How do I make my custom loss function scalar?

While calling backward on my loss function, I get the following error - RuntimeError: grad can be implicitly created only for scalar outputs

Here’s my code -

#!/usr/bin/env python
# coding: utf-8

# In[41]:


# Here we import all libraries
import numpy as np
import gym
import matplotlib.pyplot as plt
import os
import torch
import random
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from collections import deque 
import sys
env = gym.make("CliffWalking-v0")


# In[103]:


#Hyperparameters
episodes = 5000
eps = 1.0
learning_rate = 0.1
discount_factor = 0.99
tot_rewards = []
decay_val = 0.001
mem_size = 50000
batch_size = 2
gamma = 0.99


# In[104]:


class NeuralNetwork(nn.Module):
    def __init__(self, state_size, action_size):
        super(NeuralNetwork, self).__init__()
        self.state_size = state_size
        self.action_size = action_size
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(1, 30),
            nn.ReLU(),
            nn.Linear(30, 30),
            nn.ReLU(),
            nn.Linear(30, action_size)
        )
    def forward(self, x):
        x = self.linear_relu_stack(x)
        return x


# In[105]:


model = NeuralNetwork(env.observation_space.n, env.action_space.n)
opt = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
loss = nn.MSELoss()
replay_buffer = deque(maxlen=mem_size)


# In[106]:


state = torch.tensor(env.reset(), dtype=torch.float32)
state = state.unsqueeze(dim=0)
print(state.shape)
out = model(state)


# In[111]:


def compute_td_loss(batch_size):
    state, next_state, reward, done, action = zip(*random.sample(replay_buffer, batch_size))
    state = torch.from_numpy(np.array(state).reshape(-1, 1)).unsqueeze(dim = 0).type(torch.float32)
    next_state = torch.from_numpy(np.array(next_state).reshape(-1, 1)).unsqueeze(dim = 0).type(torch.float32)
    reward = torch.from_numpy(np.array(reward))
    done = torch.from_numpy(np.array(done))
    action = torch.from_numpy(np.array(action)).type(torch.int64)
    q_values = model(state)
    next_q_values = model(next_state)
    q_vals = q_values.squeeze().gather(dim=-1, index=action.reshape(-1,1)).reshape(1, -1)
    max_next_q_values = torch.max(next_q_values,2)[0].detach()
    print("q_vals = ", q_vals)
    print("max_next_q_values = ", max_next_q_values)
    loss = 0.5*(reward + gamma*max_next_q_values - q_vals)**2
    print("reward = ", reward)
    print("loss = ", loss)
    opt.zero_grad()
    loss.backward()
    opt.step()
    return loss
    


# In[112]:


for i in range(episodes):
    state = env.reset()
    done = False
    steps = 0
    eps_rew = 0 
    while not done and steps<50:
        if np.random.uniform(0,1)<eps:
            action = env.action_space.sample()
        else:
            state = torch.tensor(state, dtype=torch.float32)
            state = state.unsqueeze(dim=0)
            action = np.argmax(model(state).detach().numpy())
        next_state, reward, done, info = env.step(action)
        replay_buffer.append((state, next_state, reward, done, action))
        if len(replay_buffer)>batch_size:
            loss = compute_td_loss(batch_size)
            sys.exit()
        eps = eps/(1 + 0.001)
        eps_rew += reward 
        if done:
            break
        state = next_state
    tot_rewards.append(eps_rew)

Here’s the error that I get -

RuntimeError                              Traceback (most recent call last)
<ipython-input-112-015fd74c95d9> in <module>
     14         replay_buffer.append((state, next_state, reward, done, action))
     15         if len(replay_buffer)>batch_size:
---> 16             loss = compute_td_loss(batch_size)
     17             sys.exit()
     18         eps = eps/(1 + 0.001)

<ipython-input-111-3e1e02c32b4f> in compute_td_loss(batch_size)
     16     print("loss = ", loss)
     17     opt.zero_grad()
---> 18     loss.backward()
     19     opt.step()
     20     return loss

c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
    253                 create_graph=create_graph,
    254                 inputs=inputs)
--> 255         torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
    256 
    257     def register_hook(self, hook):

c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    141 
    142     grad_tensors_ = _tensor_or_tensors_to_tuple(grad_tensors, len(tensors))
--> 143     grad_tensors_ = _make_grads(tensors, grad_tensors_)
    144     if retain_graph is None:
    145         retain_graph = create_graph

c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\autograd\__init__.py in _make_grads(outputs, grads)
     48             if out.requires_grad:
     49                 if out.numel() != 1:
---> 50                     raise RuntimeError("grad can be implicitly created only for scalar outputs")
     51                 new_grads.append(torch.ones_like(out, memory_format=torch.preserve_format))
     52             else:

RuntimeError: grad can be implicitly created only for scalar outputs

Can you print the shape of loss? You might just need to simply take the mean but this will highly dependent on your problem.

1 Like

Actually taking the mean worked! Thanks so much.