In the following example, the gradient values for critic.bias.grad and critic.weight.grad are zero, but the loss is -0.5. I set up the custom loss to make sure there was nothing funny happening with mse_loss but using mse_loss gives the same result. Why is this the case? I would expect bias grad to be 1 because:

Error = ((target_output - output)^2)/2, so dE/dY would be 2(out[1]-1-out[1])^2)*(-1)/2 = 1

Any help that guides me into why this happens would be very useful to be able to implement custom layers successfully in pytorch.

import torch.nn as nn

import torch

class CartPoleNN(nn.Module):

def **init**(self, input_size):

super(CartPoleNN, self).**init**()

self.fc1 = nn.Linear(input_size, 20) # Fully connected layer 1

self.fc1.weight.data.uniform_(-0.4, 0.4)

self.fc1.bias.data.uniform_(-0.4, 0.4)

```
# Actor network
self.actor = nn.Sequential(
nn.Linear(20, 2),
nn.Softmax(dim=-1) # Apply softmax to get action probabilities
)
for layer in self.actor:
if isinstance(layer, nn.Linear):
layer.weight.data.uniform_(-0.4, 0.4)
layer.bias.data.uniform_(-0.4, 0.4)
# Critic network
self.critic = nn.Sequential(
nn.Linear(20, 1),
)
for layer in self.critic:
if isinstance(layer, nn.Linear):
layer.weight.data.uniform_(-0.4, 0.4)
layer.bias.data.uniform_(-0.4, 0.4)
def forward(self, x):
x = self.fc1(x) # Pass through the first fully connected layer
action_probs = self.actor(x)
critic_value = self.critic(x)
return action_probs, critic_value
```

class CustomLoss(nn.Module):

def **init**(self):

super(CustomLoss, self).**init**()

```
def forward(self, inputs, targets):
loss = -0.5 * (targets - inputs)**2
return loss.sum()
```

torch.manual_seed(0)

torch.set_grad_enabled(True)

model = CartPoleNN(4)

Inputs = torch.tensor([0.0132, -0.2175, -0.0469, 0.2295])

out = model(Inputs)

Values = out[1]

AdvantagePY = out[1]

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Loss = CustomLoss()

CriticLoss = Loss(out[1], out[1]-1)

model.critic.zero_grad() # Zero the gradients

CriticLoss.backward()

print(model.critic[0].bias.grad)