I have been trying to get the gradients for input in this thread for more than a week now.
Looking at the official tutorial here, for getting gradients with respect to the input where the tensor used for backward is not a scaler, it says,
Now in this case
y
is no longer a scalar.torch.autograd
could not compute the full Jacobian directly, but if we just want the vector-Jacobian product, simply pass the vector tobackward
as argument:
x = torch.randn(3, requires_grad=True)
y = x * 2 while y.data.norm() < 1000: y = y * 2 print(y) v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float) y.backward(v) print(x.grad)
However, when I try to sth like this the input gradient is always None!
The input will only have a gradient if I backprop using the loss1! which doesnt make any sense to me!
def fc_batchnorm_act(in_, out_, use_bn=True, act=nn.ReLU()):
return nn.Sequential(nn.Linear(in_,out_),
act,
nn.BatchNorm1d(out_) if use_bn else nn.Identity())
class Reshape(nn.Module):
def __init__(self, shape):
super().__init__()
self.shape = shape
def forward(self, input):
return input.view(self.shape)
class Contractive_AutoEncoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = nn.Sequential(Reshape(shape=(-1, 28*28)),
fc_batchnorm_act(28*28, 400, False))
self.decoder = nn.Sequential(fc_batchnorm_act(400, 28*28, False, nn.Sigmoid()),
Reshape(shape=(-1, 1, 28, 28)))
def forward(self, input):
outputs_e = self.encoder(input)
outputs = self.decoder(outputs_e)
return outputs_e, outputs
def loss_function(output_e, outputs, imgs, device):
criterion = nn.MSELoss()
assert outputs.shape == imgs.shape ,f'outputs.shape : {outputs.shape} != imgs.shape : {imgs.shape}'
imgs.requires_grad = True
loss1 = criterion(outputs, imgs)
# loss1.backward(retain_graph=True)
output_e.backward(torch.ones(outputs_e.size()).to(device), retain_graph=True)
print(imgs.grad)
loss2 = torch.mean(pow(imgs.grad,2))
imgs.requires_grad = False
imgs.grad.data.zero_()
loss = loss1 + loss2
return loss
and this is how it is used :
for e in range(epochs):
for i, (imgs, labels) in enumerate(dataloader_train):
imgs = imgs.to(device)
labels = labels.to(device)
outputs_e, outputs = model(imgs)
loss = loss_function(outputs_e, outputs, imgs,device)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'epoch/epoechs: {e}/{epochs} loss : {loss.item():.4f} ')
What am I missing here? I’d grately appreciate this