I am trying to implement something that tells me the gradients of each pixel with respect to class probability. I am not sure why, but the gradients for each pixel are always 0.
Here is the main part of the code that is likely causing the issue (but I am not sure what it is):
model.eval() # no dropout, in eval mode
grad_X=X.type(torch.FloatTensor)
grad_X = torch.reshape(grad_X, (60000,1,28,28))
grad_X = grad_X[:10]
print("images:")
print(grad_X[0])
""" if get weird errors, make sure this is ok"""
grad_Y = train_dataset.targets
grad_Y = grad_Y[:10] # number of training examples to debug with
print("labels")
print(grad_Y)
# print("dones splicing")
grad_X.requires_grad = True
# am i doing backprop on the correct thing?
node_sel = grad_Y.shape
ones = torch.ones(node_sel) # nx1, one for each training example
# print("feeding forward")
logits = model(grad_X) # feed forward, keeping track of the calculation graph
# print("done with logits")
# print("probs")
probs = torch.nn.Softmax(dim=1)(logits) # n x 10
#print(probs)
sel_nodes = probs[torch.arange(len(grad_Y)), grad_Y.type(torch.LongTensor)]
print("sel nodes shape")
print(sel_nodes.shape)
print(sel_nodes)
# print("running gradients")
sel_nodes.backward(ones) # is this correct?
print("nodes to do backprop on")
print(sel_nodes)
grad = grad_X.grad.data.numpy()
# grad = grad.squeeze()
print("gradient")
print(grad.shape)
print(grad[3]) # RUN THIS, see if it is really just 0s, it is for all i