Gradients all 0

rjsdebug · July 13, 2022, 2:28pm

I am trying to implement something that tells me the gradients of each pixel with respect to class probability. I am not sure why, but the gradients for each pixel are always 0.

Here is the main part of the code that is likely causing the issue (but I am not sure what it is):

    model.eval() # no dropout, in eval mode
    grad_X=X.type(torch.FloatTensor)
    grad_X = torch.reshape(grad_X, (60000,1,28,28))
    grad_X = grad_X[:10]
    print("images:")
    print(grad_X[0])
    """ if get weird errors, make sure this is ok"""
    grad_Y = train_dataset.targets
    grad_Y = grad_Y[:10] # number of training examples to debug with
    print("labels")
    print(grad_Y)
#  print("dones splicing")

    grad_X.requires_grad = True
    # am i doing backprop on the correct thing?
    node_sel = grad_Y.shape
    ones = torch.ones(node_sel) # nx1, one for each training example
#  print("feeding forward")
    logits  = model(grad_X) # feed forward, keeping track of the calculation graph
#  print("done with logits")
   # print("probs")
    probs = torch.nn.Softmax(dim=1)(logits) # n x 10
    #print(probs)
    sel_nodes = probs[torch.arange(len(grad_Y)), grad_Y.type(torch.LongTensor)]
    print("sel nodes shape")
    print(sel_nodes.shape)
    print(sel_nodes)
# print("running gradients")
    sel_nodes.backward(ones) # is this correct?
    print("nodes to do backprop on")
    print(sel_nodes)
    grad = grad_X.grad.data.numpy()
   # grad = grad.squeeze()
    print("gradient")
    print(grad.shape)
    print(grad[3]) # RUN THIS, see if it is really just 0s, it is for all i