CNN for regression, element 0 of tensors does not require grad and does not have a grad_fn

marsolis · June 17, 2019, 10:12am

Hello , I want to train a CNN for regression, because the output is a continuous variable, but I got this message: element 0 of tensors does not require grad and does not have a grad_fn

I saw Forums where appears the same problem, but I still don’t find the solution
Can anyone help me??

Here is my code

#class SimpleNet(nn.Module):
class CNN(nn.Module):
    def __init__(self):
        #super(SimpleNet, self).__init__()
        super(CNN, self).__init__()

        self.conv1= nn.Conv2d(in_channels=3, out_channels=96, kernel_size=5, stride=1)
        self.relu1= nn.ReLU()
        self.norm1= nn.BatchNorm2d(96)
        
        
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.norm2= nn.BatchNorm2d(256)

        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.relu4 = nn.ReLU()
        
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.relu5 = nn.ReLU()
        self.pool1= nn.MaxPool2d(kernel_size=2, stride=2)
        
        
        self.fc1 = nn.Linear(in_features=4096, out_features=4096) 
        self.fc2 = nn.Linear(in_features=4096, out_features=4096)
        #self.fc3 = nn.Linear(in_features=4096, out_features=238)
        self.fc3 = nn.Linear(in_features=4096, out_features=1)
       
        
    def forward(self, x):
        x=x.float() 
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.norm1(out)
        
        out = self.conv2(out)
        out = self.relu2(out)
        out = self.norm2(out)
        
        out = self.conv3(out)
        out = self.relu3(out)
        
        out = self.conv4(out)
        out = self.relu4(out)
        
        out = self.conv5(out)
        out = self.relu5(out)
        out = self.pool1(out)
        
        
        out = out.view(-1, 4096)

        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        
        return out

    
    
# Model
model = CNN()
CUDA = torch.cuda.is_available()
if CUDA:
    model = model.cuda()    
loss_fn = nn.MSELoss()       
#optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)   
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=0.0001) 


#Training CNN
import time

num_epochs = 2

#Define the lists to store the results of loss and accuracy
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []


for epoch in range(num_epochs): 
    #Reset s variables to cero in each epoch
    start = time.time()
    correct = 0
    iterations = 0
    iter_loss = 0.0
    
    model.train()  # Training Mode
    
    for i, (input, lab) in enumerate(train_load):
        
        # Convert torch tensor to Variable
        input = Variable(input)
        lab = Variable(lab)
        lab = torch.DoubleTensor(25)
        lab = lab.type(torch.cuda.FloatTensor)
        
        
        #  GPU
        CUDA = torch.cuda.is_available()
        if CUDA:
            input = input.cuda()
            lab = lab.cuda()
        
        optimizer.zero_grad()  # clean gradient
        outputs = model(input)  
        outputs = torch.DoubleTensor(25)
        outputs = outputs.type(torch.cuda.FloatTensor)
        
        loss = loss_fn(outputs, lab)  
        iter_loss += loss.item()      
        loss.backward()                 # Backpropagation 
        optimizer.step()                # update weights
        torch.cuda.empty_cache()
        iterations += 1   
    # Save function loss
    train_loss.append(iter_loss/iterations)

   
    #Test
    loss = 0.0
    correct = 0
    iterations = 0

    model.eval() 
    
    for i, (input, lab) in enumerate(test_load):
        
        # Convert torch tensor to Variable
        input = Variable(input)
        lab = Variable(lab, requires)
        lab = torch.DoubleTensor(25)
        lab = lab.type(torch.cuda.FloatTensor)
        
        
        CUDA = torch.cuda.is_available()
        if CUDA:
            input = input.cuda()
            lab = lab.cuda()
        
        outputs = model(input)   
        outputs = torch.DoubleTensor(25)
        outputs = outputs.type(torch.cuda.FloatTensor)
        loss = loss_fn(outputs, lab) # Calculate the loss
        loss += loss.item()
        torch.cuda.empty_cache()
        iterations += 1

    # Record the Testing loss
    test_loss.append(loss/iterations)
    stop = time.time()

ptrblck · June 17, 2019, 11:15am

It seems you are overwriting the model output in these lines:

        outputs = model(input)  
        outputs = torch.DoubleTensor(25)
        outputs = outputs.type(torch.cuda.FloatTensor)

If you create a new outputs tensor with a constant value, the computation graph will be detached and you won’t be able to update the parameters.
Could you explain a bit, why you would like to do it?

PS: I’ve formatted your code for better readability. If you want to post code snippets, you can wrap them in three backticks ```

marsolis · June 17, 2019, 2:12pm

Thanks so much for your answer

You are right, the next two lines after “outputs’” don’t make sense. It was the problem

Akshay_A_J · October 26, 2020, 7:13am

Hey, can you please check my code and let me know how to correct this error?

%%time
loss_arr = []
loss_epoch_arr = []
max_epochs = 30

for epoch in range(max_epochs):

for i, data in enumerate(trainloader, 0):

    inputs, labels = data
    inputs, labels = inputs.to(cuda0), labels.to(cuda0)
    

    opt.zero_grad()

    outputs = model(inputs)
    _,labels = torch.max(labels.data, 1)
    _, pred = torch.max(outputs.data, 1)
    loss = soft_dice_loss(pred, labels)
    loss.backward()
    opt.step()
    
    loss_arr.append(loss.item())
    
loss_epoch_arr.append(loss.item())
print('Epoch: %d/%d, valid Dloss: %0.2f, Train DLoss: %0.2f' % (epoch, max_epochs, evaluation(validloader, model), evaluation(trainloader, model)))

plt.plot(loss_epoch_arr)
plt.show()

“RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn”

ptrblck · October 26, 2020, 7:25am

the indices returned by torch.argmax or by torch.max as the second return value are not differentiable as seen here:

outputs = torch.randn(10, 10, requires_grad=True)
_, preds = torch.max(outputs, dim=1)
print(preds.grad_fn)
> None

To calculate the dice loss you could pass F.softmax(outputs, dim=1) as the predictions.
Also, don’t use the .data attribute, as it might have unwanted side effects since Autograd won’t be able to track these operations.

Akshay_A_J · October 26, 2020, 8:12am

Thank you for very quick response.
But after modifying my code, I am getting an another error.

%%time
loss_arr = []
loss_epoch_arr = []
max_epochs = 30

for epoch in range(max_epochs):

for i, data in enumerate(trainloader, 0):

    inputs, labels = data
    inputs, labels = inputs.to(cuda0), labels.to(cuda0)
    

    opt.zero_grad()
    
    outputs = model(inputs)
    pred = torch.nn.functional.softmax(out, dim=1 )
    # _,labels = torch.max(labels, dim=1)
    # _, pred = torch.max(outputs, dim=1)
    loss = soft_dice_loss(pred, labels)
    loss.backward()
    opt.step()
    
    loss_arr.append(loss.item())
    
loss_epoch_arr.append(loss.item())
print('Epoch: %d/%d, valid Dloss: %0.2f, Train DLoss: %0.2f' % (epoch, max_epochs, evaluation(validloader, model), evaluation(trainloader, model)))

plt.plot(loss_epoch_arr)
plt.show()

“RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling backward the first time.”

ptrblck · October 26, 2020, 8:34am

Are you running backward multiple times in each iteration somewhere?
E.g. is soft_dict_loss calling loss.backward already?

Akshay_A_J · October 26, 2020, 9:03am

No, its being run only once in that loop.

ptrblck · October 26, 2020, 9:19am

At one point an intermediate tensor is still needed in a sequential backward call.
Could you post a minimal, executable code snippet to reproduce this error, so that we could debug it?

Akshay_A_J · October 26, 2020, 9:22am

Is it okay if i shared a colab notebook with you?

Akshay_A_J · October 28, 2020, 5:55am

The error was due to the use of .data attribute. But it runs perfectly fine if i just given output directly to backpropogate.

%%time
import copy
#loss_arr = []
loss_epoch_arr = []
max_epochs = 30
train_loss =[]
valid_loss = []
min_loss = 100

for epoch in range(max_epochs):

for i, data in enumerate(trainloader, 0):

    inputs, labels = data
    inputs, labels = inputs.to(cuda0), labels.to(cuda0)
    
    opt.zero_grad()

    outputs = model(inputs)
    # _,labels = torch.max(labels.data, 1)
    # _,pred = torch.max(outputs.data, 1)
    loss = soft_dice_loss(labels, outputs)
    loss.backward()
    opt.step()

    if min_loss > loss.item():
        min_loss = loss.item()
        best_model = copy.deepcopy(model.state_dict())
        #print('Min loss %0.2f' % min_loss)

    del inputs, labels, outputs
    torch.cuda.empty_cache()
    
    
    #loss_arr.append(loss.item())
    
loss_epoch_arr.append(loss.item())
train_loss.append(evaluation(trainloader, model))
valid_loss.append(evaluation(validloader, model))
print('Epoch: %d/%d, Train DLoss: %0.2f, valid Dloss: %0.2f' % (epoch, max_epochs, evaluation(trainloader, model), evaluation(validloader, model)))

plt.plot(loss_epoch_arr, ‘r’)
plt.plot(train_loss, ‘b’)
plt.plot(valid_loss, ‘g’)
plt.show()

I am not understanding why there is problem with using torch.max? Does using output directly for backpropogation has any effects on accuracy?

ptrblck · October 28, 2020, 7:23am

You can calculate the gradients for the returned values in torch.max, but not the returned indices as this operation is not differentiable:

x = torch.randn(10, 10, requires_grad=True)
val, idx = torch.max(x, dim=1)
print(val, idx)
> tensor([1.4675, 2.2521, 1.3766, 1.8801, 1.5437, 1.3367, 0.8476, 0.7166, 1.0904,
        1.5543], grad_fn=<MaxBackward0>) tensor([3, 9, 4, 8, 3, 1, 0, 2, 1, 2])

As you can see, the val tensor contains a valid .grad_fn, while the idx tensor does not.

Also, where did you use the .data attribute? It shouldn’t be used at all generally, as Autograd cannot track the operations on this attribute, but I also thought you’ve already removed the usage in the previous code snippet.

Akshay_A_J · October 28, 2020, 8:16am

Yeah, right. Thank you