Why do I get loss does not require grad and does not have a grad_fn

I’m trying to finetune a resnet18 on cifar10, everyhting is straight foward yet for some weird reason I’m getting :
**RuntimeError** : element 0 of tensors does not require grad and does not have a grad_fn

I have seen a similar question here but I cant understand why I’m getting this error as im not doing anything like that here!
Can Anyone please tell me what I’m missing ?

Here is the full stack trace :

Files already downloaded and verified
Files already downloaded and verified
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
 in 
     27 dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size = batch_size, shuffle=False, num_workers = 2)
     28 
---> 29 train_info, val_info = training(resnet18, dataloader_train, dataloader_test, epochs, criterion, optimizer, k, interval, device)

 in training(model, dataloader_train, dataloader_test, epochs, criterion, optimizer, k, interval, device)
     56 
     57             optimizer.zero_grad()
---> 58             loss.backward()
     59             optimizer.step()
     60 

~\Anaconda3\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph)
    105                 products. Defaults to ``False``.
    106         """
--> 107         torch.autograd.backward(self, gradient, retain_graph, create_graph)
    108 
    109     def register_hook(self, hook):

~\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
     91     Variable._execution_engine.run_backward(
     92         tensors, grad_tensors, retain_graph, create_graph,
---> 93         allow_unreachable=True)  # allow_unreachable flag
     94 
     95 

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

And this my code :

import torch
from torchvision import models, datasets
import torch.nn as nn 

resnet18 = models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(512, 10)

for param in resnet18.parameters():
    param.requires_grad = False 
# and for fc 
for param in resnet18.fc.parameters():
    param.requires_grad = False

#%%

# lets create a training and testing functions for our case 
def validation(model, dataloader_test, criterion, k, device):
    loss_total = 0.0
    acc_perbatch = 0.0

    with torch.no_grad(): 
        for imgs, labels in dataloader_test:
            imgs, labels = imgs.to(device), labels.to(device)
            # actiate evaluation mode 
            model.eval()
            preds = model(imgs)
            loss_val = criterion(preds, labels)
            _, indexes = preds.topk(k, dim=1)
            results = (indexes.view(*labels.shape) == labels).float()
            acc_perbatch += torch.mean(results)
            loss_total += loss_val.item()
        
        acc = acc_perbatch/len(dataloader_test)
        loss_final = loss_total/len(dataloader_test)
    return loss_final, acc

def training(model, dataloader_train, dataloader_test, epochs, criterion, optimizer, k, interval, device):

    model = model.to(device)
    # activate trainig mode
    model.train()
    training_acc_losses = []
    val_acc_losses = []
    trainig_batch_count = len(dataloader_train)
    test_batch_count = len(dataloader_test)

    for e in range(epochs):
        
        acc_per_batch = 0.0
        training_loss = 0.0
        for i, (imgs, labels) in enumerate(dataloader_train):
            imgs = imgs.to(device)
            labels = labels.to(device)

            preds = model(imgs)
            loss = criterion(preds, labels)
            
            # calculate training accuracy 
            _, class_indexes= preds.topk(k=1, dim=1)
            results = (class_indexes.view(*labels.shape) == labels).float()
            acc_per_batch += torch.mean(results)

            training_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % interval == 0 : 
                print(f'(epoch/iter): {e}/{i} train-loss: {loss.item():.6f} train-acc: {acc_per_batch/trainig_batch_count:.4f} ')
                
        
        # accumulate accuracies and losses per epoch
        training_acc_losses.append( (acc_per_batch/ trainig_batch_count, training_loss/ trainig_batch_count))
        # run validation test at every epoch! 
        val_acc_loss = validation(model, dataloader_test, criterion, k=1, device=device)
        val_acc_losses.append(val_acc_loss)
        print(f'val_loss : {val_acc_loss[0]:.4f} val_acc: {val_acc_loss[1]:.4f}')
    return training_acc_losses, val_acc_losses

#%%
epochs = 10
k = 1
batch_size = 32
interval = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet18.parameters(), lr = 0.00001)

transformations_train = transforms.Compose([transforms.Resize(224),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.ToTensor(),
                                            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                                                 std=(0.229, 0.224, 0.225))
                                            ])

transformations_test = transforms.Compose([transforms.Resize(224),
                                           transforms.ToTensor(),
                                           transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                                                std=(0.229, 0.224, 0.225))
                                           ])

dataset_train = datasets.CIFAR10('CIFAR10', train=True, transform = transformations_train, download=True)
dataset_test = datasets.CIFAR10('CIFAR10', train=False, transform = transformations_test, download=True)

dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size = batch_size, shuffle=True, num_workers = 2)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size = batch_size, shuffle=False, num_workers = 2)

train_info, val_info = training(resnet18, dataloader_train, dataloader_test, epochs, criterion, optimizer, k, interval, device)


Thank you all in advance

You set the requires_grad attribute of all parameters to False, thus nothing in your computation graph requires gradients, which will raise this error.

2 Likes

I tried this:

for param in model.parameters():
param.requires_grad = False

which also didn’t work for me and I still get the same error in the loss.backward()