RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn (alexnet)

Hey everybody,

I’ve just started out and have been trying to debug this but don’t understand what’s wrong.

Would really appreciate the help!


def trained_model(model, criterion, optimizer, epochs):
    for epoch in range(epochs):
        print('Epoch:', str(epoch+1) + '/' + str(epochs))
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train() #this trains the model
                model.eval() #this evaluates the model

            running_loss, running_corrects = 0.0, 0 

            for inputs, labels in dataloaders[phase]:
                inputs = #convert inputs to cpu or cuda
                labels = #convert labels to cpu or cuda

                outputs = model(inputs) #outputs is inputs being fed to the model
                loss = criterion(outputs, labels) #outputs are fed into the model
                # loss.requires_grad = True

                if phase == 'train':
                    optimizer.zero_grad() #sets gradients to zero
                    loss.backward() #computes sum of gradients
                    optimizer.step() #preforms an optimization step

                _, preds = torch.max(outputs, 1) #max elements of outputs with output dimension of one
                running_loss += loss.item() * inputs.size(0) #loss multiplied by the first dimension of inputs
                running_corrects += torch.sum(preds == #sum of all the correct predictions

            epoch_loss = running_loss / len(data_images[phase]) #this is the epoch loss
            epoch_accuracy = running_corrects.double() / len(data_images[phase]) #this is the epoch accuracy

            print(phase, ' loss:', epoch_loss, 'epoch_accuracy:', epoch_accuracy)

    return model 

The loss tensor seems to be detached from the model (or rather the output) and you can verify it by checking its .grad_fn which should yield None.
To fix this, check the intermediate tensors in the forward pass (starting with the output) and try to isolate where the graph is detached.
Also, make sure you didn’t globally disable the gradient calculation.