BCEWithLogitsLoss RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Muhammad4hmed · December 6, 2020, 2:51pm

Hi, I am trying to run following network:

class CustomVGG16(torch.nn.Module):
    def __init__(self):
        super(CustomVGG16, self).__init__()
        self.vgg = torchvision.models.vgg16_bn(pretrained = True)
        self.vgg.classifier[-1] = torch.nn.Linear(4096,25)
        self.softmax = torch.nn.Softmax() 
        

    def forward(self, x):
        x = self.vgg(x)
        x = self.softmax(x)
        return x

with following parameters:

criterion = torch.nn.BCEWithLogitsLoss() # log loss
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)

and my training loop is as follows:

def train(model,criterion,optimizer,train_loader,epoch=1, val_loader = None):
    accuracy = []
    val_accuracy = []
    
    train_loss = []
    model = model.to(device)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, gamma = 0.1)

    for ep in range(epoch):
        running_loss = 0.0
        correct = 0
        total = 0
        start_time = time.time()
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data

            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(torch.argmax(outputs,axis=1).float(), labels.float())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            correct += (outputs.argmax(1) == labels).float().sum()
            total += len(labels)
        accuracy_local = (correct / total)*100
        accuracy_local = accuracy_local.data.cpu().numpy()
        
        train_loss.append(running_loss)
        accuracy.append(accuracy_local)
        val_acc, val_loss = valid(model,criterion,optimizer,val_loader)
        val_accuracy.append(val_acc)
        scheduler.step()

        print('EPOCH: {:} Accuracy: {:.2f}% Val_Accuracy: {:.2f}% \
        Train_Loss: {:.2f} Validation_Loss: {:.2f} Time: {:.2f} seconds'.format(ep, accuracy_local, val_acc,running_loss, val_loss, time.time() - start_time))

    return accuracy, val_accuracy, train_loss, val_loss

and validation loop:

def valid(model,criterion,optimizer,val_loader):
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(val_loader, 0):
        inputs, labels = data

        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(torch.argmax(outputs,axis=1).float(), labels.float())
        running_loss += loss.item()
        correct += (outputs.argmax(1) == labels).float().sum()
        total += len(labels)
    return ((correct / total)*100).data.cpu().numpy(), running_loss

for BCEWithLogitLoss, one hot encoded outputs were not working and were giving shape error so I changed it to a 1D array but still this error, I searched through previous answers on this error but nothing helped.

I also tried loss.require_grad = True but it also didn’t work.

What could be the problem here?
Thanks.

ptrblck · December 6, 2020, 9:06pm

You are detaching the computation graph by calling torch.argmax on the model output as this operation is not differentiable:

torch.argmax(outputs,axis=1).float()

nn.BCCEWithLogitsLoss expects logits as the model output and can be used for a multi-label classification (zero, one, or more classes can be active for each sample).
This would also mean that you should remove the softmax operation in your model.

Muhammad4hmed · December 6, 2020, 9:18pm

So what could be the solution? other than removing softmax, as I have to include it in my architecture

ptrblck · December 6, 2020, 9:29pm

Remove the softmax and the torch.argmax.

Also, if your target is one-hot encoded, I assume you are dealing with a multi-class classification, so replace nn.BCEWithLogitsLoss with nn.CrossEntropyLoss.