RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn, in UNet model

SamuelAntunesMiranda · June 28, 2020, 1:18am

Hi, i was trying to make a UNet model, using a dataset of brain images with mask images of a tumour but im getting this erro RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn.
Here’s the code for my model

def double_conv(in_channel, out_channel):
    conv = nn.Sequential(
        nn.Conv2d(in_channel, out_channel, kernel_size=3),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channel, out_channel, kernel_size=3),
        nn.ReLU(inplace=True)
    )
    return conv
  
def crop_img(tensor, target_tensor):
    target_size = target_tensor.size()[2]
    tensor_size = tensor.size()[2]
    delta = tensor_size - target_size
    delta = delta // 2
    return tensor[:,:, delta:tensor_size-delta, delta:tensor_size-delta]
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        
        self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.down_conv_1 = double_conv(1, 64)
        self.down_conv_2 = double_conv(64, 128)
        self.down_conv_3 = double_conv(128, 256)
        self.down_conv_4 = double_conv(256, 512)
        self.down_conv_5 = double_conv(512, 1024)

        self.up_trans_1 = nn.ConvTranspose2d(
            in_channels=1024,
            out_channels=512,
            kernel_size=2,
            stride=2)
        
        self.up_conv_1 = double_conv(1024, 512)

        self.up_trans_2 = nn.ConvTranspose2d(
            in_channels=512,
            out_channels=256,
            kernel_size=2,
            stride=2)
        
        self.up_conv_2 = double_conv(512, 256)

        self.up_trans_3 = nn.ConvTranspose2d(
            in_channels=256,
            out_channels=128,
            kernel_size=2,
            stride=2)
        
        self.up_conv_3 = double_conv(256, 128)

        self.up_trans_4 = nn.ConvTranspose2d(
            in_channels=128,
            out_channels=64,
            kernel_size=2,
            stride=2)
        
        self.up_conv_4 = double_conv(128, 64)

        self.out = nn.Conv2d(
            in_channels = 64,
            out_channels = 2,
            kernel_size = 1
        )
        
    def forward(self, image):
        x1 = self.down_conv_1(image)
        
        x2 = self.max_pool_2x2(x1)
        x3 = self.down_conv_2(x2)
        
        x4 = self.max_pool_2x2(x3)
        x5 = self.down_conv_3(x4)
        
        x6 = self.max_pool_2x2(x5)
        x7 = self.down_conv_4(x6)
        
        x8 = self.max_pool_2x2(x7)
        x9 = self.down_conv_5(x8)
        

        x = self.up_trans_1(x9)
        print(x.size())
        y = crop_img(x7, x)
        print(y.size())
        x = self.up_conv_1(torch.cat((x, y), 1))
        print(x.size())
        x = self.up_trans_2(x)
        y = crop_img(x5, x)
        x = self.up_conv_2(torch.cat((x, y), 1))
       
        x = self.up_trans_3(x)
        y = crop_img(x3, x)
        x = self.up_conv_3(torch.cat((x, y), 1))
        
        x = self.up_trans_4(x)
        y = crop_img(x1, x)
        x = self.up_conv_4(torch.cat((x, y), 1))
        
        x = self.out(x)
    
        return x
model = UNet()

for param in model.parameters():
    param.requires_grad = False
                      
# Move o modelo para o dispositivo disponivel
model = model.to(device)

lmbda = lambda epoch: 0.95

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.5)
exp_lr_scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda, last_epoch=-1)

And here’s the code for my train function:

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e10

    for epoch in range(num_epochs):
        print('Época {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Cada época tem uma fase de treino e validação
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Modelo em treinamento
            else:
                model.eval()   # Modelo em avaliação

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in data_loader[phase]:
                labels = labels.type(torch.LongTensor)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zera o gradiente do otimizador
                optimizer.zero_grad()

                # Analisa somente as perdas se for no treinamento
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs.unsqueeze(0)[0])
                    loss = criterion(outputs, labels)
                    #y_pred = outputs.data.cpu().numpy().ravel()
                    #y_true = labels.data.cpu().numpy().ravel()

                    # 'loss.backward()' + 'optimizer.step()' somente no treinamento
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Estatisticas
                running_loss += loss.item() * inputs.size(0)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss 

            print('{} Perda: {:.4f}'.format(
                phase, epoch_loss))

            # Copia o modelo
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        

    time_elapsed = time.time() - since
    print('Treinamento completo em {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    torch.save(best_model_wts,"model.pt")

    # Carrega os pesos do melhor modelo
    model.load_state_dict(best_model_wts)
    return model

And here’s the full error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-22-f314c3b5d742> in <module>()
----> 1 model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=30)

2 frames
<ipython-input-16-8878c14c2b34> in train_model(model, criterion, optimizer, scheduler, num_epochs)
     36                     # 'loss.backward()' + 'optimizer.step()' somente no treinamento
     37                     if phase == 'train':
---> 38                         loss.backward()
     39                         optimizer.step()
     40 

/usr/local/lib/python3.6/dist-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
    196                 products. Defaults to ``False``.
    197         """
--> 198         torch.autograd.backward(self, gradient, retain_graph, create_graph)
    199 
    200     def register_hook(self, hook):

/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
     98     Variable._execution_engine.run_backward(
     99         tensors, grad_tensors, retain_graph, create_graph,
--> 100         allow_unreachable=True)  # allow_unreachable flag
    101 
    102 

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

ptrblck · June 29, 2020, 7:28am

You are freezing all parameters in:

for param in model.parameters():
    param.requires_grad = False

which will yield this error, since the output won’t have a valid grad_fn (no trainable parameters were used calculating the output).