RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [4]] is at version 1; expected version 0 instead

Hi,
I emerged with the problem.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [4]] is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

this is my code


from model import Generator,Discriminator
from config import *
from loss import *

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.utils import save_image


import re
import os
import unicodedata
import numpy as np
from PIL import Image

class Train():

    def __init__(self):

        device = torch.device("cuda")

        self.G = Generator(g1_pram,g2_pram,g3_pram,g4_pram).to(device)
        self.D = Discriminator(d1_pram,d2_pram,d3_pram,d4_pram).to(device)

        self.G_optimizer = optim.SGD(self.G.parameters(),lr = G_lr)
        self.D_optimizer = optim.SGD(self.D.parameters(),lr = D_lr)

        self.down1 = nn.Upsample(scale_factor=(0.5,0.5),mode="nearest")
        self.down2 = nn.Upsample(scale_factor=(0.5,0.5),mode="nearest")
        self.down3 = nn.Upsample(scale_factor=(0.5,0.5),mode="nearest")

        self.scheduler_D = torch.optim.lr_scheduler.StepLR(self.D_optimizer, step_size = step_size, gamma = gamma)
        self.scheduler_G = torch.optim.lr_scheduler.StepLR(self.G_optimizer, step_size = step_size, gamma = gamma)
        self.BCEloss = nn.BCELoss()

        self.gen_img = None

    def D_loss(self,discriminate,label):
        
        D_loss_1 = self.BCEloss(discriminate[0].view(-1),label)
        D_loss_2 = self.BCEloss(discriminate[1].view(-1),label)
        D_loss_3 = self.BCEloss(discriminate[2].view(-1),label)
        D_loss_4 = self.BCEloss(discriminate[3].view(-1),label)

        return D_loss_1 + D_loss_2 + D_loss_3 + D_loss_4

    def GDL_loss(self, y, gen_img):
        d4_in = y
        d3_in = self.down3(d4_in)
        d2_in = self.down2(d3_in)
        d1_in = self.down1(d2_in)
        y = [d1_in,d2_in,d3_in,d4_in]

        G_loss_1 = loss_gdl(gen_img[0],y[0],1) * lambda_gdl
        G_loss_2 = loss_gdl(gen_img[1],y[1],1) * lambda_gdl
        G_loss_3 = loss_gdl(gen_img[2],y[2],1) * lambda_gdl
        G_loss_4 = loss_gdl(gen_img[3],y[3],1) * lambda_gdl

        return G_loss_1 + G_loss_2 + G_loss_3 + G_loss_4


    def scheduler(self):
        self.scheduler_D.step()
        self.scheduler_G.step()


    def train(self,x,y):
        
        real_label = 1.
        fake_label = 0.

        self.G.train()
        self.D.train()

        b_size = x.size()[0]
        label = torch.full((b_size,), real_label, device=torch.device("cuda"))

        # Train D
        
        self.D_optimizer.zero_grad()
        real = self.D(x,y)
        real_loss = self.D_loss(real,label)
        #real_loss.backward()
        
        #self.D_optimizer.zero_grad()
        fake_img = self.G(x)
        fake_img = [i.detach() for i in fake_img]
        fake = self.D(x,fake_img)
        label.fill_(fake_label)
        fake_loss = self.D_loss(fake, label)
        #fake_loss.backward()
        dloss = real_loss + fake_loss
        dloss.backward()
        self.D_optimizer.step()

        # Train G
        
        self.G_optimizer.zero_grad()
        gen_img = self.G(x)
        gen = self.D(x,y)
        label.fill_(real_label)
        
        adv_loss = self.D_loss(gen, label)
        gdl_loss = self.GDL_loss(y, gen_img)
        
        gloss = adv_loss + gdl_loss
        gloss.backward()
        self.G_optimizer.step()

        True_loss = real_loss
        False_loss = fake_loss + adv_loss
        criterion = nn.MSELoss()
        MSE_loss = criterion(gen_img[-1],y[-1])

        self.gen_img = gen_img

        return True_loss, False_loss, MSE_loss

    def predict(self,x,y):
        
        with torch.no_grad():
            real_label = 1.
            fake_label = 0.

            self.G.train()
            self.D.train()

            b_size = x.size()[0]
            label = torch.full((b_size,), real_label, device=torch.device("cuda"))

            # Predict D
            
            real = self.D(x,y)
            real_loss = self.D_loss(real,label)
            
            fake_img = self.G(x)
            fake_img = [i.detach() for i in fake_img]
            fake = self.D(x,fake_img)
            label.fill_(fake_label)
            fake_loss = self.D_loss(fake, label)

            # Predict G
            
            gen_img = self.G(x)
            gen = self.D(x,y)
            label.fill_(real_label)
            
            adv_loss = self.D_loss(gen, label)
            gdl_loss = self.GDL_loss(y, gen_img)
            
            gloss = adv_loss + gdl_loss

            True_loss = real_loss
            False_loss = fake_loss + adv_loss
            criterion = nn.MSELoss()
            MSE_loss = criterion(gen_img[-1],y[-1])

        self.gen_img = gen_img
        return True_loss, False_loss, MSE_loss


    def img_save(self, img_path):
        print(self.gen_img[-1][0].size())
        save_image(self.gen_img[-1][0], img_path)

    def model_save(self, model_path):
        torch.save(self.D.state_dict(),model_path[0])
        torch.save(self.G.state_dict(),model_path[1])

        

if __name__ == '__main__':
    T = Train()
    device = torch.device("cuda")
    import time
    for i in range(1000):
        print(i)
        print("start", [...])
        start = time.time()
        x = torch.autograd.Variable(torch.randn(4,12,105,80)).to(device)
        y = torch.autograd.Variable(torch.randn(4,3,105,80)).to(device)

        z = T.train(x,y)
        save_img = True if i % 10 == 0 else False
        w = T.predict(x,y)
        if save_img:
            T.img_save("oppai.png")
        save_model = True if i % 10 == 0 else False
        if save_model:
            T.model_save(["D","G"])
        
        print("end batch", [...])
        print(time.time()-start, [...])

0
start [Ellipsis]
Traceback (most recent call last):
  File "train_ops.py", line 184, in <module>
    z = T.train(x,y)
  File "train_ops.py", line 94, in train
    dloss.backward()
  File "/home/hikarukondo/.local/lib/python3.6/site-packages/torch/tensor.py", line 150, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/hikarukondo/.local/lib/python3.6/site-packages/torch/autograd/__init__.py", line 99, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [4]] is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

Hi,

Could you provide the stack trace you get after enabling the anomaly mode as proposed in the hint?
This will point you to the operation whose output has been modified inplace.

Thanks for your comment. I could solve my problem which was similar to the above posted question.

hi, I have met the same error. could you tell me how did you solve the problem? thank you very much,looking forward your replay

Hi, could you tell me how did you solve the problem? Thank you so much!

Hi,

This error can happen for many different reasons so I would suggest that if you see the same error, you open a new post with a full stack trace, the stack trace that you get with anomaly mode enabled and the surrounding code.

Thank you so much! I have tried with “with torch.autograd.detect_anomaly():”, however, I can’t get more information about this error than ever before.