Hi,
I emerged with the problem.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [4]] is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
this is my code
from model import Generator,Discriminator
from config import *
from loss import *
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.utils import save_image
import re
import os
import unicodedata
import numpy as np
from PIL import Image
class Train():
def __init__(self):
device = torch.device("cuda")
self.G = Generator(g1_pram,g2_pram,g3_pram,g4_pram).to(device)
self.D = Discriminator(d1_pram,d2_pram,d3_pram,d4_pram).to(device)
self.G_optimizer = optim.SGD(self.G.parameters(),lr = G_lr)
self.D_optimizer = optim.SGD(self.D.parameters(),lr = D_lr)
self.down1 = nn.Upsample(scale_factor=(0.5,0.5),mode="nearest")
self.down2 = nn.Upsample(scale_factor=(0.5,0.5),mode="nearest")
self.down3 = nn.Upsample(scale_factor=(0.5,0.5),mode="nearest")
self.scheduler_D = torch.optim.lr_scheduler.StepLR(self.D_optimizer, step_size = step_size, gamma = gamma)
self.scheduler_G = torch.optim.lr_scheduler.StepLR(self.G_optimizer, step_size = step_size, gamma = gamma)
self.BCEloss = nn.BCELoss()
self.gen_img = None
def D_loss(self,discriminate,label):
D_loss_1 = self.BCEloss(discriminate[0].view(-1),label)
D_loss_2 = self.BCEloss(discriminate[1].view(-1),label)
D_loss_3 = self.BCEloss(discriminate[2].view(-1),label)
D_loss_4 = self.BCEloss(discriminate[3].view(-1),label)
return D_loss_1 + D_loss_2 + D_loss_3 + D_loss_4
def GDL_loss(self, y, gen_img):
d4_in = y
d3_in = self.down3(d4_in)
d2_in = self.down2(d3_in)
d1_in = self.down1(d2_in)
y = [d1_in,d2_in,d3_in,d4_in]
G_loss_1 = loss_gdl(gen_img[0],y[0],1) * lambda_gdl
G_loss_2 = loss_gdl(gen_img[1],y[1],1) * lambda_gdl
G_loss_3 = loss_gdl(gen_img[2],y[2],1) * lambda_gdl
G_loss_4 = loss_gdl(gen_img[3],y[3],1) * lambda_gdl
return G_loss_1 + G_loss_2 + G_loss_3 + G_loss_4
def scheduler(self):
self.scheduler_D.step()
self.scheduler_G.step()
def train(self,x,y):
real_label = 1.
fake_label = 0.
self.G.train()
self.D.train()
b_size = x.size()[0]
label = torch.full((b_size,), real_label, device=torch.device("cuda"))
# Train D
self.D_optimizer.zero_grad()
real = self.D(x,y)
real_loss = self.D_loss(real,label)
#real_loss.backward()
#self.D_optimizer.zero_grad()
fake_img = self.G(x)
fake_img = [i.detach() for i in fake_img]
fake = self.D(x,fake_img)
label.fill_(fake_label)
fake_loss = self.D_loss(fake, label)
#fake_loss.backward()
dloss = real_loss + fake_loss
dloss.backward()
self.D_optimizer.step()
# Train G
self.G_optimizer.zero_grad()
gen_img = self.G(x)
gen = self.D(x,y)
label.fill_(real_label)
adv_loss = self.D_loss(gen, label)
gdl_loss = self.GDL_loss(y, gen_img)
gloss = adv_loss + gdl_loss
gloss.backward()
self.G_optimizer.step()
True_loss = real_loss
False_loss = fake_loss + adv_loss
criterion = nn.MSELoss()
MSE_loss = criterion(gen_img[-1],y[-1])
self.gen_img = gen_img
return True_loss, False_loss, MSE_loss
def predict(self,x,y):
with torch.no_grad():
real_label = 1.
fake_label = 0.
self.G.train()
self.D.train()
b_size = x.size()[0]
label = torch.full((b_size,), real_label, device=torch.device("cuda"))
# Predict D
real = self.D(x,y)
real_loss = self.D_loss(real,label)
fake_img = self.G(x)
fake_img = [i.detach() for i in fake_img]
fake = self.D(x,fake_img)
label.fill_(fake_label)
fake_loss = self.D_loss(fake, label)
# Predict G
gen_img = self.G(x)
gen = self.D(x,y)
label.fill_(real_label)
adv_loss = self.D_loss(gen, label)
gdl_loss = self.GDL_loss(y, gen_img)
gloss = adv_loss + gdl_loss
True_loss = real_loss
False_loss = fake_loss + adv_loss
criterion = nn.MSELoss()
MSE_loss = criterion(gen_img[-1],y[-1])
self.gen_img = gen_img
return True_loss, False_loss, MSE_loss
def img_save(self, img_path):
print(self.gen_img[-1][0].size())
save_image(self.gen_img[-1][0], img_path)
def model_save(self, model_path):
torch.save(self.D.state_dict(),model_path[0])
torch.save(self.G.state_dict(),model_path[1])
if __name__ == '__main__':
T = Train()
device = torch.device("cuda")
import time
for i in range(1000):
print(i)
print("start", [...])
start = time.time()
x = torch.autograd.Variable(torch.randn(4,12,105,80)).to(device)
y = torch.autograd.Variable(torch.randn(4,3,105,80)).to(device)
z = T.train(x,y)
save_img = True if i % 10 == 0 else False
w = T.predict(x,y)
if save_img:
T.img_save("oppai.png")
save_model = True if i % 10 == 0 else False
if save_model:
T.model_save(["D","G"])
print("end batch", [...])
print(time.time()-start, [...])
0
start [Ellipsis]
Traceback (most recent call last):
File "train_ops.py", line 184, in <module>
z = T.train(x,y)
File "train_ops.py", line 94, in train
dloss.backward()
File "/home/hikarukondo/.local/lib/python3.6/site-packages/torch/tensor.py", line 150, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/hikarukondo/.local/lib/python3.6/site-packages/torch/autograd/__init__.py", line 99, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [4]] is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).