Pytorch RuntimeError in gradient computation

Hi all
i am working with the DRIT model code. I just copied it from GitHub and am trying to remove some unnecessary loss functions in the model part:
The modified model.py below:

import networks
import torch
import torch.nn as nn

class DRIT(nn.Module):
def init(self, opts):
super(DRIT, self).init()

# parameters
lr = 0.0001
lr_dcontent = lr / 2.5
self.nz = 8  # No. of output channel in conv2d 




self.disA = networks.Dis(opts.input_dim_a, norm=opts.dis_norm, sn=opts.dis_spectral_norm)
self.disB = networks.Dis(opts.input_dim_b, norm=opts.dis_norm, sn=opts.dis_spectral_norm)


self.disContent = networks.Dis_content()

# encoders
self.enc_c = networks.E_content(opts.input_dim_a, opts.input_dim_b) # define the content encoder 

self.enc_a = networks.E_attr(opts.input_dim_a, opts.input_dim_b, self.nz)

# generator

self.gen = networks.G(opts.input_dim_a, opts.input_dim_b, nz=self.nz)

# optimizers
self.disA_opt = torch.optim.Adam(self.disA.parameters(), lr=lr, betas=(0.5, 0.999), weight_decay=0.0001)
self.disB_opt = torch.optim.Adam(self.disB.parameters(), lr=lr, betas=(0.5, 0.999), weight_decay=0.0001)
self.disContent_opt = torch.optim.Adam(self.disContent.parameters(), lr=lr_dcontent, betas=(0.5, 0.999), weight_decay=0.0001)
self.enc_c_opt = torch.optim.Adam(self.enc_c.parameters(), lr=lr, betas=(0.5, 0.999), weight_decay=0.0001)
self.enc_a_opt = torch.optim.Adam(self.enc_a.parameters(), lr=lr, betas=(0.5, 0.999), weight_decay=0.0001)
self.gen_opt = torch.optim.Adam(self.gen.parameters(), lr=lr, betas=(0.5, 0.999), weight_decay=0.0001)

# Setup the loss function for training
self.criterionL1 = torch.nn.L1Loss()

def initialize(self):
self.disA.apply(networks.gaussian_weights_init)
self.disB.apply(networks.gaussian_weights_init)

self.disContent.apply(networks.gaussian_weights_init)
self.gen.apply(networks.gaussian_weights_init)
self.enc_c.apply(networks.gaussian_weights_init)
self.enc_a.apply(networks.gaussian_weights_init)

def set_scheduler(self, opts, last_ep=0):
self.disA_sch = networks.get_scheduler(self.disA_opt, opts, last_ep)
self.disB_sch = networks.get_scheduler(self.disB_opt, opts, last_ep)
self.disContent_sch = networks.get_scheduler(self.disContent_opt, opts, last_ep)
self.enc_c_sch = networks.get_scheduler(self.enc_c_opt, opts, last_ep)
self.enc_a_sch = networks.get_scheduler(self.enc_a_opt, opts, last_ep)
self.gen_sch = networks.get_scheduler(self.gen_opt, opts, last_ep)

def setgpu(self, gpu):
self.gpu = gpu
self.disA.cuda(self.gpu)
self.disB.cuda(self.gpu)
self.disContent.cuda(self.gpu)
self.enc_c.cuda(self.gpu)
self.enc_a.cuda(self.gpu)
self.gen.cuda(self.gpu)

def test_forward(self, image, a2b=True):
self.z_random = self.get_z_random(image.size(0), self.nz, ‘gauss’)
if a2b:
self.z_content = self.enc_c.forward_a(image)
output = self.gen.forward_b(self.z_content, self.z_random)
else:
self.z_content = self.enc_c.forward_b(image)
output = self.gen.forward_a(self.z_content, self.z_random)
return output

def test_forward_transfer(self, image_a, image_b, a2b=True):
self.z_content_a, self.z_content_b = self.enc_c.forward(image_a, image_b)
if self.concat:
self.mu_a, self.logvar_a, self.mu_b, self.logvar_b = self.enc_a.forward(image_a, image_b)
std_a = self.logvar_a.mul(0.5).exp_()
eps = self.get_z_random(std_a.size(0), std_a.size(1), ‘gauss’)
self.z_attr_a = eps.mul(std_a).add_(self.mu_a)
std_b = self.logvar_b.mul(0.5).exp_()
eps = self.get_z_random(std_b.size(0), std_b.size(1), ‘gauss’)
self.z_attr_b = eps.mul(std_b).add_(self.mu_b)
else:
self.z_attr_a, self.z_attr_b = self.enc_a.forward(image_a, image_b)
if a2b:
output = self.gen.forward_b(self.z_content_a, self.z_attr_b)
else:
output = self.gen.forward_a(self.z_content_b, self.z_attr_a)
return output

def forward(self):
# input images
half_size = 1 # take one image only
real_A = self.input_A
real_B = self.input_B
self.real_A_encoded = real_A[0:half_size] # batch 0
self.real_B_encoded = real_B[0:half_size]# batch 0

# get encoded z_c
self.z_content_a, self.z_content_b = self.enc_c.forward(self.real_A_encoded, self.real_B_encoded)#ok



self.z_attr_a, self.z_attr_b = self.enc_a.forward(self.real_A_encoded, self.real_B_encoded)





# first cross translation

input_content_forA = torch.cat((self.z_content_b, self.z_content_a),0)
input_content_forB = torch.cat((self.z_content_a, self.z_content_b),0)
input_attr_forA = torch.cat((self.z_attr_a, self.z_attr_a),0)
input_attr_forB = torch.cat((self.z_attr_b, self.z_attr_b),0)
output_fakeA = self.gen.forward_a(input_content_forA, input_attr_forA)
output_fakeB = self.gen.forward_b(input_content_forB, input_attr_forB)
self.fake_A_encoded, self.fake_AA_encoded = torch.split(output_fakeA, self.z_content_a.size(0), dim=0)
self.fake_B_encoded, self.fake_BB_encoded = torch.split(output_fakeB, self.z_content_a.size(0), dim=0)

# second cross translation
# get reconstructed encoded z_c
self.z_content_recon_b, self.z_content_recon_a = self.enc_c.forward(self.fake_A_encoded, self.fake_B_encoded)
self.z_attr_recon_a, self.z_attr_recon_b = self.enc_a.forward(self.fake_A_encoded, self.fake_B_encoded)
# second cross translation
self.fake_A_recon = self.gen.forward_a(self.z_content_recon_a, self.z_attr_recon_a)
self.fake_B_recon = self.gen.forward_b(self.z_content_recon_b, self.z_attr_recon_b)

def forward_content(self):
half_size = 1
self.real_A_encoded = self.input_A[0:half_size]
self.real_B_encoded = self.input_B[0:half_size]
# get encoded z_c
self.z_content_a, self.z_content_b = self.enc_c.forward(self.real_A_encoded, self.real_B_encoded)

def update_D_content(self, image_a, image_b):
self.input_A = image_a
self.input_B = image_b
self.forward_content()
self.disContent_opt.zero_grad()
loss_D_Content = self.backward_contentD(self.z_content_a, self.z_content_b)
self.disContent_loss = loss_D_Content.item()
nn.utils.clip_grad_norm_(self.disContent.parameters(), 5)
self.disContent_opt.step()

def update_D(self, image_a, image_b):
self.input_A = image_a
self.input_B = image_b
self.forward()
#########################################

# update disA
self.disA_opt.zero_grad()
loss_D1_A = self.backward_D(self.disA, self.real_A_encoded, self.fake_A_encoded)
self.disA_loss = loss_D1_A.item()
self.disA_opt.step()

# update disB
self.disB_opt.zero_grad()
loss_D1_B = self.backward_D(self.disB, self.real_B_encoded, self.fake_B_encoded)
self.disB_loss = loss_D1_B.item()
self.disB_opt.step()



# update disContent   # just to udpadte the disContent twise 
self.disContent_opt.zero_grad()
loss_D_Content = self.backward_contentD(self.z_content_a, self.z_content_b)
self.disContent_loss = loss_D_Content.item()
nn.utils.clip_grad_norm_(self.disContent.parameters(), 5)
self.disContent_opt.step()

def backward_D(self, netD, real, fake):
pred_fake = netD.forward(fake.detach())
pred_real = netD.forward(real)
loss_D = 0
for it, (out_a, out_b) in enumerate(zip(pred_fake, pred_real)):
out_fake = torch.sigmoid(out_a)
out_real = torch.sigmoid(out_b)
all0 = torch.zeros_like(out_fake).cuda(self.gpu)
all1 = torch.ones_like(out_real).cuda(self.gpu)
ad_fake_loss = nn.functional.binary_cross_entropy(out_fake, all0)
ad_true_loss = nn.functional.binary_cross_entropy(out_real, all1)
loss_D = loss_D + (ad_true_loss + ad_fake_loss)
loss_D.backward()
return loss_D

def backward_contentD(self, imageA, imageB):
pred_fake = self.disContent.forward(imageA.detach())
pred_real = self.disContent.forward(imageB.detach())
for it, (out_a, out_b) in enumerate(zip(pred_fake, pred_real)):
out_fake = torch.sigmoid(out_a)
out_real = torch.sigmoid(out_b)
all1 = torch.ones((out_real.size(0))).cuda(self.gpu)
all0 = torch.zeros((out_fake.size(0))).cuda(self.gpu)
ad_true_loss = nn.functional.binary_cross_entropy(out_real, all1)
ad_fake_loss = nn.functional.binary_cross_entropy(out_fake, all0)
loss_D = ad_true_loss + ad_fake_loss
loss_D.backward()
return loss_D

def update_EG(self):
# update G, Ec, Ea
self.enc_c_opt.zero_grad()
self.enc_a_opt.zero_grad()
self.gen_opt.zero_grad()

# content Ladv for generator
loss_G_GAN_Acontent = self.backward_G_GAN_content(self.z_content_a)
loss_G_GAN_Bcontent = self.backward_G_GAN_content(self.z_content_b)

# Ladv for generator
loss_G_GAN_A = self.backward_G_GAN(self.fake_A_encoded, self.disA)
loss_G_GAN_B = self.backward_G_GAN(self.fake_B_encoded, self.disB)



# cross cycle consistency loss
loss_G_L1_A = self.criterionL1(self.fake_A_recon, self.real_A_encoded) * 10 # we can remove cyclGAN 
loss_G_L1_B = self.criterionL1(self.fake_B_recon, self.real_B_encoded) * 10 # we can remove cyclGAN
loss_G_L1_AA = self.criterionL1(self.fake_AA_encoded, self.real_A_encoded) * 10
loss_G_L1_BB = self.criterionL1(self.fake_BB_encoded, self.real_B_encoded) * 10

loss_G = loss_G_GAN_A + loss_G_GAN_B + loss_G_GAN_Acontent + loss_G_GAN_Bcontent + loss_G_L1_AA + loss_G_L1_BB + loss_G_L1_A + loss_G_L1_B 
         

#do backward()
loss_G.backward(retain_graph=True)
#self.backward_EG()


# do optimisation
self.enc_c_opt.step()
self.enc_a_opt.step()
self.gen_opt.step()


self.gan_loss_a = loss_G_GAN_A.item()
self.gan_loss_b = loss_G_GAN_B.item()
self.gan_loss_acontent = loss_G_GAN_Acontent.item()
self.gan_loss_bcontent = loss_G_GAN_Bcontent.item()

self.l1_recon_A_loss = loss_G_L1_A.item()
self.l1_recon_B_loss = loss_G_L1_B.item()
self.l1_recon_AA_loss = loss_G_L1_AA.item()
self.l1_recon_BB_loss = loss_G_L1_BB.item()
self.G_loss = loss_G.item()

#def backward_EG(self):

def backward_G_GAN_content(self, data):
outs = self.disContent.forward(data)
for out in outs:
outputs_fake = torch.sigmoid(out)
all_half = 0.5*torch.ones((outputs_fake.size(0))).cuda(self.gpu)
ad_loss = nn.functional.binary_cross_entropy(outputs_fake, all_half)
return ad_loss

def backward_G_GAN(self, fake, netD=None):
outs_fake = netD.forward(fake)
loss_G = 0
for out_a in outs_fake:
outputs_fake = torch.sigmoid(out_a)
all_ones = torch.ones_like(outputs_fake).cuda(self.gpu)
loss_G = loss_G + (nn.functional.binary_cross_entropy(outputs_fake, all_ones))
return loss_G

def update_lr(self):
self.disA_sch.step()
self.disB_sch.step()
self.disContent_sch.step()
self.enc_c_sch.step()
self.enc_a_sch.step()
self.gen_sch.step()

def save(self, filename, ep, total_it):
state = {
‘disA’: self.disA.state_dict(),
‘disB’: self.disB.state_dict(),
‘disContent’: self.disContent.state_dict(),
‘enc_c’: self.enc_c.state_dict(),
‘enc_a’: self.enc_a.state_dict(),
‘gen’: self.gen.state_dict(),

         'disA_opt': self.disA_opt.state_dict(),
         'disContent_opt': self.disContent_opt.state_dict(),
         'enc_c_opt': self.enc_c_opt.state_dict(),
         'enc_a_opt': self.enc_a_opt.state_dict(),
         'gen_opt': self.gen_opt.state_dict(),
         'ep': ep,
         'total_it': total_it
          }
torch.save(state, filename)
return

def assemble_outputs(self):
images_a =self.real_A_encoded.detach()
images_a1 =self.fake_A_encoded.detach()
images_a2 = self.normalize_image(self.fake_AA_encoded).detach()
images_a3 = self.normalize_image(self.fake_A_recon).detach()

images_b = self.real_B_encoded.detach()
images_b1 = self.normalize_image(self.fake_B_encoded).detach()
images_b2 = self.normalize_image(self.fake_BB_encoded).detach()
images_b3 = self.normalize_image(self.fake_B_recon).detach()

row1 = torch.cat((images_a[0:1, ::], images_b1[0:1, ::], images_a2[0:1, ::], images_a3[0:1, ::]),3)
row2 = torch.cat((images_b[0:1, ::], images_a1[0:1, ::], images_b2[0:1, ::], images_b3[0:1, ::]),3)
return torch.cat((row1,row2),2)

and the train part is:

train

print(‘\n— train —’)
for ep in range(ep0, opts.n_ep):
gc.collect()
torch.cuda.empty_cache()
for it,(images_a, images_b) in enumerate(tqdm(train_loader, desc=‘Epoch: {}/{}’.format(ep, opts.n_ep))):
# input data
images_a = images_a.cuda(opts.gpu).detach()
images_b = images_b.cuda(opts.gpu).detach()

    # update model
    if (it + 1) % opts.d_iter != 0 and it < len(train_loader) - 2:
        model.update_D_content(images_a, images_b)
        continue
    else:
        model.update_D(images_a, images_b)
        model.update_EG()

print(‘total_it: %d (ep %d, it %d), lr %08f’ % (total_it, ep, it, model.gen_opt.param_groups[0][‘lr’]))

    total_it += 1


       

    model.update_lr()

# save result image
saver.write_img(ep, model)

# Save network weights
if (ep % 50 == 0):
    saver.write_model(ep, total_it, model)
    print('total_it: %d (ep %d, it %d), lr %08f' % (total_it, ep, it, model.gen_opt.param_groups[0]['lr']))

after running the train part the runtime error below is coming:

RuntimeError Traceback (most recent call last)
Input In [6], in <cell line: 4>()
16 else:
17 model.update_D(images_a, images_b)
—> 18 model.update_EG()
20 # print(‘total_it: %d (ep %d, it %d), lr %08f’ % (total_it, ep, it, model.gen_opt.param_groups[0][‘lr’]))
21 total_it += 1

File ~\Project_All\DRIT-master\model.py:237, in DRIT.update_EG(self)
233 loss_G = loss_G_GAN_A + loss_G_GAN_B + loss_G_GAN_Acontent + loss_G_GAN_Bcontent + loss_G_L1_AA + loss_G_L1_BB + loss_G_L1_A + loss_G_L1_B
236 #do backward()
→ 237 loss_G.backward(retain_graph=True)
238 #self.backward_EG()
239
240
241 # do optimisation
242 self.enc_c_opt.step()

File ~\anaconda3\envs\Deeplearning\lib\site-packages\torch_tensor.py:363, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
354 if has_torch_function_unary(self):
355 return handle_torch_function(
356 Tensor.backward,
357 (self,),
(…)
361 create_graph=create_graph,
362 inputs=inputs)
→ 363 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)

File ~\anaconda3\envs\Deeplearning\lib\site-packages\torch\autograd_init_.py:173, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
168 retain_graph = create_graph
170 # The reason we repeat same the comment below is that
171 # some Python versions print out the first line of a multi-line function
172 # calls in the traceback and some print out the last line
→ 173 Variable.execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
174 tensors, grad_tensors
, retain_graph, create_graph, inputs,
175 allow_unreachable=True, accumulate_grad=True)

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 256, 64, 64]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

I deleted the inplace operation in the model part and also i changed the inplace = false in Rule operation but i still have this error!

How can i solve this issue?

Hi @Sarmad_GTU,

Have you ran your code within a torch.autograd.set_detect_anamoly context manager? (And shared the entire stacktrace?)

Also, make sure to correct initalize your nn.Module objects.

Should be,

class DRIT(nn.Module):
  def __init__(self, opts):
    super(DRIT, self).__init__()

(the underscores matter)

Thank you for reply
The nn.Module inilaize correct but when I posted the code some underscores deleted.
I rune the below code for enable anomaly detection:

train

with torch.autograd.set_detect_anomaly(True):
print(‘\n— train —’)
for ep in range(ep0, opts.n_ep):
gc.collect()
torch.cuda.empty_cache()
for it,(images_a, images_b) in enumerate(tqdm(train_loader, desc=‘Epoch: {}/{}’.format(ep, opts.n_ep))):
# input data
images_a = images_a.cuda(opts.gpu).detach()
images_b = images_b.cuda(opts.gpu).detach()

        # update model
        if (it + 1) % opts.d_iter != 0 and it < len(train_loader) - 2:
            model.update_D_content(images_a, images_b)
            continue
        else:
            model.update_D(images_a, images_b)
            model.update_EG()

print(‘total_it: %d (ep %d, it %d), lr %08f’ % (total_it, ep, it, model.gen_opt.param_groups[0][‘lr’]))

        total_it += 1


       

        model.update_lr()

# save result image
    saver.write_img(ep, model)

# Save network weights
    if (ep % 50 == 0):
        saver.write_model(ep, total_it, model)
        print('total_it: %d (ep %d, it %d), lr %08f' % (total_it, ep, it, model.gen_opt.param_groups[0]['lr']))

and I got:

train

with torch.autograd.set_detect_anomaly(True):
print(‘\n— train —’)
for ep in range(ep0, opts.n_ep):
gc.collect()
torch.cuda.empty_cache()
for it,(images_a, images_b) in enumerate(tqdm(train_loader, desc=‘Epoch: {}/{}’.format(ep, opts.n_ep))):
# input data
images_a = images_a.cuda(opts.gpu).detach()
images_b = images_b.cuda(opts.gpu).detach()

        # update model
        if (it + 1) % opts.d_iter != 0 and it < len(train_loader) - 2:
            model.update_D_content(images_a, images_b)
            continue
        else:
            model.update_D(images_a, images_b)
            model.update_EG()

print(‘total_it: %d (ep %d, it %d), lr %08f’ % (total_it, ep, it, model.gen_opt.param_groups[0][‘lr’]))

        total_it += 1


       

        model.update_lr()

# save result image
    saver.write_img(ep, model)

# Save network weights
    if (ep % 50 == 0):
        saver.write_model(ep, total_it, model)
        print('total_it: %d (ep %d, it %d), lr %08f' % (total_it, ep, it, model.gen_opt.param_groups[0]['lr']))

Where is the stacktrace?

Epoch: 0/50: 0%| | 1/1573 [00:08<3:44:09, 8.56s/it]
Epoch: 0/50: 0%| | 2/1573 [00:08<2:37:47, 6.03s/it]C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\autograd_init_.py:173: UserWarning: Error detected in ReluBackward0. Traceback of forward call that caused the error:
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\runpy.py”, line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\runpy.py”, line 87, in _run_code
exec(code, run_globals)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel_launcher.py”, line 16, in
app.launch_new_instance()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\traitlets\config\application.py”, line 846, in launch_instance
app.start()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelapp.py”, line 677, in start
self.io_loop.start()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\tornado\platform\asyncio.py”, line 199, in start
self.asyncio_loop.run_forever()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\base_events.py”, line 570, in run_forever
self._run_once()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\base_events.py”, line 1859, in _run_once
handle._run()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\events.py”, line 81, in _run
self._context.run(self._callback, *self._args)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 471, in dispatch_queue
await self.process_one()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 460, in process_one
await dispatch(*args)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 367, in dispatch_shell
await result
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 662, in execute_request
reply_content = await reply_content
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\ipkernel.py”, line 360, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\zmqshell.py”, line 532, in run_cell
return super().run_cell(*args, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 2863, in run_cell
result = self._run_cell(
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 2909, in _run_cell
return runner(coro)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\async_helpers.py”, line 129, in pseudo_sync_runner
coro.send(None)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3106, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3309, in run_ast_nodes
if await self.run_code(code, result, async
=asy):
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3369, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File “C:\Users\gtu\AppData\Local\Temp\ipykernel_8216\2562760679.py”, line 18, in <cell line: 2>
model.update_EG()
File “C:\Users\gtu\Project_All\DRIT-master\model.py”, line 216, in update_EG
File “C:\Users\gtu\Project_All\DRIT-master\model.py”, line 135, in forward
self.fake_B_recon = self.gen.forward_b(self.z_content_recon_b, self.z_attr_recon_b)
File “C:\Users\gtu\Project_All\DRIT-master\networks.py”, line 233, in forward_b
out4 = self.decB4(out3, z4)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\Project_All\DRIT-master\networks.py”, line 424, in forward
out = self.blk2(torch.cat([o3, z_expand], dim=1))
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\container.py”, line 141, in forward
input = module(input)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\activation.py”, line 98, in forward
return F.relu(input, inplace=self.inplace)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\functional.py”, line 1442, in relu
result = torch.relu(input)
(Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass

Can you check how blk2 is defined within C:\Users\gtu\Project_All\DRIT-master\networks.py at line 424? You seem to have an in-place operation there.

No inPlace operation:
the Network.py below:

import torch
import torch.nn as nn
from torch.autograd import Variable
import functools
from torch.optim import lr_scheduler
import torch.nn.functional as F

####################################################################
#------------------------- Discriminators --------------------------
####################################################################
class Dis_content(nn.Module):
def init(self):
super(Dis_content, self).init()
model = []
model += [LeakyReLUConv2d(256, 256, kernel_size=7, stride=2, padding=1, norm=‘Instance’)]
model += [LeakyReLUConv2d(256, 256, kernel_size=7, stride=2, padding=1, norm=‘Instance’)]
model += [LeakyReLUConv2d(256, 256, kernel_size=7, stride=2, padding=1, norm=‘Instance’)]
model += [LeakyReLUConv2d(256, 256, kernel_size=4, stride=1, padding=0)]
model += [nn.Conv2d(256, 1, kernel_size=1, stride=1, padding=0)]
self.model = nn.Sequential(*model)

def forward(self, x):
out = self.model(x)
out = out.view(-1)
outs = []
outs.append(out)
return outs

class MultiScaleDis(nn.Module):
def init(self, input_dim, n_scale=3, n_layer=4, norm=‘None’, sn=False):
super(MultiScaleDis, self).init()
ch = 64
self.downsample = nn.AvgPool2d(3, stride=2, padding=1, count_include_pad=False)
self.Diss = nn.ModuleList()
for _ in range(n_scale):
self.Diss.append(self._make_net(ch, input_dim, n_layer, norm, sn))

def _make_net(self, ch, input_dim, n_layer, norm, sn):
model = []
model += [LeakyReLUConv2d(input_dim, ch, 4, 2, 1, norm, sn)]
tch = ch
for _ in range(1, n_layer):
model += [LeakyReLUConv2d(tch, tch * 2, 4, 2, 1, norm, sn)]
tch *= 2
if sn:
model += [spectral_norm(nn.Conv2d(tch, 1, 1, 1, 0))]
else:
model += [nn.Conv2d(tch, 1, 1, 1, 0)]
return nn.Sequential(*model)

def forward(self, x):
outs = []
for Dis in self.Diss:
outs.append(Dis(x))
x = self.downsample(x)
return outs

class Dis(nn.Module):
def init(self, input_dim, norm=‘None’, sn=False):
super(Dis, self).init()
ch = 64
n_layer = 6
self.model = self._make_net(ch, input_dim, n_layer, norm, sn)

def _make_net(self, ch, input_dim, n_layer, norm, sn):
model = []
model += [LeakyReLUConv2d(input_dim, ch, kernel_size=3, stride=2, padding=1, norm=norm, sn=sn)] #16
tch = ch
for i in range(1, n_layer-1):
model += [LeakyReLUConv2d(tch, tch * 2, kernel_size=3, stride=2, padding=1, norm=norm, sn=sn)] # 8
tch *= 2
model += [LeakyReLUConv2d(tch, tch * 2, kernel_size=3, stride=2, padding=1, norm=‘None’, sn=sn)] # 2
tch *= 2
if sn:
model += [spectral_norm(nn.Conv2d(tch, 1, kernel_size=1, stride=1, padding=0))] # 1
else:
model += [nn.Conv2d(tch, 1, kernel_size=1, stride=1, padding=0)] # 1
return nn.Sequential(*model)

def cuda(self,gpu):
self.model.cuda(gpu)

def forward(self, x_A):
out_A = self.model(x_A)
out_A = out_A.view(-1)
outs_A = []
outs_A.append(out_A)
return outs_A

####################################################################
#---------------------------- Encoders -----------------------------
####################################################################
class E_content(nn.Module):
def init(self, input_dim_a, input_dim_b):
super(E_content, self).init()
encA_c = []
tch = 64
encA_c += [LeakyReLUConv2d(input_dim_a, tch, kernel_size=7, stride=1, padding=3)]
for i in range(1, 3):
encA_c += [ReLUINSConv2d(tch, tch * 2, kernel_size=3, stride=2, padding=1)]
tch *= 2
for i in range(0, 3):
encA_c += [INSResBlock(tch, tch)]

encB_c = []
tch = 64
encB_c += [LeakyReLUConv2d(input_dim_b, tch, kernel_size=7, stride=1, padding=3)]
for i in range(1, 3):
  encB_c += [ReLUINSConv2d(tch, tch * 2, kernel_size=3, stride=2, padding=1)]
  tch *= 2
for i in range(0, 3):
  encB_c += [INSResBlock(tch, tch)]

enc_share = []
for i in range(0, 1):
  enc_share += [INSResBlock(tch, tch)]
  enc_share += [GaussianNoiseLayer()]
  self.conv_share = nn.Sequential(*enc_share)

self.convA = nn.Sequential(*encA_c)
self.convB = nn.Sequential(*encB_c)

def forward(self, xa, xb):
outputA = self.convA(xa)
outputB = self.convB(xb)
outputA = self.conv_share(outputA)
outputB = self.conv_share(outputB)
return outputA, outputB

def forward_a(self, xa):
outputA = self.convA(xa)
outputA = self.conv_share(outputA)
return outputA

def forward_b(self, xb):
outputB = self.convB(xb)
outputB = self.conv_share(outputB)
return outputB

class E_attr(nn.Module):
def init(self, input_dim_a, input_dim_b, output_nc=8):
super(E_attr, self).init()
dim = 64
self.model_a = nn.Sequential(
nn.ReflectionPad2d(3),
nn.Conv2d(input_dim_a, dim, 7, 1),
nn.ReLU(inplace=False),
nn.ReflectionPad2d(1),
nn.Conv2d(dim, dim2, 4, 2),
nn.ReLU(inplace=False),
nn.ReflectionPad2d(1),
nn.Conv2d(dim
2, dim4, 4, 2),
nn.ReLU(inplace=False),
nn.ReflectionPad2d(1),
nn.Conv2d(dim
4, dim4, 4, 2),
nn.ReLU(inplace=False),
nn.ReflectionPad2d(1),
nn.Conv2d(dim
4, dim4, 4, 2),
nn.ReLU(inplace=False),
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(dim
4, output_nc, 1, 1, 0))
self.model_b = nn.Sequential(
nn.ReflectionPad2d(3),
nn.Conv2d(input_dim_b, dim, 7, 1),
nn.ReLU(inplace=False),
nn.ReflectionPad2d(1),
nn.Conv2d(dim, dim2, 4, 2),
nn.ReLU(inplace=False),
nn.ReflectionPad2d(1),
nn.Conv2d(dim
2, dim4, 4, 2),
nn.ReLU(inplace=False),
nn.ReflectionPad2d(1),
nn.Conv2d(dim
4, dim4, 4, 2),
nn.ReLU(inplace=False),
nn.ReflectionPad2d(1),
nn.Conv2d(dim
4, dim4, 4, 2),
nn.ReLU(inplace=False),
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(dim
4, output_nc, 1, 1, 0))
return

def forward(self, xa, xb):
xa = self.model_a(xa)
xb = self.model_b(xb)
output_A = xa.view(xa.size(0), -1)
output_B = xb.view(xb.size(0), -1)
return output_A, output_B

def forward_a(self, xa):
xa = self.model_a(xa)
output_A = xa.view(xa.size(0), -1)
return output_A

def forward_b(self, xb):
xb = self.model_b(xb)
output_B = xb.view(xb.size(0), -1)
return output_B

class E_attr_concat(nn.Module):
def init(self, input_dim_a, input_dim_b, output_nc=8, norm_layer=None, nl_layer=None):
super(E_attr_concat, self).init()

ndf = 64
n_blocks=4
max_ndf = 4

conv_layers_A = [nn.ReflectionPad2d(1)]
conv_layers_A += [nn.Conv2d(input_dim_a, ndf, kernel_size=4, stride=2, padding=0, bias=True)]
for n in range(1, n_blocks):
  input_ndf = ndf * min(max_ndf, n)  # 2**(n-1)
  output_ndf = ndf * min(max_ndf, n+1)  # 2**n
  conv_layers_A += [BasicBlock(input_ndf, output_ndf, norm_layer, nl_layer)]
conv_layers_A += [nl_layer(), nn.AdaptiveAvgPool2d(1)] # AvgPool2d(13)
self.fc_A = nn.Sequential(*[nn.Linear(output_ndf, output_nc)])
self.fcVar_A = nn.Sequential(*[nn.Linear(output_ndf, output_nc)])
self.conv_A = nn.Sequential(*conv_layers_A)

conv_layers_B = [nn.ReflectionPad2d(1)]
conv_layers_B += [nn.Conv2d(input_dim_b, ndf, kernel_size=4, stride=2, padding=0, bias=True)]
for n in range(1, n_blocks):
  input_ndf = ndf * min(max_ndf, n)  # 2**(n-1)
  output_ndf = ndf * min(max_ndf, n+1)  # 2**n
  conv_layers_B += [BasicBlock(input_ndf, output_ndf, norm_layer, nl_layer)]
conv_layers_B += [nl_layer(), nn.AdaptiveAvgPool2d(1)] # AvgPool2d(13)
self.fc_B = nn.Sequential(*[nn.Linear(output_ndf, output_nc)])
self.fcVar_B = nn.Sequential(*[nn.Linear(output_ndf, output_nc)])
self.conv_B = nn.Sequential(*conv_layers_B)

def forward(self, xa, xb):
x_conv_A = self.conv_A(xa)
conv_flat_A = x_conv_A.view(xa.size(0), -1)
output_A = self.fc_A(conv_flat_A)
outputVar_A = self.fcVar_A(conv_flat_A)
x_conv_B = self.conv_B(xb)
conv_flat_B = x_conv_B.view(xb.size(0), -1)
output_B = self.fc_B(conv_flat_B)
outputVar_B = self.fcVar_B(conv_flat_B)
return output_A, outputVar_A, output_B, outputVar_B

def forward_a(self, xa):
x_conv_A = self.conv_A(xa)
conv_flat_A = x_conv_A.view(xa.size(0), -1)
output_A = self.fc_A(conv_flat_A)
outputVar_A = self.fcVar_A(conv_flat_A)
return output_A, outputVar_A

def forward_b(self, xb):
x_conv_B = self.conv_B(xb)
conv_flat_B = x_conv_B.view(xb.size(0), -1)
output_B = self.fc_B(conv_flat_B)
outputVar_B = self.fcVar_B(conv_flat_B)
return output_B, outputVar_B

####################################################################
#--------------------------- Generators ----------------------------
####################################################################
class G(nn.Module):
def init(self, output_dim_a, output_dim_b, nz):
super(G, self).init()
self.nz = nz
ini_tch = 256
tch_add = ini_tch
tch = ini_tch
self.tch_add = tch_add
self.decA1 = MisINSResBlock(tch, tch_add)
self.decA2 = MisINSResBlock(tch, tch_add)
self.decA3 = MisINSResBlock(tch, tch_add)
self.decA4 = MisINSResBlock(tch, tch_add)

decA5 = []
decA5 += [ReLUINSConvTranspose2d(tch, tch//2, kernel_size=3, stride=2, padding=1, output_padding=1)]
tch = tch//2
decA5 += [ReLUINSConvTranspose2d(tch, tch//2, kernel_size=3, stride=2, padding=1, output_padding=1)]
tch = tch//2
decA5 += [nn.ConvTranspose2d(tch, output_dim_a, kernel_size=1, stride=1, padding=0)]
decA5 += [nn.Tanh()]
self.decA5 = nn.Sequential(*decA5)

tch = ini_tch
self.decB1 = MisINSResBlock(tch, tch_add)
self.decB2 = MisINSResBlock(tch, tch_add)
self.decB3 = MisINSResBlock(tch, tch_add)
self.decB4 = MisINSResBlock(tch, tch_add)
decB5 = []
decB5 += [ReLUINSConvTranspose2d(tch, tch//2, kernel_size=3, stride=2, padding=1, output_padding=1)]
tch = tch//2
decB5 += [ReLUINSConvTranspose2d(tch, tch//2, kernel_size=3, stride=2, padding=1, output_padding=1)]
tch = tch//2
decB5 += [nn.ConvTranspose2d(tch, output_dim_b, kernel_size=1, stride=1, padding=0)]
decB5 += [nn.Tanh()]
self.decB5 = nn.Sequential(*decB5)

self.mlpA = nn.Sequential(
    nn.Linear(8, 256),
    nn.ReLU(inplace=False),
    nn.Linear(256, 256),
    nn.ReLU(inplace=False),
    nn.Linear(256, tch_add*4))
self.mlpB = nn.Sequential(
    nn.Linear(8, 256),
    nn.ReLU(inplace=False),
    nn.Linear(256, 256),
    nn.ReLU(inplace=False),
    nn.Linear(256, tch_add*4))
return

def forward_a(self, x, z):
z = self.mlpA(z)
z1, z2, z3, z4 = torch.split(z, self.tch_add, dim=1)
z1, z2, z3, z4 = z1.contiguous(), z2.contiguous(), z3.contiguous(), z4.contiguous()
out1 = self.decA1(x, z1)
out2 = self.decA2(out1, z2)
out3 = self.decA3(out2, z3)
out4 = self.decA4(out3, z4)
out = self.decA5(out4)
return out

def forward_b(self, x, z):
z = self.mlpB(z)
z1, z2, z3, z4 = torch.split(z, self.tch_add, dim=1)
z1, z2, z3, z4 = z1.contiguous(), z2.contiguous(), z3.contiguous(), z4.contiguous()
out1 = self.decB1(x, z1)
out2 = self.decB2(out1, z2)
out3 = self.decB3(out2, z3)
out4 = self.decB4(out3, z4)
out = self.decB5(out4)
return out

class G_concat(nn.Module):
def init(self, output_dim_a, output_dim_b, nz):
super(G_concat, self).init()
self.nz = nz
tch = 256
dec_share = []
dec_share += [INSResBlock(tch, tch)]
self.dec_share = nn.Sequential(dec_share)
tch = 256+self.nz
decA1 = []
for i in range(0, 3):
decA1 += [INSResBlock(tch, tch)]
tch = tch + self.nz
decA2 = ReLUINSConvTranspose2d(tch, tch//2, kernel_size=3, stride=2, padding=1, output_padding=1)
tch = tch//2
tch = tch + self.nz
decA3 = ReLUINSConvTranspose2d(tch, tch//2, kernel_size=3, stride=2, padding=1, output_padding=1)
tch = tch//2
tch = tch + self.nz
decA4 = [nn.ConvTranspose2d(tch, output_dim_a, kernel_size=1, stride=1, padding=0)]+[nn.Tanh()]
self.decA1 = nn.Sequential(decA1)
self.decA2 = nn.Sequential(
[decA2])
self.decA3 = nn.Sequential(
[decA3])
self.decA4 = nn.Sequential(*decA4)

tch = 256+self.nz
decB1 = []
for i in range(0, 3):
  decB1 += [INSResBlock(tch, tch)]
tch = tch + self.nz
decB2 = ReLUINSConvTranspose2d(tch, tch//2, kernel_size=3, stride=2, padding=1, output_padding=1)
tch = tch//2
tch = tch + self.nz
decB3 = ReLUINSConvTranspose2d(tch, tch//2, kernel_size=3, stride=2, padding=1, output_padding=1)
tch = tch//2
tch = tch + self.nz
decB4 = [nn.ConvTranspose2d(tch, output_dim_b, kernel_size=1, stride=1, padding=0)]+[nn.Tanh()]
self.decB1 = nn.Sequential(*decB1)
self.decB2 = nn.Sequential(*[decB2])
self.decB3 = nn.Sequential(*[decB3])
self.decB4 = nn.Sequential(*decB4)

def forward_a(self, x, z):
out0 = self.dec_share(x)
z_img = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), x.size(2), x.size(3))
x_and_z = torch.cat([out0, z_img], 1)
out1 = self.decA1(x_and_z)
z_img2 = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), out1.size(2), out1.size(3))
x_and_z2 = torch.cat([out1, z_img2], 1)
out2 = self.decA2(x_and_z2)
z_img3 = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), out2.size(2), out2.size(3))
x_and_z3 = torch.cat([out2, z_img3], 1)
out3 = self.decA3(x_and_z3)
z_img4 = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), out3.size(2), out3.size(3))
x_and_z4 = torch.cat([out3, z_img4], 1)
out4 = self.decA4(x_and_z4)
return out4

def forward_b(self, x, z):
out0 = self.dec_share(x)
z_img = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), x.size(2), x.size(3))
x_and_z = torch.cat([out0, z_img], 1)
out1 = self.decB1(x_and_z)
z_img2 = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), out1.size(2), out1.size(3))
x_and_z2 = torch.cat([out1, z_img2], 1)
out2 = self.decB2(x_and_z2)
z_img3 = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), out2.size(2), out2.size(3))
x_and_z3 = torch.cat([out2, z_img3], 1)
out3 = self.decB3(x_and_z3)
z_img4 = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), out3.size(2), out3.size(3))
x_and_z4 = torch.cat([out3, z_img4], 1)
out4 = self.decB4(x_and_z4)
return out4

####################################################################
#------------------------- Basic Functions -------------------------
####################################################################
def get_scheduler(optimizer, opts, cur_ep=-1):
if opts.lr_policy == ‘lambda’:
def lambda_rule(ep):
lr_l = 1.0 - max(0, ep - opts.n_ep_decay) / float(opts.n_ep - opts.n_ep_decay + 1)
return lr_l
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule, last_epoch=cur_ep)
elif opts.lr_policy == ‘step’:
scheduler = lr_scheduler.StepLR(optimizer, step_size=opts.n_ep_decay, gamma=0.1, last_epoch=cur_ep)
else:
return NotImplementedError(‘no such learn rate policy’)
return scheduler

def meanpoolConv(inplanes, outplanes):
sequence = []
sequence += [nn.AvgPool2d(kernel_size=2, stride=2)]
sequence += [nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=1, padding=0, bias=True)]
return nn.Sequential(*sequence)

def convMeanpool(inplanes, outplanes):
sequence = []
sequence += conv3x3(inplanes, outplanes)
sequence += [nn.AvgPool2d(kernel_size=2, stride=2)]
return nn.Sequential(*sequence)

def get_norm_layer(layer_type=‘instance’):
if layer_type == ‘batch’:
norm_layer = functools.partial(nn.BatchNorm2d, affine=True)
elif layer_type == ‘instance’:
norm_layer = functools.partial(nn.InstanceNorm2d, affine=False)
elif layer_type == ‘none’:
norm_layer = None
else:
raise NotImplementedError(‘normalization layer [%s] is not found’ % layer_type)
return norm_layer

def get_non_linearity(layer_type=‘relu’):
if layer_type == ‘relu’:
nl_layer = functools.partial(nn.ReLU, inplace=False)
elif layer_type == ‘lrelu’:
nl_layer = functools.partial(nn.LeakyReLU, negative_slope=0.2, inplace=False)
elif layer_type == ‘elu’:
nl_layer = functools.partial(nn.ELU, inplace=False)
else:
raise NotImplementedError(‘nonlinearity activitation [%s] is not found’ % layer_type)
return nl_layer
def conv3x3(in_planes, out_planes):
return [nn.ReflectionPad2d(1), nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, padding=0, bias=True)]

def gaussian_weights_init(m):
classname = m.class.name
if classname.find(‘Conv’) != -1 and classname.find(‘Conv’) == 0:
m.weight.data.normal_(0.0, 0.02)

####################################################################
#-------------------------- Basic Blocks --------------------------
####################################################################

The code of LayerNorm is modified from MUNIT (GitHub - NVlabs/MUNIT: Multimodal Unsupervised Image-to-Image Translation)

class LayerNorm(nn.Module):
def init(self, n_out, eps=1e-5, affine=True):
super(LayerNorm, self).init()
self.n_out = n_out
self.affine = affine
if self.affine:
self.weight = nn.Parameter(torch.ones(n_out, 1, 1))
self.bias = nn.Parameter(torch.zeros(n_out, 1, 1))
return
def forward(self, x):
normalized_shape = x.size()[1:]
if self.affine:
return F.layer_norm(x, normalized_shape, self.weight.expand(normalized_shape), self.bias.expand(normalized_shape))
else:
return F.layer_norm(x, normalized_shape)

class BasicBlock(nn.Module):
def init(self, inplanes, outplanes, norm_layer=None, nl_layer=None):
super(BasicBlock, self).init()
layers = []
if norm_layer is not None:
layers += [norm_layer(inplanes)]
layers += [nl_layer()]
layers += conv3x3(inplanes, inplanes)
if norm_layer is not None:
layers += [norm_layer(inplanes)]
layers += [nl_layer()]
layers += [convMeanpool(inplanes, outplanes)]
self.conv = nn.Sequential(*layers)
self.shortcut = meanpoolConv(inplanes, outplanes)
def forward(self, x):
out = self.conv(x) + self.shortcut(x)
return out

class LeakyReLUConv2d(nn.Module):
def init(self, n_in, n_out, kernel_size, stride, padding=0, norm=‘None’, sn=False):
super(LeakyReLUConv2d, self).init()
model = []
model += [nn.ReflectionPad2d(padding)]
if sn:
model += [spectral_norm(nn.Conv2d(n_in, n_out, kernel_size=kernel_size, stride=stride, padding=0, bias=True))]
else:
model += [nn.Conv2d(n_in, n_out, kernel_size=kernel_size, stride=stride, padding=0, bias=True)]
if ‘norm’ == ‘Instance’:
model += [nn.InstanceNorm2d(n_out, affine=False)]
model += [nn.LeakyReLU(inplace=False)]
self.model = nn.Sequential(*model)
self.model.apply(gaussian_weights_init)
#elif == ‘Group’
def forward(self, x):
return self.model(x)

class ReLUINSConv2d(nn.Module):
def init(self, n_in, n_out, kernel_size, stride, padding=0):
super(ReLUINSConv2d, self).init()
model = []
model += [nn.ReflectionPad2d(padding)]
model += [nn.Conv2d(n_in, n_out, kernel_size=kernel_size, stride=stride, padding=0, bias=True)]
model += [nn.InstanceNorm2d(n_out, affine=False)]
model += [nn.ReLU(inplace=False)]
self.model = nn.Sequential(*model)
self.model.apply(gaussian_weights_init)
def forward(self, x):
return self.model(x)

class INSResBlock(nn.Module):
def conv3x3(self, inplanes, out_planes, stride=1):
return [nn.ReflectionPad2d(1), nn.Conv2d(inplanes, out_planes, kernel_size=3, stride=stride)]
def init(self, inplanes, planes, stride=1, dropout=0.0):
super(INSResBlock, self).init()
model = []
model += self.conv3x3(inplanes, planes, stride)
model += [nn.InstanceNorm2d(planes)]
model += [nn.ReLU(inplace=False)]
model += self.conv3x3(planes, planes)
model += [nn.InstanceNorm2d(planes)]
if dropout > 0:
model += [nn.Dropout(p=dropout)]
self.model = nn.Sequential(*model)
self.model.apply(gaussian_weights_init)
def forward(self, x):
residual = x
out = self.model(x)
out += residual
return out

class MisINSResBlock(nn.Module):
def conv3x3(self, dim_in, dim_out, stride=1):
return nn.Sequential(nn.ReflectionPad2d(1), nn.Conv2d(dim_in, dim_out, kernel_size=3, stride=stride))
def conv1x1(self, dim_in, dim_out):
return nn.Conv2d(dim_in, dim_out, kernel_size=1, stride=1, padding=0)
def init(self, dim, dim_extra, stride=1, dropout=0.0):
super(MisINSResBlock, self).init()
self.conv1 = nn.Sequential(
self.conv3x3(dim, dim, stride),
nn.InstanceNorm2d(dim))
self.conv2 = nn.Sequential(
self.conv3x3(dim, dim, stride),
nn.InstanceNorm2d(dim))
self.blk1 = nn.Sequential(
self.conv1x1(dim + dim_extra, dim + dim_extra),
nn.ReLU(inplace=False),
self.conv1x1(dim + dim_extra, dim),
nn.ReLU(inplace=False))
self.blk2 = nn.Sequential(
self.conv1x1(dim + dim_extra, dim + dim_extra),
nn.ReLU(inplace=False),
self.conv1x1(dim + dim_extra, dim),
nn.ReLU(inplace=False))
model = []
if dropout > 0:
model += [nn.Dropout(p=dropout)]
self.model = nn.Sequential(*model)
self.model.apply(gaussian_weights_init)
self.conv1.apply(gaussian_weights_init)
self.conv2.apply(gaussian_weights_init)
self.blk1.apply(gaussian_weights_init)
self.blk2.apply(gaussian_weights_init)
def forward(self, x, z):
residual = x
z_expand = z.view(z.size(0), z.size(1), 1, 1).expand(z.size(0), z.size(1), x.size(2), x.size(3))
o1 = self.conv1(x)
o2 = self.blk1(torch.cat([o1, z_expand], dim=1))
o3 = self.conv2(o2)
out = self.blk2(torch.cat([o3, z_expand], dim=1))
out += residual
return out

class GaussianNoiseLayer(nn.Module):
def init(self,):
super(GaussianNoiseLayer, self).init()
def forward(self, x):
if self.training == False:
return x
noise = Variable(torch.randn(x.size()).cuda(x.get_device()))
return x + noise

class ReLUINSConvTranspose2d(nn.Module):
def init(self, n_in, n_out, kernel_size, stride, padding, output_padding):
super(ReLUINSConvTranspose2d, self).init()
model = []
model += [nn.ConvTranspose2d(n_in, n_out, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, bias=True)]
model += [LayerNorm(n_out)]
model += [nn.ReLU(inplace = False)]
self.model = nn.Sequential(*model)
self.model.apply(gaussian_weights_init)
def forward(self, x):
return self.model(x)

####################################################################
#--------------------- Spectral Normalization ---------------------

This part of code is copied from pytorch master branch (0.5.0)

####################################################################
class SpectralNorm(object):
def init(self, name=‘weight’, n_power_iterations=1, dim=0, eps=1e-12):
self.name = name
self.dim = dim
if n_power_iterations <= 0:
raise ValueError('Expected n_power_iterations to be positive, but ’
‘got n_power_iterations={}’.format(n_power_iterations))
self.n_power_iterations = n_power_iterations
self.eps = eps
def compute_weight(self, module):
weight = getattr(module, self.name + ‘_orig’)
u = getattr(module, self.name + ‘_u’)
weight_mat = weight
if self.dim != 0:
# permute dim to front
weight_mat = weight_mat.permute(self.dim,
*[d for d in range(weight_mat.dim()) if d != self.dim])
height = weight_mat.size(0)
weight_mat = weight_mat.reshape(height, -1)
with torch.no_grad():
for _ in range(self.n_power_iterations):
v = F.normalize(torch.matmul(weight_mat.t(), u), dim=0, eps=self.eps)
u = F.normalize(torch.matmul(weight_mat, v), dim=0, eps=self.eps)
sigma = torch.dot(u, torch.matmul(weight_mat, v))
weight = weight / sigma
return weight, u
def remove(self, module):
weight = getattr(module, self.name)
delattr(module, self.name)
delattr(module, self.name + ‘_u’)
delattr(module, self.name + ‘_orig’)
module.register_parameter(self.name, torch.nn.Parameter(weight))
def call(self, module, inputs):
if module.training:
weight, u = self.compute_weight(module)
setattr(module, self.name, weight)
setattr(module, self.name + 'u’, u)
else:
r_g = getattr(module, self.name + 'orig’).requires_grad
getattr(module, self.name).detach
().requires_grad
(r_g)

@staticmethod
def apply(module, name, n_power_iterations, dim, eps):
fn = SpectralNorm(name, n_power_iterations, dim, eps)
weight = module.parameters[name]
height = weight.size(dim)
u = F.normalize(weight.new_empty(height).normal
(0, 1), dim=0, eps=fn.eps)
delattr(module, fn.name)
module.register_parameter(fn.name + “_orig”, weight)
module.register_buffer(fn.name, weight.data)
module.register_buffer(fn.name + “_u”, u)
module.register_forward_pre_hook(fn)
return fn

def spectral_norm(module, name=‘weight’, n_power_iterations=1, eps=1e-12, dim=None):
if dim is None:
if isinstance(module, (torch.nn.ConvTranspose1d,
torch.nn.ConvTranspose2d,
torch.nn.ConvTranspose3d)):
dim = 1
else:
dim = 0
SpectralNorm.apply(module, name, n_power_iterations, dim, eps)
return module

def remove_spectral_norm(module, name=‘weight’):
for k, hook in module._forward_pre_hooks.items():
if isinstance(hook, SpectralNorm) and hook.name == name:
hook.remove(module)
del module._forward_pre_hooks[k]
return module
raise ValueError(“spectral_norm of ‘{}’ not found in {}”.format(name, module))

replace this with out = out + residual, this is an in-place operation.

I changed them and same issue:

Epoch: 0/50: 0%| | 2/1573 [00:10<3:09:45, 7.25s/it]C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\autograd_init_.py:173: UserWarning: Error detected in ReluBackward0. Traceback of forward call that caused the error:
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\runpy.py”, line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\runpy.py”, line 87, in _run_code
exec(code, run_globals)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel_launcher.py”, line 16, in
app.launch_new_instance()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\traitlets\config\application.py”, line 846, in launch_instance
app.start()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelapp.py”, line 677, in start
self.io_loop.start()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\tornado\platform\asyncio.py”, line 199, in start
self.asyncio_loop.run_forever()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\base_events.py”, line 570, in run_forever
self._run_once()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\base_events.py”, line 1859, in _run_once
handle._run()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\events.py”, line 81, in _run
self._context.run(self._callback, *self._args)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 471, in dispatch_queue
await self.process_one()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 460, in process_one
await dispatch(*args)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 367, in dispatch_shell
await result
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 662, in execute_request
reply_content = await reply_content
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\ipkernel.py”, line 360, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\zmqshell.py”, line 532, in run_cell
return super().run_cell(*args, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 2863, in run_cell
result = self._run_cell(
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 2909, in _run_cell
return runner(coro)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\async_helpers.py”, line 129, in pseudo_sync_runner
coro.send(None)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3106, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3309, in run_ast_nodes
if await self.run_code(code, result, async
=asy):
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3369, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File “C:\Users\gtu\AppData\Local\Temp\ipykernel_9588\2562760679.py”, line 17, in <cell line: 2>
model.update_D(images_a, images_b)
File “C:\Users\gtu\Project_All\DRIT-master\model.py”, line 157, in update_D
self.forward()
File “C:\Users\gtu\Project_All\DRIT-master\model.py”, line 135, in forward
self.fake_B_recon = self.gen.forward_b(self.z_content_recon_b, self.z_attr_recon_b)
File “C:\Users\gtu\Project_All\DRIT-master\networks.py”, line 233, in forward_b
out4 = self.decB4(out3, z4)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\Project_All\DRIT-master\networks.py”, line 424, in forward
out = self.blk2(torch.cat([o3, z_expand], dim=1))
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\container.py”, line 141, in forward
input = module(input)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\activation.py”, line 98, in forward
return F.relu(input, inplace=self.inplace)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\functional.py”, line 1442, in relu
result = torch.relu(input)
(Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass

Did you replace all the += usage in the share code? So for example when placing nn.Module objects within a list?

Also, I see you’re running this within ipython, make sure you re-run all previous cells otherwise your code won’t have updated.

Whereabouts is defined, because it seems that this is the function triggers the in-place call somewhere. It’s defined within File “C:\Users\gtu\Project_All\DRIT-master\model.py”, line 135, in forward Can you share that code only?

Now the train has progressed for one epoch but still shows a new error:

Epoch: 0/50: 87%|████████▋ | 1367/1573 [11:03<01:13, 2.79it/s]C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\autograd_init_.py:173: UserWarning: Error detected in ConvolutionBackward0. Traceback of forward call that caused the error:
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\runpy.py”, line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\runpy.py”, line 87, in _run_code
exec(code, run_globals)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel_launcher.py”, line 16, in
app.launch_new_instance()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\traitlets\config\application.py”, line 846, in launch_instance
app.start()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelapp.py”, line 677, in start
self.io_loop.start()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\tornado\platform\asyncio.py”, line 199, in start
self.asyncio_loop.run_forever()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\base_events.py”, line 570, in run_forever
self._run_once()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\base_events.py”, line 1859, in _run_once
handle._run()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\asyncio\events.py”, line 81, in _run
self._context.run(self._callback, *self._args)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 471, in dispatch_queue
await self.process_one()
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 460, in process_one
await dispatch(*args)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 367, in dispatch_shell
await result
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\kernelbase.py”, line 662, in execute_request
reply_content = await reply_content
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\ipkernel.py”, line 360, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\ipykernel\zmqshell.py”, line 532, in run_cell
return super().run_cell(*args, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 2863, in run_cell
result = self._run_cell(
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 2909, in _run_cell
return runner(coro)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\async_helpers.py”, line 129, in pseudo_sync_runner
coro.send(None)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3106, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3309, in run_ast_nodes
if await self.run_code(code, result, async
=asy):
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\IPython\core\interactiveshell.py”, line 3369, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File “C:\Users\gtu\AppData\Local\Temp\ipykernel_12992\2562760679.py”, line 17, in <cell line: 2>
model.update_D(images_a, images_b)
File “C:\Users\gtu\Project_All\DRIT-master\model.py”, line 160, in update_D
self.forward()
File “C:\Users\gtu\Project_All\DRIT-master\model.py”, line 128, in forward
output_fakeB = self.gen.forward_b(input_content_forB, input_attr_forB)
File “C:\Users\gtu\Project_All\DRIT-master\networks.py”, line 230, in forward_b
out1 = self.decB1(x, z1)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\Project_All\DRIT-master\networks.py”, line 422, in forward
o2 = self.blk1(torch.cat([o1, z_expand], dim=1))
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\container.py”, line 141, in forward
input = module(input)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\module.py”, line 1110, in _call_impl
return forward_call(*input, **kwargs)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\conv.py”, line 447, in forward
return self._conv_forward(input, self.weight, self.bias)
File “C:\Users\gtu\anaconda3\envs\Deeplearning\lib\site-packages\torch\nn\modules\conv.py”, line 443, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
(Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass

RuntimeError Traceback (most recent call last)
Input In [6], in <cell line: 2>()
16 else:
17 model.update_D(images_a, images_b)
—> 18 model.update_EG()
20 # print(‘total_it: %d (ep %d, it %d), lr %08f’ % (total_it, ep, it, model.gen_opt.param_groups[0][‘lr’]))
21 total_it += 1

File ~\Project_All\DRIT-master\model.py:241, in DRIT.update_EG(self)
236 loss_G = loss_G_GAN_A + loss_G_GAN_B + loss_G_GAN_Acontent + loss_G_GAN_Bcontent + loss_G_L1_AA + loss_G_L1_BB + loss_G_L1_A + loss_G_L1_B
237 #loss_G = loss_G_GAN_A + loss_G_GAN_B + loss_G_GAN_Acontent + loss_G_GAN_Bcontent + loss_G_L1_AA + loss_G_L1_BB
238
239
240 #do backward()
→ 241 loss_G.backward(retain_graph=True)
242 #self.backward_EG()
243
244
245 # do optimisation
246 self.enc_c_opt.step()

File ~\anaconda3\envs\Deeplearning\lib\site-packages\torch_tensor.py:363, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
354 if has_torch_function_unary(self):
355 return handle_torch_function(
356 Tensor.backward,
357 (self,),
(…)
361 create_graph=create_graph,
362 inputs=inputs)
→ 363 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)

File ~\anaconda3\envs\Deeplearning\lib\site-packages\torch\autograd_init_.py:173, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
168 retain_graph = create_graph
170 # The reason we repeat same the comment below is that
171 # some Python versions print out the first line of a multi-line function
172 # calls in the traceback and some print out the last line
→ 173 Variable.execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
174 tensors, grad_tensors
, retain_graph, create_graph, inputs,
175 allow_unreachable=True, accumulate_grad=True)

RuntimeError: Function ‘ConvolutionBackward0’ returned nan values in its 1th output.

Can you check the self.blk1 definition for any Conv layers? That seems to be the last line before the error emerges?

it is just:

self.blk1 = nn.Sequential(
self.conv1x1(dim + dim_extra, dim + dim_extra),
nn.ReLU(inplace=False),
self.conv1x1(dim + dim_extra, dim),
nn.ReLU(inplace=False))

def conv1x1(self, dim_in, dim_out):
return nn.Conv2d(dim_in, dim_out, kernel_size=1, stride=1, padding=0)