I use the newest Pytorch Version (from source, CUDA8, CUDNN6) for improved WGANS.
Everything works fine, but after some number of iterations I receive this error:
RuntimeErrorTraceback (most recent call last)
<ipython-input-10-4c802ee3c846> in <module>()
29
30 fake_batch = Variable(gen.forward(noise).data,requires_grad=True)
---> 31 loss_d = disc.train_batch(X_batch,fake_batch,lambd=lambd)
32
33 del X_batch,noise,fake_batch
/home/hartmank/braindecode/GAN_env/GAN/train_modules.pyc in train_batch(self, batch_real, batch_fake, lambd)
250 # gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * lambd
251 loss_penalty = self.calc_gradient_penalty(batch_real, batch_fake,lambd)
--> 252 loss_penalty.backward()
253
254 #loss = loss_fake - loss_real + loss_penalty
/home/hartmank/anaconda2/envs/pytorch_GAN_CUDNN6/lib/python2.7/site-packages/torch/autograd/variable.pyc in backward(self, gradient, retain_graph, create_graph, retain_variables)
155 Variable.
156 """
--> 157 torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
158
159 def register_hook(self, hook):
/home/hartmank/anaconda2/envs/pytorch_GAN_CUDNN6/lib/python2.7/site-packages/torch/autograd/__init__.pyc in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)
96
97 Variable._execution_engine.run_backward(
---> 98 variables, grad_variables, retain_graph)
99
100
/home/hartmank/anaconda2/envs/pytorch_GAN_CUDNN6/lib/python2.7/site-packages/torch/autograd/function.pyc in apply(self, *args)
89
90 def apply(self, *args):
---> 91 return self._forward_cls.backward(self, *args)
92
93
/home/hartmank/anaconda2/envs/pytorch_GAN_CUDNN6/lib/python2.7/site-packages/torch/nn/_functions/thnn/pooling.pyc in backward(ctx, ggI, _ggIndices)
139 # ggO is equivalent to the 1d case, but the indices are given wrt the last two dimensions combined
140 indices_view = indices.view(indices.size()[:-2] + (-1,))
--> 141 ggO = ggI.contiguous().view(ggI.size()[:-2] + (-1,)).gather(dim=2, index=indices_view).view_as(indices)
142 return gI, None, ggO, None, None, None, None, None, None
143
/home/hartmank/anaconda2/envs/pytorch_GAN_CUDNN6/lib/python2.7/site-packages/torch/autograd/variable.pyc in gather(self, dim, index)
683
684 def gather(self, dim, index):
--> 685 return Gather.apply(self, dim, index)
686
687 def scatter(self, dim, index, source):
/home/hartmank/anaconda2/envs/pytorch_GAN_CUDNN6/lib/python2.7/site-packages/torch/autograd/_functions/tensor.pyc in forward(ctx, input, dim, index)
558 ctx.save_for_backward(index)
559 ctx.dim = dim
--> 560 return input.gather(dim, index)
561
562 @staticmethod
RuntimeError: cuda runtime error (59) : device-side assert triggered at /home/hartmank/pytorch_install/torch/lib/THC/generic/THCTensorScatterGather.cu:70
The number of epochs until this error occurs is lower for higher learning rates.
Could the problem stem from too high losses/gradients?
Functions of interest:
def train_batch(self, batch_real, batch_fake, lambd=10):
if not self.did_init_train:
self.train_init()
self.optimizer.zero_grad()
for p in self.parameters():
p.requires_grad = True
fx_real = self.forward(batch_real)
loss_real = -fx_real.mean()
loss_real.backward()
loss_r = -loss_real.data[0]
del fx_real
fx_fake = self.forward(batch_fake)
loss_fake = fx_fake.mean()
loss_fake.backward()
loss_f = loss_fake.data[0]
del fx_fake
# https://github.com/caogang/wgan-gp/blob/master/gan_toy.py
loss_penalty = self.calc_gradient_penalty(batch_real, batch_fake,lambd)
loss_penalty.backward()
#loss = loss_fake - loss_real + loss_penalty
# Backprop gradient
penalty = loss_penalty.data[0]
#del loss_real,loss_fake,loss_penalty
#loss.backward()
# Update parameters
self.optimizer.step()
return (loss_f,loss_r,penalty) # return loss
def calc_gradient_penalty(self, real_data, fake_data,lambd):
alpha = torch.rand(real_data.size(0),*((len(real_data.size())-1)*[1]))
alpha = alpha.expand(real_data.size())
alpha = alpha.cuda()
interpolates = alpha * real_data.data + ((1 - alpha) * fake_data.data)
interpolates = interpolates
interpolates = Variable(interpolates, requires_grad=True)
disc_interpolates = self(interpolates)
gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
grad_outputs=torch.ones(disc_interpolates.size()).cuda(),
create_graph=True, retain_graph=True, only_inputs=True)[0]
del alpha, interpolates, disc_interpolates
gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * lambd
return gradient_penalty