I have following code:
# Generate Gaussian noise
z = torch.Tensor(cfg.CONST.BATCH_SIZE, cfg.CONST.Z_SIZE).normal_(0, .33)
# Use soft labels
labels_real = torch.Tensor(cfg.CONST.BATCH_SIZE).uniform_(.7, 1.2)
labels_fake = torch.Tensor(cfg.CONST.BATCH_SIZE).uniform_(0, .3)
# Use CUDA if it is available
if torch.cuda.is_available():
rendering_images = rendering_images.cuda()
voxels = voxels.cuda()
z = z.cuda()
labels_real = labels_real.cuda()
labels_fake = labels_fake.cuda()
# Train the discriminator
generated_voxels = generator(z, None)
pred_labels_fake = discriminator(generated_voxels, None)
pred_labels_real = discriminator(voxels, None)
discriminator_loss_fake = bce_loss(pred_labels_fake, labels_fake)
discriminator_loss_real = bce_loss(pred_labels_real, labels_real)
discriminator_loss = discriminator_loss_fake + discriminator_loss_real
discriminator_acuracy_fake = torch.le(pred_labels_fake.squeeze(), .5).float()
discriminator_acuracy_real = torch.ge(pred_labels_real.squeeze(), .5).float()
discriminator_acuracy = torch.mean(torch.cat((discriminator_acuracy_fake, discriminator_acuracy_real)), 0)
# Balance the learning speed of discriminator and generator
if discriminator_acuracy <= cfg.TRAIN.DISCRIMINATOR_ACC_THRESHOLD:
discriminator.zero_grad()
discriminator_loss.backward()
discriminator_solver.step()
I got a runtime error message saying that:
THCudaCheck FAIL file=/pytorch/aten/src/THC/generic/THCStorage.cu line=58 error=2 : out of memory
Traceback (most recent call last):
File "runner.py", line 96, in <module>
main()
File "runner.py", line 82, in main
train_net(cfg)
File "/home/hzxie/Development/Python/Pixel2Voxel/core/train.py", line 148, in train_net
generated_voxels = generator(z, None)
File "/usr/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/home/hzxie/Development/Python/Pixel2Voxel/models/generator.py", line 48, in forward
out = self.layer4(out)
File "/usr/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/usr/lib/python3.6/site-packages/torch/nn/modules/container.py", line 91, in forward
input = module(input)
File "/usr/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
result = self.forward(*input, **kwargs)
File "/usr/lib/python3.6/site-packages/torch/nn/modules/activation.py", line 46, in forward
return F.threshold(input, self.threshold, self.value, self.inplace)
File "/usr/lib/python3.6/site-packages/torch/nn/functional.py", line 601, in threshold
return torch._C._nn.threshold(input, threshold, value)
RuntimeError: cuda runtime error (2) : out of memory at /pytorch/aten/src/THC/generic/THCStorage.cu:58
However, if I remove the if
statement as following:
# Generate Gaussian noise
z = torch.Tensor(cfg.CONST.BATCH_SIZE, cfg.CONST.Z_SIZE).normal_(0, .33)
# Use soft labels
labels_real = torch.Tensor(cfg.CONST.BATCH_SIZE).uniform_(.7, 1.2)
labels_fake = torch.Tensor(cfg.CONST.BATCH_SIZE).uniform_(0, .3)
# Use CUDA if it is available
if torch.cuda.is_available():
rendering_images = rendering_images.cuda()
voxels = voxels.cuda()
z = z.cuda()
labels_real = labels_real.cuda()
labels_fake = labels_fake.cuda()
# Train the discriminator
generated_voxels = generator(z, None)
pred_labels_fake = discriminator(generated_voxels, None)
pred_labels_real = discriminator(voxels, None)
discriminator_loss_fake = bce_loss(pred_labels_fake, labels_fake)
discriminator_loss_real = bce_loss(pred_labels_real, labels_real)
discriminator_loss = discriminator_loss_fake + discriminator_loss_real
discriminator_acuracy_fake = torch.le(pred_labels_fake.squeeze(), .5).float()
discriminator_acuracy_real = torch.ge(pred_labels_real.squeeze(), .5).float()
discriminator_acuracy = torch.mean(torch.cat((discriminator_acuracy_fake, discriminator_acuracy_real)), 0)
# Balance the learning speed of discriminator and generator
discriminator.zero_grad()
discriminator_loss.backward()
discriminator_solver.step()
The code above works now.
How can I solve this problem?
As you know, from 0.4.0 the volatile
option was removed.
I tried del
, but it is still not working
# Balance the learning speed of discriminator and generator
if discriminator_acuracy <= cfg.TRAIN.DISCRIMINATOR_ACC_THRESHOLD:
print('With backward')
discriminator.zero_grad()
discriminator_loss.backward()
discriminator_solver.step()
else:
print('Without backward')
del rendering_images
del voxels
del z
del labels_real
del generated_voxels
del pred_labels_fake
del pred_labels_real