I am using a CycleGAN on the CelebA dataset
def train(G, F, Dg, Df, epochs = 10, batch_size = 32, D_real_loss = [], D_fake_loss = [], G_loss = [], F_loss = []):
torch.cuda.empty_cache()
for epoch in list(range(epochs)):
print('Epoch number: {0}'.format(epoch))
for batch,(rude_batch, smile_batch) in enumerate( zip(rude_loader, smile_loader) ):
rude_real, smile_real = Variable(rude_batch[0]).type(dtype), Variable(smile_batch[0]).type(dtype)
del rude_batch, smile_batch
torch.cuda.empty_cache()
# first half
smile_fake = G(rude_real)
scores_real, scores_real_np = pass_through_discriminator(Dg, smile_real), Dgnp(smile_real)
scores_fake, scores_fake_np = pass_through_discriminator(Dg, smile_fake), Dgnp(smile_fake)
scores_real, scores_fake = 0.8 * scores_real + 0.2 * scores_real_np, 0.8 * scores_fake + 0.2 * scores_fake_np
loss1,loss2 = torch.mean((scores_real - label_real)**2), torch.mean((scores_fake - label_fake)**2)
Dg_optim.zero_grad()
loss_dg = loss1 + loss2
del loss1, loss2
torch.cuda.empty_cache()
loss_dg.backward(retain_graph=True)
Dg_optim.step()
loss_g = torch.mean((scores_fake - label_real)**2) + 10 * torch.mean(torch.abs(G(F(smile_real)) - smile_real))
G_optim.zero_grad()
loss_g.backward(retain_graph=True)
G_optim.step()
# second half
rude_fake = F(smile_real)
scores_real, scores_real_np = pass_through_discriminator(Df, rude_real), Dfnp(rude_real)
scores_fake, scores_fake_np = pass_through_discriminator(Df, rude_fake), Dfnp(rude_fake)
scores_real, scores_fake = 0.8 * scores_real + 0.2 * scores_real_np, 0.8 * scores_fake + 0.2 * scores_fake_np
loss1,loss2 = torch.mean((scores_real - label_real)**2), torch.mean((scores_fake - label_fake)**2)
Df_optim.zero_grad()
loss_df = loss1 + loss2
del loss1, loss2
torch.cuda.empty_cache()
loss_df.backward(retain_graph=True)
Df_optim.step()
loss_f = torch.mean((scores_fake - label_real)**2) + 10 * torch.mean(torch.abs(F(G(rude_real)) - rude_real))
F_optim.zero_grad()
loss_f.backward()
F_optim.step()
del smile_fake, smile_real, rude_fake, rude_real
torch.cuda.empty_cache()
if batch % 100 == 0:
print('**Batch number: {0}**'.format(batch))
print('Discriminator G loss: {0}'.format(loss_dg.data.item()))
print('Generator G loss: {0}'.format(loss_g.data.item()))
print('Discriminator F loss: {0}'.format(loss_df.data.item()))
print('Generator F loss: {0}'.format(loss_f.data.item()))
else:
del loss_dg, loss_g, loss_df, loss_f
G_scheduler.step()
F_scheduler.step()
Dg_scheduler.step()
Df_scheduler.step()
D_real_loss += [loss_f.data.item()]
D_fake_loss += [loss_dg.data.item()]
F_loss += [loss_f.data.item()]
G_loss += [loss_g.data.item()]
test_image(rude_real, G, F)
saver(G.state_dict(), F.state_dict(), Dg.state_dict(), Df.state_dict())
G,F,Dg,Df = loader(G,F,Dg,Df)
plotter(D_real_loss, D_fake_loss, G_loss, F_loss)
But I get the following error when I run the code,
RuntimeError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 11.00 GiB total capacity; 8.37 GiB already allocated; 6.86 MiB free; 8.42 GiB reserved in total by PyTorch)
I did delete variables that I no longer used and used torch.cuda.empty_cache()
Any suggestions as to how I can free memory would be really helpful, thanks in advance !