Hi,
I am starting with pytorch, and I have seen that my implementation requires more GPU memory than my tensorflow implementation of the same architecture.
Here’s a part of my code:
class seg_GAN(object):
def __init__(self, batch_size=10, height=512,width=512,channels=3, wd=0.0005,nfilters_d=64, checkpoint_dir=None, path_imgs=None, learning_rate=2e-8,lr_step=30000,lam_fcn=1, lam_adv=1,adversarial=False,nclasses=5):
self.adversarial=adversarial
self.channels=channels
self.lam_fcn=lam_fcn
self.lam_adv=lam_adv
self.lr_step=lr_step
self.wd=wd
self.learning_rate=learning_rate
self.batch_size=batch_size
self.height=height
self.width=width
self.checkpoint_dir = checkpoint_dir
self.path_imgs=path_imgs
self.nfilters_d=nfilters_d
self.organ_target=1#1 eso 2 heart 3 trach 4 aorta
self.nclasses=nclasses
self.netG=UNet(self.nclasses,self.channels)
self.netG.apply(weights_init)
if self.adversarial:
self.netD=Discriminator(self.nclasses,self.nfilters_d,self.height,self.width)
self.netD.apply(weights_init)
self.dst = myDataSet(self.path_imgs, is_transform=True)
self.trainloader = data.DataLoader(self.dst, batch_size=self.batch_size, shuffle=True, num_workers=2)
def train(self,config):
print 'verion ',torch.__version__
start=0#TODO change this so that it can continue when loading a model
print("Start from:", start)
label_ones=torch.ones(self.batch_size)
label_zeros=torch.zeros(self.batch_size)
y_onehot = torch.FloatTensor(self.batch_size,self.nclasses,self.height, self.width)
#print 'shape y_onehot ',y_onehot.size()
if self.adversarial:
self.netD.cuda()
self.netG.cuda()
label_ones,label_zeros,y_onehot=label_ones.cuda(),label_zeros.cuda(),y_onehot.cuda()
y_onehot_var= Variable(y_onehot)
label_ones_var = Variable(label_ones)
label_zeros_var = Variable(label_zeros)
if self.adversarial:
optimizerD = optim.Adam(self.netD.parameters(), lr = self.learning_rate, betas = (0.5, 0.999))
optimizerG = optim.Adam(self.netG.parameters(), lr = self.learning_rate, betas = (0.5, 0.999))
for it in range(start,config.iterations):#epochs
for i, (images,GT) in enumerate(self.trainloader):
y_onehot.resize_(GT.size(0),self.nclasses,self.height, self.width)
y_onehot.zero_()
label_ones.resize_(GT.size(0))
label_zeros.resize_(GT.size(0))
images = Variable(images.cuda())
#images = Variable(images)
#print 'unique ',np.unique(GT.numpy())
GT=GT.cuda()
#print 'image size ',images.size()
#print 'GT size ',GT.size()
#print 'shape y_onehot ',y_onehot.size()
y_onehot.scatter_(1,GT.view(GT.size(0),1,GT.size(1),GT.size(2)),1)#we need to add singleton dim so thatnum of dims is equal
GT=Variable(GT)#N,H,W
if self.adversarial:
##########################
#Update Discriminator
##########################
#train with real samples
self.netD.zero_grad()
#print self.netD
output=self.netD(y_onehot_var)#this must be in one hot
errD_real =F.binary_cross_entropy(output,label_ones_var)#loss_D
errD_real.backward()#update grads of netD
# train with fake
fake = self.netG(images)#this is a prob map which we want to be similar to y_onehot
#print 'fake sz',fake.size()
output = self.netD(fake.detach())#only for speed, so grads of netg are not computed
errD_fake = F.binary_cross_entropy(output, label_zeros_var)
errD_fake.backward()
optimizerD.step()#update the parameters of netD
############################
# Update G network
###########################
self.netG.zero_grad()
if self.adversarial:
output_D=self.netD(fake)
output_G, GT,label_ones,output_D
errG = self.loss_G(fake,GT, label_ones_var,output_D)#here we should use ones with the fakes
else:
fake = self.netG(images)
errG = self.loss_G(fake,GT)
errG.backward()#backprop errors
optimizerG.step()#optimize only netG params
I guess I am not converting tensors to Variables in a correct way or maybe because I am doing it in the training loop, could you please take a look a let me know what can I change to gain efficiency and memory if possible?
Thanks!