I have noticed that used GPU memory after transposed convolution does not correspond to the estimated GPU memory.
def get_gpu_memory_map():
result = subprocess.check_output(
[
'nvidia-smi', '--query-gpu=memory.used',
'--format=csv,nounits,noheader'
])
return float(result)
conv1 = nn.Sequential(nn.Conv3d(in_channels=8, out_channels=8, kernel_size=4, stride=1, padding=1),
nn.LeakyReLU(negative_slope=0.1, inplace=True),
nn.BatchNorm3d(8)).float().cuda()
deconv1 = nn.Sequential(nn.ConvTranspose3d(in_channels=8, out_channels=8, kernel_size=4, stride=2, padding=1),
nn.LeakyReLU(negative_slope=0.1, inplace=True),
nn.BatchNorm3d(8)).float().cuda()
deconv2 = nn.ConvTranspose3d(in_channels=8, out_channels=1, kernel_size=4, stride=2, padding=1).float().cuda()
gpu_init = get_gpu_memory_map()
inp = Variable(th.rand(1, 8, 64, 256, 256).cuda())
gpu_before = get_gpu_memory_map()
print 'before deconvolution %f, should be %f' % (gpu_before-gpu_init, float(8*64*256*256*32) / (8*1024*1024))
out = deconv1(inp)
gpu_after1 = get_gpu_memory_map()
print 'after 1st deconvolution %f, should be %f' % (gpu_after1-gpu_init, float(8*128*512*512*32 + 8*64*256*256*32) / (8*1024*1024))
out = deconv2(out)
gpu_after2 = get_gpu_memory_map()
print 'after 2nd deconvolution %f, should be %f' % (gpu_after2-gpu_init, float(1*128*1024*1024*32 + 8*64*256*256*32) / (8*1024*1024))
before deconvolution 128.000000, should be 128.000000
after 1st deconvolution 2241.000000, should be 1152.000000
after 2nd deconvolution 3265.000000, should be 640.000000
Does this suggest memory leak?
I am using Pytorch 0.2.0_4, cuda8, cudnn7 and cudnn.benchmark = True flag.