Hi,
I have been hunting a memory leak on GPU for a few days, and there seems to be a pytorch issue.
In the code bellow, I define a dummy Function that does nothing, and just forward through it a large tensor. Depending on .cuda() being inside or outside Variable(), there may or may not be a leak.
Did I miss something?
import torch
from torch import Tensor
from torch.autograd import Variable
from torch.autograd import Function
######################################################################
# That's not pretty
import os
import re
def cuda_memory():
f = os.popen('nvidia-smi -q')
fb_total, fb_used = -1, -1
for line in f:
if re.match('^ *FB Memory Usage', line):
fb_total = int(re.search(': ([0-9]*) MiB', f.readline()).group(1))
fb_used = int(re.search(': ([0-9]*) MiB', f.readline()).group(1))
return fb_total, fb_used
######################################################################
class Blah(Function):
def forward(self, input):
return input
######################################################################
blah = Blah()
for k in range(0, 10):
x = Variable(Tensor(10000, 200).normal_()).cuda()
y = blah(x)
fb_total, fb_used = cuda_memory()
print(k, fb_used, '/', fb_total)
for k in range(0, 10):
x = Variable(Tensor(10000, 200).cuda().normal_())
y = blah(x)
fb_total, fb_used = cuda_memory()
print(k, fb_used, '/', fb_total)
prints:
0 257 / 8113
1 265 / 8113
2 265 / 8113
3 265 / 8113
4 265 / 8113
5 265 / 8113
6 265 / 8113
7 265 / 8113
8 265 / 8113
9 265 / 8113
0 267 / 8113
1 267 / 8113
2 275 / 8113
3 283 / 8113
4 291 / 8113
5 299 / 8113
6 307 / 8113
7 315 / 8113
8 323 / 8113
9 331 / 8113