I ran the following code:
import torch as t
import torchvision as tv
def main():
print('memory 1: ', t.cuda.mem_get_info(0)[0] / 1024 / 1024)
print('init model')
data = t.rand(1, 3, 224, 224).cuda()
model = tv.models.resnet50().cuda().eval()
print('memory 2: ', t.cuda.mem_get_info(0)[0] / 1024 / 1024)
print('inference...:')
with t.no_grad():
model(data)
print('memory 3: ', t.cuda.mem_get_info(0)[0] / 1024 / 1024)
print('empty cache: ')
t.cuda.empty_cache()
print('memory 4: ', t.cuda.mem_get_info(0)[0] / 1024 / 1024)
print('delete variable')
del data
del model
print('memory 4.1: ', t.cuda.mem_get_info(0)[0] / 1024 / 1024)
print('exit function')
if __name__ == '__main__':
main()
print('memory 5: ', t.cuda.mem_get_info(0)[0] / 1024 / 1024)
t.cuda.empty_cache()
print('empty cache again')
print('memory 6: ', t.cuda.mem_get_info(0)[0] / 1024 / 1024)
print('synchronize')
t.cuda.synchronize()
print('memory 7: ', t.cuda.mem_get_info(0)[0] / 1024 / 1024)
The output is
memory 1: 6987.54296875
init model
memory 2: 6359.54296875
inference...:
memory 3: 5835.54296875
empty cache:
memory 4: 5883.54296875
delete variable
memory 4.1: 5883.54296875
exit function
memory 5: 5883.54296875
empty cache again
memory 6: 6003.54296875
synchronize
memory 7: 6003.54296875
So, can anyone tell me why the GPU RAM is not the same before and after the main()
function?