del tensor
should work. To release the cached memory, you would need to call torch.cuda.empty_cache()
afterwards.
Here is a small example:
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_cached()/1024**2)
x = torch.randn(1024*1024).cuda()
# 4MB allocation and potentially larger cache
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_cached()/1024**2)
y = torch.randn(8*1024*1024).cuda()
# 4+32=36MB allocation and potentially larger cache
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_cached()/1024**2)
del x
# 32MB allocation, cache should stay the same
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_cached()/1024**2)
torch.cuda.empty_cache()
# 32MB allocation and cache
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_cached()/1024**2)
del y
# 0MB allocation, 32MB cache
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_cached()/1024**2)
torch.cuda.empty_cache()
# 0MB allocation and cache
print(torch.cuda.memory_allocated()/1024**2)
print(torch.cuda.memory_cached()/1024**2)