Hello, I’m using a pretrained resnet50 only to do inference and I noticed the cached memory explodes in the first pass through the network. I was wondering if someone could shed some light on what’s being stored and if there is a way to control it. Here is an example:
import torch
from torchvision import models
def fmt_MB(alloc, cached):
print(f'Alloc : {alloc:>8,.1f} MB\n'
f'Cached: {cached:>8,.1f} MB')
def mem_usage():
return torch.cuda.memory_allocated() / 2**20, torch.cuda.memory_cached() / 2**20
alloc0, cached0 = mem_usage()
fmt_MB(alloc0, cached0)
Alloc: 0.0 MB
Cached: 0.0 MB
model = models.resnet50(pretrained=True).cuda()
alloc1, cached1 = mem_usage()
fmt_MB(alloc1, cached1)
Alloc: 97.7 MB
Cached: 118.0 MB
x = torch.rand((1024, 3, 224, 224)).cuda()
x_size = x.numel() * 32 / (8 * 2**20)
alloc2, cached2 = mem_usage()
fmt_MB(alloc2, cached2)
alloc2 - alloc1, cached2 - cached1, x_size
Alloc: 685.7 MB
Cached: 706.0 MB
(588.0, 588.0, 588.0)
model.eval()
with torch.no_grad():
y = model(x)
y_size = y.numel() * 32 / (8 * 2**20)
alloc3, cached3 = mem_usage()
fmt_MB(alloc3, cached3)
alloc3 - alloc2, y_size
Alloc: 689.6 MB
Cached: 13,250.0 MB
(3.90625, 3.90625)
torch.cuda.empty_cache()
alloc4, cached4 = mem_usage()
fmt_MB(alloc4, cached4)
Alloc: 689.6 MB
Cached: 3,842.0 MB