Is increase in model memory post prediction/forward call expected?

I am trying out the following vgg11 example from torch hub. I have modified it using psutil to capture process memory usage at different stages.

Observtion - Post inference/prediction call, the model memory increases e.g. for vgg11 ->
pre inference/after load call -> ~691.62890625MB
post inference/after forward call -> ~747.203125MB

Question -> Is this an expected behavior and why? Or am I missing something in following code snippet?

import torch

import sys
import os
import psutil
import gc

def memory_usage_psutil():
    # return the memory usage in percentage like top
    process = psutil.Process(os.getpid())
    mem = process.memory_info()[0] / float(2** 20)
    #mem = process.memory_percent()
    return mem

from time import sleep

mem1 = str(memory_usage_psutil())

model = torch.hub.load('pytorch/vision:v0.6.0', 'vgg11', pretrained=True)
model.eval()

mem2 = str(memory_usage_psutil())

import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

mem3 = str(memory_usage_psutil())

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    model(input_batch)

mem4 = str(memory_usage_psutil())

gc.collect()
sleep(60)

mem5 = str(memory_usage_psutil())

print(mem1+','+mem2+','+mem3+','+mem4+','+mem5)

# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
#print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
#print(torch.nn.functional.softmax(output[0], dim=0))

Thanks!

2 Likes

Having the same issue here: process PSS consumption increases twice after a single call with torch.no_grad(): model(torch.rand((1, 3, 400, 400)))

 PID User     Command                         Swap      USS      PSS      RSS                                                 
>> python
16346 lilyai   python                             0     3536     3658     8844
>> import torch
16346 lilyai   python                             0    45440    47373    99584
>> model = torch.jit.load
>> model.eval()
16346 lilyai   python                             0   205188   207152   260088
>> with torch.no_grad(): model(torch.rand((1, 3, 400, 400)))
16346 lilyai   python                             0   433532   435863   495292