Torch still uses memory after function executed

def process_images():
    import clip
    import glob
    clip_model, preprocess = clip.load("ViT-B/32")
    clip_model.eval()
    files = glob.glob('./**/*.jpg', recursive=True)
    batch_size = 64
    with torch.inference_mode():
        for i in tqdm(range(0, len(files), 64)):
            batch_files = files[i:i+batch_size]
            images = torch.stack([preprocess(Image.open(file)) for file in batch_files], dim=0).cuda()
            features = clip_model.encode_image(images)
            features /= features.norm(dim=-1, keepdim=True)
            features = features.detach().cpu().clone().numpy()
            for feature, file in zip(features, batch_files):
                np.save(file.replace('.jpg', '.npy'), feature)

process_images()
torch.cuda.empty_cache()

I have processing like this and torch still uses memory after execution while I expect memory to be freed, how should I fix it?

I don’t know which memory you are measuring, but in case you are checking the GPU memory, I can’t reproduce the issue and get zero allocated memory after process_image and zero reserved memory after empty_cache() as expected using a resnet18:

def process_images():
    clip_model = models.resnet18().cuda()    
    clip_model.eval()
    with torch.inference_mode():
        for i in range(0, 10):
            images = torch.randn(64, 3, 224, 224).cuda()
            features = clip_model(images)
            features /= features.norm(dim=-1, keepdim=True)
            features = features.detach().cpu().clone().numpy()

print(torch.cuda.memory_allocated())
# > 0
print(torch.cuda.memory_reserved())
# > 0
process_images()
print(torch.cuda.memory_allocated())
# > 0
print(torch.cuda.memory_reserved())
# > 723517440
torch.cuda.empty_cache()
print(torch.cuda.memory_allocated())
# > 0
print(torch.cuda.memory_reserved())
# > 0