I’m trying to do some simple feature extraction using a pretrained ResNet50 on the CIFAR 100-20 dataset. It should be pretty straightforward, but after a certain number of batches the CUDA out of memory errors would appear. It seems very strange to me as something must have been accumulating across the batches and overwhelmed the GPU, but I could not locate the problem. Here’s a simple code snippet
import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import ExponentialLR import torchvision.datasets as datasets from torchvision import transforms from torch.utils.data import TensorDataset, DataLoader, Dataset from torchvision.models import resnet50 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = resnet50(pretrained=True) model.eval() model = model.to(device) transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((224,224)), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) trainset = datasets.CIFAR100(root='./', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True) testset = datasets.CIFAR100(root='./', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=8, shuffle=False) train_f =  train_l =  for i,(im,label) in enumerate(testloader): print(i) im = im.to(device) train_f.append(model(im).cpu()) train_l.append(label) im = im.to('cpu') torch.cuda.empty_cache() train_f = torch.cat(train_f) train_l = torch.cat(train_l) np.save("features",train_f.detach().numpy()) np.save("labels",train_l.detach().numpy())
Any suggestions would be appreciated!
UPDATE: Even when I run the code above only on CPU, my laptop would kill the program after a certain number of batches.