RuntimeError: CUDA out of memory ResNet LSTM

I am using a pretrained ResNet model and added an LSTM on top.
Unfortunately, when running the model I always get an error saying that there’s not enough memory on my GPU. I tried several things but just couldn’t fix it. Even when I set the batch_size to 1, it doesn’t work.
The error occurs already in the first epoch when running the line out = self.model(x) in the ResNet model.

Below is my code

def get_batch(data, batch_size):
    batch_x = []
    batch_y = []

    for _ in range(batch_size):
        ims, labels = next(data)
        batch_x.append(ims[np.newaxis, ...])
        batch_y.append(labels[np.newaxis, ...])

    batch_x = np.concatenate(batch_x, axis=0)
    batch_y = np.concatenate(batch_y, axis=0)

    return batch_x, batch_y

# ResNet Model
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

class ResNetModel(nn.Module):
    def __init__(self):
        super(ResNetModel, self).__init__()
        self.model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet101', pretrained=True)
        self.model.fc = Identity()
        
      
    # forward function of ResNet model
    def forward(self, x):
        out = self.model(x)
        return out

# LSTM
class Combine(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(Combine, self).__init__()
        # ResNet
        self.resnetmodel = ResNetModel()

        # Building LSTM
        # Hidden dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)
        
        # Sigmoid
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # call convolutional NN
        out = self.resnetmodel(x)

        # reshape for LSTM
        out = out.view(-1, seq_dim, input_dim)


        h0 = torch.zeros(self.layer_dim, out.size(0), self.hidden_dim).requires_grad_().to(next(self.parameters()).device)
        # Initialize cell state
        c0 = torch.zeros(self.layer_dim, out.size(0), self.hidden_dim).requires_grad_().to(next(self.parameters()).device)
                
        out, (hn, cn) = self.lstm(out, (h0.detach(), c0.detach()))
        
        out = self.fc(out)
        
        out = self.sigmoid(out)
        out = out.view(out.size(0)*seq_dim)
        return out


    # initialize model
input_dim = 2048
hidden_dim = 100
layer_dim = 1 # changed from 3 to 4
output_dim = 1

model = Combine(input_dim, hidden_dim, layer_dim, output_dim)
criterion = nn.BCELoss()

# hyperparameters
seq_dim = reader.SEQ_SIZE
n_iters = 1000 # 1000
batch_size = 16
num_epochs = int(n_iters/(len(reader.train) / batch_size))

learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# move model to GPU
torch.cuda.empty_cache()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# run model on training and test set
Record = namedtuple("Record", ("epoch", "iteration", "accuracy", "loss"))
metrics = []

iteration = 0
epochs_bar = trange(num_epochs, desc="Epochs", leave=True)
shuffle = True

for epoch in epochs_bar:
        
    model.train() #setting model in train mode
    train_data = reader.train_data(shuffle=shuffle)  #self-programmed dataloader
        
    for i in range(math.ceil(len(reader.train)/batch_size)): 
        images, labels = get_batch(train_data, batch_size)

        # transform numpy arrays to torch tensors
        images = torch.from_numpy(images)
        labels = torch.from_numpy(labels)

        # move to GPU
        images, labels = images.to(device, dtype=torch.float), labels.to(device, dtype=torch.float)

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Resize images
        images = images.view(-1, 256,256, 3)
        labels = labels.view(-1)
        images = images.view(-1, 3, 256, 256)

        # Forward pass only to get logits/output
        outputs = model(images)

        # Calculate Loss: sigmoid BCELoss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iteration += 1


    # Calculate Accuracy         
    correct = 0
    total = 0
        
    # Iterate through test dataset
    model.eval() #setting model in eval mode

    test_data = reader.test_data(shuffle=shuffle)  #self-programmed dataloader

    for i in range(math.ceil(len(reader.test)/batch_size)):
        images, labels = get_batch(test_data, batch_size)

        # transform numpy arrays to torch tensors
        images = torch.from_numpy(images)
        labels = torch.from_numpy(labels)

        # move to GPU
        images, labels = images.to(device, dtype=torch.float), labels.to(device, dtype=torch.float)

        # Resize images
        images = images.view(-1, 256,256, 3)
        labels = labels.view(-1)

        images = images.view(-1, 3, 256, 256)

        # Forward pass only to get logits/output
        outputs = model(images)

        # Get predictions from the maximum value
        predicted = outputs
        predicted = predicted.view(-1)

        total += labels.size(0)

        # accuracy
        predicted = torch.as_tensor((predicted - 0.5) > 0, dtype=torch.float)
        correct += (predicted == labels).sum()

    accuracy = 100 * correct / total
    accuracy = accuracy.detach().cpu().item()
    loss = loss.detach().cpu().item()
               
    metrics.append(Record(epoch=epoch, iteration=iteration, accuracy=accuracy, loss=loss))

    epochs_bar.set_postfix_str('Loss: {:.6f} \t Test Acc: {:.6f}'.format(loss, accuracy))
    epochs_bar.refresh() # to show immediately the update
        

Hi,

Well maybe your GPU doesn’t have enough memory, can you run nvidia-smi on terminal to check?