I am using a pretrained ResNet model and added an LSTM on top.
Unfortunately, when running the model I always get an error saying that there’s not enough memory on my GPU. I tried several things but just couldn’t fix it. Even when I set the batch_size to 1, it doesn’t work.
The error occurs already in the first epoch when running the line out = self.model(x) in the ResNet model.
Below is my code
def get_batch(data, batch_size):
batch_x = []
batch_y = []
for _ in range(batch_size):
ims, labels = next(data)
batch_x.append(ims[np.newaxis, ...])
batch_y.append(labels[np.newaxis, ...])
batch_x = np.concatenate(batch_x, axis=0)
batch_y = np.concatenate(batch_y, axis=0)
return batch_x, batch_y
# ResNet Model
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
class ResNetModel(nn.Module):
def __init__(self):
super(ResNetModel, self).__init__()
self.model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet101', pretrained=True)
self.model.fc = Identity()
# forward function of ResNet model
def forward(self, x):
out = self.model(x)
return out
# LSTM
class Combine(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(Combine, self).__init__()
# ResNet
self.resnetmodel = ResNetModel()
# Building LSTM
# Hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
# Sigmoid
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# call convolutional NN
out = self.resnetmodel(x)
# reshape for LSTM
out = out.view(-1, seq_dim, input_dim)
h0 = torch.zeros(self.layer_dim, out.size(0), self.hidden_dim).requires_grad_().to(next(self.parameters()).device)
# Initialize cell state
c0 = torch.zeros(self.layer_dim, out.size(0), self.hidden_dim).requires_grad_().to(next(self.parameters()).device)
out, (hn, cn) = self.lstm(out, (h0.detach(), c0.detach()))
out = self.fc(out)
out = self.sigmoid(out)
out = out.view(out.size(0)*seq_dim)
return out
# initialize model
input_dim = 2048
hidden_dim = 100
layer_dim = 1 # changed from 3 to 4
output_dim = 1
model = Combine(input_dim, hidden_dim, layer_dim, output_dim)
criterion = nn.BCELoss()
# hyperparameters
seq_dim = reader.SEQ_SIZE
n_iters = 1000 # 1000
batch_size = 16
num_epochs = int(n_iters/(len(reader.train) / batch_size))
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# move model to GPU
torch.cuda.empty_cache()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
# run model on training and test set
Record = namedtuple("Record", ("epoch", "iteration", "accuracy", "loss"))
metrics = []
iteration = 0
epochs_bar = trange(num_epochs, desc="Epochs", leave=True)
shuffle = True
for epoch in epochs_bar:
model.train() #setting model in train mode
train_data = reader.train_data(shuffle=shuffle) #self-programmed dataloader
for i in range(math.ceil(len(reader.train)/batch_size)):
images, labels = get_batch(train_data, batch_size)
# transform numpy arrays to torch tensors
images = torch.from_numpy(images)
labels = torch.from_numpy(labels)
# move to GPU
images, labels = images.to(device, dtype=torch.float), labels.to(device, dtype=torch.float)
# Clear gradients w.r.t. parameters
optimizer.zero_grad()
# Resize images
images = images.view(-1, 256,256, 3)
labels = labels.view(-1)
images = images.view(-1, 3, 256, 256)
# Forward pass only to get logits/output
outputs = model(images)
# Calculate Loss: sigmoid BCELoss
loss = criterion(outputs, labels)
# Getting gradients w.r.t. parameters
loss.backward()
# Updating parameters
optimizer.step()
iteration += 1
# Calculate Accuracy
correct = 0
total = 0
# Iterate through test dataset
model.eval() #setting model in eval mode
test_data = reader.test_data(shuffle=shuffle) #self-programmed dataloader
for i in range(math.ceil(len(reader.test)/batch_size)):
images, labels = get_batch(test_data, batch_size)
# transform numpy arrays to torch tensors
images = torch.from_numpy(images)
labels = torch.from_numpy(labels)
# move to GPU
images, labels = images.to(device, dtype=torch.float), labels.to(device, dtype=torch.float)
# Resize images
images = images.view(-1, 256,256, 3)
labels = labels.view(-1)
images = images.view(-1, 3, 256, 256)
# Forward pass only to get logits/output
outputs = model(images)
# Get predictions from the maximum value
predicted = outputs
predicted = predicted.view(-1)
total += labels.size(0)
# accuracy
predicted = torch.as_tensor((predicted - 0.5) > 0, dtype=torch.float)
correct += (predicted == labels).sum()
accuracy = 100 * correct / total
accuracy = accuracy.detach().cpu().item()
loss = loss.detach().cpu().item()
metrics.append(Record(epoch=epoch, iteration=iteration, accuracy=accuracy, loss=loss))
epochs_bar.set_postfix_str('Loss: {:.6f} \t Test Acc: {:.6f}'.format(loss, accuracy))
epochs_bar.refresh() # to show immediately the update