Trying to train LSTM on GPU

I’ve been trying to train an LSTM cell using a GPU, which works on a CPU, but get the following error.

I’ve found this post (How to train LSTM with GPU - #17 by Crohn_eng), but have been using a custom collate function and haven’t found the answer to my issue in this post.

Below is the LSTM code:

import torch
import torch.nn.utils.rnn as rnn_utils
import torch.nn as nn
from torchUtils import SplitDataset

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def my_collate(batch):
    # batch contains a list of tuples of structure (sequence, target)
    data = [item[0] for item in batch]
    data = rnn_utils.pack_sequence(data, enforce_sorted=False)
    targets = [item[1] for item in batch]
    return [data, targets]
	

dataset = SplitDataset(shuffleData=False) #ImageDataset()
dataset.selectPhase('train')
trainingDL = torch.utils.data.DataLoader(dataset, 4,shuffle=True, num_workers=0, collate_fn=my_collate)
inputs, labels = next(iter(trainingDL))
print(inputs[0].shape)

'''
a = torch.Tensor([[1, 2], [2, 3], [3, 4]])
b = torch.Tensor([[4, 5], [5, 6]])
c = torch.Tensor([[6, 7]])
packed = rnn_utils.pack_sequence([a, b, c])
'''

lstm = nn.LSTM(1000,1000, batch_first=True).to(device)
#device = torch.device("cpu")

packed_output, (h,c) = lstm(inputs.to(device))

y = rnn_utils.pad_packed_sequence(packed_output)
predictionLayer = nn.Linear(1000, 2)
print(len(y))
print(y[1])
print(y[0].shape)
currentIdx = -1
for count, pos in enumerate(y[1]):
	print(pos, count)
	print(predictionLayer(y[0][pos - 1, count, :]))

The custom dataset is the following:

class SplitDataset(MongoDataset):
	def __init__(self, trainingProportion = 0.8, shuffleData = False):
		MongoDataset.__init__(self)
		trainingSetSize = int(len(self)*trainingProportion)
		validSetSize =  len(self) - trainingSetSize
		# Could be replaced by something else rather than random replacement
		random.shuffle(self.ids)
		self.phase = 'train'
		self.trainingSet = self.ids[0:trainingSetSize]
		self.validationSet = self.ids[trainingSetSize:]
		self.shuffleData = shuffleData
		self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
		self.featureExtractor = models.vgg16(pretrained=True)

	def __getitem__(self, i):
		profile, label = MongoDataset.__getitem__(self,i)
		# You could replace this with the online data loader
		with torch.no_grad():
			self.featureExtractor.eval()
			preppedData = [self.featureExtractor(self.imageTransform(img).unsqueeze(0)).squeeze().to(self.device) for img in profile.images]

		# TODO: Replace this a proper transform using Pytorch libs
		if self.phase == 'train' and self.shuffleData:
			np.random.shuffle(preppedData)
		return torch.stack(preppedData), label
		
	def selectPhase(self, phase):
		self.phase = phase
		if phase == 'val':
			self.loadData = self.validationSet
			self.transform = ProfileDataset.validationTransform
		elif phase == 'train':
			self.loadData = self.trainingSet
			self.transform = ProfileDataset.trainingTransform
		else:
			raise("Invalid phase")

The error appears to be caused by the model’s weights, is it possible the model is only partially on the GPU?

y[0][pos - 1, count, :]).to(device)

1 Like

That wasn’t the solution, but was helpful. It was a silly mistake, the prediction layer wasn’t sent to device. Adding

predictionLayer = nn.Linear(1000, 2).to(device)

fixed it.