Hi All,
I tried all the solutions on the forum but nothing seems to work.
Kindly help me with this issue. I am not able to figure out whats wrong.
This is my first time trying to run LSTM on GPU
Please find below the model, training loop and error trace
1. Model:
class LSTM(nn.ModuleList):
def __init__(self, sequence_length, input_size, hidden_size, batch_size):
super(LSTM,self).__init__()
# used for live debugging, use next command to move to next line
# set_trace()
self.sequence_length = sequence_length
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
# LSTMCell 1
self.lstm1 = nn.LSTMCell(input_size = self.input_size,
hidden_size = self.hidden_size,
bias = True)
# LSTMCell 2
self.lstm2 = nn.LSTMCell(input_size = self.hidden_size,
hidden_size = self.hidden_size,
bias = True)
# LSTMCell 3
self.lstm3 = nn.LSTMCell(input_size = self.hidden_size ,
hidden_size = self.hidden_size,
bias = True)
# LSTMCell 4
self.lstm4 = nn.LSTMCell(input_size = self.hidden_size,
hidden_size = self.hidden_size,
bias = True)
# Dropout Layer 1
self.drop1 = nn.Dropout(p=0.4)
# Fully-connected Layer, out_features = input_size = vocab_size
self.fc1 = nn.Linear(in_features = self.hidden_size,
out_features = self.hidden_size,
bias = True)
# Dropout Layer 2
self.drop2 = nn.Dropout(p=0.4)
self.fc2 = nn.Linear(in_features = self.hidden_size,
out_features = self.input_size,
bias = True)
def forward(self,inputs, initial_states):
'''
Function responsible for the forward pass through the network
Args:
inputs : Tuple of batch, input_size at each timestep t
initail_states : Tuple of hidden state and cell state, initailly set to 0s
Returns:
Collection of output sequences at each timestep t
'''
# More debugguing
# set_trace()
# Initialize empty output sequence
output_sequence = torch.empty((self.sequence_length,
self.batch_size,
self.input_size))
# For every time step in the sequence
for t in range(self.sequence_length):
# Passing the input sequentially through all LSTMCells
initial_hidden_state, initial_cell_state = initial_states
initial_hidden_state.requires_grad_()
initial_hidden_state.cuda()
initial_cell_state.requires_grad_()
initial_cell_state.cuda()
state1 = self.lstm1(inputs[t], (initial_hidden_state,initial_cell_state))
hidden_state1, cell_state1 = state1
state2 = self.lstm2(hidden_state1, (initial_hidden_state,initial_cell_state))
hidden_state2, cell_state2 = state2
state3 = self.lstm3(hidden_state2, (initial_hidden_state,initial_cell_state))
hidden_state3, cell_state3 = state3
state4 = self.lstm4(hidden_state3, (initial_hidden_state,initial_cell_state))
hidden_state4, cell_state4 = state4
# Passing output of LSTMCells through dropout and fc layers
output = self.fc1(self.drop1(hidden_state4))
# add final output to output sequence
output_sequence[t] = self.fc2(self.drop2(output))
# returning output sequence
return output_sequence.view((self.sequence_length * self.batch_size, -1))
def initialize_hidden_and_cell_states(self):
# The hidden and cell state at the start are all zeros
return (torch.zeros(self.batch_size, self.hidden_size),
torch.zeros(self.batch_size, self.hidden_size))
2. Instantiating the model and moving to GPU:
# Setting the device that will be used for training.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# Creating the model and load it into the GPU
model = LSTM(sequence_length=128,
input_size=len(character_to_integer),
hidden_size=512,
batch_size=128)
model = model.to(device)
3. Training Loop:
# Training the model
for epoch in range(10):
# Initialize hidden and cell state to zero at the beginning
hidden_and_cell_state = model.initialize_hidden_and_cell_states()
print(type(hidden_and_cell_state))
print(type(hidden_and_cell_state[0]))
# Creating instance of dataloader class
T = TextDataLoader(text_corpus_encoded, 128,128)
for i, (feature, label) in enumerate(T.make_batches()):
# One-hot encode inputs, convert into tensor and transpose them
feature = torch.from_numpy(to_categorical(feature, num_classes=model.input_size).transpose([1, 0, 2]))
# Converting input to FloatTensor
feature = feature.float()
# Transpose labels and convert into LongTensor
label = torch.from_numpy(label.T).type(torch.LongTensor)
# Wrap features and labels in PyTorch Variable
feature, label = Variable(feature),Variable(label)
# Convert features and labels to type torch.cuda and load them into the GPU
feature.requires_grad_()
feature = feature.to(device)
label.requires_grad_()
label = feature.to(device)
optimizer.zero_grad() # Makes sure the gradients are initially zero
out = model(feature, hidden_and_cell_state) # Forward pass
loss = criteron(out,label.contiguous().view(128*128)) # Computing loss
loss.backward() # Back-Prop the loss / Backward Pass
optimizer.step() # Update the gradients
print(f'Batch : {i+1} Loss : {loss}')
print(f'Epoch : {epoch+1} Loss : {loss}')
4. Error trace:
<class ‘tuple’>
<class ‘torch.Tensor’>
RuntimeError Traceback (most recent call last)
in ()
31 optimizer.zero_grad() # Makes sure the gradients are initially zero
32
—> 33 out = model(feature, hidden_and_cell_state) # Forward pass
34
35 loss = criteron(out,label.contiguous().view(128*128)) # Computing loss
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
–> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
in forward(self, inputs, initial_states)
98 initial_cell_state.cuda()
99
–> 100 state1 = self.lstm1(inputs[t], (initial_hidden_state,initial_cell_state))
101 hidden_state1, cell_state1 = state1
102
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
–> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
682 input, hx,
683 self.weight_ih, self.weight_hh,
–> 684 self.bias_ih, self.bias_hh,
685 )
686
/usr/local/lib/python3.6/dist-packages/torch/nn/_functions/rnn.py in LSTMCell(input, hidden, w_ih, w_hh, b_ih, b_hh)
26 if input.is_cuda:
27 igates = F.linear(input, w_ih)
—> 28 hgates = F.linear(hidden[0], w_hh)
29 state = fusedBackend.LSTMFused.apply
30 return state(igates, hgates, hidden[1]) if b_ih is None else state(igates, hgates, hidden[1], b_ih, b_hh)
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
992 return torch.addmm(bias, input, weight.t())
993
–> 994 output = input.matmul(weight.t())
995 if bias is not None:
996 output += bias
RuntimeError: Expected object of type torch.FloatTensor but found type torch.cuda.FloatTensor for argument #2 'mat2’
next(model.parameters()).is_cuda => True
feature.is_cuda => True
label.is_cuda => True