Hi All,

I tried all the solutions on the forum but nothing seems to work.

Kindly help me with this issue. I am not able to figure out whats wrong.

This is my first time trying to run LSTM on GPU

Please find below the model, training loop and error trace

**1. Model:**

```
class LSTM(nn.ModuleList):
def __init__(self, sequence_length, input_size, hidden_size, batch_size):
super(LSTM,self).__init__()
# used for live debugging, use next command to move to next line
# set_trace()
self.sequence_length = sequence_length
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
# LSTMCell 1
self.lstm1 = nn.LSTMCell(input_size = self.input_size,
hidden_size = self.hidden_size,
bias = True)
# LSTMCell 2
self.lstm2 = nn.LSTMCell(input_size = self.hidden_size,
hidden_size = self.hidden_size,
bias = True)
# LSTMCell 3
self.lstm3 = nn.LSTMCell(input_size = self.hidden_size ,
hidden_size = self.hidden_size,
bias = True)
# LSTMCell 4
self.lstm4 = nn.LSTMCell(input_size = self.hidden_size,
hidden_size = self.hidden_size,
bias = True)
# Dropout Layer 1
self.drop1 = nn.Dropout(p=0.4)
# Fully-connected Layer, out_features = input_size = vocab_size
self.fc1 = nn.Linear(in_features = self.hidden_size,
out_features = self.hidden_size,
bias = True)
# Dropout Layer 2
self.drop2 = nn.Dropout(p=0.4)
self.fc2 = nn.Linear(in_features = self.hidden_size,
out_features = self.input_size,
bias = True)
def forward(self,inputs, initial_states):
'''
Function responsible for the forward pass through the network
Args:
inputs : Tuple of batch, input_size at each timestep t
initail_states : Tuple of hidden state and cell state, initailly set to 0s
Returns:
Collection of output sequences at each timestep t
'''
# More debugguing
# set_trace()
# Initialize empty output sequence
output_sequence = torch.empty((self.sequence_length,
self.batch_size,
self.input_size))
# For every time step in the sequence
for t in range(self.sequence_length):
# Passing the input sequentially through all LSTMCells
initial_hidden_state, initial_cell_state = initial_states
initial_hidden_state.requires_grad_()
initial_hidden_state.cuda()
initial_cell_state.requires_grad_()
initial_cell_state.cuda()
state1 = self.lstm1(inputs[t], (initial_hidden_state,initial_cell_state))
hidden_state1, cell_state1 = state1
state2 = self.lstm2(hidden_state1, (initial_hidden_state,initial_cell_state))
hidden_state2, cell_state2 = state2
state3 = self.lstm3(hidden_state2, (initial_hidden_state,initial_cell_state))
hidden_state3, cell_state3 = state3
state4 = self.lstm4(hidden_state3, (initial_hidden_state,initial_cell_state))
hidden_state4, cell_state4 = state4
# Passing output of LSTMCells through dropout and fc layers
output = self.fc1(self.drop1(hidden_state4))
# add final output to output sequence
output_sequence[t] = self.fc2(self.drop2(output))
# returning output sequence
return output_sequence.view((self.sequence_length * self.batch_size, -1))
def initialize_hidden_and_cell_states(self):
# The hidden and cell state at the start are all zeros
return (torch.zeros(self.batch_size, self.hidden_size),
torch.zeros(self.batch_size, self.hidden_size))
```

**2. Instantiating the model and moving to GPU:**

```
# Setting the device that will be used for training.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# Creating the model and load it into the GPU
model = LSTM(sequence_length=128,
input_size=len(character_to_integer),
hidden_size=512,
batch_size=128)
model = model.to(device)
```

**3. Training Loop:**

```
# Training the model
for epoch in range(10):
# Initialize hidden and cell state to zero at the beginning
hidden_and_cell_state = model.initialize_hidden_and_cell_states()
print(type(hidden_and_cell_state))
print(type(hidden_and_cell_state[0]))
# Creating instance of dataloader class
T = TextDataLoader(text_corpus_encoded, 128,128)
for i, (feature, label) in enumerate(T.make_batches()):
# One-hot encode inputs, convert into tensor and transpose them
feature = torch.from_numpy(to_categorical(feature, num_classes=model.input_size).transpose([1, 0, 2]))
# Converting input to FloatTensor
feature = feature.float()
# Transpose labels and convert into LongTensor
label = torch.from_numpy(label.T).type(torch.LongTensor)
# Wrap features and labels in PyTorch Variable
feature, label = Variable(feature),Variable(label)
# Convert features and labels to type torch.cuda and load them into the GPU
feature.requires_grad_()
feature = feature.to(device)
label.requires_grad_()
label = feature.to(device)
optimizer.zero_grad() # Makes sure the gradients are initially zero
out = model(feature, hidden_and_cell_state) # Forward pass
loss = criteron(out,label.contiguous().view(128*128)) # Computing loss
loss.backward() # Back-Prop the loss / Backward Pass
optimizer.step() # Update the gradients
print(f'Batch : {i+1} Loss : {loss}')
print(f'Epoch : {epoch+1} Loss : {loss}')
```

**4. Error trace:**

## <class ‘tuple’>

<class ‘torch.Tensor’>

RuntimeError Traceback (most recent call last)

in ()

31 optimizer.zero_grad() # Makes sure the gradients are initially zero

32

—> 33 out = model(feature, hidden_and_cell_state) # Forward pass

34

35 loss = criteron(out,label.contiguous().view(128*128)) # Computing loss

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in **call**(self, *input, **kwargs)

489 result = self._slow_forward(*input, **kwargs)

490 else:

–> 491 result = self.forward(*input, **kwargs)

492 for hook in self._forward_hooks.values():

493 hook_result = hook(self, input, result)

in forward(self, inputs, initial_states)

98 initial_cell_state.cuda()

99

–> 100 state1 = self.lstm1(inputs[t], (initial_hidden_state,initial_cell_state))

101 hidden_state1, cell_state1 = state1

102

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in **call**(self, *input, **kwargs)

489 result = self._slow_forward(*input, **kwargs)

490 else:

–> 491 result = self.forward(*input, **kwargs)

492 for hook in self._forward_hooks.values():

493 hook_result = hook(self, input, result)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)

682 input, hx,

683 self.weight_ih, self.weight_hh,

–> 684 self.bias_ih, self.bias_hh,

685 )

686

/usr/local/lib/python3.6/dist-packages/torch/nn/_functions/rnn.py in LSTMCell(input, hidden, w_ih, w_hh, b_ih, b_hh)

26 if input.is_cuda:

27 igates = F.linear(input, w_ih)

—> 28 hgates = F.linear(hidden[0], w_hh)

29 state = fusedBackend.LSTMFused.apply

30 return state(igates, hgates, hidden[1]) if b_ih is None else state(igates, hgates, hidden[1], b_ih, b_hh)

/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)

992 return torch.addmm(bias, input, weight.t())

993

–> 994 output = input.matmul(weight.t())

995 if bias is not None:

996 output += bias

**RuntimeError: Expected object of type torch.FloatTensor but found type torch.cuda.FloatTensor for argument #2 'mat2’**

next(model.parameters()).is_cuda => True

feature.is_cuda => True

label.is_cuda => True