The code below runs without errors when run on CPU but gives error when run on GPU
import torch.nn as nn
class RNN(nn.Module):
def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.5):
"""
Initialize the PyTorch RNN Module
:param vocab_size: The number of input dimensions of the neural network (the size of the vocabulary)
:param output_size: The number of output dimensions of the neural network
:param embedding_dim: The size of embeddings, should you choose to use them
:param hidden_dim: The size of the hidden layer outputs
:param dropout: dropout to add in between LSTM/GRU layers
"""
super(RNN, self).__init__()
# TODO: Implement function
# set class variables
self.output_size = output_size
self.embedding_dim = embedding_dim
self.hidden_dim = hidden_dim
self.n_layers = n_layers
# define model layers
self.embed = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_size=hidden_dim, num_layers = n_layers, batch_first = True, dropout = dropout)
self.fc = nn.Linear(hidden_dim, output_size)
def forward(self, nn_input, hidden):
"""
Forward propagation of the neural network
:param nn_input: The input to the neural network
:param hidden: The hidden state
:return: Two Tensors, the output of the neural network and the latest hidden state
"""
# TODO: Implement function
batch_size = nn_input.shape[0]
embed_out = self.embed(nn_input)
# print(embed_out.shape, *(h.shape for h in hidden))
nn_input, hidden = self.lstm(embed_out, hidden) # hidden:: n_layers x batch_size x hidden_dim... lstm_out::
nn_input = nn_input.contiguous().view(-1, self.hidden_dim)
nn_input = self.fc(nn_input) # (batch_size * seq_length) * output_dim
nn_input = nn_input.view(batch_size, -1, self.output_size) # batch_size x (seq_length * output_dim)
# Get the last prediction from each item in the sequence
nn_input = nn_input[ : , -1 , :]
# return one batch of output word scores and the hidden state
return nn_input, hidden
def init_hidden(self, batch_size):
'''
Initialize the hidden state of an LSTM/GRU
:param batch_size: The batch_size of the hidden state
:return: hidden state of dims (n_layers, batch_size, hidden_dim)
'''
# Implement function
# initialize hidden state with zero weights, and move to GPU if available
weight = next(self.parameters()).data
if next(self.parameters()).data.is_cuda:
return (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
return (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
tests.test_rnn(RNN, train_on_gpu)