Runtime Error: CUDNN_STATUS_EXECUTION_FAILED

The code below runs without errors when run on CPU but gives error when run on GPU

import torch.nn as nn

class RNN(nn.Module):
    
    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.5):
        """
        Initialize the PyTorch RNN Module
        :param vocab_size: The number of input dimensions of the neural network (the size of the vocabulary)
        :param output_size: The number of output dimensions of the neural network
        :param embedding_dim: The size of embeddings, should you choose to use them        
        :param hidden_dim: The size of the hidden layer outputs
        :param dropout: dropout to add in between LSTM/GRU layers
        """
        super(RNN, self).__init__()
        # TODO: Implement function
        
        # set class variables
        self.output_size = output_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        # define model layers
        self.embed = nn.Embedding(vocab_size, embedding_dim)
        
        self.lstm = nn.LSTM(embedding_dim, hidden_size=hidden_dim, num_layers = n_layers, batch_first = True, dropout = dropout)
        
        self.fc = nn.Linear(hidden_dim, output_size)
            
    def forward(self, nn_input, hidden):
        """
        Forward propagation of the neural network
        :param nn_input: The input to the neural network
        :param hidden: The hidden state        
        :return: Two Tensors, the output of the neural network and the latest hidden state
        """
        # TODO: Implement function  
        batch_size = nn_input.shape[0]
        embed_out = self.embed(nn_input)
        # print(embed_out.shape, *(h.shape for h in hidden))
        nn_input, hidden = self.lstm(embed_out, hidden) # hidden:: n_layers x batch_size x hidden_dim... lstm_out:: 
        nn_input = nn_input.contiguous().view(-1, self.hidden_dim)
        nn_input  = self.fc(nn_input) # (batch_size * seq_length) * output_dim
        
        nn_input = nn_input.view(batch_size, -1, self.output_size) # batch_size x (seq_length * output_dim)
        
        # Get the last prediction from each item in the sequence
        nn_input = nn_input[ : , -1 , :]
        # return one batch of output word scores and the hidden state
        return nn_input, hidden
    
    
    def init_hidden(self, batch_size):
        '''
        Initialize the hidden state of an LSTM/GRU
        :param batch_size: The batch_size of the hidden state
        :return: hidden state of dims (n_layers, batch_size, hidden_dim)
        '''
        # Implement function
        
        # initialize hidden state with zero weights, and move to GPU if available
        weight = next(self.parameters()).data
        if next(self.parameters()).data.is_cuda:
            return (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                   weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        
        return (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                   weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())

tests.test_rnn(RNN, train_on_gpu)

Could you post all arguments to create the model as well as all necessary input shapes to execute it?
Also, could you post your current setup, i.e.

  • GPU
  • CUDA, cudnn version
  • PyTorch version and you’ve installed it

I figured the problem. The condition in the init_hidden function evaluates to false on always, whether on GPU or CPU and thus, never moves the hidden states parameters to the GPU when run on GPU.

if next(self.parameters()).data.is_cuda:

Any idea why it evaluates to false always?

This should not be the case, if you are using a GPU and your code snippet works for me locally:



model = RNN(1, 1, 1, 1, 1)
print(next(model.parameters()).is_cuda)
> False
print(model.init_hidden(1))
> (tensor([[[0.]]]), tensor([[[0.]]]))

model.to('cuda')
print(next(model.parameters()).is_cuda)
> True
print(model.init_hidden(1))
> (tensor([[[0.]]], device='cuda:0'), tensor([[[0.]]], device='cuda:0'))

Also, you shouldn’t use the .data attribute, as it might have unwanted side effects (should be unrelated to your current issue).