RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /pytorch/aten/src/THC/generic/THCTensorCopy.cpp:20

Hi,
I have just started to work with NLP and was working to create a model for Sentiment Analysis.

When I try to build my model on Google Colab I get the error that has been mentioned in the title.

model = SentiModel(vocab_size, embedding_dim, hidden_size, output_size)
if torch.cuda.is_available():
    model.cuda()
    print("Check Done")

The error specifically comes on the line when I call ----> model.cuda()

This is the complete error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-14-311fd88a1a30> in <module>()
    166 model = SentiModel(vocab_size, embedding_dim, hidden_size, output_size)
    167 if torch.cuda.is_available():
--> 168     model.cuda()
    169     print("Check Done")
    170 

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in cuda(self, device)
    256             Module: self
    257         """
--> 258         return self._apply(lambda t: t.cuda(device))
    259 
    260     def cpu(self):

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
    183     def _apply(self, fn):
    184         for module in self.children():
--> 185             module._apply(fn)
    186 
    187         for param in self._parameters.values():

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
    189                 # Tensors stored in modules are graph leaves, and we don't
    190                 # want to create copy nodes, so we have to unpack the data.
--> 191                 param.data = fn(param.data)
    192                 if param._grad is not None:
    193                     param._grad.data = fn(param._grad.data)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in <lambda>(t)
    256             Module: self
    257         """
--> 258         return self._apply(lambda t: t.cuda(device))
    259 
    260     def cpu(self):

RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /pytorch/aten/src/THC/generic/THCTensorCopy.cpp:20

This is the model class I am using:

class SentiModel(nn.Module):
    """docstring for SentiModel"""
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        super(SentiModel, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size


        self.embed = nn.Embedding(vocab_size, embedding_size)
        self.rnn = nn.RNN(embedding_size,hidden_size)
        self.fc = nn.Linear(hidden_size,output_size)

    def forward(self, x, lens):
        batch_size = x.size(1)
        self.hidden_out = self.init_hidden(batch_size)
        embeddings = self.embed(x)
        embbeddings_packed = pack_padded_sequence(embeddings, lens)
        rnn_out, self.hidden_out = self.rnn(embbeddings_packed, self.hidden_out)
        rnn_out, lens = pad_packed_sequence(rnn_out) 
        out = self.fc(self.hidden_out)
        return out

    def init_hidden(self, batch_size):
        if torch.cuda.is_available():
            return Variable(torch.zeros((1,batch_size,self.hidden_size)).cuda())
        else:
            return Variable(torch.zeros((1,batch_size,self.hidden_size)))

To recreate the error the following piece of code when ran on Google Colab produces a similar error:

from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable
import torch.nn as nn
import torch


class SentiModel(nn.Module):
    """docstring for SentiModel"""
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        super(SentiModel, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.embed = nn.Embedding(vocab_size, embedding_size)
        self.rnn = nn.RNN(embedding_size,hidden_size)
        self.fc = nn.Linear(hidden_size,output_size)

    def forward(self, x, lens):
        batch_size = x.size(1) # batch size
        self.hidden_out = self.init_hidden(batch_size)
        embeddings = self.embed(x)
        embbeddings_packed = pack_padded_sequence(embeddings, lens)
        rnn_out, self.hidden_out = self.rnn(embbeddings_packed, self.hidden_out)
        rnn_out, lens = pad_packed_sequence(rnn_out) 
        out = self.fc(self.hidden_out)
        return out

    def init_hidden(self, batch_size):
        if torch.cuda.is_available():
            return Variable(torch.zeros((1,batch_size,self.hidden_size)).cuda())
        else:
            return Variable(torch.zeros((1,batch_size,self.hidden_size)))
          
          
vocab_size = 5000
embedding_dim = 5
hidden_size = 5
output_size = 2
epochs = 3000
batch_size = 3


model = SentiModel(vocab_size, embedding_dim, hidden_size, output_size)
if torch.cuda.is_available():
    model.cuda()
    print("Check Done")

Thank you.

Hi!

Were you able to resolve this issue? I’m having the exact same problems…

Thanks

Could you post an executable code snippet as well as your current setup (PyTorch, CUDA, cudnn versions as well as the used GPU)?

Thanks for your help!

I realised that it was a problem with my VM, since I wasn’t even able to move a tensor to the GPU. Rebooting appears to have fixed it now!