RuntimeError: cuda runtime error (59) THCTensorIndex.cu:361

I’m new to pytorch and tried to run a bilstm+attention sentiment classification model. It runs well before i get the following error, and I’ve set CUDA_LAUNCH_BLOCKING=1 and get the same error every time.But strangly, it happens at different step of training process and I use SequentialSampler.


Please give me some suggestions. Thanks!

Could you post the model definition or run your code on the CPU?
Based on the error message you get an index error, but it’s a bit difficult to guess where this error happens in your model.

this is my model definition:

class config():
    def __init__(self,num_cl,lr,keep_dr,cemb_size,pemb_size,l2_lmd,hid_dim,rnn_lay):
        self.num_classes = num_cl
        self.learning_rate = lr
        self.keep_dropout = keep_dr
        self.char_embedding_size = cemb_size
        self.pinyin_embedding_size = pemb_size
        self.l2_reg_lambda = l2_lmd
        self.hidden_dims = hid_dim
        self.rnn_layers = rnn_lay

class TextBILSTM(nn.Module):
    
    def __init__(self,
                 config,
                 char_size):
        super(TextBILSTM, self).__init__()
        self.num_classes = config.num_classes
        self.learning_rate = config.learning_rate
        self.keep_dropout = config.keep_dropout
        self.char_embedding_size = config.char_embedding_size
        self.pinyin_embedding_size = config.pinyin_embedding_size
        self.l2_reg_lambda = config.l2_reg_lambda
        self.hidden_dims = config.hidden_dims
        self.char_size = char_size
        self.rnn_layers = config.rnn_layers

        self.build_model()


    def build_model(self):
        self.char_embeddings = nn.Embedding(self.char_size, self.char_embedding_size)
        self.char_embeddings.weight.requires_grad = True
        # attention layer
        self.attention_layer = nn.Sequential(
            nn.Linear(self.hidden_dims, self.hidden_dims),
            nn.ReLU(inplace=True)
        )
        # self.attention_weights = self.attention_weights.view(self.hidden_dims, 1)

        # 双层lstm
        self.lstm_net = nn.LSTM(self.char_embedding_size, self.hidden_dims,
                                num_layers=self.rnn_layers, dropout=self.keep_dropout,
                                bidirectional=True)
        # FC层
        # self.fc_out = nn.Linear(self.hidden_dims, self.num_classes)
        self.fc_out = nn.Sequential(
            nn.Dropout(self.keep_dropout),
            nn.Linear(self.hidden_dims, int(self.hidden_dims/2)),
            nn.ReLU(inplace=True),
            nn.Dropout(self.keep_dropout),
            nn.Linear(int(self.hidden_dims/2), int(self.hidden_dims/4)),
            nn.ReLU(inplace=True),
            nn.Dropout(self.keep_dropout),
            nn.Linear(int(self.hidden_dims/4), self.num_classes)
        )


    def attention_net_with_w(self, lstm_out):
        '''
        :param lstm_out: [batch_size, time_step, hidden_dims * num_directions(=2)]
        :return:
        '''
        lstm_tmp_out = torch.chunk(lstm_out, 2, -1)
        # h [batch_size, time_step, hidden_dims]
        h = lstm_tmp_out[0] + lstm_tmp_out[1]
        # atten_w [batch_size, time_step, hidden_dims]
        atten_w = self.attention_layer(h)
        # m [batch_size, time_step, hidden_dims]
        m = nn.Tanh()(h)
        # atten_context [batch_size, time_step, time_step]
        atten_context = torch.bmm(m, atten_w.transpose(1, 2))
        # softmax_w [batch_size, time_step, time_step]
        softmax_w = F.softmax(atten_context, dim=-1)
        # context [batch_size, hidden_dims, time_step]
#        context = torch.bmm(h.transpose(1,2), softmax_w)
#        context_with_attn = h.transpose(1, 2) + context
        context = torch.bmm(softmax_w,h)
        average_att = torch.sum(softmax_w,1)/softmax_w.size(1)
        average_att = average_att.view(average_att.size(0),1,average_att.size(1))
#        context_with_attn = h + context
        # result [batch_size, hidden_dims]
        result = torch.bmm(average_att,context).view(average_att.size(0),context.size(2))
#        result = torch.sum(context_with_attn, dim=1)
        return result


    def forward(self, char_id):
        # char_id = torch.from_numpy(np.array(input[0])).long()

        sen_char_input = self.char_embeddings(char_id)

#        sen_input = torch.cat((sen_char_input), dim=1)
        # input : [len_seq, batch_size, embedding_dim]
        sen_input = sen_char_input.permute(1, 0, 2)
        output, (final_hidden_state, final_cell_state) = self.lstm_net(sen_input)
        # output : [batch_size, len_seq, n_hidden]
        output = output.permute(1, 0, 2)
        # final_hidden_state = torch.mean(final_hidden_state, dim=0, keepdim=True)
        # atten_out = self.attention_net(output, final_hidden_state)
        atten_out = self.attention_net_with_w(output)
        return self.fc_out(atten_out)

and it takes a while to run on cpu. I’ll post the result on cpu later.


It gave me the reason. I’ll try to fix it. Thanks!

Problem solved! The input value of nn.Embedding (num_embeddings, embedding_dim) should be from 0 to num_embeddings - 1 not 1 to num_embeddings.