C:/w/b/windows/pytorch/aten/src/THC/THCTensorIndex.cu:272: block: [178,0,0], thread: [31,0,0] Assertion srcIndex < srcSelectDimSize
failed.
Traceback (most recent call last):
File “C:/Users/lhuang93/PycharmProjects/pythonProject/train.py”, line 190, in
F = train(config, ‘ddi_e-5.log’)
File “C:/Users/lhuang93/PycharmProjects/pythonProject/train.py”, line 130, in train
train_loss, train_pred = run_iter(batch=train_batch, is_training=True)
File “C:/Users/lhuang93/PycharmProjects/pythonProject/train.py”, line 102, in run_iter
logits = model(input_ids, attention_mask, token_type_ids, label, e1_mask, e2_mask)
File “D:\Anaconda\Anaconda\lib\site-packages\torch\nn\modules\module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “C:\Users\lhuang93\PycharmProjects\pythonProject\model.py”, line 60, in forward
outputs = self.bert(input_ids, attention_mask=attention_mask,
File “D:\Anaconda\Anaconda\lib\site-packages\torch\nn\modules\module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “D:\Anaconda\Anaconda\lib\site-packages\transformers\modeling_bert.py”, line 829, in forward
embedding_output = self.embeddings(
File “D:\Anaconda\Anaconda\lib\site-packages\torch\nn\modules\module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “D:\Anaconda\Anaconda\lib\site-packages\transformers\modeling_bert.py”, line 211, in forward
token_type_embeddings = self.token_type_embeddings(token_type_ids)
File “D:\Anaconda\Anaconda\lib\site-packages\torch\nn\modules\module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “D:\Anaconda\Anaconda\lib\site-packages\torch\nn\modules\sparse.py”, line 124, in forward
return F.embedding(
File “D:\Anaconda\Anaconda\lib\site-packages\torch\nn\functional.py”, line 1814, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: CUDA error: device-side assert triggered
I had the same error in one of my machine translation projects.
I solved the error by removing the very long sentences in my datasets.
Maybe when I see your code I will be able to help you better.
But the max length of my sentences is no more than 300, so I set the max length as 300.
The following code is my data process code for relation extraction. I added the special token ,, , for indentifing the entities.
import json
from transformers import BertTokenizer, BertConfig, BertForMaskedLM, BertForNextSentencePrediction
import random
from torch.utils.data import DataLoader, TensorDataset
import os
import logging
import torch
import numpy as np
from tqdm import tqdm
import config
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)-8s %(message)s')
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(config.pretrained_model_name, do_lower_case=config.do_lower_case)
tokenizer.add_special_tokens({"additional_special_tokens": ["<e1>", "</e1>", "<e2>", "</e2>"]})
label_path='label2id.json'
label2id = json.load(open(label_path, 'r'))
NA_id = label2id['NA']
def tokenizer_entity(sent, headword_pos, tailword_pos):
sent = list(sent)
if headword_pos[1]<tailword_pos[0]:
sent.insert(headword_pos[0], "<e1>")
sent.insert(headword_pos[1]+1, "</e1>")
sent.insert(tailword_pos[0]+2, "<e2>")
sent.insert(tailword_pos[1]+3, "</e2>")
else:
sent.insert(tailword_pos[0], "<e2>")
sent.insert(tailword_pos[1] + 1, "</e2>")
sent.insert(headword_pos[0] + 2, "<e1>")
sent.insert(headword_pos[1] + 3, "</e1>")
sent = "".join(sent)
return sent
def read_data(file_dir, filename):
data = []
data_path = os.path.join(file_dir, filename)
d = json.load(open(data_path, 'r'))
for ins in d:
sent = ins['sentence'].replace('\n', '').lower()
label = label2id.get(ins['relation'], NA_id)
tail_word = ins['tail']['word']
head_word = ins['head']['word']
data.append([sent, label,head_word,tail_word])
random.shuffle(data)
return data
def process_data(data, max_length):
def pad(x):
return x[:max_length] if len(x) > (max_length) else x + [0] * ((max_length) - len(x))
# sent_raw = [x for x, _, _, _ in data]
# labels = [y for _, y, _, _ in data]
# head_word = [h for _, _, h, _ in data]
# tail_word = [t for _, _, _, t in data]
input_ids_pad=[]
input_mask_data=[]
input_segment_data=[]
input_labels=[y for _, y, _, _ in data]
e1_mask_data = []
e2_mask_data = []
for ins in tqdm(data):
sent_ins = ins[0]
head_word = ins[2]
tail_word = ins[3]
head_pos = sent_ins.index(head_word)
head_pos = [head_pos, head_pos + len(head_word)]
tail_pos = sent_ins.index(tail_word)
tail_pos = [tail_pos, tail_pos + len(tail_word)]
sent_ins = tokenizer_entity(sent_ins,head_pos,tail_pos)
tokenized_text = tokenizer.tokenize(sent_ins)
tokenized_text = ["CLS"]+tokenized_text
e11_p = tokenized_text.index("<e1>")
e12_p = tokenized_text.index("</e1>")
e21_p = tokenized_text.index("<e2>")
e22_p = tokenized_text.index("</e2>")
tokenized_text[e11_p] = "$"
tokenized_text[e12_p] = "$"
tokenized_text[e21_p] = "#"
tokenized_text[e11_p] = "#"
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
input_ids = pad(input_ids)
input_mask = [1 if i != 0 else 0 for i in input_ids]
input_segment = [0 for i in input_ids]
# e1 mask, e2 mask
e1_mask = [0] * len(input_mask)
e2_mask = [0] * len(input_mask)
# e1_mask和e2_mask
for i in range(e11_p,e12_p+1):
if i>len(e1_mask)-1:
print(sent_ins)
print(tokenized_text)
print(i)
print(len(e1_mask))
exit()
else:
e1_mask[i] = 1
for i in range(e21_p,e22_p+1):
if i>len(e2_mask)-1:
print(sent_ins)
print(tokenized_text)
print(i)
print(len(e2_mask))
exit()
else:
e2_mask[i] = 1
input_ids_pad.append(input_ids)
input_mask_data.append(input_mask)
input_segment_data.append(input_segment)
e1_mask_data.append(e1_mask)
e2_mask_data.append(e2_mask)
input_ids_pad = torch.tensor(input_ids_pad,dtype=torch.long)
input_mask_data = torch.tensor(input_mask_data,dtype=torch.long)
input_segment_data = torch.tensor(input_segment_data,dtype=torch.long)
e1_mask_data = torch.tensor(e1_mask_data,dtype=torch.long)
e2_mask_data = torch.tensor(e2_mask_data,dtype=torch.long)
input_labels = torch.tensor(input_labels,dtype=torch.long)
return input_ids_pad, input_mask_data, input_segment_data, input_labels, e1_mask_data, e2_mask_data
def get_dataset(file_dir, filename, max_length):
data = read_data(file_dir,filename)
input_ids_pad, input_mask, input_segment, input_labels, e1_mask_data, e2_mask_data = process_data(data, max_length)
dataset = TensorDataset(input_ids_pad, input_mask,input_segment, input_labels, e1_mask_data, e2_mask_data)
return dataset
def dump_dataset(data_name):
dataset = get_dataset(file_dir='.', filename=data_name + '.json', max_length=config.MAX_LENGTH)
torch.save(dataset, data_name + '.pt')
dump_dataset('train')
dump_dataset('valid')
dump_dataset('test')
Model code:
from transformers import BertTokenizer, BertConfig, BertForMaskedLM, BertForNextSentencePrediction,BertPreTrainedModel
from transformers import BertModel
import torch.nn as nn
import config
import torch
class FCLayer(nn.Module):
def __init__(self, input_dim, output_dim, dropout_rate=0., use_activation=True):
super(FCLayer, self).__init__()
self.use_activation = use_activation
self.dropout = nn.Dropout(dropout_rate)
self.linear = nn.Linear(input_dim, output_dim)
self.tanh = nn.Tanh()
def forward(self, x):
x = self.dropout(x)
if self.use_activation:
x = self.tanh(x)
return self.linear(x)
class Mybert(BertPreTrainedModel):
def __init__(self, bert_config, args):
super(Mybert, self).__init__(bert_config)
self.bert = BertModel.from_pretrained(args.pretrained_model_name, config=bert_config) # Load pretrained bert
self.num_labels = bert_config.num_labels
self.cls_fc_layer = FCLayer(bert_config.hidden_size, bert_config.hidden_size, args.dropout_rate)
self.e1_fc_layer = FCLayer(bert_config.hidden_size, bert_config.hidden_size, args.dropout_rate)
self.e2_fc_layer = FCLayer(bert_config.hidden_size, bert_config.hidden_size, args.dropout_rate)
self.label_classifier = FCLayer(bert_config.hidden_size * 3, bert_config.num_labels, args.dropout_rate, use_activation=False)
@staticmethod
def entity_average(hidden_output, e_mask):
"""
Average the entity hidden state vectors (H_i ~ H_j)
:param hidden_output: [batch_size, j-i+1, dim]
:param e_mask: [batch_size, max_seq_len]
e.g. e_mask[0] == [0, 0, 0, 1, 1, 1, 0, 0, ... 0]
:return: [batch_size, dim]
"""
e_mask_unsqueeze = e_mask.unsqueeze(1) # [b, 1, j-i+1]
length_tensor = (e_mask != 0).sum(dim=1).unsqueeze(1) # [batch_size, 1]
sum_vector = torch.bmm(e_mask_unsqueeze.float(), hidden_output).squeeze(1) # [b, 1, j-i+1] * [b, j-i+1, dim] = [b, 1, dim] -> [b, dim]
avg_vector = sum_vector.float() / length_tensor.float() # broadcasting
return avg_vector
def forward(self, input_ids, attention_mask, token_type_ids, labels, e1_mask, e2_mask):
outputs = self.bert(input_ids, attention_mask=attention_mask,
token_type_ids=token_type_ids) # sequence_output, pooled_output, (hidden_states), (attentions)
sequence_output = outputs[0]
pooled_output = outputs[1] # [CLS]
# Average
e1_h = self.entity_average(sequence_output, e1_mask)
e2_h = self.entity_average(sequence_output, e2_mask)
# Dropout -> tanh -> fc_layer
pooled_output = self.cls_fc_layer(pooled_output)
e1_h = self.e1_fc_layer(e1_h)
e2_h = self.e2_fc_layer(e2_h)
# Concat -> fc_layer
concat_h = torch.cat([pooled_output, e1_h, e2_h], dim=-1)
logits = self.label_classifier(concat_h)
return logits
Besides the seq. length you should also check the inputs (min and max values) to the embedding layer, which contains often out-of-bounds indices.
Thank you for your reply. I solved this problem by adding the following code:
model.resize_token_embeddings(len(tokenizer))`
I hope it can help other people who meet the same problem with me.
2 Likes