Hi,
I Have been working on fine-tuning a pre-trained language model, however, in the training part, I am receiving this error.
File “training.py”, line 296, in
record_loss, perplexity = train_one_iter(batch, update_count, fp16=False)
File “training.py”, line 105, in train_one_iter
logits, past = model_A(dial_turn_inputs, past=past)
File “/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py”, line 547, in call
result = self.forward(*input, **kwargs)
File “/usr/local/lib/python3.6/dist-packages/transformers/modeling_gpt2.py”, line 528, in forward
head_mask=head_mask)
File “/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py”, line 547, in call
result = self.forward(*input, **kwargs)
File “/usr/local/lib/python3.6/dist-packages/transformers/modeling_gpt2.py”, line 418, in forward
inputs_embeds = self.wte(input_ids)
File “/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py”, line 547, in call
result = self.forward(*input, **kwargs)
File “/usr/local/lib/python3.6/dist-packages/torch/nn/modules/sparse.py”, line 114, in forward
self.norm_type, self.scale_grad_by_freq, self.sparse)
File “/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py”, line 1467, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: index out of range: Tried to access index 50297 out of table with 50296 rows. at /pytorch/aten/src/TH/generic/THTensorEvenMoreMath.cpp:237
I have checked the model weights’ size and vocab size. It tuned out like this respectively.
torch.Size([50297, 768])
50297
Here is my code:
# continue training
# Initialize models
model_A_states, model_B_states = torch.load(inspired_data_path+"model_weight.pth",map_location=torch.device('cpu'))
print('model weights loaded')
#print(len(model_A_states))
added_embedding_plot = [model_A_states["transformer.wte.weight"][[tokenizer.encode("movie plot")], :].mean(1)]
added_embedding_plot = torch.cat(added_embedding_plot, 0)
added_embedding_sep = [model_A_states["transformer.wte.weight"][[tokenizer.encode("separate")], :].mean(1)]
added_embedding_sep = torch.cat(added_embedding_sep, 0)
new_weight = torch.cat([model_A_states["transformer.wte.weight"], added_embedding_plot, added_embedding_sep], 0)
print(new_weight.size())
#print(new_weight.shape())
model_A_states["transformer.wte.weight"] = new_weight
model_B_states["transformer.wte.weight"] = new_weight
model_A_states["lm_head.weight"] = new_weight
model_B_states["lm_head.weight"] = new_weight
config = GPT2Config()
config.vocab_size = model_A_states["transformer.wte.weight"].shape[0]
#config.vocab_size = config.vocab_size-1
print(config.vocab_size)
model_A = GPT2LMHeadModel(config)
model_B = GPT2LMHeadModel(config)
model_A.load_state_dict(model_A_states)
model_B.load_state_dict(model_B_states)
device = torch.device("cpu")
#print(device)
model_A = model_A.to(device)
model_B = model_B.to(device)
@ptrblck could you please assist in this regard?