def forward(self, enc_input, enc_input_ext, dec_input, target=None, teacher_forcing_ratio=0.5):
enc_output, enc_hidden = self.encoder(enc_input)
dec_hidden = enc_hidden
batch_size, seq_len = dec_input.size()
outputs = torch.zeros(batch_size, seq_len, self.vocab_size).to(
enc_input.device) # Adjust self.vocab_size accordingly
if self.training:
loss = 0
for t in range(seq_len - 1): # Assuming dec_input includes <sos> and <eos>
dec_input_t = dec_input[:, t].unsqueeze(1)
true_output = target[:, t + 1]
p_vocab, dec_hidden, p_gen = self.decoder(dec_input_t, dec_hidden, enc_output)
context_vector, attn = self.attention(dec_hidden, enc_output)
final_dist = self.get_final_distribution(enc_input_ext, p_gen, p_vocab, attn, self.max_oov) # Simplified
# final_dist = self.get_final_distribution(enc_input_ext, p_gen, p_vocab, attn, self.max_oov) # Simplified
loss += f.cross_entropy(final_dist, true_output, ignore_index=self.pad_token_id)
return loss / (seq_len - 1)
def get_final_distribution(self, x, p_gen, p_vocab, attention_weights, max_oov):
batch_size = x.size(0)
# Clip the probabilities to avoid log(0) in loss computation
p_gen = torch.clamp(p_gen, 0.001, 0.999)
p_vocab_weighted = p_gen * p_vocab
attention_weighted = (1 - p_gen) * attention_weights
extension = torch.zeros((batch_size, max_oov)).float().to(x.device)
p_vocab_extended = torch.cat([p_vocab_weighted, extension], dim=1)
# print("x value of get final distribution",x.shape)
# print("Max index in x:", x.max().item())
# print("Second dimension of p_vocab_extended:", p_vocab_extended.size(1))
# print("attention_weighted of get final distribution", attention_weighted.shape)
# print("Sample values from x:", x[0, :10])
# print("Sample values from attention_weighted:", attention_weighted[0, :10])
final_distribution = p_vocab_extended.scatter_add_(1, x, attention_weighted)
return final_distribution
Here the problem brief,
Traceback (most recent call last):
File “/Users/sagar/PycharmProjects/TouchFYP/V2_main.py”, line 201, in
train(model, train_loader,val_loader, optimizer, criterion,scheduler, device,5, )
File “/Users/sagar/PycharmProjects/TouchFYP/V2_main.py”, line 167, in train
loss, _ = model(articles, ext_enc_inp, summaries[:, :-1], summaries[:, 1:], teacher_forcing_ratio=0.5)
File “/Users/sagar/PycharmProjects/TouchFYP/venv/lib/python3.9/site-packages/torch/nn/modules/module.py”, line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File “/Users/sagar/PycharmProjects/TouchFYP/venv/lib/python3.9/site-packages/torch/nn/modules/module.py”, line 1520, in _call_impl
return forward_call(*args, **kwargs)
File “/Users/sagar/PycharmProjects/TouchFYP/my_V2.py”, line 283, in forward
final_dist = self.get_final_distribution(enc_input_ext, p_gen, p_vocab, attn, self.max_oov,
File “/Users/sagar/PycharmProjects/TouchFYP/my_V2.py”, line 359, in get_final_distribution
attn_dist_extended[b, vocab_idx] += attention_weighted[b, idx]
IndexError: index 695 is out of bounds for dimension 1 with size 695