class computeLoss(nn.Module):
def __init__(self, tgt_size, smoothing=0.0):
super(computeLoss, self).__init__()
self.criterion = nn.KLDivLoss(reduction='sum')
self.confidence = 1.0 - smoothing
self.smoothing = smoothing
self.tgt_size = tgt_size
self.padding_idx = padding_idx
def forward(self, y, target, src, generatedAttention):
# y has size (batch_size x tgt_length, vocab_size)
# target (batch_size, tgt_length)
# src (batch_size, src_length)
assert y.size(1) == self.tgt_size
smooth = self.smoothing / (self.tgt_size - 2)
indexFormDist = self.get_true_dist(target, src.clone())
true_dist = torch.zeros(size=(target.size(0), target.size(1), src_length + generative_vocab)).fill_(smooth).type_as(src)
indexFormDist = indexFormDist.unsqueeze(-1)
true_dist.scatter_(-1, indexFormDist.long(), 1)
finalTrueDist = None
for count in range(0, target.size(0)):
mask = (target[count] == self.padding_idx).nonzero(as_tuple=False).squeeze(-1).type_as(src).long()
dist = true_dist[count].index_fill_(0, mask, 0.0)
if finalTrueDist is None:
finalTrueDist = dist
else:
finalTrueDist = torch.cat((finalTrueDist, dist), dim=0)
return self.criterion(y, finalTrueDist)
Error:
Traceback (most recent call last):
File "D:\PycharmProjects\bestpointerGen\finalModel.py", line 707, in <module>
trainer.fit(model)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 771, in fit
self._call_and_handle_interrupt(
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 724, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 812, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1237, in _run
results = self._run_stage()
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1324, in _run_stage
return self._run_train()
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1354, in _run_train
self.fit_loop.run()
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\loops\base.py", line 204, in run
self.advance(*args, **kwargs)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\loops\fit_loop.py", line 269, in advance
self._outputs = self.epoch_loop.run(self._data_fetcher)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\loops\base.py", line 204, in run
self.advance(*args, **kwargs)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 208, in advance
batch_output = self.batch_loop.run(batch, batch_idx)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\loops\base.py", line 204, in run
self.advance(*args, **kwargs)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\loops\batch\training_batch_loop.py", line 90, in advance
outputs = self.manual_loop.run(split_batch, batch_idx)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\loops\base.py", line 204, in run
self.advance(*args, **kwargs)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\loops\optimization\manual_loop.py", line 115, in advance
training_step_output = self.trainer._call_strategy_hook("training_step", *step_kwargs.values())
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1766, in _call_strategy_hook
output = fn(*args, **kwargs)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\pytorch_lightning\strategies\strategy.py", line 333, in training_step
return self.model.training_step(*args, **kwargs)
File "D:\PycharmProjects\bestpointerGen\finalModel.py", line 567, in training_step
loss.backward()
File "D:\anaconda\envs\originalTransformer\lib\site-packages\torch\_tensor.py", line 363, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "D:\anaconda\envs\originalTransformer\lib\site-packages\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: Found dtype Float but expected Int
return self.criterion(y, finalTrueDist)
for the line above, I did
print(y.type(), finalTrueDist.type())
I get
torch.cuda.FloatTensor torch.cuda.IntTensor
which I think makes sense because y is the probability distribution hence float, and finalTrueDist is the one hot encodding version of the output which is a long tensor. So what is causing this error.