I am coding Encoder Decoder model.
Now, add masking processing.
Before adding, the coding was working.
But, (# -> add) my code doesn’t work.
Could you help me?
class Encoder_Decoder(nn.Module):
def __init__(self, input_size, output_size, hidden_size):
super(Encoder_Decoder, self).__init__()
# batch_size = 50 , hidden_size = 256
self.embed_input = nn.Embedding(input_size, hidden_size, padding_idx=0)
self.embed_target = nn.Embedding(output_size, hidden_size, padding_idx=0)
self.lstm_input = nn.LSTMCell(hidden_size, hidden_size)
self.lstm_target = nn.LSTMCell(hidden_size, hidden_size)
self.linear = nn.Linear(hidden_size, output_size)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
def create_mask(self, input_sentence_words):
mask = input_sentence_words.eq(0)
return mask
def forward(self, input_lines, target_lines):
global all_loss
hx = torch.zeros(batch_size, self.hidden_size).cuda()
cx = torch.zeros(batch_size, self.hidden_size).cuda()
for input_sentence_words in input_lines:
before_hx = hx
before_cx = cx
input_k = self.embed_input(input_sentence_words)
hx, cx = self.lstm_input(input_k, (hx, cx) )
# mask = self.create_mask(input_sentence_words) -> add
# indices = mask.nonzero() -> add
# hx[indices]= before_hx[indices] -> add
# cx[indices] = before_cx[indices] -> add
target_lines_not_last = target_lines[:(padding_num-1)]
target_lines_next = target_lines[1:]
loss = 0
k = 0
for target_sentence_words , target_sentence_words_next in zip(target_lines_not_last, target_lines_next):
target_k = self.embed_target(target_sentence_words)
k += 1
print(k)
print("target_k", target_k)
hx, cx = self.lstm_target(target_k, (hx, cx) )
print("hx", hx)
print("hx size", hx.size())
print("cx", cx)
print("cx size", cx.size())
print("target_sentence_words_next", target_sentence_words_next)
print("---------------")
loss += F.cross_entropy(self.linear(hx), target_sentence_words_next)
return loss
49
target_k tensor([[ 0., 0., 0., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 0., 0., 0.],
...,
[ 0., 0., 0., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 0., 0., 0.]], device='cuda:0')
target_sentence_words tensor([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')
hx tensor(1.00000e-02 *
[[ 1.8093, 2.3050, 1.3506, ..., -0.2192, 2.2486, -2.5922],
[ 1.8108, 2.3046, 1.3504, ..., -0.2191, 2.2489, -2.5926],
[ 1.8108, 2.3046, 1.3504, ..., -0.2191, 2.2489, -2.5926],
...,
[ 1.8108, 2.3046, 1.3504, ..., -0.2191, 2.2489, -2.5926],
[ 1.8069, 2.3096, 1.3515, ..., -0.2181, 2.2485, -2.5900],
[ 1.8108, 2.3046, 1.3504, ..., -0.2191, 2.2489, -2.5926]], device='cuda:0')
hx size torch.Size([50, 256])
cx tensor([[ 0.0342, 0.0479, 0.0270, ..., -0.0043, 0.0449, -0.0510],
[ 0.0342, 0.0479, 0.0270, ..., -0.0043, 0.0449, -0.0510],
[ 0.0342, 0.0479, 0.0270, ..., -0.0043, 0.0449, -0.0510],
...,
[ 0.0342, 0.0479, 0.0270, ..., -0.0043, 0.0449, -0.0510],
[ 0.0342, 0.0480, 0.0270, ..., -0.0043, 0.0449, -0.0510],
[ 0.0342, 0.0479, 0.0270, ..., -0.0043, 0.0449, -0.0510]], device='cuda:0')
cx size torch.Size([50, 256])
target_sentence_words_next tensor([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')
---------------
Traceback (most recent call last):
File "pytorch.py", line 109, in <module>
loss.backward()
File "/home/xx/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/tensor.py", line 93, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/xx/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/autograd/__init__.py", line 89, in backward
allow_unreachable=True) # allow_unreachable flag
File "/home/xx/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/autograd/function.py", line 76, in apply
return self._forward_cls.backward(self, *args)
File "/home/xx/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/autograd/function.py", line 188, in wrapper
outputs = fn(ctx, *args)
File "/home/xx/.pyenv/versions/3.6.3/lib/python3.6/site-packages/torch/nn/_functions/thnn/rnnFusedPointwise.py", line 86, in backward
saved_tens, cx, cy = ctx.saved_tensors
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation