I need help, please the batch_size=50 , hidden_size=200 when using torch.cat((h_t, encoder_outputs),dim=2) the dim results is [batch_size=50, max_len, hidden_size=600] in atten=nn.linear(hidden_size,hidden_size) the error is size mismtach
class SeqAttnDecoderRNN(nn.Module):
def init(self, embedding_size, hidden_size, output_size, dropout_p=0.1, max_length=30):
super(SeqAttnDecoderRNN, self).__init__()
# Define parameters
self.hidden_size = hidden_size
self.embed_size = embedding_size
self.output_size = output_size
self.n_layers = 1
self.dropout_p = dropout_p
# Define layers
self.embedding = embedding_size #nn.Embedding(output_size, embedding_size)
self.dropout = nn.Dropout(dropout_p)
self.rnn = nn.LSTM(hidden_size*2, hidden_size, dropout=self.dropout_p, batch_first=False)
self.out = nn.Linear(hidden_size*2, output_size)
self.score = nn.Linear(self.hidden_size + self.hidden_size, self.hidden_size)
self.attn_combine = nn.Linear(embedding_size + hidden_size, embedding_size)
# attention
self.attn = nn.Linear(self.hidden_size, hidden_size)
self.v = nn.Parameter(torch.rand(hidden_size))
stdv = 1. / math.sqrt(self.v.size(0))
self.v.data.normal_(mean=0, std=stdv)
def forward(self, _input, hidden, encoder_outputs):
if isinstance(hidden, tuple):
h_t = hidden[0]
else:
h_t = hidden
encoder_outputs = encoder_outputs.transpose(0, 1)
embedded = _input # self.embedding(_input) # .view(1, 1, -1)
# SCORE 3
max_len = encoder_outputs.size(1)
h_t = h_t.transpose(0, 1) # [1,B,D] -> [B,1,D]
h_t = h_t.repeat(1, max_len, 1) # [B,1,D] -> [B,T,D]
energy = self.attn(torch.cat((h_t, encoder_outputs), 2)) # [B,T,2D] -> [B,T,D]
energy = torch.tanh(energy)
energy = energy.transpose(2, 1) # [B,H,T]
v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1) # [B,1,H]
energy = torch.bmm(v, energy) # [B,1,T]
attn_weights = F.softmax(energy, dim=2) # [B,1,T]
# getting context
context = torch.bmm(attn_weights, encoder_outputs) # [B,1,H]
# context = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) #[B,1,H]
# Combine embedded input word and attended context, run through RNN
rnn_input = torch.cat((embedded, context), 2)
rnn_input = rnn_input.transpose(0, 1)
output, hidden = self.rnn(rnn_input, hidden)
output = output.squeeze(0) # (1,B,V)->(B,V)
output = F.log_softmax(self.out(output), dim=1)
return output, hidden # , attn_weights
RuntimeError Traceback (most recent call last)
in ()
1 model = Model(ontology, vocab)
----> 2 model.run_train(dataset[βtrainβ], dataset[βdevβ])
in run_train(self, train, dev)
145 iteration += 1
146 self.zero_grad()
β> 147 loss, scores = self.forward(batch)
148 loss.backward()
149 self.optimizer.step()
in forward(self, batch)
57
58
β> 59 dec_out, dec_h = self.decoder(sys_utter, hidden, enc_ou)
60 print(dec_out.shape, dec_h.shape)
61
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
β> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)
in forward(self, _input, hidden, encoder_outputs)
45
46
β> 47 energy = self.attn(torch.cat((h_t, encoder_outputs), 2)) # [B,T,2D] -> [B,T,D]
48 energy = torch.tanh(energy)
49 energy = energy.transpose(2, 1) # [B,H,T]
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
β> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
65 @weak_script_method
66 def forward(self, input):
β> 67 return F.linear(input, self.weight, self.bias)
68
69 def extra_repr(self):
~\Anaconda3\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1352 ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t())
1353 else:
-> 1354 output = input.matmul(weight.t())
1355 if bias is not None:
1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [1150 x 600], m2: [200 x 200] at c:\a\w\1\s\tmp_conda_3.6_091443\conda\conda-bld\pytorch_1544087948354\work\aten\src\th\generic/THTensorMath.cpp:940