Attention decoder size mismatch error in nn.Linear

khaldoon_alhussayni · April 1, 2019, 4:27pm

I need help, please the batch_size=50 , hidden_size=200 when using torch.cat((h_t, encoder_outputs),dim=2) the dim results is [batch_size=50, max_len, hidden_size=600] in atten=nn.linear(hidden_size,hidden_size) the error is size mismtach

class SeqAttnDecoderRNN(nn.Module):
def init(self, embedding_size, hidden_size, output_size, dropout_p=0.1, max_length=30):

    super(SeqAttnDecoderRNN, self).__init__()
    # Define parameters
    self.hidden_size = hidden_size
    self.embed_size = embedding_size
    self.output_size = output_size
    self.n_layers = 1
    self.dropout_p = dropout_p
    # Define layers
    self.embedding = embedding_size #nn.Embedding(output_size, embedding_size)
    self.dropout = nn.Dropout(dropout_p)        
    self.rnn = nn.LSTM(hidden_size*2, hidden_size, dropout=self.dropout_p, batch_first=False)
    self.out = nn.Linear(hidden_size*2, output_size)
    self.score = nn.Linear(self.hidden_size + self.hidden_size, self.hidden_size)
    self.attn_combine = nn.Linear(embedding_size + hidden_size, embedding_size)

    # attention
    self.attn = nn.Linear(self.hidden_size, hidden_size)
    self.v = nn.Parameter(torch.rand(hidden_size))
    stdv = 1. / math.sqrt(self.v.size(0))
    self.v.data.normal_(mean=0, std=stdv)

def forward(self, _input, hidden, encoder_outputs):
    if isinstance(hidden, tuple):
        h_t = hidden[0]
    else:
        h_t = hidden
    encoder_outputs = encoder_outputs.transpose(0, 1)
    embedded = _input # self.embedding(_input)  # .view(1, 1, -1)
 
    # SCORE 3
    max_len = encoder_outputs.size(1)
    h_t = h_t.transpose(0, 1)  # [1,B,D] -> [B,1,D]
    h_t = h_t.repeat(1, max_len, 1)  # [B,1,D]  -> [B,T,D]
    
   
    energy = self.attn(torch.cat((h_t, encoder_outputs), 2))  # [B,T,2D] -> [B,T,D]
    energy = torch.tanh(energy)
    energy = energy.transpose(2, 1)  # [B,H,T]
    v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)  # [B,1,H]
    energy = torch.bmm(v, energy)  # [B,1,T]
    attn_weights = F.softmax(energy, dim=2)  # [B,1,T]

    # getting context
    context = torch.bmm(attn_weights, encoder_outputs)  # [B,1,H]

    # context = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) #[B,1,H]
    # Combine embedded input word and attended context, run through RNN
    rnn_input = torch.cat((embedded, context), 2)
    rnn_input = rnn_input.transpose(0, 1)

    output, hidden = self.rnn(rnn_input, hidden)
    output = output.squeeze(0)  # (1,B,V)->(B,V)

    output = F.log_softmax(self.out(output), dim=1)
    return output, hidden  # , attn_weights

RuntimeError Traceback (most recent call last)
in ()
1 model = Model(ontology, vocab)
----> 2 model.run_train(dataset[‘train’], dataset[‘dev’])

in run_train(self, train, dev)
145 iteration += 1
146 self.zero_grad()
–> 147 loss, scores = self.forward(batch)
148 loss.backward()
149 self.optimizer.step()

in forward(self, batch)
57
58
—> 59 dec_out, dec_h = self.decoder(sys_utter, hidden, enc_ou)
60 print(dec_out.shape, dec_h.shape)
61

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
–> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)

in forward(self, _input, hidden, encoder_outputs)
45
46
—> 47 energy = self.attn(torch.cat((h_t, encoder_outputs), 2)) # [B,T,2D] -> [B,T,D]
48 energy = torch.tanh(energy)
49 energy = energy.transpose(2, 1) # [B,H,T]

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
–> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
65 @weak_script_method
66 def forward(self, input):
—> 67 return F.linear(input, self.weight, self.bias)
68
69 def extra_repr(self):

~\Anaconda3\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1352 ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t())
1353 else:
-> 1354 output = input.matmul(weight.t())
1355 if bias is not None:
1356 output += torch.jit._unwrap_optional(bias)

RuntimeError: size mismatch, m1: [1150 x 600], m2: [200 x 200] at c:\a\w\1\s\tmp_conda_3.6_091443\conda\conda-bld\pytorch_1544087948354\work\aten\src\th\generic/THTensorMath.cpp:940

ptrblck · April 1, 2019, 11:18pm

Could you print the shapes of h_t and encoder_outputs before trying to concatenate them?
Neither the batch size nor the number of features seem to match.

khaldoon_alhussayni · April 3, 2019, 8:19am

Thank you for replayed. Solved by definition of atten = nn.linear((hidden_size *2)+embedded_size, hidden_size).
(hidden_size *2)+embedded_size =600
Because the result shape of torch.cat(h_t, encoder_outputs) is [batch_size=50, max_len, hidden_size=600]