Hi friends,
I’m adapting the conditional RNN Name Generator tutorial to do longer text generation and am having some trouble.
The tutorial makes the category, input and hidden state all LongTensors, but then I received this error when training:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-117-3570efd110ad> in <module>()
20
21 for epoch in range(1, n_epochs + 1):
---> 22 output, loss = train(*random_training_set())
23 loss_avg += loss
24
train(category_tensor, input_line_tensor, target_line_tensor)
9
10 for i in range(input_line_tensor.size()[1]):
---> 11 output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)
12 print(output, target_line_tensor[i])
13 loss += criterion(output, target_line_tensor[i])
__call__(self, *input, **kwargs)
222 for hook in self._forward_pre_hooks.values():
223 hook(self, input)
--> 224 result = self.forward(*input, **kwargs)
225 for hook in self._forward_hooks.values():
226 hook_result = hook(self, input, result)
<ipython-input-116-31a4f7187888> in forward(self, category, input, hidden)
16 input_combined = torch.cat((category, input, hidden), 1)
17 print("Input_Combined: ", input_combined.data.type())
---> 18 hidden = self.i2h(input_combined)
19 output = self.i2o(input_combined)
20 output_combined = torch.cat((hidden, output), 1)
__call__(self, *input, **kwargs)
222 for hook in self._forward_pre_hooks.values():
223 hook(self, input)
--> 224 result = self.forward(*input, **kwargs)
225 for hook in self._forward_hooks.values():
226 hook_result = hook(self, input, result)
forward(self, input, hx)
160 flat_weight=flat_weight
161 )
--> 162 output, hidden = func(input, self.all_weights, hx)
163 if is_packed:
164 output = PackedSequence(output, batch_sizes)
forward(input, *fargs, **fkwargs)
349 else:
350 func = AutogradRNN(*args, **kwargs)
--> 351 return func(input, *fargs, **fkwargs)
352
353 return forward
forward(input, weight, hidden)
242 input = input.transpose(0, 1)
243
--> 244 nexth, output = func(input, hidden, weight)
245
246 if batch_first and batch_sizes is None:
forward(input, hidden, weight)
82 l = i * num_directions + j
83
---> 84 hy, output = inner(input, hidden[l], weight[l])
85 next_hidden.append(hy)
86 all_output.append(output)
forward(input, hidden, weight)
111 steps = range(input.size(0) - 1, -1, -1) if reverse else range(input.size(0))
112 for i in steps:
--> 113 hidden = inner(input[i], hidden, *weight)
114 # hack to handle LSTM
115 output.append(hidden[0] if isinstance(hidden, tuple) else hidden)
/usr/local/lib/python3.5/dist-packages/torch/nn/_functions/rnn.py in GRUCell(input, hidden, w_ih, w_hh, b_ih, b_hh)
52 return state(gi, gh, hidden) if b_ih is None else state(gi, gh, hidden, b_ih, b_hh)
53
---> 54 gi = F.linear(input, w_ih, b_ih)
55 gh = F.linear(hidden, w_hh, b_hh)
56 i_r, i_i, i_n = gi.chunk(3, 1)
/usr/local/lib/python3.5/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
553 return torch.addmm(bias, input, weight.t())
554
--> 555 output = input.matmul(weight.t())
556 if bias is not None:
557 output += bias
/usr/local/lib/python3.5/dist-packages/torch/autograd/variable.py in matmul(self, other)
558
559 def matmul(self, other):
--> 560 return torch.matmul(self, other)
561
562 @staticmethod
/usr/local/lib/python3.5/dist-packages/torch/functional.py in matmul(tensor1, tensor2, out)
166 elif dim_tensor1 == 1 and dim_tensor2 == 2:
167 if out is None:
--> 168 return torch.mm(tensor1.unsqueeze(0), tensor2).squeeze_(0)
169 else:
170 return torch.mm(tensor1.unsqueeze(0), tensor2, out=out).squeeze_(0)
/usr/local/lib/python3.5/dist-packages/torch/autograd/variable.py in mm(self, matrix)
577 def mm(self, matrix):
578 output = Variable(self.data.new(self.data.size(0), matrix.data.size(1)))
--> 579 return Addmm.apply(output, self, matrix, 0, 1, True)
580
581 def bmm(self, batch):
/usr/local/lib/python3.5/dist-packages/torch/autograd/_functions/blas.py in forward(ctx, add_matrix, matrix1, matrix2, alpha, beta, inplace)
24 output = _get_output(ctx, add_matrix, inplace=inplace)
25 return torch.addmm(alpha, add_matrix, beta,
---> 26 matrix1, matrix2, out=output)
27
28 @staticmethod
TypeError: torch.addmm received an invalid combination of arguments - got (int, torch.LongTensor, int, torch.LongTensor, torch.FloatTensor, out=torch.LongTensor), but expected one of:
* (torch.LongTensor source, torch.LongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
* (torch.LongTensor source, torch.SparseLongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
* (int beta, torch.LongTensor source, torch.LongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
* (torch.LongTensor source, int alpha, torch.LongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
* (int beta, torch.LongTensor source, torch.SparseLongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
* (torch.LongTensor source, int alpha, torch.SparseLongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
* (int beta, torch.LongTensor source, int alpha, torch.LongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
didn't match because some of the arguments have invalid types: (int, torch.LongTensor, int, torch.LongTensor, torch.FloatTensor, out=torch.LongTensor)
* (int beta, torch.LongTensor source, int alpha, torch.SparseLongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
didn't match because some of the arguments have invalid types: (int, torch.LongTensor, int, torch.LongTensor, torch.FloatTensor, out=torch.LongTensor)
I’m definitely making some sort of mistake here, it seems like the weights are FloatTensors which makes it impossible to do the matrix multiplication?
Please help! I will post whatever is needed
Here’s the RNN initialization in the code:
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.bias = False
print(input_size, hidden_size, output_size)
self.i2h = nn.GRU(n_categories + input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)
self.o2o = nn.Linear(hidden_size + output_size, output_size)
self.softmax = nn.LogSoftmax()
def forward(self, category, input, hidden):
print("Category: ",category.data.type(), category.size(), "\n", "Input: ",input.data.type(), input.size(), "\n", "Hidden: ", hidden.data.type(), hidden.size(), "\n")
input_combined = torch.cat((category, input, hidden), 1)
print("Input_Combined: ", input_combined.data.type())
hidden = self.i2h(input_combined)
output = self.i2o(input_combined)
output_combined = torch.cat((hidden, output), 1)
output = self.o2o(output_combined)
return output, hidden
def init_hidden(self):
hidden = Variable(torch.LongTensor(1, self.hidden_size))
if USE_CUDA: hidden = hidden.cuda()
return hidden