LongTensor vs. FloatTensor -- battle to the death?

cooganb · November 4, 2017, 8:32pm

Hi friends,

I’m adapting the conditional RNN Name Generator tutorial to do longer text generation and am having some trouble.

The tutorial makes the category, input and hidden state all LongTensors, but then I received this error when training:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-117-3570efd110ad> in <module>()
     20 
     21 for epoch in range(1, n_epochs + 1):
---> 22     output, loss = train(*random_training_set())
     23     loss_avg += loss
     24 

 train(category_tensor, input_line_tensor, target_line_tensor)
      9 
     10     for i in range(input_line_tensor.size()[1]):
---> 11         output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)
     12         print(output, target_line_tensor[i])
     13         loss += criterion(output, target_line_tensor[i])

 __call__(self, *input, **kwargs)
    222         for hook in self._forward_pre_hooks.values():
    223             hook(self, input)
--> 224         result = self.forward(*input, **kwargs)
    225         for hook in self._forward_hooks.values():
    226             hook_result = hook(self, input, result)

<ipython-input-116-31a4f7187888> in forward(self, category, input, hidden)
     16         input_combined = torch.cat((category, input, hidden), 1)
     17         print("Input_Combined: ", input_combined.data.type())
---> 18         hidden = self.i2h(input_combined)
     19         output = self.i2o(input_combined)
     20         output_combined = torch.cat((hidden, output), 1)

 __call__(self, *input, **kwargs)
    222         for hook in self._forward_pre_hooks.values():
    223             hook(self, input)
--> 224         result = self.forward(*input, **kwargs)
    225         for hook in self._forward_hooks.values():
    226             hook_result = hook(self, input, result)

forward(self, input, hx)
    160             flat_weight=flat_weight
    161         )
--> 162         output, hidden = func(input, self.all_weights, hx)
    163         if is_packed:
    164             output = PackedSequence(output, batch_sizes)

 forward(input, *fargs, **fkwargs)
    349         else:
    350             func = AutogradRNN(*args, **kwargs)
--> 351         return func(input, *fargs, **fkwargs)
    352 
    353     return forward

 forward(input, weight, hidden)
    242             input = input.transpose(0, 1)
    243 
--> 244         nexth, output = func(input, hidden, weight)
    245 
    246         if batch_first and batch_sizes is None:

 forward(input, hidden, weight)
     82                 l = i * num_directions + j
     83 
---> 84                 hy, output = inner(input, hidden[l], weight[l])
     85                 next_hidden.append(hy)
     86                 all_output.append(output)

 forward(input, hidden, weight)
    111         steps = range(input.size(0) - 1, -1, -1) if reverse else range(input.size(0))
    112         for i in steps:
--> 113             hidden = inner(input[i], hidden, *weight)
    114             # hack to handle LSTM
    115             output.append(hidden[0] if isinstance(hidden, tuple) else hidden)

/usr/local/lib/python3.5/dist-packages/torch/nn/_functions/rnn.py in GRUCell(input, hidden, w_ih, w_hh, b_ih, b_hh)
     52         return state(gi, gh, hidden) if b_ih is None else state(gi, gh, hidden, b_ih, b_hh)
     53 
---> 54     gi = F.linear(input, w_ih, b_ih)
     55     gh = F.linear(hidden, w_hh, b_hh)
     56     i_r, i_i, i_n = gi.chunk(3, 1)

/usr/local/lib/python3.5/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
    553         return torch.addmm(bias, input, weight.t())
    554 
--> 555     output = input.matmul(weight.t())
    556     if bias is not None:
    557         output += bias

/usr/local/lib/python3.5/dist-packages/torch/autograd/variable.py in matmul(self, other)
    558 
    559     def matmul(self, other):
--> 560         return torch.matmul(self, other)
    561 
    562     @staticmethod

/usr/local/lib/python3.5/dist-packages/torch/functional.py in matmul(tensor1, tensor2, out)
    166     elif dim_tensor1 == 1 and dim_tensor2 == 2:
    167         if out is None:
--> 168             return torch.mm(tensor1.unsqueeze(0), tensor2).squeeze_(0)
    169         else:
    170             return torch.mm(tensor1.unsqueeze(0), tensor2, out=out).squeeze_(0)

/usr/local/lib/python3.5/dist-packages/torch/autograd/variable.py in mm(self, matrix)
    577     def mm(self, matrix):
    578         output = Variable(self.data.new(self.data.size(0), matrix.data.size(1)))
--> 579         return Addmm.apply(output, self, matrix, 0, 1, True)
    580 
    581     def bmm(self, batch):

/usr/local/lib/python3.5/dist-packages/torch/autograd/_functions/blas.py in forward(ctx, add_matrix, matrix1, matrix2, alpha, beta, inplace)
     24         output = _get_output(ctx, add_matrix, inplace=inplace)
     25         return torch.addmm(alpha, add_matrix, beta,
---> 26                            matrix1, matrix2, out=output)
     27 
     28     @staticmethod

TypeError: torch.addmm received an invalid combination of arguments - got (int, torch.LongTensor, int, torch.LongTensor, torch.FloatTensor, out=torch.LongTensor), but expected one of:
 * (torch.LongTensor source, torch.LongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
 * (torch.LongTensor source, torch.SparseLongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
 * (int beta, torch.LongTensor source, torch.LongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
 * (torch.LongTensor source, int alpha, torch.LongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
 * (int beta, torch.LongTensor source, torch.SparseLongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
 * (torch.LongTensor source, int alpha, torch.SparseLongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
 * (int beta, torch.LongTensor source, int alpha, torch.LongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
      didn't match because some of the arguments have invalid types: (int, torch.LongTensor, int, torch.LongTensor, torch.FloatTensor, out=torch.LongTensor)
 * (int beta, torch.LongTensor source, int alpha, torch.SparseLongTensor mat1, torch.LongTensor mat2, *, torch.LongTensor out)
      didn't match because some of the arguments have invalid types: (int, torch.LongTensor, int, torch.LongTensor, torch.FloatTensor, out=torch.LongTensor)

I’m definitely making some sort of mistake here, it seems like the weights are FloatTensors which makes it impossible to do the matrix multiplication?

Please help! I will post whatever is needed

Here’s the RNN initialization in the code:

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.bias = False
        print(input_size, hidden_size, output_size)
        self.i2h = nn.GRU(n_categories + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.softmax = nn.LogSoftmax()
    
    def forward(self, category, input, hidden):
        print("Category: ",category.data.type(), category.size(), "\n", "Input: ",input.data.type(), input.size(), "\n", "Hidden: ", hidden.data.type(), hidden.size(), "\n")
        input_combined = torch.cat((category, input, hidden), 1)
        print("Input_Combined: ", input_combined.data.type())
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        return output, hidden

    def init_hidden(self):
        hidden = Variable(torch.LongTensor(1, self.hidden_size))
        if USE_CUDA: hidden = hidden.cuda()
        return hidden

cooganb · November 4, 2017, 8:41pm

Here’s a public gist I created of the notebook I’m using:

gist.github.com

https://gist.github.com/cooganb/cde46d0fa3e1769e68724b0c81e9f2f4

Conditional-Word-level-RNN.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Conditional Word-level RNN"
   ]
  },
  {

This file has been truncated. show original

smth · November 5, 2017, 12:22am

The tutorial does not make input, hidden to be LongTensors.
It only makes target to be LongTensor.

cooganb · November 5, 2017, 1:00am

Thank you so much for your response!

Are you suggesting I run input and hidden as FloatTensors?