Runtime error in nn.Linear()

ShawnGuo · April 19, 2017, 1:44am

My model could run well. However, I got a runtime error and I can’t figure out how this happened. It seems that the weight matrix of nn.Linear() module is with wrong type.
The trace back information is as follow:

Traceback (most recent call last):
  File "/home/shawnguo/PythonWS/LinearAlignment_TE/trainer.py", line 187, in <module>
    t.train()
  File "/home/shawnguo/PythonWS/LinearAlignment_TE/trainer.py", line 102, in train
    train_loss, train_acc = self.train_step(self.data.train)
  File "/home/shawnguo/PythonWS/LinearAlignment_TE/trainer.py", line 151, in train_step
    output = self.model(_data['p_ids'], _data['h_ids'], _data['p_rels'], _data['h_rels'])
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/shawnguo/PythonWS/LinearAlignment_TE/align_model.py", line 69, in forward
    e = torch.mm(self.F(a_), self.F(b_).t())
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/shawnguo/PythonWS/LinearAlignment_TE/align_model.py", line 23, in forward
    x = self.Layer1(x)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/linear.py", line 54, in forward
    return self._backend.Linear()(input, self.weight, self.bias)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/linear.py", line 10, in forward
    output.addmm_(0, 1, input, weight.t())
TypeError: addmm_ received an invalid combination of arguments - got (int, int, torch.cuda.DoubleTensor, torch.cuda.FloatTensor), but expected one of:
 * (torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
 * (torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
 * (float beta, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
 * (float alpha, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
 * (float beta, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
 * (float alpha, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
 * (float beta, float alpha, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
 * (float beta, float alpha, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)

And, my model is defined as:

class FeedForwardLayer(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, activation=F.tanh):
        super(FeedForwardLayer, self).__init__()
        self.name = 'FeedForwardLayer'

        self.Layer1 = nn.Linear(in_dim, hidden_dim)
        self.Layer2 = nn.Linear(hidden_dim, out_dim)

        self.activation = activation

    def forward(self, x):
        x = self.Layer1(x)
        x = self.activation(x)
        x = self.Layer2(x)
        x = self.activation(x)
        return x

class DecomposableModel(nn.Module):
    def __init__(self, word_embedding, config, train=True):
        super(DecomposableModel, self).__init__()
        self.name = 'DecomposableModel'

        self.train = train
        self.activation = config['activation']
        self.drop_p = config['drop_p']

        self.word_dim = word_embedding.embeddings.size(1)
        self.embedding = nn.Embedding(word_embedding.embeddings.size(0), self.word_dim)
        self.embedding.weight = nn.Parameter(word_embedding.embeddings, requires_grad=False)

        self.F = FeedForwardLayer(self.word_dim, config['hidden_dim'], config['F_dim'], self.activation)
        self.G = FeedForwardLayer(2 * self.word_dim, config['hidden_dim'], config['G_dim'], self.activation)
        self.H = FeedForwardLayer(2 * config['G_dim'], config['hidden_dim'],
                                  config['relation_num'], self.activation)

        self.cuda_flag = config['cuda_flag']

    def forward(self, *inputs):
        p_ids = inputs[0]
        h_ids = inputs[1]

        if self.cuda_flag:
            p_ids = p_ids.cuda()
            h_ids = h_ids.cuda()
        p = Variable(p_ids)
        h = Variable(h_ids)

        # project the word ids into continuous space
        a_ = self.embedding(p)
        b_ = self.embedding(h)

        e = torch.mm(self.F(a_), self.F(b_).t())

        e_ = F.softmax(e)
        e_t = F.softmax(e.t())

        beta = torch.mm(e_, b_)
        alpha = torch.mm(e_t, a_)

        v1 = self.G(torch.cat((a_, beta), 1)).mean(0)
        v2 = self.G(torch.cat((b_, alpha), 1)).mean(0)

        if self.train:
            v = self.H(F.dropout(torch.cat((v1, v2), 1), self.drop_p))
        else:
            v = self.H(torch.cat((v1, v2), 1))

        return v

    def set_train_flag(self, flag):
        self.train = flag

Henderake · April 19, 2017, 8:13am

The type of the inputs to addmm_ should be both FloatTensor or DoubleTensor.
What’s the type of word_embedding.embeddings in nn.Parameter(word_embedding.embeddings, requires_grad=False)? It seems that you pass a word_embedding of type DoubleTensor to DecomposableModel’s __init__ anywhere else in the code (I guess it’s in your trainer.py). Make it to FloatTensor.

ShawnGuo · April 19, 2017, 9:03am

Oh, I see. This does solve my problem. Thank you very much.