My model could run well. However, I got a runtime error and I can’t figure out how this happened. It seems that the weight matrix of nn.Linear() module is with wrong type.
The trace back information is as follow:
Traceback (most recent call last):
File "/home/shawnguo/PythonWS/LinearAlignment_TE/trainer.py", line 187, in <module>
t.train()
File "/home/shawnguo/PythonWS/LinearAlignment_TE/trainer.py", line 102, in train
train_loss, train_acc = self.train_step(self.data.train)
File "/home/shawnguo/PythonWS/LinearAlignment_TE/trainer.py", line 151, in train_step
output = self.model(_data['p_ids'], _data['h_ids'], _data['p_rels'], _data['h_rels'])
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
result = self.forward(*input, **kwargs)
File "/home/shawnguo/PythonWS/LinearAlignment_TE/align_model.py", line 69, in forward
e = torch.mm(self.F(a_), self.F(b_).t())
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
result = self.forward(*input, **kwargs)
File "/home/shawnguo/PythonWS/LinearAlignment_TE/align_model.py", line 23, in forward
x = self.Layer1(x)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 206, in __call__
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/linear.py", line 54, in forward
return self._backend.Linear()(input, self.weight, self.bias)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/linear.py", line 10, in forward
output.addmm_(0, 1, input, weight.t())
TypeError: addmm_ received an invalid combination of arguments - got (int, int, torch.cuda.DoubleTensor, torch.cuda.FloatTensor), but expected one of:
* (torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
* (torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
* (float beta, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
* (float alpha, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
* (float beta, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
* (float alpha, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
* (float beta, float alpha, torch.cuda.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
* (float beta, float alpha, torch.cuda.sparse.DoubleTensor mat1, torch.cuda.DoubleTensor mat2)
And, my model is defined as:
class FeedForwardLayer(nn.Module):
def __init__(self, in_dim, hidden_dim, out_dim, activation=F.tanh):
super(FeedForwardLayer, self).__init__()
self.name = 'FeedForwardLayer'
self.Layer1 = nn.Linear(in_dim, hidden_dim)
self.Layer2 = nn.Linear(hidden_dim, out_dim)
self.activation = activation
def forward(self, x):
x = self.Layer1(x)
x = self.activation(x)
x = self.Layer2(x)
x = self.activation(x)
return x
class DecomposableModel(nn.Module):
def __init__(self, word_embedding, config, train=True):
super(DecomposableModel, self).__init__()
self.name = 'DecomposableModel'
self.train = train
self.activation = config['activation']
self.drop_p = config['drop_p']
self.word_dim = word_embedding.embeddings.size(1)
self.embedding = nn.Embedding(word_embedding.embeddings.size(0), self.word_dim)
self.embedding.weight = nn.Parameter(word_embedding.embeddings, requires_grad=False)
self.F = FeedForwardLayer(self.word_dim, config['hidden_dim'], config['F_dim'], self.activation)
self.G = FeedForwardLayer(2 * self.word_dim, config['hidden_dim'], config['G_dim'], self.activation)
self.H = FeedForwardLayer(2 * config['G_dim'], config['hidden_dim'],
config['relation_num'], self.activation)
self.cuda_flag = config['cuda_flag']
def forward(self, *inputs):
p_ids = inputs[0]
h_ids = inputs[1]
if self.cuda_flag:
p_ids = p_ids.cuda()
h_ids = h_ids.cuda()
p = Variable(p_ids)
h = Variable(h_ids)
# project the word ids into continuous space
a_ = self.embedding(p)
b_ = self.embedding(h)
e = torch.mm(self.F(a_), self.F(b_).t())
e_ = F.softmax(e)
e_t = F.softmax(e.t())
beta = torch.mm(e_, b_)
alpha = torch.mm(e_t, a_)
v1 = self.G(torch.cat((a_, beta), 1)).mean(0)
v2 = self.G(torch.cat((b_, alpha), 1)).mean(0)
if self.train:
v = self.H(F.dropout(torch.cat((v1, v2), 1), self.drop_p))
else:
v = self.H(torch.cat((v1, v2), 1))
return v
def set_train_flag(self, flag):
self.train = flag