I apologise if this is silly. But I’m unable to understand this error message :
<ipython-input-164-b29eb2d49cef> in <module>()
2
3 for i in range(num_iters):
----> 4 l = train()
5 print(l)
<ipython-input-163-818cc46545d8> in train()
10 for u in range(num_unrollings):
11 one_hot_input = Variable(torch.from_numpy(batches[u].T).type(dtype), requires_grad=False)
---> 12 cell, hidden, output_prior = lstm(one_hot_input, cell, hidden)
13 loss_function = nn.CrossEntropyLoss()
14 logits = output_prior.view(output_prior.shape[1], -1).data
/usr/local/lib/python3.5/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
<ipython-input-162-7f7723fe4c71> in forward(self, one_hot_input, cell, hidden)
29
30 def forward(self, one_hot_input, cell, hidden):
---> 31 input_gate = nn.Sigmoid(torch.mm(self.Wi,one_hot_input) + torch.mm(self.Ui,hidden) + self.bi)
32 forget_gate = nn.Sigmoid(torch.mm(self.Wf,one_hot_input) + torch.mm(self.Uf,hidden) + self.bf)
33 output_gate = nn.Sigmoid(torch.mm(self.Wo,one_hot_input) + torch.mm(self.Uo,hidden) + self.bo)
TypeError: __init__() takes 1 positional argument but 2 were given
It’s from a simple character based LSTM and I’m simply trying to print the loss at each step for now
dtype = torch.FloatTensor
hidden_size = 64
class LSTMn(nn.Module):
def __init__(self, hidden_size, vocab_size):
super(LSTMn, self).__init__()
self.Wi = Parameter(torch.rand(hidden_size, vocab_size).type(dtype), requires_grad=True)
self.Ui = Parameter(torch.rand(hidden_size, hidden_size).type(dtype), requires_grad=True)
self.bi = Parameter(torch.rand(hidden_size, 1).type(dtype), requires_grad=True)
self.Wf = Parameter(torch.rand(hidden_size, vocab_size).type(dtype), requires_grad=True)
self.Uf = Parameter(torch.rand(hidden_size, hidden_size).type(dtype), requires_grad=True)
self.bf = Parameter(torch.rand(hidden_size, 1).type(dtype), requires_grad=True)
self.Wo = Parameter(torch.rand(hidden_size, vocab_size).type(dtype), requires_grad=True)
self.Uo = Parameter(torch.rand(hidden_size, hidden_size).type(dtype), requires_grad=True)
self.bo = Parameter(torch.rand(hidden_size, 1).type(dtype), requires_grad=True)
self.Wc = Parameter(torch.rand(hidden_size, vocab_size).type(dtype), requires_grad=True)
self.Uc = Parameter(torch.rand(hidden_size, hidden_size).type(dtype), requires_grad=True)
self.bc = Parameter(torch.rand(hidden_size, 1).type(dtype), requires_grad=True)
self.W_soft = Parameter(torch.rand(vocab_size, hidden_size).type(dtype), requires_grad=True)
self.b_soft = Parameter(torch.rand(vocab_size, 1).type(dtype), requires_grad=True)
self.hidden_state = Variable(torch.zeros(hidden_size, batch_size).type(dtype), requires_grad=False)
self.cell_state = Variable(torch.zeros(hidden_size, batch_size).type(dtype), requires_grad=False)
def forward(self, one_hot_input, cell, hidden):
input_gate = nn.Sigmoid(torch.mm(self.Wi,one_hot_input) + torch.mm(self.Ui,hidden) + self.bi)
forget_gate = nn.Sigmoid(torch.mm(self.Wf,one_hot_input) + torch.mm(self.Uf,hidden) + self.bf)
output_gate = nn.Sigmoid(torch.mm(self.Wo,one_hot_input) + torch.mm(self.Uo,hidden) + self.bo)
update = nn.Tanh(torch.mm(self.Wc,one_hot_input) + torch.mm(self.Uc,hidden) + self.bc)
cell = (forget_gate * cell) + (input_gate * update)
hidden = output_gate * nn.Tanh(cell)
output_prior = torch.mm(self.W_soft,hidden) + self.b_soft
return cell, hidden, output_prior
def save_state(self, hidden_state, cell_state):
self.hidden_state = hidden_state
self.cell_state = cell_state
def load_state(self):
return self.hidden_state, self.cell_state
lstm = LSTMn(hidden_size, vocab_size)
learning_rate = 0.0001
train_batches = BatchGenerator(train_text, batch_size, num_unrollings)
optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)
def train():
hidden, cell = lstm.load_state()
batches = train_batches._next()
optimizer.zero_grad()
loss = 0
for u in range(num_unrollings):
one_hot_input = Variable(torch.from_numpy(batches[u].T).type(dtype), requires_grad=False)
cell, hidden, output_prior = lstm(one_hot_input, cell, hidden)
loss_function = nn.CrossEntropyLoss()
logits = output_prior.view(output_prior.shape[1], -1).data
labels = torch.from_numpy(np.argmax(batches[u+1], axis=1))
loss += loss_function(logits, labels)
lstm.save_state(hidden, cell)
loss.backward()
optimizer.step()
return loss
num_iters = 1000
for i in range(num_iters):
l = train()
print(l)
What init() could the error be referring to? The init() for the class seems alright to me.