Hi, I’m try to create a RNN that suggest next words, based on an input text. More accurate I give an String of lenth 30 in one-hot encoding into the network, and want the probabilitys what the next char is.
I get the following Error with the code below:
/home/marvin/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:46: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-96-d288d6b9bba7> in <module>()
11 for epoch in range(10):
---> 12 loss_train = train(rnn, epoch)
13 history['loss_train'].append(loss_train)
<ipython-input-89-4bb7b3fb6c43> in train(model, epoch)
---> 22 loss = criterion(output, target.long()) # check how far away the output is from the original data
23 loss.backward(retain_graph=True)
24
/home/marvin/anaconda3/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
/home/marvin/anaconda3/lib/python3.5/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
599 _assert_no_grad(target)
600 return F.cross_entropy(input, target, self.weight, self.size_average,
--> 601 self.ignore_index, self.reduce)
602
603
/home/marvin/anaconda3/lib/python3.5/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce)
1138 >>> loss.backward()
1139 """
-> 1140 return nll_loss(log_softmax(input, 1), target, weight, size_average, ignore_index, reduce)
1141
1142
/home/marvin/anaconda3/lib/python3.5/site-packages/torch/nn/functional.py in log_softmax(input, dim, _stacklevel)
784 if dim is None:
785 dim = _get_softmax_dim('log_softmax', input.dim(), _stacklevel)
--> 786 return torch._C._nn.log_softmax(input, dim)
787
788
RuntimeError: dimension out of range (expected to be in range of [-1, 0], but got 1)
Here is my model:
class LSTM_RNN(nn.Module):
def __init__(self, no_classes):
super(LSTM_RNN, self).__init__()
self.lstm = nn.LSTM(input_size = no_classes, hidden_size = args.hidden_size, num_layers = 3)
self.linear = nn.Linear(in_features = args.hidden_size, out_features = no_classes)
self.softmax = nn.Softmax()
self.linear.weight.data.normal_(0, 0.075**2)
self.linear.bias.data.normal_(0, 0.075**2)
for name, param in self.lstm.named_parameters():
if 'bias' in name:
nn.init.constant(param, 0.0)
elif 'weight' in name:
nn.init.xavier_normal(param)
nn.init.xavier_uniform(self.lstm.weight_hh_l0)
# LSTM needs hidden variable which is initialized in self.init_hidden(self)
self.hidden = self.init_hidden()
def init_hidden(self):
h0 = Variable(torch.zeros(args.num_layers, args.batch_size, args.hidden_size))
c0 = Variable(torch.zeros(args.num_layers, args.batch_size, args.hidden_size))
return (h0, c0)
def forward(self, x):
lstm_out, self.hidden = self.lstm(x, self.hidden) # (h0, c0 are set to default values)
lstm_out = lstm_out.view(-1, lstm_out.size(2))
linear_out = self.linear(lstm_out)
res = self.softmax(linear_out) # use only the output of the last layer of lstm
return res
My train function:
# Training loop (one epoch)
def train(model, epoch):
model.train()
criterion = nn.CrossEntropyLoss() # use the cross-entropy loss
total_loss = 0.0 # compute total loss over one epoch
for batch_idx, (data, target) in enumerate(train_loader):
data = data.view(data.size(1), data.size(0), data.size(2))
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target.long()) # check how far away the output is from the original data
loss.backward(retain_graph=True)
#torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
optimizer.step()
total_loss += loss.data[0]
relative_loss = total_loss/float(len(train_loader))
print('Relative loss over epoch %s: %s' %(epoch, relative_loss))
return relative_loss # return the relative loss for later analysis
Someone knows why I get this error and how to solve this?
Thanks in advance