Text Generator with RNN

marvv · January 28, 2018, 2:42pm

Hi, I’m try to create a RNN that suggest next words, based on an input text. More accurate I give an String of lenth 30 in one-hot encoding into the network, and want the probabilitys what the next char is.

I get the following Error with the code below:

/home/marvin/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:46: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-96-d288d6b9bba7> in <module>()
     11 for epoch in range(10):
---> 12     loss_train = train(rnn, epoch)
     13     history['loss_train'].append(loss_train)

<ipython-input-89-4bb7b3fb6c43> in train(model, epoch)
---> 22         loss = criterion(output, target.long()) # check how far away the output is from the original data
     23         loss.backward(retain_graph=True)
     24 

/home/marvin/anaconda3/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    323         for hook in self._forward_pre_hooks.values():
    324             hook(self, input)
--> 325         result = self.forward(*input, **kwargs)
    326         for hook in self._forward_hooks.values():
    327             hook_result = hook(self, input, result)

/home/marvin/anaconda3/lib/python3.5/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    599         _assert_no_grad(target)
    600         return F.cross_entropy(input, target, self.weight, self.size_average,
--> 601                                self.ignore_index, self.reduce)
    602 
    603 

/home/marvin/anaconda3/lib/python3.5/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce)
   1138         >>> loss.backward()
   1139     """
-> 1140     return nll_loss(log_softmax(input, 1), target, weight, size_average, ignore_index, reduce)
   1141 
   1142 

/home/marvin/anaconda3/lib/python3.5/site-packages/torch/nn/functional.py in log_softmax(input, dim, _stacklevel)
    784     if dim is None:
    785         dim = _get_softmax_dim('log_softmax', input.dim(), _stacklevel)
--> 786     return torch._C._nn.log_softmax(input, dim)
    787 
    788 

RuntimeError: dimension out of range (expected to be in range of [-1, 0], but got 1)

Here is my model:

class LSTM_RNN(nn.Module):
    
    def __init__(self, no_classes):
        super(LSTM_RNN, self).__init__()
        
        self.lstm = nn.LSTM(input_size = no_classes, hidden_size = args.hidden_size, num_layers = 3)
        self.linear = nn.Linear(in_features = args.hidden_size, out_features = no_classes)
        self.softmax = nn.Softmax()
        
        self.linear.weight.data.normal_(0, 0.075**2)
        self.linear.bias.data.normal_(0, 0.075**2)
        for name, param in self.lstm.named_parameters():
            if 'bias' in name:
                nn.init.constant(param, 0.0)
            elif 'weight' in name:
                nn.init.xavier_normal(param) 
        nn.init.xavier_uniform(self.lstm.weight_hh_l0)
        
        # LSTM needs hidden variable which is initialized in self.init_hidden(self)
        self.hidden = self.init_hidden()
    
    def init_hidden(self):
        h0 = Variable(torch.zeros(args.num_layers, args.batch_size, args.hidden_size))
        c0 = Variable(torch.zeros(args.num_layers, args.batch_size, args.hidden_size))
        return (h0, c0)
    
    def forward(self, x):
        lstm_out, self.hidden = self.lstm(x, self.hidden) # (h0, c0 are set to default values)
        lstm_out = lstm_out.view(-1, lstm_out.size(2))
        linear_out = self.linear(lstm_out)
        res = self.softmax(linear_out) # use only the output of the last layer of lstm
        return res

My train function:

# Training loop (one epoch)
def train(model, epoch):
    model.train()
    criterion = nn.CrossEntropyLoss() # use the cross-entropy loss
    total_loss = 0.0 # compute total loss over one epoch

    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(data.size(1), data.size(0), data.size(2))
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target.long()) # check how far away the output is from the original data
        loss.backward(retain_graph=True)
        
        #torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()

        total_loss += loss.data[0]

    relative_loss = total_loss/float(len(train_loader))
    print('Relative loss over epoch %s: %s' %(epoch, relative_loss))
    return relative_loss # return the relative loss for later analysis

Someone knows why I get this error and how to solve this?
Thanks in advance