Here is my RNN class:

```
import torch.nn as nn
from torch.autograd import Variable
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(input_size , hidden_size)
self.h2o = nn.Linear(hidden_size, output_size)
self.h2h = nn.Linear(hidden_size, hidden_size)
self.softmax = nn.LogSoftmax(dim = 1)
def forward(self, input, hidden):
h = self.softmax(self.h2h(hidden)+ self.i2h(input))
o = self.softmax(self.h2o(h))
return o, h
def init_hidden(self):
return Variable(torch.zeros(1, self.hidden_size))
```

And I created an instance and trained as follows:

```
rnn = RNN(n_chars, 90, n_chars)
criterion = nn.MSELoss()
learning_rate = 0.05
optimizer = torch.optim.Adam(rnn.parameters(), lr = learning_rate)
hidden = rnn.init_hidden()
epochs = 5
for epoch in range(epochs):
for i in range(len(X)):
for ele in X[i]:
output, hidden = rnn(Variable(ele.t()), hidden)
loss = criterion(output, Y[i])
loss.backward(retain_graph=True)
optimizer.zero_grad()
optimizer.step()
if (i %1000 == 0):
print('Current loss is: ', loss)
```

The result is an extremely large loss in the first step, an then the loss becomes Inf, after which it becomes Nan till the end. Why is this happening? How can I correct this?