Hi, I have a character level lstm model for word classification and it doesn’t seem to be training. The error rate after each epoch is almost the same (sometimes even more that the previous one!). The input tensor size is 16 x 250 x 63 (batch x seq length x alphabet size)
One hot vector encoding has been used to encode a string into a 2d matrix of size 250 x 63. Left padding is done with 0s
CrossEntropyLoss was used as the loss function
The lstm class is defined as follows:
class CharLSTM(nn.Module):
def initHidden(self):
h0 = Variable(torch.randn(self.nlayers, self.batch_size, self.hidden_dim).cuda()) # Initial hidden state
c0 = Variable(torch.randn(self.nlayers, self.batch_size, self.hidden_dim).cuda()) # Initial cell state
return (h0, c0)
def __init__(self, input_size, hidden_size, nlayers, batch_size):
super(CharLSTM, self).__init__()
self.hidden_dim = hidden_size
self.batch_size = batch_size
self.nlayers = nlayers
self.hidden = 0
self.lstm = nn.LSTM(input_size, hidden_size)
self.dense1 = nn.Linear(hidden_size, 2)
def forward(self, inputs):
x=inputs.transpose(0,1)
self.hidden = self.initHidden()
x, self.hidden = self.lstm(x, self.hidden)
x = (self.hidden[0] + self.hidden[1])/2
x = x.squeeze(0)
x = self.dense1(x)
return F.softmax(x)
The training of the model is as shown below:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
start = time.time()
losses = []
test_acc = []
train_loss = []
for epoch in range(5):
print("Epoch %d " % epoch)
total_loss = 0
count = 0
print(model.parameters())
for _, (X, Y) in enumerate(train_loader):
optimizer.zero_grad()
Y = Y.squeeze(1)
probs = model(Variable(X.cuda()))
loss = loss_function(probs, Variable(Y.cuda()))
loss.backward()
optimizer.step()
total_loss += loss.data[0]
count += 1
losses.append(total_loss/count)
accuracy = 0
Please help me out here