Hi,
I’ve tried to implement a simple RNN cell from scratch for sentiment classification (positive/negative) and overfit a sample of data, but for some reason, the model doesn’t seem to learn anything (loss doesn’t decrease). Do you have any clue what could cause this?
Thanks for your help!
Here is the model:
class SentimentClassifier(nn.Module):
def __init__(self, input_size, hidden_size, output_size=2):
super(SentimentClassifier, self).__init__()
self.hidden_size = hidden_size
self.whh = nn.Linear(hidden_size, hidden_size)
self.wxh = nn.Linear(input_size, hidden_size)
self.fc = nn.Linear(hidden_size, output_size)
def init_hidden_state(self, batch_size):
return torch.zeros(batch_size, self.hidden_size)
def forward(self, x, hidden_state): # x shape: (batch_size, input_size)
hidden_state = torch.sigmoid(self.whh(hidden_state) + self.wxh(x))
output = F.softmax(self.fc(hidden_state), dim=1)
return output, hidden_state
def fit(self, dataset, batch_size, epochs, lr=0.001):
optimizer = torch.optim.SGD(self.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
for epoch in range(epochs):
total_loss = 0
for idx, (texts, sentiments) in enumerate(dataset):
hidden_state = self.init_hidden_state(batch_size=batch_size)
# forward
for i in range(texts.shape[0]): # texts.shape[0] = sequence length
output, hidden_state = self.forward(texts[i], hidden_state)
loss = criterion(output, sentiments)
# backward
optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1)
# gradient descent or Adam step
optimizer.step()
total_loss += loss