Building a RNN/LSTM/GRU I am trying to overfit it on random data and I can’t get it to learn (accuracy won’t increase past 0.2). It keeps getting stuck at local optima. Idea of using random data comes fromUNDERSTANDING DEEP LEARNING REQUIRES RETHINKING GENERALIZATION . Any advice on what to change.
Input is (100 * 4 * 60) output is (100 * 10 ).
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# self.lstm1 = nn.LSTM(input_size = 60, hidden_size = 32, num_layers = 1, bidirectional= True)
# self.lstm1 = nn.RNN(input_size = 60, hidden_size = 32, num_layers = 1, bidirectional= True)
self.lstm1 = nn.GRU(input_size = 60, hidden_size = 32, bidirectional= True)
self.fcn1 = nn.Linear(4 * 64, 512)
self.fcn2 = nn.Linear(512, 512)
self.fcn3 = nn.Linear(512, 10)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x, _ = self.lstm1(x)
# print(x.shape)
x = x.view(100, 4 * 64)
# print(x.shape)
x = self.fcn1(x)
x = F.dropout(x, p = 0.5)
x = self.fcn2(x)
x = F.dropout(x, p = 0.5)
x = self.fcn3(x)
x = F.dropout(x, p = 0.9)
x =x.squeeze(1)
x = self.softmax(x)
return x
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
I’ve tried different architectures, layers , drop-out, learning rates, learning rules, etc. Perhaps I should change the Loss function?
Training Code
for epoch in range(5000):
optimizer.zero_grad()
running_loss = 0.0
outputs = net(X)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
running_loss += loss.item()
if epoch % 100 == 0:
acc = accuracy_score(np.array(outputs.detach()).argmax(1), y)
# print(np.array(outputs.detach()).argmax(1))
print("Epoch: ", epoch, "Loss: ",running_loss, "Accuracy:", acc)