I am building a model to predict a continuous variable from an input signal of a mixture of encoded categorical and continuous variables.
I am using some linear layers with LeakyReLUs and dropouts in between.
The problem I’m facing with this model is that it is learning very slowly and I’m not sure why. Not that it just requires many epochs to train but that even then it plateaus and gets somewhat stuck. Further, since it requires extensive training, it overfits and validation losses increase.
I have followed some guides to improve model complexity (hence added layers, batchnorm, dropouts and leaky relus), and while they have helped my loss is still not as low as I would like as it doesn’t decrease enough each iteration.
Although I have a large dataset, I thought this model was relatively simple and so I’m confused as to why it isn’t learning effectively.
I’d really appreciate a second eye as I may have made a mistake somewhere. I have attached the relevant code below.
Thanks in advance!
class FundedDateNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size=1):
super().__init__()
self.fc1 = nn.Linear(input_size, 2048)
self.fc2 = nn.Linear(2048, 1024)
self.fc3 = nn.Linear(1024, hidden_size)
self.fc4 = nn.Linear(hidden_size, output_size)
self.relu = nn.LeakyReLU()
self.dropout = nn.Dropout(p=0.4)
self.norm1 = nn.BatchNorm1d(2048)
self.norm2 = nn.BatchNorm1d(1024)
self.norm3 = nn.BatchNorm1d(hidden_size)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.norm1(x)
x = self.fc2(x)
x = self.relu(x)
x = self.dropout(x)
x = self.norm2(x)
x = self.fc3(x)
x = self.relu(x)
x = self.norm3(x)
x = self.fc4(x)
return x
def predict(self, x):
return self.forward(x)
Train and validation function.
def sub_train_(model, dataloader):
model.train()
losses = list()
for idx, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
optimizer.zero_grad()
out = model(X)
loss = criterion(out.squeeze(), y)
loss.backward()
optimizer.step()
losses.append(loss.item())
return np.mean(losses), model
def sub_valid_(model, dataloader):
model.eval()
losses = list()
with torch.no_grad():
for idx, (seq, label) in enumerate(dataloader):
seq, label = seq.to(device), label.to(device)
output = model(seq)
test_loss = criterion(output.squeeze(), label)
losses.append(test_loss.item())
test_loss = np.mean(losses)
return test_loss
Initiate the model
model = FundedDateNN(input_size, hidden_size).to(device)
criterion = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
print(model)
def train(model, trainloader, testloader, num_epochs):
best_model = model
best_loss = math.inf
ts = time.time()
losses = list()
for epoch in range(num_epochs):
train_loss, model = sub_train_(model, trainloader)
test_loss = sub_valid_(model, testloader)
losses.append(train_loss)
if test_loss < best_loss:
best_loss = test_loss
best_model = model
if epoch % 20 == 0:
print('Epoch: {}, train_loss: {}, test_loss: {}'.format(
epoch, train_loss, test_loss))
te = time.time()
# Plot Losses
fig, ax = plt.subplots()
ax.plot(range(num_epochs), losses)
plt.show()
mins = int((te-ts) / 60)
secs = int((te-ts) % 60)
return losses, best_model
losses, model = train(model, trainloader, testloader, n_epochs)