# Validation loss not moving with MLP in Regression

Given input features as such, just raw numbers:

``````tensor([0.2153, 0.2190, 0.0685, 0.2127, 0.2145, 0.1260, 0.1480, 0.1483, 0.1489,
0.1400, 0.1906, 0.1876, 0.1900, 0.1925, 0.0149, 0.1857, 0.1871, 0.2715,
0.1887, 0.1804, 0.1656, 0.1665, 0.1137, 0.1668, 0.1168, 0.0278, 0.1170,
0.1189, 0.1163, 0.2337, 0.2319, 0.2315, 0.2325, 0.0519, 0.0594, 0.0603,
0.0586, 0.0067, 0.0624, 0.2691, 0.0617, 0.2790, 0.2805, 0.2848, 0.2454,
0.1268, 0.2483, 0.2454, 0.2475], device='cuda:0')
``````

And the expected output is a single real number output, e.g.

``````tensor(-34.8500, device='cuda:0')
``````

I’ve tried creating a simple 2 layer network with:

``````class MLP(nn.Module):
def __init__(self, input_size, output_size, hidden_size):
super(MLP, self).__init__()
self.linear = nn.Linear(input_size, hidden_size)
self.classifier = nn.Linear(hidden_size, output_size)

def forward(self, inputs, hidden=None, dropout=0.5):
inputs = F.dropout(inputs, dropout) # Drop-in.
# First Layer.
output = F.relu(self.linear(inputs))

# Matrix manipulation magic.
batch_size, sequence_len, hidden_size = output.shape
# Technically, linear layer takes a 2-D matrix as input, so more manipulation...
output = output.contiguous().view(batch_size * sequence_len, hidden_size)
# Apply dropout.
output = F.dropout(output, dropout)

# Put it through the classifier
# And reshape it to [batch_size x sequence_len x vocab_size]
output = self.classifier(output).view(batch_size, sequence_len, -1)

return output
``````

And training as such:

``````# Training routine.
def train(num_epochs, dataloader, valid_dataset, model, criterion, optimizer):
losses = []
valid_losses = []
learning_rates = []
plt.ion()
x_valid, y_valid = valid_dataset
for _e in range(num_epochs):
#print(batch)
this_x = torch.tensor(batch['x'].view(len(batch['x']), 1, -1)).to(device)
this_y = torch.tensor(batch['y'].view(len(batch['y']), 1, 1)).to(device)

# Feed forward.
output = model(this_x)

prediction, _ = torch.max(output, dim=1)
loss = criterion(prediction, this_y.view(len(batch['y']), -1))
loss.backward()
optimizer.step()
losses.append(torch.sqrt(loss.float()).data)

output = model(x_valid.view(len(x_valid), 1, -1))
prediction, _ = torch.max(output, dim=1)
loss = criterion(prediction, y_valid.view(len(y_valid), -1))
valid_losses.append(torch.sqrt(loss.float()).data)

clear_output(wait=True)
plt.plot(losses, label='Train')
plt.plot(valid_losses, label='Valid')
plt.legend()
plt.pause(0.05)
``````

Tuning several hyperparameters, it looks like the model doesn’t train well, the validation loss doesn’t move at all e.g.

``````hyperparams = Hyperparams(input_size=train_dataset.x.shape[1],
output_size=1,
hidden_size=150,
loss_func=nn.MSELoss,
learning_rate=1e-8,
Currently it looks like e.g. the batch dimension might be missing and thus an internal broadcasting might happen (in the latest version we’ve added a warning to `nn.MSELoss` in case this happens).
Also note, that you should pass the `training` argument to `F.dropout`, since in your current code dropout will be used during training and evaluation.