Hi
I have made a simple feed-forward network, and I’m having problems with overfitting.
I am trying to implement dropout, however, it doesn’t do quite what I would expect, so I find it likely that maybe I’m using it wrong
I have tried 3, 4 and 5 layers, learningsrates from 0.00001 to 0.001. I have tried to place dropout at all layers and in only one, and with values 0.2 and 0.4.
So, is the following incorrect?
class Feedforward(torch.nn.Module):
def init(self, input_size, hidden_1_size, hidden_2_size, hidden_3_size, hidden_4_size):
super(Feedforward, self).init()
self.input_size = input_size
self.hidden_1_size = hidden_1_size
self.hidden_2_size = hidden_2_size
self.hidden_3_size = hidden_3_size
self.hidden_4_size = hidden_4_size
#self.hidden_5_size = hidden_5_size
self.layer_1 = torch.nn.Linear(self.input_size, self.hidden_1_size)
self.dropout1 = torch.nn.Dropout(0.2)
self.relu_1 = torch.nn.ReLU()
self.layer_2 = torch.nn.Linear(self.hidden_1_size, self.hidden_2_size)
self.dropout2 = torch.nn.Dropout(0.2)
self.relu_2 = torch.nn.ReLU()
self.layer_3 = torch.nn.Linear(self.hidden_2_size, self.hidden_3_size)
self.dropout3 = torch.nn.Dropout(0.2)
self.relu_3 = torch.nn.ReLU()
self.layer_4 = torch.nn.Linear(self.hidden_3_size, self.hidden_4_size)
self.dropout4 = torch.nn.Dropout(0.2)
self.relu_4 = torch.nn.ReLU() #LeakyReLU()
#self.layer_5 = torch.nn.Linear(self.hidden_4_size, self.hidden_5_size)
#self.dropout5 = torch.nn.Dropout(0.2)
#self.relu_5 = torch.nn.ReLU() #LeakyReLU()
self.layer_6 = torch.nn.Linear(self.hidden_4_size,1)
def forward(self, x):
hidden_1 = self.layer_1(x)
drop_1 = self.dropout1(hidden_1)
relu_1 = self.relu_1(drop_1)
hidden_2 = self.layer_2(relu_1)
drop_2 = self.dropout2(hidden_2)
relu_2 = self.relu_2(drop_2)
hidden_3 = self.layer_3(relu_2)
drop_3 = self.dropout3(hidden_3)
relu_3 = self.relu_3(drop_3)
hidden_4 = self.layer_4(relu_3)
drop_4 = self.dropout4(hidden_4)
relu_4 = self.relu_4(drop_4)
#hidden_5 = self.layer_5(relu_4)
#drop_5 = self.dropout5(hidden_5)
#relu_5 = self.relu_5(drop_5)
output = self.layer_6(relu_4)
return output
model = Feedforward(x_test.shape[1] ,300,300,300,100)
lr=0.001 #0.00001
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = lr)