Hi!
I trying to develop my first ANN if I am correct.
In short: I am trying to predict a coefficient for a flight (10 parameters:
month - month of the flight
day - day of the flight
week_day - day week
classname - class of the flight (like UberX or Select)
departure: country, city and airport
arrival: country, city and airport
Out of 377333 fligths avaliable 368904 have coeeficient of 0.05 (basically just 1 flight of the route expacted of that class)
With data this heavily skewed towards 0.05 my model obviously predicts 0.05 +/- some margine.
(I have example of Moscow-Dubai on 7th of November that should give a coeff of around 0.6, but I get 0.0534… )
My code looks like this:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)
class Net(torch.nn.Module):
def __init__(self, input_size, hidden_size, hidden_size2, hidden_size3): #input size 10
super(Net, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.hidden_size2 = hidden_size2
self.hidden_size3 = hidden_size3
self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
self.relu = torch.nn.ReLU()
self.fc2 = torch.nn.Linear(self.hidden_size, self.hidden_size2)
self.relu2 = torch.nn.ReLU()
self.fc3 = torch.nn.Linear(self.hidden_size2, self.hidden_size3)
self.relu3 = torch.nn.ReLU()
self.fc4 = torch.nn.Linear(self.hidden_size3, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
hidden = self.fc1(x)
relu = self.relu(hidden)
hidden2 = self.fc2(relu)
relu2 = self.relu2(hidden2)
hidden3 = self.fc3(hidden2)
relu3 = self.relu3(hidden3)
output = self.fc4(relu3)
output = self.sigmoid(output)
return output
training_input = torch.FloatTensor(x_train.values)
training_output = torch.FloatTensor(y_train.values)
test_input = torch.FloatTensor(x_test.values)
test_output = torch.FloatTensor(y_test.values)
training_input = training_input.to(device)
training_output = training_output.to(device)
test_input = test_input.to(device)
test_output = test_output.to(device)
# number of features selected
input_size = training_input.size()[1]
#number of nodes/neurons in the hidden layer
hidden_size = 200
hidden_size2 = 100
hidden_size3 = 50
model = Net(input_size, hidden_size, hidden_size2, hidden_size3) # create the model
model = model.to(device)
criterion = torch.nn.MSELoss()
#with momentum parameter
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
model.eval()
y_pred = model(test_input)
before_train = criterion(y_pred.squeeze(), test_output)
print('Test loss before training', before_train.item())
model.train()
epochs = 5000
errors = []
for epoch in range(epochs):
optimizer.zero_grad()
# Forward pass
y_pred = model(training_input)
# Compute Loss
loss = criterion(y_pred.squeeze(), training_output)
errors.append(loss.item())
if epoch % 500 == 1:
print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
print(y_pred)
print(training_output)
# Backward pass
loss.backward()
optimizer.step()
model.eval()
y_pred = model(test_input)
after_train = criterion(y_pred.squeeze(), test_output)
print('Test loss after Training', after_train.item())
I am not asking for a solution, but rather suggestions. If you see something stupid or a mistake in my code, maybe overuse of hidden layers, please let me know.
Any advice on how to train such data would also be appriciated.
My current hyposesis is that I should make a model that binary predicts if there is a 0.05 coeff or not and then if not I should train the other model on predicting the coeff for those ‘max’ (but that model only going to have around 8k rows of data to train on)
Thanks!