I am trying to implement Sklearn’s MLP network with LBFGS optimizer to Solve regression problem, but there seems to be a problem with my code as the MAE is different from what I get in Sklearn’s MLP
This is my MLP code and my Pytorch implementation.
MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
beta_2=0.999, early_stopping=False, epsilon=1e-08,
hidden_layer_sizes=(7, 3), learning_rate='constant',
learning_rate_init=0.001, max_fun=15000, max_iter=40, momentum=0.9,
n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
random_state=None, shuffle=True, solver='lbfgs', tol=0.0001,
validation_fraction=0.1, verbose=False, warm_start=False)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as utils_data
from torch.autograd import Variable
np.random.seed(29)
torch.manual_seed(29)
torch.cuda.manual_seed(29)
def error_analysis_torch(estimates, actual, title=''):
arr = estimates - actual
err_per = arr/actual
mae = torch.abs(arr).mean()
std = torch.std(arr)
err_per_std = torch.std(err_per)
mape = 100 * (torch.abs(arr) / actual)
accuracy = 100 - torch.mean(mape)
print('Results :')
print(accuracy, mae)
features_Pytorch = np.array(train_features)
labels_Pytorch = np.array(train_labels)
inputs = torch.from_numpy(features_Pytorch)#.cuda()
targets = torch.from_numpy(labels_Pytorch)#.cuda()
features_Pytorch_test = np.array(test_features)
labels_Pytorch_test = np.array(test_labels)
inputs_test = torch.from_numpy(features_Pytorch_test)#.cuda()
targets_test = torch.from_numpy(labels_Pytorch_test)#.cuda()
class MLP(nn.Module):
def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size1)
self.fc2 = nn.Linear(hidden_size1, hidden_size2)
self.fc3 = nn.Linear(hidden_size2, output_size)
def forward(self, x):
out = self.fc3(F.relu(self.fc2(F.relu(self.fc1(x)))))
return out
input_size = inputs.size()[1]
hidden_size1 = 7
hidden_size2 = 3
output_size = 1
num_epoch = 20
model = MLP(input_size = input_size, hidden_size1 = hidden_size1, hidden_size2 = hidden_size2,
output_size = output_size)
optimizer = LBFGS(model.parameters(), lr=0.1)
criterion = nn.MSELoss()
training_samples = utils_data.TensorDataset(inputs, targets)
data_loader_trn = utils_data.DataLoader(training_samples, batch_size=32, drop_last=False, shuffle=False)
#train
for epoch in range(num_epoch):
print('STEP: ', epoch)
for batch_idx, (data, target) in enumerate(data_loader_trn):
tr_x, tr_y = data.float(), target.float()
def closure():
optimizer.zero_grad()
out = model(tr_x)
loss = criterion(out, tr_y.unsqueeze(1))
print('loss:', loss.item())
loss.backward()
return loss
optimizer.step(closure)
with torch.no_grad():
pred = model(inputs_test.float())
loss = criterion(pred, targets_test.float())
print('test loss:', loss.item())
y = pred.detach().numpy()
pred_y = model(inputs_test.float())
error_analysis_torch(pred_y, targets_test.float())
The input is of a shape (1103, 61)
where the 61 columns are 2 numbers columns and 59 columns of one hot encoded data
The output MAE from Sklearn = 928818
The output from PyTorch = 1883490
I am also using Google Colab
I also used the same random seed for both experiments