Difference between MLP regressor in Sci-Kit and Pytorch

I have a regression model, that i am using on SciKit-Learn using MLP regressor but when trying to duplicate the same model on Pytorch i get different results.

Scikit-Learn Code:

mlp = MLPRegressor().fit(features, labels)
mlp.score(features, labels)

Pytorch Code:

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as utils_data
from torch.autograd import Variable
torch.manual_seed(29)

train_features, test_features, train_labels, test_labels = train_test_split(
    features, labels, test_size = 0.1, shuffle=False)
features_Pytorch = np.array(features)
labels_Pytorch = np.array(labels)
inputs = torch.from_numpy(features_Pytorch).cuda()
targets = torch.from_numpy(labels_Pytorch).cuda()
features_Pytorch_test = np.array(test_features)
labels_Pytorch_test = np.array(test_labels)
minfeat = np.amin(labels_Pytorch_test)
maxfeat = np.amax(labels_Pytorch_test)
features_n_iter = (labels_Pytorch_test - minfeat)/ (maxfeat - minfeat)
features_n_iter = torch.from_numpy(features_n_iter)
inputs_test = torch.from_numpy(features_Pytorch_test)
targets_test = torch.from_numpy(labels_Pytorch_test)
# inputs = Variable(torch.Tensor(features.values))
# targets = Variable(torch.Tensor(labels.values))

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        out = self.fc2(F.relu(self.fc1(x)))
        return out

input_size = inputs.size()[1]
hidden_size = 100
output_size = 1
num_epoch = 200
learning_rate = 1e-3

model = MLP(input_size = input_size, hidden_size = hidden_size,
            output_size = output_size)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=1e-4)
loss_fct = nn.MSELoss()

training_samples = utils_data.TensorDataset(inputs, targets)
data_loader_trn = utils_data.DataLoader(training_samples, batch_size=200, drop_last=False, shuffle=True)

#train
for epoch in range(num_epoch):
        cum_loss = 0
        for batch_idx, (data, target) in enumerate(data_loader_trn):

            tr_x, tr_y = data.float(), target.float()

            pred = model(tr_x)
            loss = loss_fct(pred, tr_y)
        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            cum_loss += loss.item()
            
        if epoch % 10 == 0:
            print ('Epoch [%d/%d], Loss: %.4f' 
                      %(epoch+1, num_epoch, cum_loss))
pred_y = model(inputs.float()) 
# print("predict (after training)", 4, our_model(inputs_test.cuda().float()).data[0][0]) 
# error_analysis_torch(pred_y, targets_test.float().cuda())
pred_y = pred_y.cpu()
final_pred_np = pred_y.cpu().clone().detach().numpy()
np.corrcoef(final_pred_np.squeeze(), targets.cpu())[0,1]

How different are these results?
I.e. for e.g. 10 runs what is the mean and stddev of the final result using the sklearn and PyTorch model?