Hello, I am new to deep learning and pytorch, I try to use DNN method to predict the output value, but the loss is saturated when training.

Any comment will be very helpful.

All the code is list below:

```
import time
import torch
import numpy as np
import pandas as pd
import torch.nn.functional as F
from torch import nn
from sklearn.utils import shuffle
from torch.autograd import Variable
from torch.optim import lr_scheduler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
Dataset = pd.read_csv('./Input Data.txt', sep = "\s+")
Dataset.head()
Target_Column = 9
X = Dataset.iloc[:, 0:Target_Column].values
Y = Dataset.iloc[:, Target_Column].values
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size = 0.3, random_state = 0)
#Define hyperprameters.
Batch_Size = 2048
Num_Epochs = 2000
Size_Hidden_1 = 128
Size_Hidden_2 = 64
Size_Hidden_3 = 32
Learning_Rate = 0.001
Batch_No = len(X_Train) // Batch_Size # Batches
Num_Columns_Input = X_Train.shape[1] # Number of columns in input matrix
Num_Columns_Output = 1
#Using GPU
Use_CUDA = torch.cuda.is_available()
Device = torch.device("cuda" if Use_CUDA else "cpu")
print("Executing the model on :",Device)
#Define model and optimizer
class Net(torch.nn.Module):
def __init__(self, n_feature, size_hidden_1, size_hidden_2, size_hidden_3, n_output):
super(Net, self).__init__()
self.hidden1 = torch.nn.Linear(Num_Columns_Input, Size_Hidden_1) # Hidden layer 1
self.hidden2 = torch.nn.Linear(Size_Hidden_1, Size_Hidden_2) # Hidden layer 2
self.hidden3 = torch.nn.Linear(Size_Hidden_2, Size_Hidden_3) # Hidden layer 3
self.predict = torch.nn.Linear(Size_Hidden_3, Num_Columns_Output) # Output layer
def forward(self, x):
x = F.relu(self.hidden1(x))
x = F.relu(self.hidden2(x))
x = F.relu(self.hidden3(x))
x = self.predict(x)
return x
net = Net(Num_Columns_Input, Size_Hidden_1, Size_Hidden_2, Size_Hidden_3, Num_Columns_Output)
if torch.cuda.is_available():
device = "cuda:0"
if torch.cuda.device_count() > 1:
net = nn.DataParallel(net)
net.to(device)
optimizer = torch.optim.Adam(net.parameters(), lr = Learning_Rate)
criterion = torch.nn.MSELoss()
# Training model
Running_Loss = 0.0
start_time = time.time()
for Epoch in range(Num_Epochs):
X_Train, Y_Train = shuffle(X_Train, Y_Train)
# Mini batch learning
for i in range(Batch_No):
start = i * Batch_Size
end = start + Batch_Size
inputs = Variable(torch.FloatTensor(X_Train[start:end])).to(Device)
labels = Variable(torch.FloatTensor(Y_Train[start:end])).to(Device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, torch.unsqueeze(labels,dim = 1))
loss.backward()
optimizer.step()
# print statistics
Running_Loss += loss.item()
if(Epoch % 100 == 0): print('Epoch {}'.format(Epoch), "loss: ",Running_Loss)
#scheduler.step()
Running_Loss = 0.0
print("Calculation time :", time.time() - start_time)
```

The training output shows saturated loss which is not decreasing:

Epoch 0 loss: 82637.44604492188

Epoch 100 loss: 3913.1080932617188

Epoch 200 loss: 3164.8107986450195

Epoch 300 loss: 3010.6801147460938

Epoch 400 loss: 2929.7017517089844

Epoch 500 loss: 2904.999656677246

Epoch 600 loss: 2887.5707092285156

Epoch 700 loss: 2891.483169555664

Epoch 800 loss: 2877.9163970947266

Epoch 900 loss: 2891.381019592285

Epoch 1000 loss: 2870.423141479492

Epoch 1100 loss: 2887.0635833740234

Epoch 1200 loss: 2889.669761657715

Epoch 1300 loss: 2891.597194671631

Epoch 1400 loss: 2881.264518737793

Epoch 1500 loss: 2884.085250854492

Epoch 1600 loss: 2883.3774032592773

Epoch 1700 loss: 2883.196922302246

Epoch 1800 loss: 2891.262664794922

Epoch 1900 loss: 2888.922218322754

How can I fix this problem?

Any comments are highly appreciated!