Outputs of the model are sometimes nan, sometimes [0.]

I am training a simple model with three input features and one output (both inputs and outputs are numerical). The printed outputs are sometimes nan, sometimes [0.] for every training example. I adjust the number of layers and nodes, but it didn’t help. I tried the model with another random dataset and it gave some reasonable outputs. So maybe the data is not imported correctly?


import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Dataset
from torch.autograd import Variable
import torch.optim as optim

class DatasetCDFarm(Dataset): 

    def __init__(self, file_path):
        df = pd.read_excel(file_path, header = 0)
        df_array = df.to_numpy() # transform fd to numpy array
        self.len = df_array.shape[0]
        self.x = torch.from_numpy(df_array[:, 0:3]) # prices of barley, rapeseed, wheat 
        self.y = torch.from_numpy(df_array[:, [3]]) # profit

    def __getitem__(self, index):  
        return self.x[index], self.y[index]

    def __len__(self): 
        return self.len

train_dataset = DatasetCDFarm('nn_CDFarm_torch_profit_train.xlsx')
train_loader = DataLoader(dataset=train_dataset, 
                    batch_size = 16, 
                    shuffle = True) 

class Net(nn.Module): 

    def __init__(self, input_size, hidden1_size, hidden2_size, output_size): 
        super(Net, self).__init__() 
        self.fc1 = nn.Linear(input_size, hidden1_size) # first layer
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden1_size, hidden2_size) # second layer
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden2_size, output_size) # output layer
        self.relu3 = nn.ReLU()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out) 
        out = self.relu3(out)
        return out

Net = Net(3, 60, 60, 1)

criterion = nn.MSELoss()
optimizer = optim.SGD(Net.parameters(), lr=0.01, momentum=0.9)

def train(epoch):
    for batch_id, data in enumerate(train_loader):

        inputs, labels = data

        inputs = Variable(inputs).float()
        labels = Variable(labels).float()

        print(epoch, batch_id, "inputs", inputs.data, "labels", labels.data)
        out = Net(inputs)
        print('out', type(out),out)
        print('labels', type(labels),labels)

        loss = criterion(out, labels)
        print(epoch, batch_id, loss.data) 

for epoch in range(1, 100):

remove the last relu layer and try it again.

still got: out: nan, and some weights are nan, biases are all nan

problem solved by using another optimizer, e.g. Adam optimizer.
The reason was I did not standardize the target variable and it caused exploding gradients. The SGD Optimizer was not performing well in this problem.
and of course remove the last relu.