Hello all,
First all, full disclosure, I have very limited experience using PyTorch; I have largely reverse-engineered the code I’m using from examples I found on the internet.
With that said, I’ve used PyTorch to build a linear regression model to predict a single metric from a series of data points (1499 of them, to be precise), with the format:
| Metric | Data points |
|---|---|
| 10.70685 | [0.0019238, 0.00176722, 0.001642…] |
| 10.56528 | [0.000376129, 0.0010622, 0.000149819…] |
| 10.93116 | [0.0025905, 0.00237781, 0.0022083…] |
However, when I run the code, it comes back with the error ValueError: Input contains NaN. Using torch.autograd.set_detect_anomaly(True)yields the message RuntimeError: Function ‘MseLossBackward0’ returned nan values in its 0th output. Furthermore, the output of the regression model (y_predin the code below) itself appears to be made up of nothing but nan values.
I’ve cleaned the training data to ensure there aren’t any NaNs, and reduced the learning rate (which I’d seen suggested elsewhere), but to no avail. What’s the best way to solve this problem? Thanks!
My code:
import pandas as pd import torch from torch import nn from torch.utils.data import TensorDataset, DataLoader import torch.optim as optim from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score import numpy as np class Regression(nn.Module): def __init__(self): super().__init__() self.layers = nn.Sequential( nn.Linear(1499, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 1) ) def forward(self, x): ''' Forward pass ''' return self.layers(x) def training_loop(n_epochs, train_loader): for epoch in range(n_epochs): # Set current loss value current_loss = 0.0 # Iterate over the DataLoader for training data for i, data in enumerate(train_loader, 0): # Get and prepare inputs inputs, targets = data inputs, targets = inputs.float(), targets.float() targets = targets.reshape((targets.shape[0],1)) # Zero the gradients optimizer.zero_grad() # Perform forward pass outputs = model(inputs) # Compute loss loss = loss_function(outputs, targets) # Perform backward pass loss.backward() # Perform optimization optimizer.step() return model data_file="data_file.csv" raw_data=pd.read_csv(raw_data) input_size = raw_data.shape[1] batch_size = 10 model=Regression() n_epochs=100 # loss function and optimizer loss_function =nn.MSELoss() # mean square error optimizer = torch.optim.Adam(model.parameters(), lr=1e-14) # input data data_metric=raw_data[metric] raw_data_points=raw_data['Data Points'].to_list() data_points=[] for l in range(len(raw_data_points)): # data sanitation and conversion from str to list of list of floats data_values=raw_data_points[l].split(',') for idx in range(len(raw_data_points)): data_values[idx]=data_values[idx].replace('[','') data_values[idx]=data_values[idx].replace(']','') data_values[idx]=float(data_values[idx]) data_points.append(data_values) exo_spectra=np.array(exo_spectra) metrics=['metric'] scores=[] for metric in metrics: #split and format data data_target=metrics[metric] X_train, X_test, y_train, y_test =train_test_split(data_points,data_target, test_size=0.2, random_state=23) X_train_tensor = torch.from_numpy(X_train) X_train_tensor = torch.tensor(X_train_tensor,dtype=torch.float32) y_train_tensor = torch.from_numpy(y_train.values) y_train_tensor = torch.tensor(y_train_tensor,dtype=torch.float32) X_test_tensor = torch.from_numpy(X_test) X_test_tensor = torch.tensor(X_test_tensor,dtype=torch.float32) y_test_tensor= torch.from_numpy(y_test.values) y_test_tensor = torch.tensor(y_test_tensor,dtype=torch.float32) # load data train_dataset = TensorDataset(X_train_tensor, y_train_tensor) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # train data and assess accuracy model=training_loop(n_epochs,train_loader) y_pred = model(X_test_tensor) print(y_pred) print(y_pred.detach().numpy()) model_score=r2_score(y_test_tensor.detach().numpy(),y_pred.detach().numpy()) scores.append(model_score) r2_scores=[] for score in scores: r2_scores.append(score) print(r2_scores)