Hi! I am completely new to Pytorch and neural networks. I am trying to solve a simple exercise that asks for training, validating and testing a FNN model with data on sea surface temperatures. However, accuracy results are very jumpy and reach 100% quick. It can somewhat be expected due to a relatively small dataset. But I was wondering whether the jumpyness is alright? Or that my code has a large fault somewhere?
This is my complete code (several blocks in Jupyter Notebook):
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.font_manager
import sys
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional
from torch import nn
import random
from sklearn.metrics import confusion_matrix, classification_report
filename = "ERSSTv5_deseasoneddetrended_5monthrunningmean_1950-2021.nc"
sstds = xr.open_dataset(filename)
sst = sstds.sst
time = sstds.time
ninolat1 = -5
ninolat2 = 5
ninolon1 = 190
ninolon2 = 240
sstnino = np.asarray(sst.sel(lat=slice(ninolat1,ninolat2),lon=slice(ninolon1,ninolon2)))
nino34 = np.nanmean(sstnino,axis=(1,2))
timevec = np.arange(1950+(5/12),2022,1/12)
X, y, dates = [], [], []
for i, idx in enumerate(nino34):
if idx >= 0.5:
X.append(sstnino[i])
y.append(1)
dates.append(timevec[i])
elif idx <= -0.5:
X.append(sstnino[i])
y.append(0)
dates.append(timevec[i])
X_train, X_valid, X_test = [], [], []
y_train, y_valid, y_test = [], [], []
for i, date in enumerate(dates):
if 1950 < date < 2000:
X_train.append(X[i])
y_train.append(y[i])
elif 2000 <= date < 2010:
X_valid.append(X[i])
y_valid.append(y[i])
elif date >= 2010:
X_test.append(X[i])
y_test.append(y[i])
X_arr = np.stack(X_train)
std_map = np.nanstd(X_arr, axis=0)
mean_map = np.nanmean(X_arr, axis=0)
X_train_std = [((x - mean_map) / std_map) for x in X_train]
X_valid_std = [((x - mean_map) / std_map) for x in X_valid]
X_test_std = [((x - mean_map) / std_map) for x in X_test]
X_train_arr = np.array([x.flatten() for x in X_train_std])
X_valid_arr = np.array([x.flatten() for x in X_valid_std])
X_test_arr = np.array([x.flatten() for x in X_test_std])
y_train_arr = np.array(y_train)
y_valid_arr = np.array(y_valid)
y_test_arr = np.array(y_test)
-------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device)
class ENSO_ds(Dataset):
"""Initialize tensors from numpy arrays and define the dataset"""
def __init__(self, data, labels):
self.data = torch.from_numpy(data).float()
self.labels = torch.from_numpy(labels).long()
def __getitem__(self, idx):
return self.data[idx], self.labels[idx]
def __len__(self):
return len(self.data)
train_ds = ENSO_ds(X_train_arr, y_train_arr)
valid_ds = ENSO_ds(X_valid_arr, y_valid_arr)
test_ds = ENSO_ds(X_test_arr, y_test_arr)
b = 32 # batch size
train_loader = DataLoader(train_ds, batch_size=b, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=b, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=b, shuffle=False)
class ENSO_FNN(nn.Module):
"""Feedforward neural network
Parameters:
- h: number of hidden layers (default=2)
- n_h: numer of neurons / hidden dimension (default=12)
- n_o: number of output dimensions (default=2)
- A: activation function (default=nn.ReLU())
"""
def __init__(self, input_dim, n_h=12, h=2, n_o=2, A=nn.ReLU()):
super(ENSO_FNN, self).__init__()
if h == 2:
self.fc1 = nn.Linear(input_dim, n_h)
self.fc2 = nn.Linear(n_h, n_h)
self.fc3 = nn.Linear(n_h, n_o)
else:
raise ValueError("Hidden layer structure not defined for this value of h")
self.act = A
def forward(self, x):
x = self.act(self.fc1(x))
x = self.act(self.fc2(x))
x = self.fc3(x)
return x
# --- FUNCTIONS --- #
def evaluate(network, data_loader, lossf, device):
network.eval()
correct = 0
total = 0
total_loss = 0.0
all_preds = []
all_labels = []
with torch.no_grad():
for inputs, labels in data_loader:
inputs = inputs.to(device)
labels = labels.to(device)
inputs = inputs.view(inputs.shape[0], -1)
outputs = network(inputs)
loss = lossf(outputs, labels)
total_loss += loss.item() * inputs.size(0)
_, predicted = torch.max(outputs, 1)
correct += (predicted == labels).sum().item()
total += labels.size(0)
all_preds.extend(predicted.cpu().numpy())
all_labels.extend(labels.numpy())
avg_loss = total_loss / len(data_loader.dataset)
accuracy = 100 * correct / total
return avg_loss, accuracy, np.array(all_preds), np.array(all_labels)
def train_and_validate(network, train_loader, valid_loader, test_loader, lossf, n, eta, beta, device):
optimizer = torch.optim.Adam(network.parameters(), lr=eta, weight_decay=beta)
train_loss_history = []
val_loss_history = []
val_accuracy_history = []
test_loss_history = []
test_accuracy_history = []
for epoch in range(n):
## TRAIN ##
network.train()
current_loss = 0.0
for inputs, labels in train_loader:
inputs = inputs.to(device)
labels = labels.to(device)
inputs = inputs.view(inputs.shape[0], -1)
optimizer.zero_grad()
output = network(inputs)
loss = lossf(output, labels)
loss.backward()
optimizer.step()
current_loss += loss.item() * inputs.size(0)
train_loss = current_loss / len(train_loader.dataset)
train_loss_history.append(train_loss)
## VALIDATE ##
val_loss, val_accuracy, _, _ = evaluate(network, valid_loader, lossf, device)
val_loss_history.append(val_loss)
val_accuracy_history.append(val_accuracy)
## TEST ##
test_loss, test_accuracy, _, _ = evaluate(network, test_loader, lossf, device)
test_accuracy_history.append(test_accuracy)
print(f"Epoch {epoch+1}/{n} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.2f}% | Test Acc: {test_accuracy:.2f}%")
test_loss_final, test_accuracy_final, all_preds, all_labels = evaluate(network, test_loader, lossf, device)
print(f"Final Test Accuracy: {test_accuracy_final:.2f}%, Test Loss: {test_loss_final:.4f}")
print("")
return train_loss_history, val_loss_history, val_accuracy_history, test_accuracy_history, all_preds, all_labels
def set_seed(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
eta = 0.001 # learning rate
beta = 1e-5 # ridge regularization parameter
n = 10 # number of epochs
activation_functions = {'Relu': nn.ReLU(), 'Sigmoid': nn.Sigmoid(), 'Softmax': nn.Softmax()}
lossf = torch.nn.CrossEntropyLoss()
results = {}
set_seed(42)
for name, activation_func in activation_functions.items():
print(f"Training with {name} activation function:")
mlp = ENSO_FNN(input_dim=X_train_arr.shape[1], n_h=12, h=2, n_o=2, A=activation_func)
train_loss, val_loss, val_accuracy, test_accuracy, preds, labels = train_and_validate(mlp, train_loader, valid_loader, test_loader, lossf, n, eta, beta, device)
results[name] = {'train_loss_history': train_loss, 'val_loss_history': val_loss, 'val_accuracy_history': val_accuracy, 'test_accuracy_history': test_accuracy}
And the results:
Training with Relu activation function:
Epoch 1/10 | Train Loss: 0.6729 | Val Loss: 0.6758 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 2/10 | Train Loss: 0.5622 | Val Loss: 0.6028 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 3/10 | Train Loss: 0.4874 | Val Loss: 0.5460 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 4/10 | Train Loss: 0.4426 | Val Loss: 0.5116 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 5/10 | Train Loss: 0.4143 | Val Loss: 0.4832 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 6/10 | Train Loss: 0.3887 | Val Loss: 0.4498 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 7/10 | Train Loss: 0.3528 | Val Loss: 0.3949 | Val Acc: 81.97% | Test Acc: 87.84%
Epoch 8/10 | Train Loss: 0.2963 | Val Loss: 0.3090 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 9/10 | Train Loss: 0.2124 | Val Loss: 0.2090 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 10/10 | Train Loss: 0.1306 | Val Loss: 0.1214 | Val Acc: 100.00% | Test Acc: 100.00%
Final Test Accuracy: 100.00%, Test Loss: 0.1134
Training with Sigmoid activation function:
Epoch 1/10 | Train Loss: 0.6672 | Val Loss: 0.6255 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 2/10 | Train Loss: 0.6056 | Val Loss: 0.5844 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 3/10 | Train Loss: 0.5646 | Val Loss: 0.5480 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 4/10 | Train Loss: 0.5301 | Val Loss: 0.5148 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 5/10 | Train Loss: 0.4975 | Val Loss: 0.4824 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 6/10 | Train Loss: 0.4654 | Val Loss: 0.4494 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 7/10 | Train Loss: 0.4338 | Val Loss: 0.4166 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 8/10 | Train Loss: 0.4029 | Val Loss: 0.3849 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 9/10 | Train Loss: 0.3721 | Val Loss: 0.3543 | Val Acc: 100.00% | Test Acc: 100.00%
Epoch 10/10 | Train Loss: 0.3420 | Val Loss: 0.3251 | Val Acc: 100.00% | Test Acc: 100.00%
Final Test Accuracy: 100.00%, Test Loss: 0.3240
Training with Softmax activation function:
Epoch 1/10 | Train Loss: 0.6926 | Val Loss: 0.7022 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 2/10 | Train Loss: 0.6893 | Val Loss: 0.6985 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 3/10 | Train Loss: 0.6871 | Val Loss: 0.6955 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 4/10 | Train Loss: 0.6850 | Val Loss: 0.6918 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 5/10 | Train Loss: 0.6824 | Val Loss: 0.6894 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 6/10 | Train Loss: 0.6799 | Val Loss: 0.6858 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 7/10 | Train Loss: 0.6771 | Val Loss: 0.6827 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 8/10 | Train Loss: 0.6741 | Val Loss: 0.6792 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 9/10 | Train Loss: 0.6709 | Val Loss: 0.6760 | Val Acc: 40.98% | Test Acc: 41.89%
Epoch 10/10 | Train Loss: 0.6673 | Val Loss: 0.6722 | Val Acc: 40.98% | Test Acc: 41.89%
Final Test Accuracy: 41.89%, Test Loss: 0.6715
Any tips on how I can make my code better?