I’m working on a model that takes in some custom csv data and is supposed to make predictions based on a lot of parameters, the labels are numbers up to 30. My concern is that when I run the training, the loss doesn’t decrease.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
class HorseDataset(Dataset):
def __init__(self, file_name):
super().__init__()
file_out = pd.read_csv(file_name, float_precision='round_trip').fillna(value=0)
x = file_out.iloc[0:7292, 0:673]
y = file_out.iloc[0:7292, 673]
x_train = torch.Tensor(x.values)
y_train = y
self.X_train = torch.tensor(x_train, dtype=torch.float32)
self.Y_train = torch.tensor(y_train).type(torch.LongTensor)
def __len__(self):
return len(self.Y_train)
def __getitem__(self, idx):
return self.X_train[idx], self.Y_train[idx]
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.l1 = nn.Linear(673, int(673*(2/3)//1))
self.relu = nn.ReLU()
self.l2 = nn.Linear(int(673*(2/3)//1),int(673*(2/3)//1))
self.relu = nn.ReLU()
self.l3 = nn.Linear(int(673*(2/3)//1),31)
def forward(self, x):
output = self.l1(x)
output = self.relu(output)
output = self.l2(output)
output = self.relu(output)
output = self.l3(output)
return output
dataset = HorseDataset('./export_labelled_correct.csv')
batch_size = 64
testing_split = .2
loss_fn = torch.nn.CrossEntropyLoss()
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(testing_split * dataset_size))
train_indices, test_indices = indices[split:], indices[:split]
neural_net = NeuralNet()
neural_net = neural_net.to(device)
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
sampler=train_sampler)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
sampler=test_sampler)
optimizer = torch.optim.SGD(neural_net.parameters(), lr=0.01, momentum=0.9)
scheduler = ReduceLROnPlateau(optimizer, 'min')
def train_one_epoch(epoch_index, tb_writer):
running_loss = 0.
last_loss = 0.
for i, data in enumerate(train_loader):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
neural_net.to(device)
optimizer.zero_grad()
outputs = neural_net(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 10000 == 9999:
last_loss = running_loss / batch_size # loss per batch
print(' batch {} loss: {}'.format(i + 1, last_loss))
tb_x = epoch_index * len(train_loader) + i + 1
tb_writer.add_scalar('Loss/train', last_loss, tb_x)
running_loss = 0.
return last_loss
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/oddsgenie_trainer_{}'.format(timestamp))
epoch_number = 0
EPOCHS = 10000
best_vloss = 1_000_000.
for epoch in range(EPOCHS):
print('EPOCH {}:'.format(epoch_number + 1))
# Make sure gradient tracking is on, and do a pass over the data
neural_net.train(True)
avg_loss = train_one_epoch(epoch_number, writer)
running_vloss = 0.0
neural_net.eval()
with torch.no_grad():
for i, vdata in enumerate(test_loader):
vinputs, vlabels = vdata
vinputs = vinputs.to(device)
vlabels = vlabels.to(device)
voutputs = neural_net(vinputs)
vloss = loss_fn(voutputs, vlabels)
scheduler.step(vloss)
running_vloss += vloss
avg_vloss = running_vloss / (i + 1)
print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
writer.add_scalars('Training vs. Validation Loss',
{ 'Training' : avg_loss, 'Validation' : avg_vloss },
epoch_number + 1)
writer.flush()
if avg_vloss < best_vloss:
best_vloss = avg_vloss
model_path = 'optimal_model'
torch.save(neural_net.state_dict(), model_path)
epoch_number += 1
I am quite new to pytorch, with a theoretical understanding of some of the basic machine learning concepts, so there might be a lot of bugs here, but I would appreciate if somebody could potentially point to why the output during training looks like this
LOSS train 0.0 valid 3.0965657234191895
Seems like the avg_loss variable is always returning 0, and the loss just sticks at about 3.08-3.09.
Thanks in advance!