Why is Training error not changing? Loss is decreasing and weights are changing

import torch
import torch.nn as nn
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
from sklearn.model_selection import train_test_split

x, y = make_blobs(n_samples=1000, centers=2,random_state=0)
x, xtest, y, ytest = train_test_split(x, y, test_size=0.15, random_state=42)  
        
x_data = torch.Tensor(x)
y_data = torch.Tensor(y)
x_data.requires_grad_(True)
y_data.requires_grad_(True)

x_data_test = torch.Tensor(xtest)
y_data_test = torch.Tensor(ytest)
x_data_test.requires_grad_(True)
y_data_test.requires_grad_(True)


class Net(nn.Module):
  def __init__(self):
      super(Net, self).__init__()
      self.layer1 = torch.nn.Linear(2, 32)
      self.layer2 = torch.nn.Linear(32, 16)
      self.layer3 = torch.nn.Linear(16, 1)

  def forward(self,x):
      x = self.layer1(x)
      x = self.layer2(x)
      x = self.layer3(x)
      return x


net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
loss_fn = torch.nn.BCEWithLogitsLoss()


def error_criterion(outputs,labels):
    max_vals, max_indices = torch.max(outputs,1)
    error = (max_indices != labels).float().sum()/max_indices.size()[0]
    return error


net.train()
y_data = y_data.unsqueeze(1)
for t in range(900):
    outputs = net(x_data)
    
    loss = loss_fn(outputs, y_data)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


    if t % 100 == 0:
        print("Loss:", loss.item())
        print("Error:", error_criterion(outputs, y_data.squeeze(1)).item())
      

Since you are using a single output neuron for your binary classification task, torch.max(outputs, 1) will always yield zeros.

Instead you could use a threshold (e.g. 0, as you are dealing with logits) to calculate the error:

def error_criterion(outputs,labels):
    preds = outputs > 0.
    error = (preds.float() != labels).float().mean()
    return error

for t in range(900):
    outputs = net(x_data)
    
    loss = loss_fn(outputs, y_data)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


    if t % 100 == 0:
        print("Loss:", loss.item())
        print("Error:", error_criterion(outputs, y_data).item()) # I removed the squeeze() here

Also, note that you are basically using a single linear layer, since you don’t apply any activation function after layer1 and layer2.

1 Like