Not learning/training after each epoch in regression

Jose_Garcia · February 21, 2022, 12:13pm

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
    
        self.linear_relu_stack = nn.Sequential(
            # input of 28*28 and output of 512 
            nn.Linear(2,10),
            nn.ReLU(),
            nn.Linear(10,10),
            nn.ReLU(),
            # input of 512 and output of 10
            nn.Linear(10,1)
        )
        
    def forward(self, x):
        flatten = nn.Flatten()
        x = flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.00001)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    
    # model.train() Sets model into training mode i.e. 
      # BatchNorm layers use per-batch statistics 
      # Dropout layers are activated
    
    model.train()
    for batch, (x,y) in enumerate(dataloader):
        # Compute prediction error
        pred = model(x)
        loss = loss_fn(pred, y)
        
        # PyTorch's autograd simply accumulates the gradients for each model parameter. 
        # Thus for each epoch, one has to clear the existing gradients.
        optimizer.zero_grad()
        
        # compute gradient of loss w.r.t all parameters
        loss.backward()
        
        # backpropragation (i.e update weights and biases)
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(x)
            print('Loss:', loss, '[', current, '/', size, ']')

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    batch_size = len(dataloader)
    
    # model.eval() Sets model into evaluation mode i.e. 
      # BatchNorm layers use running statistics 
      # Dropout layers are de-activated
    
    model.eval()
    
    test_loss, correct = 0,0
    # operations within the block won't calculate a gradient
    with torch.no_grad():
        for x, y in dataloader:
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
    
    test_loss = test_loss / batch_size
    
    print('\nTest Error:','Avg Loss:', test_loss, '\n')```

Epoch 1

Loss: 406.853515625 [ 0 / 3092 ]
Loss: 295.5381164550781 [ 1000 / 3092 ]
Loss: 307.9087219238281 [ 2000 / 3092 ]
Loss: 309.39630126953125 [ 3000 / 3092 ]

Test Error: Avg Loss: 297.10006644509053

Epoch 2

Loss: 406.853515625 [ 0 / 3092 ]
Loss: 295.5381164550781 [ 1000 / 3092 ]
Loss: 307.9087219238281 [ 2000 / 3092 ]
Loss: 309.39630126953125 [ 3000 / 3092 ]

Test Error: Avg Loss: 297.10006644509053

As stated, the error results for each epoch are the same. Did I implement something wrong? I’ve tried changing the learning rate and increasing the number of epochs.

ptrblck · February 21, 2022, 9:08pm

I guess you might not be shuffling the dataset and, since the learning rate is quite low, you might see very similar losses.
Increase the learning rate and shuffle the dataset and the results should differ:

model = NeuralNetwork()
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

dataset = torch.utils.data.TensorDataset(torch.randn(8000, 2), torch.randn(8000, 1))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=False)

for epoch in range(2):
    print('epoch {}'.format(epoch))
    train(dataloader, model, loss_fn, optimizer)

Output, shuffle=False, lr=1e-3:

epoch 0
Loss: 0.42772895097732544 [ 0 / 8000 ]
Loss: 1.0073586702346802 [ 800 / 8000 ]
Loss: 1.467551350593567 [ 1600 / 8000 ]
Loss: 1.2312512397766113 [ 2400 / 8000 ]
Loss: 0.4189470708370209 [ 3200 / 8000 ]
Loss: 1.999730110168457 [ 4000 / 8000 ]
Loss: 1.1693495512008667 [ 4800 / 8000 ]
Loss: 2.068380355834961 [ 5600 / 8000 ]
Loss: 0.8423236608505249 [ 6400 / 8000 ]
Loss: 1.0693302154541016 [ 7200 / 8000 ]
epoch 1
Loss: 0.44811519980430603 [ 0 / 8000 ]
Loss: 0.9499561786651611 [ 800 / 8000 ]
Loss: 1.4340418577194214 [ 1600 / 8000 ]
Loss: 1.2619669437408447 [ 2400 / 8000 ]
Loss: 0.3990039527416229 [ 3200 / 8000 ]
Loss: 1.9583899974822998 [ 4000 / 8000 ]
Loss: 1.144278645515442 [ 4800 / 8000 ]
Loss: 2.0790889263153076 [ 5600 / 8000 ]
Loss: 0.8327335119247437 [ 6400 / 8000 ]
Loss: 1.074733853340149 [ 7200 / 8000 ]

Output, shuffle=True, lr=1e-3:

epoch 0
Loss: 1.971574068069458 [ 0 / 8000 ]
Loss: 0.45451757311820984 [ 800 / 8000 ]
Loss: 0.40237903594970703 [ 1600 / 8000 ]
Loss: 0.7783100008964539 [ 2400 / 8000 ]
Loss: 1.5645486116409302 [ 3200 / 8000 ]
Loss: 0.4626065790653229 [ 4000 / 8000 ]
Loss: 0.5110663175582886 [ 4800 / 8000 ]
Loss: 0.4680662751197815 [ 5600 / 8000 ]
Loss: 1.1759974956512451 [ 6400 / 8000 ]
Loss: 0.7698230743408203 [ 7200 / 8000 ]
epoch 1
Loss: 1.2638776302337646 [ 0 / 8000 ]
Loss: 1.5431848764419556 [ 800 / 8000 ]
Loss: 0.48873111605644226 [ 1600 / 8000 ]
Loss: 1.2282673120498657 [ 2400 / 8000 ]
Loss: 1.1738083362579346 [ 3200 / 8000 ]
Loss: 0.5108490586280823 [ 4000 / 8000 ]
Loss: 0.7704136371612549 [ 4800 / 8000 ]
Loss: 1.6635758876800537 [ 5600 / 8000 ]
Loss: 0.674902081489563 [ 6400 / 8000 ]
Loss: 0.47564896941185 [ 7200 / 8000 ]