Noob question : model doesn't update

I have tried running the following code on CIFAR-10 dataset.
However, I found that the test loss stays constant, which seems to indicate that training is not being done

the code is the following :

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32*32*3, 512) #32*32*3 because 32*32 size, 3 : channel size
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = x.reshape(-1, 32*32*3) #flattening it #.view(-1, 32*32*3)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.softmax(x, dim = 1)
        return x

def train(model, train_loader, optim, criterion, scheduler = None):
    model.train() #train model remember
    train_loss = 0
    correct = 0
    
    #settinge the tqdm_bar (epoch here refers to the epoch, not the iteration)
    tqdm_bar = enumerate(tqdm(train_loader, desc = "TRAIN | Epoch {} : ".format(epoch)))    
    #tqdm_bar = tqdm(enumerate(train_loader))
    for batch_idx, (X, y) in tqdm_bar :
        optim.zero_grad() #initilaize remember? (for each batch)
        X = X.to(DEVICE)      #send things ot the device
        y = y.to(DEVICE)
        
        #backprop/optimize
        output = model(X)
        loss = criterion(output, y)
        loss.backward()
        optim.step()
        
        #calculate train/test loss/acc
        train_loss += loss.item()
        prediction = output.max(1, keepdim = True)[1] #ํ™•๋ฅ ํ™” ์‹œํ‚ค๊ธฐ
        correct += prediction.eq(y.view_as(prediction)).sum().item()

    if scheduler is not None:
        scheduler.step() #lr scheduler decays the learning rate each epoch 
        
    #batch train loss/acc
    train_loss /= len(train_loader.dataset)              #i.e. batch ๊ฐ€ ๋œ train_loader์ด ์•„๋‹Œ ๊ทธ ์–ด๋จธ๋‹ˆ์ธ dataset์˜ size (i.e. total sample number)
    train_acc = 100. * correct/ len(train_loader.dataset) #์ด๊ฒƒ๋„ ๋งˆ์ฐฌ๊ฐ€์ง€
    
    return train_loss, train_acc    


def evaluate(model, test_loader, criterion):
    model.eval()      #EVALUATION MODE REMEMBER
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
        for X, y in tqdm(test_loader, desc = "TEST | Epoch {} : ".format(epoch)):
            X = X.to(DEVICE)
            y = y.to(DEVICE)
            
            output = model(X)
            test_loss += criterion(output, y).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(y.view_as(prediction)).sum().item() #์—ฌ๊ธฐ์„œ๋Š” number of items (not batches) that are correct 
                                                        #(because 1D boolean array ์˜ ํ•ฉ์„ ๋ณด๋‹ˆ)
        #normalize to batch
        test_loss /= len(test_loader.dataset)
        test_acc = 100. * correct/len(test_loader.dataset)
    return test_loss, test_acc


model_MLP = MLP().to(DEVICE)
optimizer = torch.optim.Adam(model_MLP.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()

print(model_MLP) #let's look at the model
#print(optimizer)

for epoch in range(10):
    train(model_MLP, train_loader, optimizer, criterion) #trn_loss, trn_acc = t
    tst_loss, tst_acc = evaluate(model_MLP, test_loader, criterion)
    print("\n[EPOCH: {}], \tModel: MLP, \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, tst_loss, tst_acc))

nn.CrossEntropyLoss expects raw logits as the model output so remove the F.softmax` from your model.

1 Like