Transfer learning accuracy and lossess not improving. Resnet 50 and Cifar-10

AIBOI · March 21, 2022, 6:28pm

I have been trying everything to fix this issue however my results are still the same, my validation accuracy, train_loss, val_loss are not improving. I have no idea what to do anymore.



Epoch [0], last_lr: 0.00100, train_loss: 1.7094, val_loss: 1.4315, val_acc: 0.6288
Epoch [1], last_lr: 0.00100, train_loss: 1.7140, val_loss: 1.4907, val_acc: 0.5649
Epoch [2], last_lr: 0.00070, train_loss: 1.6565, val_loss: 1.3885, val_acc: 0.6260
Epoch [3], last_lr: 0.00070, train_loss: 1.6562, val_loss: 1.4879, val_acc: 0.5438
Epoch [4], last_lr: 0.00070, train_loss: 1.6645, val_loss: 1.3985, val_acc: 0.5858
Epoch [5], last_lr: 0.00049, train_loss: 1.6250, val_loss: 1.3905, val_acc: 0.6210
Epoch [6], last_lr: 0.00049, train_loss: 1.6283, val_loss: 1.3860, val_acc: 0.6021
Epoch [7], last_lr: 0.00049, train_loss: 1.6253, val_loss: 1.3890, val_acc: 0.5866
Epoch [8], last_lr: 0.00034, train_loss: 1.6081, val_loss: 1.3763, val_acc: 0.6103
Epoch [9], last_lr: 0.00034, train_loss: 1.6017, val_loss: 1.3626, val_acc: 0.6220

Here are my training steps

@torch.no_grad()

def evaluate(model, val_loader):

    model.eval()

    outputs = [validation_step(model,batch) for batch in val_loader]

    return validation_epoch_end(outputs)

def get_lr(optimizer):

    for param_group in optimizer.param_groups:

        return param_group['lr']

def fit_one_cycle(epochs, max_lr, models, train_loader, val_loader ):

    torch.cuda.empty_cache()

    history = []

    

    # Set up cutom optimizer with weight decay

    optimizer = optim.Adam(model.parameters(), max_lr, weight_decay=0.1)

    # Set up one-cycle learning rate scheduler

    sched = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.7)

    

    for epoch in range(epochs):

        # Training Phase 

        model.train()

        

        train_losses = []

        lrs = []

        for batch in train_loader:

            optimizer.zero_grad()

            loss = training_step(model,batch)

            train_losses.append(loss)

            loss.backward()

            

            # Gradient clipping

            

            nn.utils.clip_grad_value_(model.parameters(), 0.01)

            

            optimizer.step()

            lrs.append(get_lr(optimizer))

        sched.step()

        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        epoch_end(epoch, result)
        history.append(result)
    return history

Here are my helper functions to train my model

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))


def training_step(model, batch):
        images, labels = batch 
        images, labels = images.to(device), labels.to(device)
        out = model(images)                  # Generate predictions
        loss =  nn.functional.cross_entropy(out, labels) # Calculate loss
        return loss
    
def validation_step(model, batch):
        images, labels = batch 
        images, labels = images.to(device), labels.to(device)
        out = model(images)                    # Generate predictions
        loss = nn.functional.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
def validation_epoch_end( outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
def epoch_end( epoch, result):
        print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))

I am currently using the resnet 50 pre-trained model on the Cifar-10 dataset. My normalization values are [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]. I have frozen the gradient calculation for all layers except for the last layer as I need to finetune the FCL layers.

I was training the model for 10 epochs with a learning rate of 0.01. I have tried to use lr_scheduler, weight decay, gradient clipping, etc. I can’t seem to find the issue to this problem.