I have been trying everything to fix this issue however my results are still the same, my validation accuracy, train_loss, val_loss are not improving. I have no idea what to do anymore.
Epoch [0], last_lr: 0.00100, train_loss: 1.7094, val_loss: 1.4315, val_acc: 0.6288
Epoch [1], last_lr: 0.00100, train_loss: 1.7140, val_loss: 1.4907, val_acc: 0.5649
Epoch [2], last_lr: 0.00070, train_loss: 1.6565, val_loss: 1.3885, val_acc: 0.6260
Epoch [3], last_lr: 0.00070, train_loss: 1.6562, val_loss: 1.4879, val_acc: 0.5438
Epoch [4], last_lr: 0.00070, train_loss: 1.6645, val_loss: 1.3985, val_acc: 0.5858
Epoch [5], last_lr: 0.00049, train_loss: 1.6250, val_loss: 1.3905, val_acc: 0.6210
Epoch [6], last_lr: 0.00049, train_loss: 1.6283, val_loss: 1.3860, val_acc: 0.6021
Epoch [7], last_lr: 0.00049, train_loss: 1.6253, val_loss: 1.3890, val_acc: 0.5866
Epoch [8], last_lr: 0.00034, train_loss: 1.6081, val_loss: 1.3763, val_acc: 0.6103
Epoch [9], last_lr: 0.00034, train_loss: 1.6017, val_loss: 1.3626, val_acc: 0.6220
Here are my training steps
@torch.no_grad()
def evaluate(model, val_loader):
model.eval()
outputs = [validation_step(model,batch) for batch in val_loader]
return validation_epoch_end(outputs)
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def fit_one_cycle(epochs, max_lr, models, train_loader, val_loader ):
torch.cuda.empty_cache()
history = []
# Set up cutom optimizer with weight decay
optimizer = optim.Adam(model.parameters(), max_lr, weight_decay=0.1)
# Set up one-cycle learning rate scheduler
sched = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.7)
for epoch in range(epochs):
# Training Phase
model.train()
train_losses = []
lrs = []
for batch in train_loader:
optimizer.zero_grad()
loss = training_step(model,batch)
train_losses.append(loss)
loss.backward()
# Gradient clipping
nn.utils.clip_grad_value_(model.parameters(), 0.01)
optimizer.step()
lrs.append(get_lr(optimizer))
sched.step()
# Validation phase
result = evaluate(model, val_loader)
result['train_loss'] = torch.stack(train_losses).mean().item()
result['lrs'] = lrs
epoch_end(epoch, result)
history.append(result)
return history
Here are my helper functions to train my model
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def training_step(model, batch):
images, labels = batch
images, labels = images.to(device), labels.to(device)
out = model(images) # Generate predictions
loss = nn.functional.cross_entropy(out, labels) # Calculate loss
return loss
def validation_step(model, batch):
images, labels = batch
images, labels = images.to(device), labels.to(device)
out = model(images) # Generate predictions
loss = nn.functional.cross_entropy(out, labels) # Calculate loss
acc = accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc}
def validation_epoch_end( outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end( epoch, result):
print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))
I am currently using the resnet 50 pre-trained model on the Cifar-10 dataset. My normalization values are [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]. I have frozen the gradient calculation for all layers except for the last layer as I need to finetune the FCL layers.
I was training the model for 10 epochs with a learning rate of 0.01. I have tried to use lr_scheduler, weight decay, gradient clipping, etc. I can’t seem to find the issue to this problem.