I am trying to train a model to recognize attributes like the weather in images and it seems that is quite accurate on train and validation sets of the same dataset but it performs poorly on a different dataset and returns different labels each I run it.
For NN I used a pretrained ResNet. I cannot understand what went wrong, so I would appreciate any help or suggestion.
model_conv=torchvision.models.resnet50(pretrained=True)
model_conv.fc = nn.Sequential(nn.Linear(2048, 40), nn.Sigmoid())
model_conv = model_conv.to(device)
# Loss Function
criterion = nn.BCELoss()
optimizer_ft = optim.Adam([
{'params': list(model_conv.parameters())[:-1]},
{'params': list(model_conv.parameters())[-1], 'lr': 1e-3}
], lr=1e-4, weight_decay=0)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=2, gamma=0.7)
and the training function:
def train_model(model, dataloaders, criterion, optimizer, scheduler, batch_size=5,
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
average_precis = 0.001
loss_values = []
print('Iterating over data:')
for batch_idx, (inputs, labels) in enumerate(dataloaders[phase]):
inputs = inputs.to(device)
labels = labels.to(device).float()
gt_data = labels
gt_data = gt_data.to(device)
gt_data = gt_data.cpu().data.numpy()
average_precision_array = []
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
if phase == 'train':
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
outputs = outputs.cpu()#.data.numpy()
preds = outputs.cpu().data.numpy()
preds = np.round(preds) #set a condition for binary
preds_int = preds.astype(int)
gt_data_np = np.round(gt_data)
gt_data_int = gt_data_np.astype(int)
gt_data = torch.from_numpy(gt_data_np)
loss = criterion(outputs, gt_data)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += f1_score(gt_data, preds, average="samples")
#Validation set
else:
with torch.no_grad():
val_outputs = model(inputs)
val_outputs = val_outputs.cpu()#.data.numpy()
val_preds = val_outputs.cpu().data.numpy()
val_preds = np.round(val_preds) #set a condition for binary
val_preds = val_preds.astype(int)
val_gtdata_np = np.round(gt_data)
val_gtdata_int = val_gtdata_np.astype(int)
val_gtdata = torch.from_numpy(val_gtdata_np)
loss = criterion(val_outputs, val_gtdata)
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += f1_score(val_gtdata, val_preds, average='samples')
if phase == 'train':
scheduler.step()
average_precis_train += average_precision_score(gr_truth_array, preds_array, average= "macro")
print("Average precision Training:", average_precis_train)
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects / len(dataloaders[phase].dataset)
epoch_acc = np.round(epoch_acc, decimals=4)
if phase == 'val':
scheduler.step()
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects / len(dataloaders[phase].dataset)
epoch_acc = np.round(epoch_acc, decimals=4)
average_precis += average_precision_score(gr_truth_val, preds_val, average="macro")
print("Average precision Validation:", average_precis)
print('{} Loss: {:.4f}'.format(phase, epoch_loss))
print("Acc:", epoch_acc)
#Visualize a few images
if phase == 'val':
for jp in range(inputs.size()[0]):
ax = plt.subplot(1, 4, jp + 1)
plt.figure(figsize=[5, 4])
ax.axis('off')
ax.set_title('Sample #{}'.format(jp))
img = inputs.cpu().data[jp].numpy().transpose((1, 2, 0))
for value in val_outputs:
show_scores(img, value, attributes)
break
plt.show()
break
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)
# load best model weights
model.load_state_dict(best_model_wts)
return model, val_acc_history