I’m doing trying to solve multilabel classification problem: Large-scale Video Classification with Convolutional Neural Networks.
Since every video can have more that one label, I infer this probelm as a multi-labels classification problem, thus as suggested from other issues the best loss fucntion to grab is the nn.BCEWithLogitsLoss(pos_weight=torch.ones(n_class).to(device))
. The point is that even at the first training iteration my loss is and will be equal to zero from that point on.
Furthermore, the values of the tensors in the output of the model will have increasingly negative values as the training process progresses
My Train & Evaluation procedure is the following one:
class CNN_Architecture():
def __init__(self, model: torch.nn.Module, train_dataloader: torch.utils.data.DataLoader,
val_dataloader: torch.utils.data.DataLoader, optimizer: torch.optim.Optimizer,
loss_fn: torch.nn.Module, accuracy_fn, scheduler: torch.optim.Optimizer, device: torch.device, save_check = False):
self.model = model.to(device)
self.optimizer = optimizer
self.train_dataloader = train_dataloader
self.loss_fn = loss_fn
self.val_dataloader = val_dataloader
self.accuracy_fn = accuracy_fn
self.scheduler = scheduler
self.device = device
self.save_check = save_check
def __save_checkpoint(self, train_loss, train_f1, epoch):
data_path = Path('data/')
filename = f'{self.model.typ}_checkpoint.pth.tar'
print('=> Saving Checkpoint')
checkpoint = {'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'train_loss': train_loss, 'train_f1': train_f1, 'epoch': epoch}
torch.save(checkpoint, filename)
print(' DONE\n')
def __load_checkpoint(self,checkpoint):
self.model.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
def evaluate(self, val_dataloader: torch.utils.data.DataLoader, epoch = 1, epochs = 1):
val_loss, val_f1 = 0, 0
# Evaluation phase
self.model.eval()
pbar = tqdm(enumerate(val_dataloader), total = len(val_dataloader), leave=False) #, desc='EVALUATION'
with torch.inference_mode():
for batch_idx, (images, labels, _) in pbar: # there is a _ to ignore the paths
images, labels = images.to(self.device), labels.to(self.device)
outputs = self.model(images)
loss = self.loss_fn(outputs, labels)
f1 = self.accuracy_fn(outputs, labels)
val_loss += loss.item()
val_f1 += f1.item()
pbar.set_description(f'{self.model.__class__.__name__} EVALUATION Epoch [{epoch + 1} / {epochs}]')
pbar.set_postfix(loss = loss, f1 = f1)
val_loss /= len(val_dataloader) # already calculate the mean of all loss
val_f1 /= len(val_dataloader) # already calculate the mean of all f1
model_name = self.model.__class__.__name__
if self.model.__class__.__name__ == 'NoMultiresCNN': model_name = f'{model_name} - Stream Type: {self.model.CNN.stream_type}'
return { 'model_name': model_name, # only works when model was created with a class
'model_loss': val_loss,
'model_f1': val_f1 }
def fit(self, epochs: int):
results = { 'train_loss': [], 'train_f1': [], 'val_loss': [], 'val_f1': [] }
best_train_loss, best_train_f1 = float('inf'), float('-inf')
for epoch in range(epochs):
train_loss, train_f1 = 0, 0
# Training phase
self.model.train()
pbar = tqdm(enumerate(self.train_dataloader), total = len(self.train_dataloader), leave=False) #, desc='TRAIN'
for batch_idx, (images, labels, _) in pbar: # there is a _ to ignore the paths
# zero_grad -> backword -> step
self.optimizer.zero_grad()
images, labels = images.to(self.device), labels.to(self.device)
outputs = self.model(images)
print(outputs, loss)
loss = self.loss_fn(outputs, labels)
print(batch_idx, loss)
loss.backward()
self.optimizer.step()
train_loss += loss.item()
f1 = self.accuracy_fn(outputs, labels).item()
train_f1 += f1
model_name = self.model.__class__.__name__
if self.model.__class__.__name__ == 'NoMultiresCNN': model_name = f'{model_name} - Stream Type: {self.model.CNN.stream_type}'
pbar.set_description(f'{model_name} TRAIN Epoch [{epoch + 1} / {epochs}]')
pbar.set_postfix(loss = loss.item(), f1 = f1)
train_loss /= len(self.train_dataloader)
train_f1 /= len(self.train_dataloader)
self.scheduler.step(train_loss)
if(self.save_check):
if(train_loss < best_train_loss and train_f1 > best_train_f1):
self.__save_checkpoint(train_loss, train_f1, epoch + 1)
best_train_loss, best_train_f1 = train_loss, train_f1
# Validation phase
model_name, val_loss, val_f1 = (self.evaluate(self.val_dataloader, epoch, epochs)).values()
results['train_loss'].append(train_loss)
results['train_f1'].append(train_f1)
results['val_loss'].append(val_loss)
results['val_f1'].append(val_f1)
print('Epoch [{}], train_loss: {:.4f}, train_f1: {:.4f}, val_loss: {:.4f}, val_f1: {:.4f} \n'.format(
epoch + 1, train_loss, train_f1, val_loss, val_f1))
return {'model_name': model_name, 'results': results}
# still not used, here we display or example the top 5 predicted label
def evaluate_and_plot_image(self, image_path, class_names, transform=None, mean=[0.4588,0.4588,0.4588], std=[0.4588,0.4588,0.4588]):
img = Image.open(image_path)
if transform is not None: image_transform = transform
else: image_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std),
])
self.model.to(self.device)
self.model.eval()
with torch.inference_mode():
transformed_image = image_transform(img).unsqueeze(dim=0)
targets_image_pred = self.model(transformed_image.to(self.device))
target_image_pred_probs = torch.softmax(targets_image_pred, dim=1)
#target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)
target_image_pred_labels = torch.topk(target_image_pred_probs, k=3, dim=1)
label_pred_names = [LABELS[lab]for lab in target_image_pred_labels]
plt.figure()
plt.imshow(img)
plt.title(f"Model: {self.model.__class__.__name__} | True Labels: {class_names} | Pred: {label_pred_names} | Prob: {target_image_pred_labels}")
plt.axis(False)
Here it is a brief print of the shapes and values of the ‘output’ and ‘loss’:
0 tensor([[-0.1324, -0.3337, -0.0672, ..., 0.0807, -0.1942, 0.0604],
[ 0.2858, 0.1114, -0.0895, ..., 0.0215, -0.1544, -0.0857],
[ 0.2030, 0.2486, 0.0848, ..., 0.2918, -0.1791, 0.1380],
...,
[ 0.0056, 0.3312, -0.0370, ..., 0.1495, -0.1622, 0.1832],
[ 0.4067, -0.1880, 0.0375, ..., 0.3299, -0.2620, 0.2810],
[ 0.2507, 0.1005, -0.1742, ..., 0.1452, -0.2513, 0.1491]],
device='cuda:0', grad_fn=<AddmmBackward0>)
0 tensor(0.6992, device='cuda:0', dtype=torch.float64,
grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
1 tensor([[-6.8007e+14, -6.7884e+14, -6.8033e+14, ..., -6.8114e+14,
-6.8070e+14, -6.8010e+14],
[-7.7220e+14, -7.7084e+14, -7.7253e+14, ..., -7.7344e+14,
-7.7293e+14, -7.7225e+14],
[-6.4728e+14, -6.4614e+14, -6.4758e+14, ..., -6.4832e+14,
-6.4791e+14, -6.4732e+14],
...,
[-8.2835e+14, -8.2691e+14, -8.2867e+14, ..., -8.2967e+14,
-8.2914e+14, -8.2844e+14],
[-8.5051e+14, -8.4895e+14, -8.5082e+14, ..., -8.5183e+14,
-8.5130e+14, -8.5055e+14],
[-8.3222e+14, -8.3071e+14, -8.3249e+14, ..., -8.3345e+14,
-8.3296e+14, -8.3224e+14]], device='cuda:0', grad_fn=<AddmmBackward0>)
1 tensor(0., device='cuda:0', dtype=torch.float64,
grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
2 tensor([[-1.9041e+16, -1.9020e+16, -1.9046e+16, ..., -1.9059e+16,
-1.9051e+16, -1.9042e+16],
[-1.8066e+16, -1.8047e+16, -1.8070e+16, ..., -1.8083e+16,
-1.8076e+16, -1.8067e+16],
[-1.8258e+16, -1.8238e+16, -1.8263e+16, ..., -1.8276e+16,
-1.8268e+16, -1.8259e+16],
...,
[-1.9322e+16, -1.9301e+16, -1.9327e+16, ..., -1.9340e+16,
-1.9332e+16, -1.9323e+16],
[-1.7275e+16, -1.7256e+16, -1.7278e+16, ..., -1.7291e+16,
-1.7284e+16, -1.7276e+16],
[-1.7924e+16, -1.7904e+16, -1.7927e+16, ..., -1.7940e+16,
-1.7934e+16, -1.7924e+16]], device='cuda:0', grad_fn=<AddmmBackward0>)
2 tensor(0., device='cuda:0', dtype=torch.float64,
grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
3 tensor([[-9.5671e+16, -9.5592e+16, -9.5688e+16, ..., -9.5739e+16,
-9.5712e+16, -9.5673e+16],
[-9.0492e+16, -9.0418e+16, -9.0509e+16, ..., -9.0556e+16,
-9.0530e+16, -9.0494e+16],
[-8.8861e+16, -8.8788e+16, -8.8876e+16, ..., -8.8924e+16,
-8.8898e+16, -8.8863e+16],
...,
[-9.2860e+16, -9.2783e+16, -9.2874e+16, ..., -9.2927e+16,
-9.2897e+16, -9.2864e+16],
[-8.9120e+16, -8.9045e+16, -8.9134e+16, ..., -8.9181e+16,
-8.9152e+16, -8.9119e+16],
[-9.1919e+16, -9.1839e+16, -9.1932e+16, ..., -9.1981e+16,
-9.1954e+16, -9.1920e+16]], device='cuda:0', grad_fn=<AddmmBackward0>)
3 tensor(0., device='cuda:0', dtype=torch.float64,
grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)