Hi, I’m trying to train a classifier on images that belong to 3 classes, but because one image can belong to more than one class, I’m not sure which loss or metric to use to evaluate my model.
The output of my classifier is a sigmoid function on 3 neurons. My labels are an array with three binary elements, e.g. [[1,1,0], [0,0,0], [1, 0, 0]].
I decided to use the binary cross entropy loss and use the mean average precision to evaluate my models predictions but while the training loss decreases (a little bit) the validation loss goes up during training and the mean average precision of the predictions oscillates between 0.01 and 0.03. So I guess that I am doing something wrong. Am I using the wrong loss?
This is my code for the training:
# load simple resnet classifier
net = ClfImg(flags, classes).to(flags.device)
criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
def train_loop(m, epoch):
running_loss = 0.0
m.train()
# for i, (inputs, labels) in tqdm(enumerate(trainloader, 0), total=len(trainloader)):
for i, (inputs, labels) in enumerate(trainloader, 0):
inputs, labels = Variable(inputs['PA']).to(device), Variable(labels).to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = m(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 100 == 99: # print every 100 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 100))
running_loss = 0.0
return m
def eval_loop(m, epoch):
running_loss = 0.0
predictions = torch.Tensor()
gts = torch.Tensor()
m.eval()
with torch.no_grad():
# for i, (inputs, labels) in tqdm(enumerate(testloader, 0), total=len(testloader)):
for i, (inputs, labels) in enumerate(testloader, 0):
inputs, labels = Variable(inputs['PA']).to(device), Variable(labels).to(device)
outputs = m(inputs)
loss = criterion(outputs, labels)
predictions = torch.cat((predictions, outputs.cpu()), 0)
gts = torch.cat((gts, labels.cpu()), 0)
running_loss += loss.item()
if i % len(testloader) == len(testloader) - 1: # print every 100 mini-batches
print('[%d, %5d] eval loss: %.3f' %
(epoch + 1, i + 1, running_loss / len(testloader)))
running_loss = 0.0
for i in range(len(classes)):
print(f'average precision score for label {classes[i]}:',
average_precision_score((predictions[:, i].numpy().ravel() > 0.5) * 1,
(gts[:, i].numpy().ravel() > 0.5) * 1))
print('total average precision score: ',
average_precision_score((predictions.numpy().ravel() > 0.5) * 1, (gts.numpy().ravel() > 0.5) * 1))
for epoch in range(5):
net = train_loop(net, epoch)
eval_loop(net, epoch)
which outputs this:
[1, 100] loss: 0.318
[1, 200] loss: 0.207
[1, 300] loss: 0.204
[1, 400] loss: 0.209
[1, 4] eval loss: 0.216
average precision score for label Lung Opacity: 0.02746288798920378
average precision score for label Pleural Effusion: 0.04696741854636591
/home/hendrik/miniconda3/envs/mimic/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:681: RuntimeWarning: invalid value encountered in true_divide
recall = tps / tps[-1]
average precision score for label Support Devices: nan
total average precision score: 0.02530319882546603
[2, 100] loss: 0.198
[2, 200] loss: 0.201
[2, 300] loss: 0.200
[2, 400] loss: 0.204
[2, 4] eval loss: 0.234
average precision score for label Lung Opacity: 0.012267206477732794
average precision score for label Pleural Effusion: 0.031197747455057396
average precision score for label Support Devices: nan
total average precision score: 0.014783662452835385
[3, 100] loss: 0.196
[3, 200] loss: 0.193
[3, 300] loss: 0.200
[3, 400] loss: 0.201
[3, 4] eval loss: 0.244
average precision score for label Lung Opacity: 0.019230769230769232
average precision score for label Pleural Effusion: 0.06400816856957207
average precision score for label Support Devices: nan
total average precision score: 0.024417337048915994
[4, 100] loss: 0.196
[4, 200] loss: 0.199
[4, 300] loss: 0.196
[4, 400] loss: 0.192
[4, 4] eval loss: 0.234
average precision score for label Lung Opacity: 0.012267206477732794
average precision score for label Pleural Effusion: 0.07982306192832508
average precision score for label Support Devices: nan
total average precision score: 0.030305291765393632
[5, 100] loss: 0.195
[5, 200] loss: 0.199
[5, 300] loss: 0.193
[5, 400] loss: 0.190
[5, 4] eval loss: 0.254
average precision score for label Lung Opacity: 0.01537883169462117
average precision score for label Pleural Effusion: 0.0471451355661882
average precision score for label Support Devices: nan
total average precision score: 0.01959257117151854