I’m currently working on a multi-label classification problem using a dataset containing images of Ocular Disease. I’ve written a custom PyTorch Dataset
implementation to create a loader that will, for every patient ID, return the left and the right image, as well as the associated 8D multi-hot target vector.
The following is my Dataset
class code:
class CustomDataset(torch.utils.data.Dataset):
def __init__(self, IDs, data_dir, annotations, transforms=None):
self.transforms = transforms
self.annotations = annotations
self.data_dir = data_dir
# read the excel file.
mapper = pd.read_excel(self.annotations, sheet_name=0)
mapper.ID = mapper.ID.astype(str)
# 'ID' is the primary key.
self.subset_df = mapper.loc[mapper['ID'].isin(IDs), :]
print("Found {} entries.".format(self.subset_df.shape[0]))
self.subset_df.reset_index(drop=True, inplace=True)
self.ix2id = {
ix:id for ix, id in zip(list(self.subset_df.index), list(self.subset_df.ID))
}
def __len__(self):
return len(self.ix2id)
def __getitem__(self, index):
# Sample from IDs
sampled_id = self.ix2id[index]
row = self.subset_df.loc[self.subset_df.ID == sampled_id, :]
left_im_name = row['Left-Fundus'].values[0]
right_im_name = row['Right-Fundus'].values[0]
left_im = Image.open(self.data_dir + left_im_name, "r")
right_im = Image.open(self.data_dir + right_im_name, "r")
if self.transforms is not None:
left_im = self.transforms(left_im)
right_im = self.transforms(right_im)
y = row[['N','D','G','C','A','H','M','O']].values.flatten()
return (left_im, right_im), y
The class is instantiated with a list of IDs (split separately). The following is the instantiation:
train_transforms = transforms.Compose([
transforms.RandomRotation(60),
transforms.Resize((IM_HEIGHT, IM_WIDTH)),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std)
])
val_transforms = transforms.Compose([
transforms.Resize((IM_HEIGHT, IM_WIDTH)),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std)
])
train = CustomDataset(train_split, data_dir='path_to_im', annotations='somepath', transforms=train_transforms)
test = CustomDataset(test_split, data_dir='path_to_im', annotations='somepath', transforms=val_transforms)
train_dataloader = torch.utils.data.DataLoader(train, batch_size=BATCH_SIZE, shuffle=True, num_workers=5)
test_dataloader = torch.utils.data.DataLoader(test, batch_size=BATCH_SIZE, shuffle=True, num_workers=5)
The model training is without a hitch. The evaluation, however, is where the model returns nans. The following is the eval() code:
def eval(epoch_num):
model.eval()
epoch_loss = 0.
y_true = []
y_pred = []
for ix, ((left_im, right_im), target) in enumerate(test_dataloader):
y_true.append(target)
# Push on GPU.
left_im = left_im.to(device)
right_im = right_im.to(device)
target = target.to(device).float()
with torch.no_grad():
left_im, right_im, target = Variable(left_im), Variable(right_im), Variable(target)
# Pass both image batches through.
preds = model(left_im, right_im)
# !!! Preds is a (B, 8) tensor full of nans !!!
loss = criterion(preds, target)
epoch_loss += loss.item()
pred_cpu = preds.detach().cpu().numpy()
y_pred.append(pred_cpu)
y_true = np.concatenate(y_true, axis=0)
y_pred = np.concatenate(y_pred, axis=0)
print("Eval loss: {}".format(epoch_loss/len(test_dataloader)))
metrics = custom_Metrics(gt_data=y_true, pr_data=y_pred)
print("Eval metrics: {}".format(metrics))
metrics['loss'] = epoch_loss
return metrics
What seems to be wrong here?
The model is a simple CNN that passes both images through a ‘backbone’ (for shared parameters), concatenates the latent vectors, and passes it through a couple of Linear layers. I’m using the BCEWithLogitsLoss criterion and an Adam optimizer and an initial learning rate of 5e-4.