Hi everyone
I’m struggling with the triplet loss convergence. I’m trying to do a face verification (1:1 problem) with a minimum computer calculation (since I don’t have GPU).
So I’m using the facenet-pytorch model InceptionResnetV1 pretrained with vggface2 (casia-webface gives the same results).
I created a dataset with anchors, positives and negatives samples and I unfreezed the last linear layer (~900k parameters).
The training beginning well but the loss is completely stuck… After some investigations, it seems the loss is stuck at the value alpha (the margin of the Pytorch Triplet Loss)…
If we look at the loss equation, it says
max[ L2norm(f(A)-f(P)) - L2norm(f(A)-f(N)) + alpha, 0 ]
So it seems the condition below is always verified, which is weird…
- L2norm(f(A)-f(P)) = L2norm(f(A)-f(N))
I’ve tried many combinations (changing lr, release more layers…) but the loss still has the same behavior…
I’m working on Google Collab with around 8000 images (4k positives/anchors and 4k negatives)
Could someone help me on this please ?
class SiameseDataset2(Dataset):
def __init__(self, list_PIL_positive, list_PIL_negative, val_stride = 0, isValSet_bool = None, Transform=False, Normalize=False, mean=None, std=None):
self.Transform = Transform
self.Normalize = Normalize
self.mean = mean
self.std = std
self.A = random.sample(list_PIL_positive, len(list_PIL_positive))
self.P = random.sample(list_PIL_positive, len(list_PIL_positive))
self.N = random.sample(list_PIL_negative, len(list_PIL_negative))
self.PN = self.P + self.N
if isValSet_bool:
assert val_stride > 0, val_stride
self.A = self.A[::val_stride]
self.P = self.P[::val_stride]
self.N = self.N[::val_stride]
elif val_stride > 0:
del self.A[::val_stride]
del self.P[::val_stride]
del self.N[::val_stride]
def preprocess(self, img_PIL):
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((160,160)),
torchvision.transforms.ToTensor()])
if self.Normalize:
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((160,160)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=self.mean, std=self.std)])
img = transform(img_PIL)
return img
def __len__(self):
return len(self.P)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
PIL_imageA = self.A[idx]
PIL_imageP = random.choice(self.PN)
PIL_imageN = random.choice(self.PN)
if self.Transform:
imageA = self.preprocess(PIL_imageA)
imageP = self.preprocess(PIL_imageP)
imageN = self.preprocess(PIL_imageN)
return imageA, imageP, imageN
else :
return PIL_imageA, PIL_imageP, PIL_imageN
BATCH_SIZE = 32
siameseDataset_train = SiameseDataset2(PIL_imgs['PIL_positif'], PIL_imgs['PIL_negatif'], val_stride=10, isValSet_bool=False, Transform=True, Normalize=False)
siameseDataset_test = SiameseDataset2(PIL_imgs['PIL_positif'], PIL_imgs['PIL_negatif'], val_stride=10, isValSet_bool=True, Transform=True, Normalize=False)
train_dataloader = DataLoader(siameseDataset_train, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(siameseDataset_test, batch_size=BATCH_SIZE, shuffle=True)
model = InceptionResnetV1(pretrained='vggface2')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 3
model = InceptionResnetV1(pretrained='vggface2')
for param in model.parameters():
param.requires_grad = False
model.last_linear.weight.requires_grad = True
def model_loop(model, epochs, trainloader, validloader, batch_size, anchor_img_, optimizer, triplet_loss, device):
model.to(device)
train_loss_list = []
valid_loss_list = []
size_train = len(trainloader.dataset)
size_test = len(validloader.dataset)
last_batch_size_train = size_train % batch_size
last_batch_size_test = size_test % batch_size
for epoch in range(num_epochs):
print(f"Epoch {epoch+1} on {device} \n-------------------------------")
train_loss = 0.0
model.train()
for batch, (anch, pos, neg) in enumerate(trainloader):
# Transfer Data to GPU if available
anch, pos, neg = anch.to(device), pos.to(device), neg.to(device)
# Clear the gradients
optimizer.zero_grad()
# Make prediction & compute the mini-batch training loss
anch_embedding = model(anch)
pos_embedding = model(pos)
neg_embedding = model(neg)
anch_embedding = anch_embedding / torch.norm(anch_embedding)
pos_embedding = pos_embedding / torch.norm(pos_embedding)
neg_embedding = neg_embedding / torch.norm(neg_embedding)
loss = triplet_loss(anch_embedding, pos_embedding, neg_embedding)
# Compute the gradients
loss.backward()
# Update Weights
optimizer.step()
# Aggregate mini-batch training losses
train_loss += loss.item()
train_loss_list.append(train_loss)
if batch == 0 or batch%10 == 0:
loss, current = loss.item(), (batch+1) * len(pos)
if len(pos) < batch_size:
current = (batch) * batch_size + len(pos)
print(f"mini-batch loss for training : {loss:>7f} [{current:>5d}/{size_train:>5d}]")
# Compute the global training loss as the mean of the mini-batch training losses
# print(f"Training loss for epoch {Epoch+1} = {train_loss/size_train}")
valid_loss = 0.0
model.eval()
# Test part : no gradient update
with torch.no_grad():
for batch, (anch, pos, neg) in enumerate(validloader):
# Transfer Data to GPU if available
anch, pos, neg = anch.to(device), pos.to(device), neg.to(device)
anch_embedding = model(anch)
pos_embedding = model(pos)
neg_embedding = model(neg)
anch_embedding = anch_embedding / torch.norm(anch_embedding)
pos_embedding = pos_embedding / torch.norm(pos_embedding)
neg_embedding = neg_embedding / torch.norm(neg_embedding)
loss = triplet_loss(anch_embedding, pos_embedding, neg_embedding)
# Calculate Loss
valid_loss += loss.item()
valid_loss_list.append(valid_loss)
if batch == 0 or batch%1 == 0:
loss, current = loss.item(), (batch+1) * len(pos)
if len(pos) < batch_size:
current = (batch) * batch_size + len(pos)
print(f"mini-batch loss for validation : {loss:>7f} [{current:>5d}/{size_test:>5d}]")
# Compute the global training & validation loss as the mean of the mini-batch losses
train_loss /= len(trainloader)
valid_loss /= len(validloader)
print(f"--Fin Epoch {epoch+1}/{epochs} \n Training Loss: {train_loss:>7f} \n Validation Loss: {valid_loss:>7f}" )
print('\n')
return train_loss_list, valid_loss_list
train_loss, valid_loss = model_loop(model = model, model2=model2.to(device),
epochs = num_epochs,
trainloader = train_dataloader,
validloader = val_dataloader,
batch_size = BATCH_SIZE,
anchor_img_ = anchor_t,
optimizer = optimizer,
triplet_loss = nn.TripletMarginLoss(alpha=0.2),
device = device)