Hello all,
Similar issues than RuntimeError: element 0 of variables does not require grad and does not have a grad_fn
I am training a double sentence transformer models (training a cosine similarity function between two embeddings) using the code below.
My issue: the loss does not change, as I suspect that the loss does not propagate (gradients of the optimizer returns None). Something must be frozen here but I really don’t see what.
class CosineLoss(nn.Module):
def __init__(self):
super(CosineLoss, self).__init__()
self.loss = nn.MSELoss(reduction='sum')
def forward(self, output1, output2, label):
cos_sim = F.cosine_similarity(output1, output2)
final_loss = self.loss(cos_sim, label)
return final_loss
class TestModel(nn.Module):
def __init__(self):
super().__init__()
self.modelA = SentenceTransformer('clip-ViT-B-32')
self.modelB = SentenceTransformer('clip-ViT-B-32-multilingual-v1')
def embed_A(self, inputA):
x = self.modelA.encode([Image.open(BytesIO(requests.get(filepath).content)).convert('RGB')for filepath in inputA], convert_to_tensor=True)
return x
def embed_B(self, inputB):
x = self.modelB.encode(inputB, convert_to_tensor=True)
return x
def forward(self, inputA, inputB):
x1 = self.embed_A(inputA)
x2 = self.embed_B(inputB)
return x1, x2
mymodel = TestModel()
criterion = CosineLoss()
optimizer = optim.Adam(mymodel.parameters(), lr=lr,betas=betas,eps=eps,weight_decay=wd)
for param in mymodel.parameters():
param.requires_grad = True
for epoch in range(maxepochs):
print('Epoch:', epoch)
mymodel.train()
for batch in dm.train_dataloader():
mymodel.zero_grad()
list_A,list_B,ground_truth = batch
outputA, outputB = mymodel(list_A,list_B)
ground_truth = ground_truth.to(device)
optimizer.zero_grad()
loss = criterion(outputA,outputB,ground_truth)
loss.requires_grad = True
loss.backward()
optimizer.step()
print(optimizer.param_groups[0]['params'][0].grad)
print("Saving model for epoch:", epoch)
print("Total Loss for Epoch number {} is {}".format(epoch, loss))
The prints of that code are
Epoch: 0
None
None
None
None
None
Saving model for epoch: 0
Total Loss for Epoch number 0 is 0.4687237625608282
Epoch: 1
None
None
None
None
None
Saving model for epoch: 1
Total Loss for Epoch number 1 is 0.4687237625608282
Epoch: 2
None
None
None
None
None
Saving model for epoch: 2
Total Loss for Epoch number 2 is 0.4687237625608282
I believe that the line loss.requires_grad = True is the issue but getting rid of it just keeps returning the error message "element 0 of tensors does not require grad and does not have a grad_fn’.
I would love to get your help on this.
Thank you very much.
Belhal