Custom fn_collate Dataloader

I am trying to build a custom dataset and dataloader. The goal was that I wanted to modeify the default dataloader in such a way that with each sample I can load a predifined list of related samples (similar samples). However samples have different sizes of such lists (it could be empty as well). Then I want to calculate a pairwise similarity based loss for each sample against each sample in its accompanying list and then add it to a standard supervised MSE loss. This is how I am trying to build it:

from torch.utils.data import Dataset, DataLoader
null_graph = Data(x = torch.zeros((1,1)) , edge_index = torch.zeros((1,1)) , edge_attr = torch.zeros((1,1)) , y = torch.zeros(1))

class dotdict(dict):
   """dot.notation access to dictionary attributes"""
   __getattr__ = dict.get
   __setattr__ = dict.__setitem__
   __delattr__ = dict.__delitem__
   
class CustomDataset(Dataset):
   def __init__(self, dataframe, graphs):
       self.data = dataframe 
       self.graphs = graphs
       
   
   def __len__(self):
       return len(self.data)
   
   def __getitem__(self, idx):
       sample = self.data.iloc[idx]
       cmg = []
       ac_cmg= []
       if (sample[4] == 1):
           lis = sample[5]
           for ind in lis:
               if ind in self.data['index'].to_list():
                   cmg.append(self.graphs[train_data.index[train_data['index'] == ind].item()])
                   ac_cmg.append(train_data.activity[train_data['index'] == ind].item())
       else:
           cmg.append(null_graph)
           ac_cmg.append(0)
                   
       graph =  self.graphs[idx]
       
       return {'y': torch.tensor(sample[3], dtype = torch.float32) ,
               'is_cliff':torch.tensor(sample[4], dtype = torch.int8),  'py_cmg':cmg, 
               'y_cm':torch.tensor(ac_cmg,dtype = torch.float32),
               'py_graphs': graph} 
   
def custom_collate_fn(batch):

   y = torch.stack([sample['y'] for sample in batch])
   is_cliff = torch.stack([sample['is_cliff'] for sample in batch])

   x = torch.cat([sample['py_graphs'].x for sample in batch], dim = 0)
   edge_index = torch.cat([sample['py_graphs'].edge_index for sample in batch], dim = 1)
   edge_attribute = torch.cat([sample['py_graphs'].edge_attr for sample in batch], dim =0)
   assignment = torch.cat([ i+torch.zeros(sample['py_graphs'].x.shape[0],dtype= torch.int64) for i,sample in enumerate(batch)])
   
   cliff_mates_num = [len(sample['py_cmg']) for sample in batch]

   x_cm = [graph.x  for sample in batch for graph in sample['py_cmg']]
   assignment2 = [torch.zeros(g.shape[0],dtype= torch.int64) for i,g in enumerate(x_cm)]
   edge_index_cm = [graph.edge_index.to(torch.int64) for sample in batch for graph in sample['py_cmg']]
   edge_attribute_cm = [graph.edge_attr for sample in batch for graph in sample['py_cmg']]
   y_cm = [sample['y_cm'] for sample in batch]
   
   data = dotdict({'x':x, 'edge_index':edge_index, 'edge_attr': edge_attribute , 'batch': assignment})
#     data_cm = dotdict({ 'x' : x_cm ,'edge_index' : edge_index_cm, 
#                   'edge_attr': edge_attribute_cm, 'batch2': assignment2 ,'y' : y_cm})
   
   return dotdict({'y': y, 'cliff_mates_num' : cliff_mates_num,'data': data , 'x_cm' : x_cm ,'edge_index_cm' : edge_index_cm, 
                 'edge_attr_cm': edge_attribute_cm, 'batch2': assignment2 ,'y_cm' : y_cm, 'is_cliff': is_cliff, })```

However, I get it seems there are problems with the dataloader related to Tensor sizes mismatches. Also, this is causing the backward method not to work properly. Here is my training function:

def train(model, train_loader,optimizer,criterion,device=‘cpu’):
model.train()
total_loss = 0
for batch_num,data in enumerate(train_loader):
cliff_total = 0
tmp_embed = 0
embed = 0
optimizer.zero_grad()
pred, graph_embed = model(data.data)
for i in range(bs):
mates =
if(data.is_cliff[i]==1):
print(bsbatch_num+i)
for idx in range(data.cliff_mates_num[i]):
mate = dotdict({‘x’:data.x_cm[i +idx] ,‘edge_index’: data.edge_index_cm[i+idx],
‘edge_attr’:data.edge_attr_cm[i+idx] , ‘batch’:data.batch2[i+idx],
‘y’ : data.y_cm[i][idx]})
_ ,cliff_embed = model(mate)
_,anchor_embed = model(list1[batch_num
bs + i])
cliff_loss = 1 - F.cosine_similarity(anchor_embed,cliff_embed) - (data.y[i] - mate.y)
cliff_total = cliff_total + cliff_loss
supervised_loss = criterion(pred,data.y.unsqueeze(1))
loss_combined = supervised_loss + cliff_loss

    loss_combined.backward()
    total_loss += supervised_loss.item() * bs
    optimizer.step()
return total_loss / len(train_loader.dataset)
def train(model, train_loader,optimizer,criterion,device=‘cpu’):
model.train()
total_loss = 0
for batch_num,data in enumerate(train_loader):
cliff_total = 0
tmp_embed = 0
embed = 0
optimizer.zero_grad()
pred, graph_embed = model(data.data)
for i in range(bs):
mates =
if(data.is_cliff[i]==1):
print(bs<em>batch_num+i)
for idx in range(data.cliff_mates_num[i]):
mate = dotdict({‘x’:data.x_cm[i +idx] ,‘edge_index’: data.edge_index_cm[i+idx],
‘edge_attr’:data.edge_attr_cm[i+idx] , ‘batch’:data.batch2[i+idx],
‘y’ : data.y_cm[i][idx]})
_ ,cliff_embed = model(mate)
_,anchor_embed = model(list1[batch_num</em>bs + i])
cliff_loss = 1 - F.cosine_similarity(anchor_embed,cliff_embed) - (data.y[i] - mate.y)
cliff_total = cliff_total + cliff_loss
supervised_loss = criterion(pred,data.y.unsqueeze(1))
loss_combined = supervised_loss + cliff_loss