RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed)

I have read other threads related to this problem but I somehow cannot figure out the fix for my code or what is triggering this error. In my training script, I am trying to use a contrastive loss proxy with the intention of making the embeddings of an anchor tensor the same as the indices given in the positive_samples via the nbrs dict.

I get the error:
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

My code is as follows:

   prevs=[]
   i=0
   train_preds=[]
   test_preds=[]
   train_labls=[]
   test_labls=[]
   optimizer.zero_grad()
   
   for t in range(89):
       #print(t)
       
       #feats=torch.load('data/feats_tensors'+str(t)+'.pt')
       #feats=feats.to(device)
       labels_t=labels[labels['timestamp']==t]
       labels_l=P2id.merge(labels_t,on='user_id',how='left')
       labels_l=labels_l.dropna()
       labs=torch.tensor(labels_l['label'].to_numpy())
       labs=labs.to(device)
       I_MD=torch.load('matrices/d_incidence_matrix_'+str((t))+'.pt')
       
       I_MD=I_MD.type(torch.FloatTensor)
       I_MD=I_MD.to(device)
       I_MM=torch.load('matrices/m_incidence_matrix_'+str((t))+'.pt')
       
       I_MM=I_MM.type(torch.FloatTensor)
       I_MM=I_MM.to(device)
       I_MR=torch.load('matrices/r_incidence_matrix_'+str((t))+'.pt')
       
       I_MR=I_MR.type(torch.FloatTensor)
       I_MR=I_MR.to(device)
       ft=feats[(t)]
       ft=ft.type(torch.FloatTensor)
       ft=ft.to(device)
       
       D_HE=model_spatial.step1(I_MD,ft)
       M_HE=model_spatial.step1(I_MM,ft)
       R_HE=model_spatial.step1(I_MR,ft)
       X_HE=model_spatial.step2(D_HE,M_HE,R_HE)
       if t>2:
           path='neighbours/ts_'+str(t)+'.csv'
           df=pd.read_csv(path)
           nbrs={}
           #maxlen=0
           for col in df.columns:
               #import pdb;pdb.set_trace()
               nb=df[col].dropna().astype(int).tolist()
               #if len(nb)>maxlen:
                   #maxlen=len(nb)
               if len(nb)>0:
                   nbrs[int(col)]=nb
           #num_negative_samples=2*maxlen
           simclr_dataset = SimCLRDataset(X_HE, nbrs)
           #simclr_data_loader = torch.utils.data.DataLoader(simclr_dataset, batch_size=batch_size_simclr)
           #cont_loss = 0.
           #counter=1
           for key, value in nbrs.items():
               
               #anchor_samples, positive_samples, negative_samples = simclr_dataset.sampler(key)
               anchor_samples = X_HE[key]
               #import pdb;pdb.set_trace()
               positive_samples = X_HE[nbrs[key]]
               neg_len=2*len(nbrs[key])

               # Select negative samples randomly from the rest of the data
               negative_indices = [i for i in range(len(X_HE)) if i != key]
               negative_indices = shuffle(negative_indices)
               negative_indices = negative_indices[:neg_len]
               negative_samples = X_HE[negative_indices]
               #print(nbrs[key])
               #print(negative_indices)
               #anchor_samples = anchor_samples.to(device)
               #positive_samples = positive_samples.to(device)
               #negative_samples = negative_samples.to(device)
               negative_samples.detach_()
               positive_samples.detach_()
               #print(key)
               cont_loss=torch.sum(torch.exp(torch.cosine_similarity(anchor_samples,positive_samples)))/torch.sum(torch.exp(torch.cosine_similarity(anchor_samples,negative_samples)))
               print(cont_loss)
               #if counter%50==0:
               print(cont_loss)
               cont_loss.backward()
               optimizer.step()
               anchor_samples.detach()
               negative_samples.detach_()
               positive_samples.detach_()
               #anchor_samples.requires_grad_(True)
               X_HE[key]=anchor_samples
               print(X_HE)