Autograd for gradient ascent: One of the variables needed for gradient computation has been modified by an inplace operation

Hi Guys,
I’m trying to use AutoGrad to compute a gradient of a complicated likelihood function that I need for gradient ascent. But I keep getting the runtime error that an inplace operation has been done. I tried using .clone() (assuming I used it correctly but to no avail. Here is my code

def like_gradient_mu(dim,alpha,beta,sigma,nu,e_p,e_k,Inv_Cov_p,Inv_Cov_k,LDS_p,LDS_k,x_prev,x_curr,u_prev,IQ,NumParents,NumKids,PerParent):
    
    #Variables we don't need gradient of 
    IC_p=Variable(torch.from_numpy(Inv_Cov_p).float()) #Covariance matrices of parents
    IC_k=Variable(torch.from_numpy(Inv_Cov_k).float()) #Covariance matrices of kids
    Theta_p=Variable(torch.from_numpy(LDS_p).float()) #LDS of parents
    Theta_k=Variable(torch.from_numpy(LDS_k).float()) #LDS of kids
    IQ=Variable(torch.from_numpy(IQ).float()) #Covariance matrix of state noise
    x_p=Variable(torch.from_numpy(x_prev).float()) #x_{t-1}
    x_c=Variable(torch.from_numpy(x_curr).float()) #x_t
    u=Variable(torch.from_numpy(u_prev).float()) #u=[x_{t-1};1]
    ep=Variable(torch.from_numpy(np.matrix(e_p)).float())
    ek=Variable(torch.from_numpy(np.matrix(e_k)).float())
    
    #Variables that we do need the gradients of 
    a=Variable(torch.from_numpy(np.matrix(alpha)).float(),requires_grad=True)
    a.retain_grad()
    b=Variable(torch.from_numpy(np.matrix(beta)).float(),requires_grad=True)
    b.retain_grad()
    s=Variable(torch.from_numpy(np.matrix(sigma)).float(),requires_grad=True)
    s.retain_grad()
    n=Variable(torch.from_numpy(np.matrix(nu)).float(),requires_grad=True)
    n.retain_grad()
    
    #Used to store weights
    weights_parents=Variable(torch.from_numpy(np.matrix(np.zeros(NumParents)).T).float())
    weights_kids=Variable(torch.from_numpy(np.matrix(np.zeros(NumKids)).T).float())
    x_temp_parents=Variable(torch.from_numpy(np.matrix(np.zeros((dim,NumParents)))).float())
    x_temp_kids=Variable(torch.from_numpy(np.matrix(np.zeros((dim,NumKids)))).float())


    #Weigh Clusters
    for p in range(0,NumParents):
        weights_parents[p,0]=torch.exp( torch.matmul( torch.matmul( -(x_p[:,0]-a[:,p]-s[:,p]*ep[:,p]).unsqueeze(0),IC_p[:,:,p]) ,(x_p[:,0]-a[:,p]-s[:,p]*ep[:,p]))   )+1e-50
        x_temp_parents[:,p]=weights_parents[p,0]*torch.matmul(Theta_p[:,:,p],u)
        
    for k in range(0,NumKids):
        p=int(k/PerParent)
        weights_kids[k,0]=torch.exp(torch.matmul(torch.matmul(-(x_p[:,0]-a[:,p]-s[:,p]*ep[:,p]-b[:,k]-n[:,k]*ek[:,k]).unsqueeze(0),IC_k[:,:,k]),(x_p[:,0]-a[:,p]-s[:,p]*ep[:,p]-b[:,k]-n[:,k]*ek[:,k])))+1e-50
        x_temp_kids[:,k]=weights_kids[k,0]*torch.matmul(Theta_k[:,:,k],u)
    

    #Normalize
    Z=torch.sum(weights_parents)+torch.sum(weights_kids)
    x_p_clone=x_temp_parents.clone()
    x_k_clone=x_temp_kids.clone()
    x_unnorm=torch.sum(x_p_clone,1)+torch.sum(x_k_clone.clone(),1)
    x_norm=x_unnorm.clone()/Z
    
    y=x_norm.clone().unsqueeze(1)
    z=x_c-y
    F=torch.matmul(torch.matmul((z).transpose(0,1),IQ),(z))
    print(F)
    L=-0.5*F

    L.backward()
    return a.grad.data,b.grad.data,s.grad.data,n.grad.data

Also, if I make L=Z i.e.( the normalizing constant) and comment everything under it then everything works fine. I’m totally lost

I found my error. It’s because I reference weights_parents[:,p]