Hi Guys,
I’m trying to use AutoGrad to compute a gradient of a complicated likelihood function that I need for gradient ascent. But I keep getting the runtime error that an inplace operation has been done. I tried using .clone() (assuming I used it correctly but to no avail. Here is my code
def like_gradient_mu(dim,alpha,beta,sigma,nu,e_p,e_k,Inv_Cov_p,Inv_Cov_k,LDS_p,LDS_k,x_prev,x_curr,u_prev,IQ,NumParents,NumKids,PerParent):
#Variables we don't need gradient of
IC_p=Variable(torch.from_numpy(Inv_Cov_p).float()) #Covariance matrices of parents
IC_k=Variable(torch.from_numpy(Inv_Cov_k).float()) #Covariance matrices of kids
Theta_p=Variable(torch.from_numpy(LDS_p).float()) #LDS of parents
Theta_k=Variable(torch.from_numpy(LDS_k).float()) #LDS of kids
IQ=Variable(torch.from_numpy(IQ).float()) #Covariance matrix of state noise
x_p=Variable(torch.from_numpy(x_prev).float()) #x_{t-1}
x_c=Variable(torch.from_numpy(x_curr).float()) #x_t
u=Variable(torch.from_numpy(u_prev).float()) #u=[x_{t-1};1]
ep=Variable(torch.from_numpy(np.matrix(e_p)).float())
ek=Variable(torch.from_numpy(np.matrix(e_k)).float())
#Variables that we do need the gradients of
a=Variable(torch.from_numpy(np.matrix(alpha)).float(),requires_grad=True)
a.retain_grad()
b=Variable(torch.from_numpy(np.matrix(beta)).float(),requires_grad=True)
b.retain_grad()
s=Variable(torch.from_numpy(np.matrix(sigma)).float(),requires_grad=True)
s.retain_grad()
n=Variable(torch.from_numpy(np.matrix(nu)).float(),requires_grad=True)
n.retain_grad()
#Used to store weights
weights_parents=Variable(torch.from_numpy(np.matrix(np.zeros(NumParents)).T).float())
weights_kids=Variable(torch.from_numpy(np.matrix(np.zeros(NumKids)).T).float())
x_temp_parents=Variable(torch.from_numpy(np.matrix(np.zeros((dim,NumParents)))).float())
x_temp_kids=Variable(torch.from_numpy(np.matrix(np.zeros((dim,NumKids)))).float())
#Weigh Clusters
for p in range(0,NumParents):
weights_parents[p,0]=torch.exp( torch.matmul( torch.matmul( -(x_p[:,0]-a[:,p]-s[:,p]*ep[:,p]).unsqueeze(0),IC_p[:,:,p]) ,(x_p[:,0]-a[:,p]-s[:,p]*ep[:,p])) )+1e-50
x_temp_parents[:,p]=weights_parents[p,0]*torch.matmul(Theta_p[:,:,p],u)
for k in range(0,NumKids):
p=int(k/PerParent)
weights_kids[k,0]=torch.exp(torch.matmul(torch.matmul(-(x_p[:,0]-a[:,p]-s[:,p]*ep[:,p]-b[:,k]-n[:,k]*ek[:,k]).unsqueeze(0),IC_k[:,:,k]),(x_p[:,0]-a[:,p]-s[:,p]*ep[:,p]-b[:,k]-n[:,k]*ek[:,k])))+1e-50
x_temp_kids[:,k]=weights_kids[k,0]*torch.matmul(Theta_k[:,:,k],u)
#Normalize
Z=torch.sum(weights_parents)+torch.sum(weights_kids)
x_p_clone=x_temp_parents.clone()
x_k_clone=x_temp_kids.clone()
x_unnorm=torch.sum(x_p_clone,1)+torch.sum(x_k_clone.clone(),1)
x_norm=x_unnorm.clone()/Z
y=x_norm.clone().unsqueeze(1)
z=x_c-y
F=torch.matmul(torch.matmul((z).transpose(0,1),IQ),(z))
print(F)
L=-0.5*F
L.backward()
return a.grad.data,b.grad.data,s.grad.data,n.grad.data
Also, if I make L=Z i.e.( the normalizing constant) and comment everything under it then everything works fine. I’m totally lost