I am going to define my layer. How ever, I encounter the RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation while running backward().
I found that if i commented the second for loop ‘for j in range(self.number_person):’ or make ‘u_i[:,j,:] = (1 - self.lumbda)*u_i[:,j,:]’, the backward() was fine.
I wonder where is inplace operation and why it does not work? ‘p_rnn_feature’ and ‘u_sum’ has been compute before.
BTW, this code is run on pytorch 0.19.7ad948f
def myNet():
def __init__():
#do som init
def forward():
#compute p_rnn_feature,u_sum
p_rnn_feature = Variable(torch.ones(p_rnn_feature.size())).cuda()
u_sum = Variable(torch.ones(u_sum.size())).cuda()
for i in range(self.embedding_length):
u_s = u_s.clone()
u_i = u_i.clone()
for j in range(self.number_person):
alpha_i = Variable(torch.zeros(batch_size, self.number_person, 1)).cuda()
comp_mask = Variable(j*torch.ones(valid_person_num.size())).cuda()
comp_mask = torch.lt(comp_mask, valid_person_num) # (batch_size, 1)
comp_mask_ui = comp_mask.repeat(1, self.hyper_d)
tmp_x = torch.cat((p_rnn_feature[:,j,:], u_sum[:,j,:], u_s), 1) # size: (batch_size, 2*rnn_cell_size+hyper_d)
u_i[:,j,:] = (1 - self.lumbda)*u_i[:,j,:] + self.lumbda*F.relu(self.u_i_linear(tmp_x))
u_i[:,j,:] = u_i[:,j,:]*comp_mask_ui.float()
alpha_i[:,j,:] = F.tanh(self.alpha_i_linear(torch.cat((u_i[:,j,:], u_s),1)))
alpha_i[:,j,:] = alpha_i[:,j,:]*comp_mask.float()
alpha_sum = torch.sum(alpha_i,1)
alpha_sum = alpha_sum.repeat(1,self.number_person,1)
gate = alpha_i / Variable(torch.max(alpha_sum.data, torch.ones(alpha_sum.size()).cuda())).cuda()
gate = gate.repeat(1,1,self.hyper_d)
gated_ui_sum = gate*u_i
gated_ui_sum = torch.sum(gated_ui_sum,1)
gated_ui_sum = torch.squeeze(gated_ui_sum, dim=1)
tmp_s = torch.cat((u_s, p_feature_sum, gated_ui_sum), 1) # size: (batch_size, hyper_d+rnn_cell_size+hyper_d)
u_s = (1 - self.lumbda) * u_s + self.lumbda * F.relu(self.u_s_linear(tmp_s))
pred_tmp = torch.cat((torch.squeeze(torch.sum(u_i, 1), dim=1), u_s), 1)
pred = self.pred_dropout(self.pred_linear(pred_tmp))
pred = self.pred_linear_second(pred)