Runtime Error: about the "in-place" operation and autograd

Sorry, I am a neophyte for PyTorch, coming across with some problems about the “in-place” operation
Here is my code

class lstm_7_Days(nn.Module):
    def __init__(self, input_size=16,hidden_size=16*4,output_size=2,num_layer=2):
        super(lstm_7_Days, self).__init__()
        self.layer0 = nn.LSTM(input_size,hidden_size,num_layer,batch_first=True)
        self.layer_arg = nn.ModuleList()
        self.layer_sel = nn.ModuleList()
        for i in range(2):
            self.layer_arg.append(nn.Linear(hidden_size,hidden_size))
            self.layer_sel.append(nn.Linear(hidden_size,hidden_size))
        self.layer_pro = nn.Linear(hidden_size*3,output_size)
    
    def forward(self,inseq):
        x = torch.zeros((BATCH_SIZE,9,4,64))
        N_adjacent_forest = torch.zeros((BATCH_SIZE));
        for i in range(0,9):
            out, (h_n, c_n) = self.layer0(inseq[:,i,:,:])
            x[:,i,0,:] =  x[:,i,0,:].clone() + h_n[-1,:,].clone()
        for i in range(BATCH_SIZE):
            square_id = (int)((inseq[i,0,0,10] - 37.4283)/latitude_step_unit) * 5 + (int)((inseq[i,0,0,11] +  122.08156)/longitude_step_unit)
            if(square_id == 0 or square_id == 4 or square_id == 20 or square_id == 24):
                N_adjacent_forest[i] = N_adjacent_forest[i].clone() + 3;
            elif(square_id == 5 or square_id == 10 or square_id == 25 or 
               square_id == 1 or square_id == 2  or square_id == 3  or
               square_id == 9 or square_id == 14 or square_id == 19 or
               square_id == 21 or square_id == 22 or square_id == 23 ):
                N_adjacent_forest[i] = N_adjacent_forest[i].clone() + 5;
            else:
                N_adjacent_forest[i] = N_adjacent_forest[i].clone() + 8;
        for a in range(BATCH_SIZE):
            for j in range(1,3):
                for i in range(0,(int)(N_adjacent_forest[a])+1):
                    x[a,i,j,:] = x[a,i,j,:].clone() + self.layer_sel[j-1](x[a,i,j-1,:]);
                    for cnt in range(0,(int)(N_adjacent_forest[a])+1):
                        if(cnt != i):
                            x[a,i,j,:] = x[a,i,j,:].clone() + (1/N_adjacent_forest[a])*self.layer_arg[j-1](x[a,cnt,j-1,:])
        h = torch.cat((x[:,0,0,:],x[:,0,1,:],x[:,0,2,:]),axis=1).squeeze().clone()
        print(h.size())
        h = self.layer_pro(h)
        return h;

And here is the RunTime Error
: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [1, 64]], which is output 0 of UnsqueezeBackward0, is at version 8129; expected version 8128 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

I know that I should avoid the in place operation, so I have added some calls clone() function, however, the ErrorTime Error still happens.
PS: I am really green about PyTorch, so there must be lots of problems in code :frowning: