One of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [12, 4096]], which is output 0 of EuclideanDistBackward, is at version 4096; expected version 0 instead

There is a bug when I run total_loss.backward() in my main.py
What can I do to fix the bug?
I do not use any -= or += but I still got this

this is my code

    def forward(self):
        point_cls = torch.reshape(self.point_cls, (4, 4096, 2))
        point_regression = torch.reshape(self.point_regression, (4, 4096, 2))
        batchb = point_cls.shape[0]
        point_num = point_cls.shape[1]
        total_loss = 0
        for b in range(0, batchb):
            l2 = torch.cdist(self.targets[b], point_regression[b, :, :], p=2)
            gt_num = self.targets[b].shape[0]
            for pn in range(0, point_num):
                l2[:, pn] = 0.05 * l2[:, pn] - point_cls[b, pn, 1]
            row_ind, match_res = match(l2.cpu().detach().numpy())
            point_cls_temp = point_cls[b, :, 1]
            regression_loss = l2[row_ind, match_res].sum()/gt_num

            for gt in range(0, gt_num):
                if gt == 0:
                    matched_cls = torch.unsqueeze(point_cls[b, match_res[gt], 1], 0)
                    point_cls_temp = torch.cat([point_cls_temp[0:match_res[gt]], point_cls_temp[match_res[gt]:-1]])
                else:
                    matched_cls = torch.cat((matched_cls, point_cls[b, match_res[gt], 1].unsqueeze(0)), dim=0)
                    point_cls_temp = torch.cat([point_cls_temp[0:match_res[gt]], point_cls_temp[match_res[gt] :-1]])

            matched_cls = torch.cat((matched_cls, point_cls_temp))
            class_loss = torch.div(torch.sum(torch.log(matched_cls[:gt_num])) + 0.5*torch.sum(torch.log(matched_cls[gt_num:])), point_num) * -1
            print(class_loss)
            print(regression_loss)
            total_loss = total_loss + class_loss + regression_loss * 2 * 0.0001
        total_loss = total_loss / batchb
        print(total_loss)
        print('a')
        return total_loss

I guess this line of code:

l2[:, pn] = 0.05 * l2[:, pn] - point_cls[b, pn, 1]

might be causing the issue, as you are assigning the new values inplace to l2.
As a workaround you could append the results to a new list and create the tensor afterwards.

1 Like