Inplace operation ?where?

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        vgg_pretrained_features = vgg(pretrained=True).features
        self.enc_1 = nn.Sequential()
        self.enc_2 = nn.Sequential()
        self.enc_3 = nn.Sequential()
        self.enc_4 = nn.Sequential()
        self.enc_5 = nn.Sequential()
        for x in range(4):
            self.enc_1.add_module(str(x), vgg_pretrained_features[x])
        for x in range(4, 9):
            self.enc_2.add_module(str(x), vgg_pretrained_features[x])
        for x in range(9, 16):
            self.enc_3.add_module(str(x), vgg_pretrained_features[x])
        for x in range(16, 23):
            self.enc_4.add_module(str(x), vgg_pretrained_features[x])
        for x in range(22, 30):
            self.enc_5.add_module(str(x), vgg_pretrained_features[x])

        #attention
        #down sample
        self.in_3=nn.Sequential(
            Inception(in_channels=256),
            nn.Conv2d(512,256,2,2),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        self.in_4=nn.Sequential(
            Inception(in_channels=512)
        )
        #up sample
        self.in_5=nn.Sequential(
            Inception(in_channels=512),
            nn.ConvTranspose2d(512,512,4,2,1),
            nn.Conv2d(512,256,3,1,1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        self.inception=nn.Sequential(
            nn.Conv2d(1024,512,1,1),
            nn.BatchNorm2d(512),
            nn.ReLU()
        )
        self.de_mask_inp=nn.Sequential(
            nn.ConvTranspose2d(512,256,4,2,1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.ConvTranspose2d(256,128,4,2,1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128,64,4,2,1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64,2,1,1),
            nn.Sigmoid()
        )
    
    def forward(self, input):
        # encoder
        encoder=[input]
        for i in range(1, 6):
            encoder.append(getattr(self, 'enc_{:d}'.format(i))(encoder[-1]))
        tam = []
        for i in range(3, 6):
            tam.append(getattr(self, 'in_{:d}'.format(i))(encoder[i]))
        attention = self.inception(torch.cat(tam, 1))
        attention=self.de_mask_inp(attention)
        return attention

I run this code to do the segmentation task successfully util i modified some layer in init function and the corresponding codes in forward loop.
And it suddenly report a fatal mistake:RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation.I checked the code ,there’s no += operation,Now I don’t know what happened and turn to the forum.
The change on my DL server is that I added up 2 GPUs on it.The command line nvidia-smi seems work well.