[torch.cuda.FloatTensor [..,..,..,..]], which is output 0 of ReluBackward1, is at version 2; expected version 1 instead

I use the following class as part of my VAE model.

class AxialAttentionUnet(nn.Module):

    def __init__(self, block, layers, num_classes=2, zero_init_residual=True,
                 groups=8, width_per_group=64, replace_stride_with_dilation=None,
                 s=0.125, img_size = 128, imgchan = 3 , features=[ 64, 128, 256, 512]):
        super(AXIAL_ATTENTION_UNET, self).__init__()
        self.features = features
        self.inplanes = int(features[0] * s)
        self.dilation = 1
        if replace_stride_with_dilation is None:
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.relu = nn.ReLU(inplace=True)
        down_layers = nn.ModuleList()
        down_layers.append(nn.Conv2d( imgchan, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False))
        down_layers.append(nn.BatchNorm2d(self.inplanes))
        down_layers.append(self.relu)
        down_layers.append(nn.Conv2d(self.inplanes, features[1], kernel_size=3, stride=1, padding=1, bias=False))
        down_layers.append(nn.BatchNorm2d(features[1]))
        down_layers.append(self.relu)
        down_layers.append(nn.Conv2d(features[1], self.inplanes, kernel_size=3, stride=1, padding=1, bias=False))
        down_layers.append(nn.BatchNorm2d(self.inplanes))
        down_layers.append(self.relu)
        self.down = nn.Sequential(*down_layers)
        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.encoder_layer = nn.ModuleList()
        ks  = img_size//2
        #three layers 
        for i in range(1,len(features)):
            if i==0:
               self.encoder_layer.append( self._make_layer(block, int(features[i] * s), layers[i-1], kernel_size= ks))
            else :
               self.encoder_layer.append( self._make_layer(block, int(features[i] * s), layers[i-1], stride=2, kernel_size=ks,
                                       dilate=replace_stride_with_dilation[i-2]))
               ks=ks//2
        
        # Decoder
        
        self.decoder_layer = nn.ModuleList()
        self.decoder_layer.append(nn.Conv2d(int(features[-1] *2*s), int(features[-1]*2*s), kernel_size=3, stride=2, padding=1))
        self.decoder_layer.append(nn.Conv2d(int(features[-1] *2*s), int(features[-1]*s), kernel_size=3, stride=1, padding=1))
        for idx in reversed( range(1, len(features))):
            if idx > 1:
               self.decoder_layer.append( nn.Conv2d(int(features[idx]*s) ,  int(features[idx-1]*s), kernel_size=3, stride=1, padding=1))
            else:
               self.decoder_layer.append(  nn.Conv2d(int(features[idx]*s) , num_classes, kernel_size=1, stride=1, padding=0))

        self.soft     = nn.Softmax(dim=1)
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.to(device=self.device)


    def _make_layer(self, block, planes, blocks, kernel_size=56, stride=1, dilate=False):
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride =stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, groups=self.groups,
                            base_width=self.base_width, dilation=previous_dilation, 
                            kernel_size=kernel_size))
        self.inplanes = planes * block.expansion
        if stride != 1:
            kernel_size = kernel_size // 2

        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                kernel_size=kernel_size))

        return nn.Sequential(*layers)

    def forward(self, x):
        
        x = self.down(x)
        
        
        # x = self.maxpool(x)
        # pdb.set_trace()
        x1 = self.encoder_layer[0](x)
        # print(x1.shape)
        x2 = self.encoder_layer[1](x1)
        # print(x2.shape)
        x3 = self.encoder_layer[2](x2)

        x = self.relu(F.interpolate(self.decoder_layer[0](x3) , scale_factor=(2,2), mode ='bilinear'))
        x = torch.add(x.clone(), x3)
        x = self.relu(F.interpolate(self.decoder_layer[1](x) , scale_factor=(2,2), mode ='bilinear'))
        x = torch.add(x.clone(), x2)
        x = self.relu(F.interpolate(self.decoder_layer[2](x) , scale_factor=(2,2), mode ='bilinear'))
        x = torch.add(x.clone(), x1)
        x = self.relu(F.interpolate(self.decoder_layer[3](x) , scale_factor=(2,2), mode ='bilinear'))
        x = self.decoder_layer[4](self.relu(x))
        # print(x.shape)
        
        # end of full image training 

        return x3, x

when I run the code, I would get this error message:

File "/home/utils.py", line 799, in forward
    x = self.relu(F.interpolate(self.decoder_layer[3](x) , scale_factor=(2,2), mode ='bilinear'))
  File "/h/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "/h/anaconda3/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 102, in forward
    return F.relu(input, inplace=self.inplace)
  File "/h/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 1296, in relu
    result = torch.relu_(input)
 (function _print_stack)
Traceback (most recent call last):
  File "Hierarchical_VAE.py", line 1586, in <module>
    average_epoch_loss, elbo, wasserstein_loss, latent_dimension_kld = train(epoch)
  File "Hierarchical_VAE.py", line 1421, in train
    loss_dict["WAE-GP"].backward()
  File "/h/anaconda3/lib/python3.8/site-packages/torch/_tensor.py", line 255, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "/h/anaconda3/lib/python3.8/site-packages/torch/autograd/__init__.py", line 147, in backward
    Variable._execution_engine.run_backward(
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor 
[100, 16, 48, 48]], which is output 0 of ReluBackward1, is at version 2; expected version 1 instead. Hint: the backtrace further above
 shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

I will appreciate if someone can point out how I can fix this error message. Thanks.

Replace with self.relu = nn.ReLU(inplace=False)

@AlphaBetaGamma96 I tried your suggestion before and I got the same error.

if you have removed all inplace=True then check if F.interpolate has a backward function. It seems it calls relu_ (which is inplace by default) internally which may be the probelm