RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation?

While I modified the unet, actually speaking, I just add a gaussian layer in the net, the code is as blow.
But when I run this code, it tells me that

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
So what is wrong with code?

# -*-coding:utf-8-*-
import torch
import torch.nn as nn
from torch.nn import init
import functools
import cv2
import numpy as np
from torch.optim import lr_scheduler

class unet256_2(nn.Module):
    def __init__(self, input_nc, output_nc, num_downs, ngf,
                 norm_layer=nn.BatchNorm2d, use_dropout=False):
        super(unet256_2, self).__init__()

        # construct unet structure
        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None,
                                             norm_layer=norm_layer, innermost=True)  # 2*2

        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block,
                                             norm_layer=norm_layer, use_dropout=use_dropout, sigma=1, size=4)  # 4*4
        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block,
                                             norm_layer=norm_layer, use_dropout=use_dropout, sigma=1, size=8)  # 8*8
        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block,
                                             norm_layer=norm_layer, use_dropout=use_dropout, sigma=1, size=16)  # 16*16

        unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) # 32*32
        unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
        unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
        unet_block = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer)

        self.model = unet_block

    def forward(self, input):
        return self.model(input)


# Defines the submodule with skip connection.
# X -------------------identity---------------------- X
#   |-- downsampling -- |submodule| -- upsampling --|
class UnetSkipConnectionBlock(nn.Module):
    def __init__(self, outer_nc, inner_nc, input_nc=None,
                 submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False, sigma=None, size=None):
        super(UnetSkipConnectionBlock, self).__init__()
        self.outermost = outermost

        # 对高斯layer进行声明
        self.sigma = sigma
        self.size = size
        self.gaussian_layer=None
        if sigma is not None:
            self.gaussian_layer = Gaussian_filter(kernel_size=self.size, sigma=self.sigma)
            # self.gaussian_layer = nn.Sequential(*[self.gaussian_layer])


        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d
        if input_nc is None:
            input_nc = outer_nc

        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
                             stride=2, padding=1, bias=use_bias)
        downrelu = nn.LeakyReLU(0.2, True)
        downnorm = norm_layer(inner_nc)
        uprelu = nn.ReLU(True)
        upnorm = norm_layer(outer_nc)

        if outermost:  # 如果是U-NET网络结构中的最外层部分
            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
                                        kernel_size=4, stride=2,
                                        padding=1)
            down = [downconv]
            up = [uprelu, upconv, nn.Tanh()]
            model = down + [submodule] + up
        elif innermost:  # 如果是U-NET网络结构中的最低部
            upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
                                        kernel_size=4, stride=2,
                                        padding=1, bias=use_bias)
            down = [downrelu, downconv]
            up = [uprelu, upconv, upnorm]
            model = down + up
        else:  # U-NET网络结构的中间部分
            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
                                        kernel_size=4, stride=2,
                                        padding=1, bias=use_bias)
            down = [downrelu, downconv, downnorm]
            up = [uprelu, upconv, upnorm]

            if use_dropout:
                model = down + [submodule] + up + [nn.Dropout(0.5)]
            else:
                model = down + [submodule] + up

        self.model = nn.Sequential(*model)

    def forward(self, x):
        if self.outermost:
            return self.model(x)
        elif self.sigma is not None:
            y = self.gaussian_layer(x)
            z = self.model(x)
            return torch.cat([y, z], 1)  # 在第一个维度上将x与model(x)进行cat一起,向上up sampling
        else:
            return torch.cat([x, self.model(x)], 1)


class Gaussian_filter(nn.Module):
    def __init__(self, kernel_size=8, sigma=1):
        super(Gaussian_filter, self).__init__()
        # 进行高斯初始化
        k1 = cv2.getGaussianKernel(kernel_size, sigma)
        k2 = cv2.getGaussianKernel(kernel_size, sigma)
        n = int(kernel_size / 2)
        K = np.dot(k1, np.transpose(k2))
        N = np.ones((kernel_size, kernel_size))
        K2 = K[n, n] * N - K
        torch_data = torch.FloatTensor(K2)
        self.gaussian_filter = nn.Parameter(torch_data, requires_grad=True)
    def forward(self, x):
        # print(x)
        # x = self.test(x)
        # print(x)
        return torch.mul(x, self.gaussian_filter)

if __name__ =='__main__':

    data = torch.rand(1,3,256,256)
    label = torch.rand(1,3,256,256)
    net = unet256_2(input_nc=3, output_nc=3, num_downs=8, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=True)
    # net = Gaussian_filter()
    output = net(data)
    loss = torch.nn.L1Loss()
    tmp_loss = loss(output, data)
    optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
    optimizer.zero_grad()
    tmp_loss.backward()
    optimizer.step()

Hi,

Could you try to define your relus not inplace by using uprelu = nn.ReLU(False). It is possible that one of the operations just before the relu needs its output to compute its gradient and so the relu operation change it inplace.

2 Likes

thank you very much! that is the problem! really amazing!!!