I was about to implement a network consisting of residual connections, and encountered an error RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
.
I read about this thread and dig deeper into the implementation of residual connections using the two implementations below.
# implementation 1
class ResidualUnitA(nn.Module):
def __init__(self):
super(ResidualUnitA, self).__init__()
self.conv_0 = nn.Conv2d(3, 24, 3, padding=1, bias=False)
self.bn_0 = nn.BatchNorm2d(24)
self.conv_1 = nn.Conv2d(24, 24, 3, padding=1, bias=False)
self.bn_1 = nn.BatchNorm2d(24)
def forward(self, x):
x = self.conv_0(x)
x = self.bn_0(x)
x = F.relu(x, inplace=True)
residual = self.conv_1(x)
residual = self.bn_1(residual)
x += residual
x = F.relu(x, inplace=True)
return x
# implementation 2
class ResidualUnitB(nn.Module):
def __init__(self):
super(ResidualUnitB, self).__init__()
self.conv_0 = nn.Conv2d(3, 24, 3, padding=1, bias=False)
self.bn_0 = nn.BatchNorm2d(24)
self.conv_1 = nn.Conv2d(24, 24, 3, padding=1, bias=False)
self.bn_1 = nn.BatchNorm2d(24)
def forward(self, x):
x = self.conv_0(x)
x = self.bn_0(x)
x = F.relu(x, inplace=True)
identity = x
x = self.conv_1(x)
x = self.bn_1(x)
x += identity
x = F.relu(x, inplace=True)
return x
I find that ResidualUnitA
will raise the RuntimeError, but ResidualUnitB
won’t. How to explain this using the restrictions of in-place operations?