I am trying to block the gradient of the ResNet shortcut (i.e., residual in the following code, when self.downsample is None
, which means residual = x
). If the self.downsample is not None
, I can register a backward hook function block_grad(self, grad_input, grad_output)
to self.downsample layer and change the gradient of grad_input[0] = 0
. However, when self.downsample is None
, I cannot register backward hook function to the layer anymore. So, my question is how to block the gradient in this case?
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out