I have a ResNet-18 working well. Now, I want to use InstanceNorm as normalization layer instead of BatchNorm, so I changed all the batchnorm layers in this way:
resnet18.bn1 = nn.InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer1[0].bn1 = nn.InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer1[0].bn2 = nn.InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer1[1].bn1 = nn.InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer1[1].bn2 = nn.InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer2[0].bn1 = nn.InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer2[0].bn2 = nn.InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer2[1].bn1 = nn.InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer2[1].bn2 = nn.InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer2[0].downsample[1] = nn.InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer3[0].bn1 = nn.InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer3[0].bn2 = nn.InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer3[1].bn1 = nn.InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer3[1].bn2 = nn.InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer3[0].downsample[1] = nn.InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer4[0].bn1 = nn.InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer4[0].bn2 = nn.InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer4[1].bn1 = nn.InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer4[1].bn2 = nn.InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.layer4[0].downsample[1] = nn.InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
resnet18.fc = nn.Linear(in_features=512, out_features=10, bias=True)
All the num_features
are equal to the BatchNorm2d ones, I just changed BatchNorm2d into InstanceNorm2d. So my ResNet-18 is this:
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): InstanceNorm2d(64, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): InstanceNorm2d(128, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): InstanceNorm2d(256, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): InstanceNorm2d(512, eps=1e-05, momentum=0.9, affine=True, track_running_stats=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=10, bias=True)
)
I have the error in title. Do you know how can I fix?