Not getting the same performance for the same actions

I am changing the vgg code from:

‘’‘VGG11/13/16/19 in Pytorch.’‘’
import torch
import torch.nn as nn
from torch.autograd import Variable
import sys

cfg = {
‘VGG19’: [64, 64, ‘M’, 128, 128, ‘M’, 256, 256, 256, 256, ‘M’, 512, 512, 512, 512, ‘M’, 512, 512, 512, 512, ‘M’],
}

device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
print(“device is {}” .format(device))

class VGG(nn.Module):
def init(self, vgg_name):
super(VGG, self).init()
self.features = self._make_layers(cfg[vgg_name])
self.classifier = nn.Linear(512, 10)

def forward(self, x):
    out = self.features(x)
    out = out.view(out.size(0), -1)
    out = self.classifier(out)
    return out

def _make_layers(self, cfg):
    layers = []
    in_channels = 3
    for x in cfg:
        if x == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                       nn.BatchNorm2d(x),
                       nn.ReLU(inplace=True)]
            in_channels = x
    layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
    return nn.Sequential(*layers)

to:

‘’‘VGG11/13/16/19 in Pytorch.’‘’
import torch
import torch.nn as nn
from torch.autograd import Variable
import sys

device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
print(“device is {}” .format(device))

class VGG(nn.Module):
def init(self, vgg_name):
super(VGG, self).init()

     self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
     self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
     self.BatchNorm_64 = nn.BatchNorm2d(64)


     self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
     self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
     self.BatchNorm_128 = nn.BatchNorm2d(128)


     self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
     self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
     self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
     self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
     self.BatchNorm_256 = nn.BatchNorm2d(256)


     self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
     self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

     self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.BatchNorm_512 = nn.BatchNorm2d(512)

     self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)
     self.ReLUU = nn.ReLU(inplace=True)

     self.AvgPool = nn.AvgPool2d(kernel_size=1, stride=1)

    self.classifier = nn.Linear(512, 10)

 def forward(self, x):

     Out = self.ReLUU(self.BatchNorm_64(self.conv1_1(x)))
     Out = self.MaxPool(self.ReLUU(self.BatchNorm_64(self.conv1_2(Out))))

     Out = self.ReLUU(self.BatchNorm_128(self.conv2_1(Out)))
     Out = self.MaxPool(self.ReLUU(self.BatchNorm_128(self.conv2_2(Out))))

     Out = self.ReLUU(self.BatchNorm_256(self.conv3_1(Out)))
     Out = self.ReLUU(self.BatchNorm_256(self.conv3_2(Out)))
     Out = self.ReLUU(self.BatchNorm_256(self.conv3_3(Out)))
     Out = self.MaxPool(self.ReLUU(self.BatchNorm_256(self.conv3_4(Out))))

     Out = self.ReLUU(self.BatchNorm_512(self.conv4_1(Out)))
     Out = self.ReLUU(self.BatchNorm_512(self.conv4_2(Out)))
     Out = self.ReLUU(self.BatchNorm_512(self.conv4_3(Out)))
    Out = self.MaxPool(self.ReLUU(self.BatchNorm_512(self.conv4_4(Out))))

     Out = self.ReLUU(self.BatchNorm_512(self.conv5_1(Out)))
     Out = self.ReLUU(self.BatchNorm_512(self.conv5_2(Out)))
     Out = self.ReLUU(self.BatchNorm_512(self.conv5_3(Out)))
     Out = self.MaxPool(self.ReLUU(self.BatchNorm_512(self.conv5_4(Out))))


     Out = self.AvgPool(Out)

     # I am not sure if i should use the nn.Sequential one anymore or not...
     # Out = nn.Sequential(Out)


     out = Out
     #     return nn.Sequential(*layers)

     out = out.view(out.size(0), -1)
     # print "outoutoutout", out.size()

     out = self.classifier(out)
     return out

I think the above codes are identical, am i write?
If not what i am missing?

But when i run them the first code converge super fast and after 10 epoch it reaches 60% accuracy. however, the second one stucks with 10-15% even after 100 epochs.

I should also emphasized that the accuracy for the training data is almost the same. However, the testing accuracy is so different…

Can anyone tell me what im missing

Thanks

You are reusing the BatchnNorm2d layers in your second approach, while you create new ones in the first approach:

Out = self.ReLUU(self.BatchNorm_64(self.conv1_1(x)))
Out = self.MaxPool(self.ReLUU(self.BatchNorm_64(self.conv1_2(Out))))
1 Like

Im so dumb :frowning: :
I aint know BatchnNorm2d layer can be used for training. i thought like maxpool or relu it is just a funtion without having any training!
Well Thanks a lot, it solved the issue…