Not getting the same performance for the same actions

isalirezag · June 26, 2018, 1:33am

I am changing the vgg code from:

‘’‘VGG11/13/16/19 in Pytorch.’‘’
import torch
import torch.nn as nn
from torch.autograd import Variable
import sys

cfg = {
‘VGG19’: [64, 64, ‘M’, 128, 128, ‘M’, 256, 256, 256, 256, ‘M’, 512, 512, 512, 512, ‘M’, 512, 512, 512, 512, ‘M’],
}

device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
print(“device is {}” .format(device))

class VGG(nn.Module):
def init(self, vgg_name):
super(VGG, self).init()
self.features = self._make_layers(cfg[vgg_name])
self.classifier = nn.Linear(512, 10)
def forward(self, x):
    out = self.features(x)
    out = out.view(out.size(0), -1)
    out = self.classifier(out)
    return out

def _make_layers(self, cfg):
    layers = []
    in_channels = 3
    for x in cfg:
        if x == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                       nn.BatchNorm2d(x),
                       nn.ReLU(inplace=True)]
            in_channels = x
    layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
    return nn.Sequential(*layers)

to:

‘’‘VGG11/13/16/19 in Pytorch.’‘’
import torch
import torch.nn as nn
from torch.autograd import Variable
import sys

device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
print(“device is {}” .format(device))

class VGG(nn.Module):
def init(self, vgg_name):
super(VGG, self).init()

     self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
     self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
     self.BatchNorm_64 = nn.BatchNorm2d(64)


     self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
     self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
     self.BatchNorm_128 = nn.BatchNorm2d(128)


     self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
     self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
     self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
     self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
     self.BatchNorm_256 = nn.BatchNorm2d(256)


     self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
     self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

     self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
     self.BatchNorm_512 = nn.BatchNorm2d(512)

     self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)
     self.ReLUU = nn.ReLU(inplace=True)

     self.AvgPool = nn.AvgPool2d(kernel_size=1, stride=1)

    self.classifier = nn.Linear(512, 10)

 def forward(self, x):

     Out = self.ReLUU(self.BatchNorm_64(self.conv1_1(x)))
     Out = self.MaxPool(self.ReLUU(self.BatchNorm_64(self.conv1_2(Out))))

     Out = self.ReLUU(self.BatchNorm_128(self.conv2_1(Out)))
     Out = self.MaxPool(self.ReLUU(self.BatchNorm_128(self.conv2_2(Out))))

     Out = self.ReLUU(self.BatchNorm_256(self.conv3_1(Out)))
     Out = self.ReLUU(self.BatchNorm_256(self.conv3_2(Out)))
     Out = self.ReLUU(self.BatchNorm_256(self.conv3_3(Out)))
     Out = self.MaxPool(self.ReLUU(self.BatchNorm_256(self.conv3_4(Out))))

     Out = self.ReLUU(self.BatchNorm_512(self.conv4_1(Out)))
     Out = self.ReLUU(self.BatchNorm_512(self.conv4_2(Out)))
     Out = self.ReLUU(self.BatchNorm_512(self.conv4_3(Out)))
    Out = self.MaxPool(self.ReLUU(self.BatchNorm_512(self.conv4_4(Out))))

     Out = self.ReLUU(self.BatchNorm_512(self.conv5_1(Out)))
     Out = self.ReLUU(self.BatchNorm_512(self.conv5_2(Out)))
     Out = self.ReLUU(self.BatchNorm_512(self.conv5_3(Out)))
     Out = self.MaxPool(self.ReLUU(self.BatchNorm_512(self.conv5_4(Out))))


     Out = self.AvgPool(Out)

     # I am not sure if i should use the nn.Sequential one anymore or not...
     # Out = nn.Sequential(Out)


     out = Out
     #     return nn.Sequential(*layers)

     out = out.view(out.size(0), -1)
     # print "outoutoutout", out.size()

     out = self.classifier(out)
     return out

I think the above codes are identical, am i write?
If not what i am missing?

But when i run them the first code converge super fast and after 10 epoch it reaches 60% accuracy. however, the second one stucks with 10-15% even after 100 epochs.

I should also emphasized that the accuracy for the training data is almost the same. However, the testing accuracy is so different…

Can anyone tell me what im missing

Thanks

ptrblck · June 26, 2018, 9:26am

You are reusing the BatchnNorm2d layers in your second approach, while you create new ones in the first approach:

Out = self.ReLUU(self.BatchNorm_64(self.conv1_1(x)))
Out = self.MaxPool(self.ReLUU(self.BatchNorm_64(self.conv1_2(Out))))

isalirezag · June 26, 2018, 3:38pm

Im so dumb :
I aint know BatchnNorm2d layer can be used for training. i thought like maxpool or relu it is just a funtion without having any training!
Well Thanks a lot, it solved the issue…