I am changing the vgg code from:
‘’‘VGG11/13/16/19 in Pytorch.’‘’
import torch
import torch.nn as nn
from torch.autograd import Variable
import syscfg = {
‘VGG19’: [64, 64, ‘M’, 128, 128, ‘M’, 256, 256, 256, 256, ‘M’, 512, 512, 512, 512, ‘M’, 512, 512, 512, 512, ‘M’],
}device = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
print(“device is {}” .format(device))class VGG(nn.Module):
def init(self, vgg_name):
super(VGG, self).init()
self.features = self._make_layers(cfg[vgg_name])
self.classifier = nn.Linear(512, 10)def forward(self, x): out = self.features(x) out = out.view(out.size(0), -1) out = self.classifier(out) return out def _make_layers(self, cfg): layers = [] in_channels = 3 for x in cfg: if x == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), nn.BatchNorm2d(x), nn.ReLU(inplace=True)] in_channels = x layers += [nn.AvgPool2d(kernel_size=1, stride=1)] return nn.Sequential(*layers)
to:
‘’‘VGG11/13/16/19 in Pytorch.’‘’
import torch
import torch.nn as nn
from torch.autograd import Variable
import sysdevice = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
print(“device is {}” .format(device))class VGG(nn.Module):
def init(self, vgg_name):
super(VGG, self).init()self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1) self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1) self.BatchNorm_64 = nn.BatchNorm2d(64) self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1) self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1) self.BatchNorm_128 = nn.BatchNorm2d(128) self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1) self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1) self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1) self.conv3_4 = nn.Conv2d(256, 256, kernel_size=3, padding=1) self.BatchNorm_256 = nn.BatchNorm2d(256) self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1) self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1) self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1) self.conv4_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1) self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1) self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1) self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1) self.conv5_4 = nn.Conv2d(512, 512, kernel_size=3, padding=1) self.BatchNorm_512 = nn.BatchNorm2d(512) self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2) self.ReLUU = nn.ReLU(inplace=True) self.AvgPool = nn.AvgPool2d(kernel_size=1, stride=1) self.classifier = nn.Linear(512, 10) def forward(self, x): Out = self.ReLUU(self.BatchNorm_64(self.conv1_1(x))) Out = self.MaxPool(self.ReLUU(self.BatchNorm_64(self.conv1_2(Out)))) Out = self.ReLUU(self.BatchNorm_128(self.conv2_1(Out))) Out = self.MaxPool(self.ReLUU(self.BatchNorm_128(self.conv2_2(Out)))) Out = self.ReLUU(self.BatchNorm_256(self.conv3_1(Out))) Out = self.ReLUU(self.BatchNorm_256(self.conv3_2(Out))) Out = self.ReLUU(self.BatchNorm_256(self.conv3_3(Out))) Out = self.MaxPool(self.ReLUU(self.BatchNorm_256(self.conv3_4(Out)))) Out = self.ReLUU(self.BatchNorm_512(self.conv4_1(Out))) Out = self.ReLUU(self.BatchNorm_512(self.conv4_2(Out))) Out = self.ReLUU(self.BatchNorm_512(self.conv4_3(Out))) Out = self.MaxPool(self.ReLUU(self.BatchNorm_512(self.conv4_4(Out)))) Out = self.ReLUU(self.BatchNorm_512(self.conv5_1(Out))) Out = self.ReLUU(self.BatchNorm_512(self.conv5_2(Out))) Out = self.ReLUU(self.BatchNorm_512(self.conv5_3(Out))) Out = self.MaxPool(self.ReLUU(self.BatchNorm_512(self.conv5_4(Out)))) Out = self.AvgPool(Out) # I am not sure if i should use the nn.Sequential one anymore or not... # Out = nn.Sequential(Out) out = Out # return nn.Sequential(*layers) out = out.view(out.size(0), -1) # print "outoutoutout", out.size() out = self.classifier(out) return out
I think the above codes are identical, am i write?
If not what i am missing?
But when i run them the first code converge super fast and after 10 epoch it reaches 60% accuracy. however, the second one stucks with 10-15% even after 100 epochs.
I should also emphasized that the accuracy for the training data is almost the same. However, the testing accuracy is so different…
Can anyone tell me what im missing
Thanks