Hi, I have defined the following 2 architectures using some valuable suggestions in this forum. In my opinion they are the same, but I am getting very different performance after the same number of epochs. The only difference is that one of them uses nn.Sequential and the other doesn’t. Any ideas?
The first architecture is the following:
class Net(nn.Module):
def __init__(self, n_input, n_output, w ):
super(Net, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, w),
nn.ReLU(),
nn.Conv2d(32, 64, 3),
nn.ReLU(),
nn.MaxPool2d((2, 2)),
nn.Dropout(p=0.25)
)
self.flat_fts = self.get_flat_fts(n_input, self.features)
self.classifier = nn.Sequential(
nn.Linear(self.flat_fts, 512),
nn.ReLU(),
nn.Linear(512, n_output), #Always check n_input here.
nn.LogSoftmax(dim=1)
)
def get_flat_fts(self, in_size, fts):
f = fts(Variable(torch.ones(1, 1, in_size, in_size)))
return int(np.prod(f.size()[1:]))
def forward(self, x):
fts = self.features(x)
flat_fts = fts.view(-1, self.flat_fts)
out = self.classifier(flat_fts)
return out
The second one is the following:
class Net(nn.Module):
def __init__(self, n_input, n_output, w):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(1, 32, w)
self.conv2 = nn.Conv2d(32, 64, w)
#Addition of the Linear layers
self.fc1 = nn.Linear(64*30*30, 512)
self.fc2 = nn.Linear(512, n_output)
#droput Layer.
self.droput = nn.Dropout(p=0.25)
#output layer.
self.output = nn.Softmax(dim=1)
def forward(self, x):
x = F.relu(self.conv1((x)))
# maxpoling over a (2,2) window, after using a RelU as activation function
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = self.droput(x)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = self.fc2(x)
return self.output(x)
def num_flat_features(self, x):
size = x.size()[1:] # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features