Different outcomes with nn.Sequential and nn.Functional

Hi, I have defined the following 2 architectures using some valuable suggestions in this forum. In my opinion they are the same, but I am getting very different performance after the same number of epochs. The only difference is that one of them uses nn.Sequential and the other doesn’t. Any ideas?

The first architecture is the following:

class Net(nn.Module):
    
    def __init__(self, n_input, n_output, w ):
        super(Net, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, w),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
            nn.Dropout(p=0.25)
        )
        self.flat_fts = self.get_flat_fts(n_input, self.features)
        self.classifier = nn.Sequential(
            nn.Linear(self.flat_fts, 512),
            nn.ReLU(),
            nn.Linear(512, n_output), #Always check n_input here. 
            nn.LogSoftmax(dim=1)
        )
    
    def get_flat_fts(self, in_size, fts):
        f = fts(Variable(torch.ones(1, 1, in_size, in_size)))
        return int(np.prod(f.size()[1:]))
    
    def forward(self, x):
        fts = self.features(x)
        flat_fts = fts.view(-1, self.flat_fts)
        out = self.classifier(flat_fts)
        return out

The second one is the following:

class Net(nn.Module):
    def __init__(self, n_input, n_output, w):
        super(Net,self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, w)
        self.conv2 = nn.Conv2d(32, 64, w)
        
        #Addition of the Linear layers
        self.fc1 = nn.Linear(64*30*30, 512) 
        self.fc2 = nn.Linear(512, n_output)

        #droput Layer. 
        self.droput = nn.Dropout(p=0.25)

        #output layer. 
        self.output = nn.Softmax(dim=1)
        
  
    def forward(self, x):
    
        x = F.relu(self.conv1((x)))
  
        # maxpoling over a (2,2) window, after using a RelU as activation function      
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = self.droput(x)
        
        x = x.view(-1, self.num_flat_features(x))
        
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return self.output(x)
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

One has a Softmax for output and one has a LogSoftmax. Is that expected?

1 Like

No, that might be it. Thank you.