Binary classfication loss=0.69

vasline · December 23, 2018, 2:40pm

please help me ,why the first one code(MTNet) loss can not be reduced。

With the above code (MTNet,shared two layers of parameters), the loss canl not drop, keep 0.69.
With the code2 (LBNet,share the first layer parameters), the loss can be reduced to 0.0x.

This is an method in a published paper,binary classfication,use crossentropyLoss ,looks like siamese net.The Paper said that MTnet performance is better

class MTNet(nn.Module):
    def __init__(self):
        super(MTNet, self).__init__()
        self.convolutions = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(16, 64, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
            nn.MaxPool2d(kernel_size=2, stride=2)

        )
        self.c3 =nn.Sequential(
           # nn.Conv2d(16, 64, kernel_size=7, stride=1),
           # nn.ReLU(),
           # nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
           # nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 256, kernel_size=7, stride=1),
        )
        self.Linears = nn.Sequential(
                    nn.Dropout(0.5),
                    nn.Linear(21*21 * 256, 2),
                    nn.LogSoftmax()
        )

    def forward_once(self, x):
        output = self.convolutions(x)
        return output

    def forward(self,x):
        input1 = x[:, 0, :, :].unsqueeze(1)
        input2 = x[:, 1, :, :].unsqueeze(1)

        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        outputf = self.c3(output1+output2)

        outputf= outputf.view(-1, 21*21*256)
        outputf = self.Linears(outputf)
        return  outputf

CODE2

class LBNet(nn.Module):
    def __init__(self):
        super(LBNet, self).__init__()
        self.convolutions = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
            nn.MaxPool2d(kernel_size=2, stride=2),

           # nn.Conv2d(16, 64, kernel_size=7, stride=1),
           # nn.ReLU(),
           # nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
           # nn.MaxPool2d(kernel_size=2, stride=2)

        )
        self.c3 =nn.Sequential(
            nn.Conv2d(16, 64, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 256, kernel_size=7, stride=1),
        )
        self.Linears = nn.Sequential(
                    nn.Dropout(0.5),
                    nn.Linear(21*21 * 256, 2),
                    nn.LogSoftmax()
        )

    def forward_once(self, x):
        output = self.convolutions(x)
        return output

    def forward(self,x):
        input1 = x[:, 0, :, :].unsqueeze(1)
        input2 = x[:, 1, :, :].unsqueeze(1)

        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        outputf = self.c3(output1+output2)

        outputf= outputf.view(-1, 21*21*256)
        outputf = self.Linears(outputf)
        return  outputf

smth · December 23, 2018, 8:07pm

what is the question?

vasline · December 24, 2018, 12:19am

The loss of the first one code cannot be reduced

发自我的iPhone

------------------ Original ------------------