please help me ,why the first one code(MTNet) loss can not be reduced。
With the above code (MTNet,shared two layers of parameters), the loss canl not drop, keep 0.69.
With the code2 (LBNet,share the first layer parameters), the loss can be reduced to 0.0x.
This is an method in a published paper,binary classfication,use crossentropyLoss ,looks like siamese net.The Paper said that MTnet performance is better
class MTNet(nn.Module):
def __init__(self):
super(MTNet, self).__init__()
self.convolutions = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=7, stride=1),
nn.ReLU(),
nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 64, kernel_size=7, stride=1),
nn.ReLU(),
nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.c3 =nn.Sequential(
# nn.Conv2d(16, 64, kernel_size=7, stride=1),
# nn.ReLU(),
# nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
# nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 256, kernel_size=7, stride=1),
)
self.Linears = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(21*21 * 256, 2),
nn.LogSoftmax()
)
def forward_once(self, x):
output = self.convolutions(x)
return output
def forward(self,x):
input1 = x[:, 0, :, :].unsqueeze(1)
input2 = x[:, 1, :, :].unsqueeze(1)
output1 = self.forward_once(input1)
output2 = self.forward_once(input2)
outputf = self.c3(output1+output2)
outputf= outputf.view(-1, 21*21*256)
outputf = self.Linears(outputf)
return outputf
CODE2
class LBNet(nn.Module):
def __init__(self):
super(LBNet, self).__init__()
self.convolutions = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=7, stride=1),
nn.ReLU(),
nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
nn.MaxPool2d(kernel_size=2, stride=2),
# nn.Conv2d(16, 64, kernel_size=7, stride=1),
# nn.ReLU(),
# nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
# nn.MaxPool2d(kernel_size=2, stride=2)
)
self.c3 =nn.Sequential(
nn.Conv2d(16, 64, kernel_size=7, stride=1),
nn.ReLU(),
nn.LocalResponseNorm(5, 0.0001, 0.75, 2),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 256, kernel_size=7, stride=1),
)
self.Linears = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(21*21 * 256, 2),
nn.LogSoftmax()
)
def forward_once(self, x):
output = self.convolutions(x)
return output
def forward(self,x):
input1 = x[:, 0, :, :].unsqueeze(1)
input2 = x[:, 1, :, :].unsqueeze(1)
output1 = self.forward_once(input1)
output2 = self.forward_once(input2)
outputf = self.c3(output1+output2)
outputf= outputf.view(-1, 21*21*256)
outputf = self.Linears(outputf)
return outputf