Loss not change

Tshzzz · March 14, 2018, 8:36am

I try to train a network to do object detection. When I train the network after 2~3epochs ,the loss will never change.

class my_test_net(nn.Module):
    input_size = 300

    def __init__(self):
        super(SSD300, self).__init__()

        # model
        self.base = self.VGG16()
        self.norm_1 = nn.BatchNorm2d(512)
        #self.norm4 = L2Norm(512,20)
        
        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1)        
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1, dilation=1)
        
        self.norm_2 = nn.BatchNorm2d(512)
        
        self.conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)  
        self.conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
        
        self.norm_3 = nn.BatchNorm2d(1024)
        self.fc = nn.Linear(1024*6*6, 50*(21+4))
        torch.nn.init.xavier_uniform(self.fc.weight)
        #self.dropout = torch.nn.functional.dropout()
        
    def forward(self,x):
        output = self.base(x)
        output = self.norm_1(output)
        #print(output.size())
        
        output = F.max_pool2d(output, kernel_size=2, stride=2, ceil_mode=True)
        
        output = F.relu(self.conv5_1(output))
        output = F.relu(self.conv5_2(output))
        output = F.relu(self.conv5_3(output))
        output = F.max_pool2d(output, kernel_size=3, padding=1, stride=1, ceil_mode=True)
        
        output = F.relu(self.conv6(self.norm_2(output)))
        output = F.relu(self.conv7(output))
        output = F.max_pool2d(output, kernel_size=2, padding=1, stride=1)
        
        output = self.norm_3(output)
        
        output = output.view(-1,1024*6*6)
        
        output = F.sigmoid(self.fc(output))
        output = torch.nn.functional.dropout(output,training = True)
        
        
        output = output.view(-1,50,21+4)
        
        return output

[1] batch_loss = 1.79685 epoch_loss = 59.99467
[2] batch_loss = 2.68952 epoch_loss = 54.00942
[3] batch_loss = 1.10251 epoch_loss = 54.00942
[4] batch_loss = 4.84961 epoch_loss = 54.00942:
[5] batch_loss = 4.32710 epoch_loss = 54.00942

richard · March 14, 2018, 2:54pm

Have you checked your gradients? It’s possible that they’re 0 (and have vanished).

mukul54 · May 26, 2020, 2:35am

@Tshzzz did you solve this issue? I am facing the same issue with SSD.