The loss will remain at a very low value basically unchanged

Why does my loss stay at a very low value basically unchanged when I use 1D Unet to process 1D function data? I usually use mse or kld for my loss,Here is my network structure for processing a 40-dimensional input and a 200-dimensional output

# define convolution block
def conv_block(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
        nn.ReLU(inplace=True)
    )


class FCU(nn.Module):
    def __init__(self, in_channels=1, out_channels=1):
        super(FCU, self).__init__()

        # FC
        self.fc1 = nn.Linear(in_features=40, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=200)
        self.fc4 = nn.Linear(in_features=200, out_features=200)

        # encoder
        self.encoder1 = conv_block(in_channels, 32)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.encoder2 = conv_block(32, 64)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.encoder3 = conv_block(64, 128)
        self.pool3 = nn.MaxPool1d(kernel_size=2, stride=2)

        # middle
        self.center = conv_block(128, 256)

       # decoder
       self.upsample3 = nn.ConvTranspose1d(256, 128, kernel_size=2, stride=2)
       self.decoder3 = conv_block(256, 128)
       self.upsample2 = nn.ConvTranspose1d(128, 64, kernel_size=2, stride=2)
       self.decoder2 = conv_block(128, 64)
       self.upsample1 = nn.ConvTranspose1d(64, 32, kernel_size=2, stride=2)
       self.decoder1 = conv_block(64, 32)

       # output
       self.output = nn.ConvTranspose1d(32, out_channels, kernel_size=1)

   def forward(self, x):

       # FC
       x = F.relu(self.fc1(x))
       x = F.relu(self.fc2(x))
       x = F.relu(self.fc3(x))
       x = F.softmax(self.fc4(x), dim=1)

       x = x.unsqueeze(1)
       # encoder
       encode1 = self.encoder1(x)
       pool1 = self.pool1(encode1)
       encode2 = self.encoder2(pool1)
       pool2 = self.pool2(encode2)
       encode3 = self.encoder3(pool2)
       pool3 = self.pool3(encode3)

       # center
       center = self.center(pool3)

       # decoder
       upsample3 = self.upsample3(center)
       decode3 = self.decoder3(torch.cat([upsample3, encode3], dim=1))
       upsample2 = self.upsample2(decode3)
       decode2 = self.decoder2(torch.cat([upsample2, encode2], dim=1))
       upsample1 = self.upsample1(decode2)
       decode1 = self.decoder1(torch.cat([upsample1, encode1], dim=1))

       # output
       output = self.output(decode1)
       x = output.squeeze(1)
       x = F.softmax(x, dim=1)

       return x

:sob:I’ve tried adjusting the learning rate, etc. and the dataset is fine if it’s on a fully connected network

Based on the docs for nn.KLDivLoss the model output is expected to contain log probabilities:

To avoid underflow issues when computing this quantity, this loss expects the argument input in the log-space. The argument target may also be provided in the log-space if log_target = True.

while you are applying F.softmax on the output. Could you replace it with F.log_softmax and see it this would improve the training?

Thank you for answering my question, but my code in using kld as a loss looks like this

    # Forward pass
    Pred_A = model(batchG)
    DeAcost = F.kl_div(torch.log(Pred_A+1e-8), batchA, reduction='batchmean')

This should already be using logarithms, plus if I replace the loss function with mse_loss, the function value will also remain at a value that stays the same

When I use convolutional networks with mseloss as the loss function, the loss values also remain largely unchanged. But when I use KLD as the loss function, the loss value decreases normally

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=2, padding=1)
        self.bn1 = nn.BatchNorm1d(num_features=32)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=2, padding=1)
        self.bn2 = nn.BatchNorm1d(num_features=64)
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=2, padding=1)
        self.bn3 = nn.BatchNorm1d(num_features=128)
        self.pool3 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv4 = nn.Conv1d(in_channels=128, out_channels=64, kernel_size=2, padding=1)
        self.bn4 = nn.BatchNorm1d(num_features=64)
        self.pool4 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(in_features=64 * 6, out_features=200)

    def forward(self, x):
        x = F.elu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        x = F.elu(self.bn2(self.conv2(x)))
        x = F.elu(self.bn3(self.conv3(x)))
        x = self.pool3(x)
        x = F.elu(self.bn4(self.conv4(x)))
        x = self.pool4(x)

        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.softmax(x, dim=1)
        return x