Constant prediction in classification task

Hi there,
I’m trying to use a NN for a classification into two classes. As this did not work with my dataset (constant prediction for each batch) I wrote a simpler version of the code, but can’t still find the problem.

Here’s a minimal version code:

class Model(nn.Module):
def __init__(self, input_size, hidden_sizes_fc=[100, 2]):
    super().__init__()
   
    self.fc_list = nn.ModuleList([nn.Linear(input_size, hidden_sizes_fc[0])])
    for hidden_size_fc_ind in range(0, len(hidden_sizes_fc)-1):
        self.fc_list.append(nn.Linear(hidden_sizes_fc[hidden_size_fc_ind],
                                      hidden_sizes_fc[hidden_size_fc_ind+1]))


def forward(self, x):
    relu = nn.ReLU()
    for i, FC in enumerate(self.fc_list):
        x = FC(x)
        x = relu(x)
    return x

def train_std_nn(net, train, val, epochs, loss_fn):
    optimiser = torch.optim.Adam(net.parameters(), lr=0.0001)
    train_losses_epochs = []
    val_score_epochs = []
    net.train()
    for epoch in trange(epochs):
        train_loss = 0.0
        total_computations = 0
        for X, Y in train:
            output = net(X)
            loss = loss_fn(output, Y)
            loss.backward()
            optimiser.step()
            train_loss += loss.item()
            total_computations += Y.shape[0]
        train_losses_epochs.append(train_loss / total_computations)
    for X_val, Y_val in val:
        output = net(X_val)
        top_p, top_class = torch.topk(output, 1, dim=1)
        pred = torch.flatten(top_class).detach().numpy()
        val_score_epochs.append(roc_auc_score(Y_val.numpy(), pred))
    return net, train_losses_epochs, val_score_epochs


epochs = 10
batch_size = 128

hidden_layers_size = [16, 2]
net = Model(input_size=11, hidden_sizes_fc=hidden_layers_size).double()
loss_fn = nn.CrossEntropyLoss()

aaa = torch.Tensor(np.random.rand(15, 11)).double()#.type(torch.LongTensor)
bbb = torch.Tensor(np.random.randint(0, 2, (15))).type(torch.LongTensor)

net, train_losses_epochs, val_score_epochs = train_std_nn(net, [[aaa, bbb]], [[aaa, bbb]], epochs, loss_fn)

I’ve plotted some graphs of the training loss and validation score (area under the curve). But the model doesn’t seem to learn anything… Training loss does random stuff (mainly decreasing but depends on the run) and auc is always 0.5

Thanks for help!

try use softmax() instead of relu() for the output layer

I also tried

def forward(self, x):
    relu = nn.ReLU()
    sm = nn.Softmax(dim=1)
    for i, FC in enumerate(self.fc_list):
            x = FC(x)
            x = relu(x)
     x = sm(x)
    return x

but that did not work either

You are still using relu for the output layer, try

def forward(self, x):
    relu = nn.ReLU()
    sm = nn.Softmax(dim=1)
    x = self.fc_list[0](x)
    x = relu(x)
    x = self.fc_list[1](x)
    x = sm(x)
    return x

Yeah sorry, what I meant is: I both tried

def forward(self, x):
    relu = nn.ReLU()
    sm = nn.Softmax(dim=1)
    for i, FC in enumerate(self.fc_list):
            x = FC(x)
            x = relu(x)
     x = sm(x)
    return x

and

def forward(self, x):
    relu = nn.ReLU()
    sm = nn.Softmax(dim=1)
    x = self.fc_list[0](x)
    x = relu(x)
    x = self.fc_list[1](x)
    x = sm(x)
    return x

neither is working

sorry my bad, but I think you forgot to call 'optimiser.zero_grad()beforeloss.backward()`

thanks, I inserted that, but still get very bed predictions somehow…

nn.CrossEntropyLoss expects raw logits as the model output, so remove the softmax and relu and pass the output of the last linear layer to the loss function.
Also, as explained before, you are not zeroing out the gradients.

Thanks! The code now looks like this:

class Model(nn.Module):
def __init__(self, input_size, hidden_sizes_fc=[100, 2]):
    super().__init__()
   
    self.fc_list = nn.ModuleList([nn.Linear(input_size, hidden_sizes_fc[0])])
    for hidden_size_fc_ind in range(0, len(hidden_sizes_fc)-1):
        self.fc_list.append(nn.Linear(hidden_sizes_fc[hidden_size_fc_ind],
                                      hidden_sizes_fc[hidden_size_fc_ind+1]))


def forward(self, x):
    relu = nn.ReLU()
    x = self.fc_list[0](x)
    x = relu(x)
    x = self.fc_list[1](x)
    return x

def train_std_nn(net, train, val, epochs, loss_fn):
    optimiser = torch.optim.Adam(net.parameters(), lr=0.0001)
    train_losses_epochs = []
    val_score_epochs = []
    net.train()
    for epoch in trange(epochs):
        train_loss = 0.0
        total_computations = 0
        for X, Y in train:
            output = net(X)
            loss = loss_fn(output, Y)
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            train_loss += loss.item()
            total_computations += Y.shape[0]
        train_losses_epochs.append(train_loss / total_computations)
    for X_val, Y_val in val:
        output = net(X_val)
        top_p, top_class = torch.topk(output, 1, dim=1)
        pred = torch.flatten(top_class).detach().numpy()
        val_score_epochs.append(roc_auc_score(Y_val.numpy(), pred))
    return net, train_losses_epochs, val_score_epochs


epochs = 200
batch_size = 4

hidden_layers_size = [16, 2]
net = Model(input_size=11, hidden_sizes_fc=hidden_layers_size).double()
loss_fn = nn.CrossEntropyLoss()

aaa = torch.Tensor(np.random.rand(15, 11)).double()#.type(torch.LongTensor)
bbb = torch.Tensor(np.random.randint(0, 2, (15))).type(torch.LongTensor)

net, train_losses_epochs, val_score_epochs = train_std_nn(net, [[aaa, bbb]], [[aaa, bbb]], epochs, loss_fn)

With e.g. input vector [1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0] after 200 epochs (which I would consider enough to overfitt the data) the predicition is still [1 0 1 1 1 1 1 1 1 1 1 1 1 1 1]. I ran the code a few times and always get very bad predictions.

I can perfectly overfit random samples using your code, so you might want to increase the learning rate to let it converge faster (it still converges with your lr or 1e-4, but takes more epochs):

class Model(nn.Module):
    def __init__(self, input_size, hidden_sizes_fc=[100, 2]):
        super().__init__()
       
        self.fc_list = nn.ModuleList([nn.Linear(input_size, hidden_sizes_fc[0])])
        for hidden_size_fc_ind in range(0, len(hidden_sizes_fc)-1):
            self.fc_list.append(nn.Linear(hidden_sizes_fc[hidden_size_fc_ind],
                                          hidden_sizes_fc[hidden_size_fc_ind+1]))
    
    
    def forward(self, x):
        relu = nn.ReLU()
        x = self.fc_list[0](x)
        x = relu(x)
        x = self.fc_list[1](x)
        return x

hidden_layers_size = [16, 2]
net = Model(input_size=11, hidden_sizes_fc=hidden_layers_size)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

data = torch.rand(15, 11)
target = torch.randint(0, 2, (15,))

for epoch in range(1000):
    optimizer.zero_grad()
    output = net(data)
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()
    preds = torch.argmax(output, dim=1)
    print('epoch {}, loss {:.3f}, acc {}'.format(
        epoch, loss.item(), (preds==target).float().mean()))