Getting same output from model

razla · August 14, 2022, 2:35pm

Hello everyone,

I’m trying to use my neural network to classify between two labels.
I had created my own dataset, normalized the dataset between -1 to 1 and fed the network.
When I try to train the network, it outputs the same output no matter which batch it iterates on.

Tried using different batch sizes and learning rates but there was no improvement.

The input dimension is 3 x 32 x 32 (CIFAR’s gradients specifically).
Tried using convolutional net and vanilla net but both output constant outputs.

thanks!

this is the network:

import torch.nn as nn

class ConvDetector(nn.Module):
    def __init__(self):
        super(ConvDetector, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.relu4 = nn.ReLU()

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.reshape(-1, 16 * 5 * 5)
        x = self.relu3(self.fc1(x))
        x = self.relu4(self.fc2(x))
        x = self.fc3(x)
        return x

ptrblck · August 14, 2022, 6:28pm

Your model itself works fine and can overfit a small random dataset:

class ConvDetector(nn.Module):
    def __init__(self):
        super(ConvDetector, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        self.relu4 = nn.ReLU()

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu3(self.fc1(x))
        x = self.relu4(self.fc2(x))
        x = self.fc3(x)
        return x

model = ConvDetector()
data = torch.randn(64, 3, 32, 32)
target = torch.randint(0, 2, (64,))

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for epoch in range(20):
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    
    pred = torch.argmax(output, dim=1)
    acc = (pred==target).float().mean()
    print("epcoch {}, loss {}, acc {}".format(epoch, loss.item(), acc))


# epcoch 0, loss 0.6974315047264099, acc 0.484375
# epcoch 1, loss 0.6893928647041321, acc 0.515625
# epcoch 2, loss 0.6862733364105225, acc 0.515625
# ...
# epcoch 17, loss 0.32708993554115295, acc 1.0
# epcoch 18, loss 0.2823491096496582, acc 1.0
# epcoch 19, loss 0.2445635348558426, acc 1.0

razla · August 15, 2022, 6:24am

thank you!
well, my model is trained on non-random data, specifically its the gradients of the model with respect to the input. The values of these gradients are between 0.0 to 0.0018~ so I normalized them to [-1, 1] and still no learning.
this is the training loop:

def train(net, n_epochs, trainloader, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=1e-4)
    for epoch in range(n_epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0