CNN creates same output

Hello everyone,
I am currently facing the problem that my CNN always outputs the same results.
For simple testing I gave it 20 numpy 2D data arrays of size 373x171. The CNN should classify them into one of two categories (1 or 0). The training data contains 10 of each category.
The dataset has a max value of 1, and a min of -0.82. Mean is close to zero.
I have the suspicion that maybe the parameters chosen for the CNN are not good.
I basically just adapted the code from the 60 Minute Blitz tutorial for my purpose and am not familiar with CNN otherwise.
Maybe someone can point me in the right direction:
` class MyDataset(Dataset):
def init(self, data, labels):
self.data = data
self.targets = torch.LongTensor(labels)

  def __getitem__(self, index):
      x = self.data[index]
      y = self.targets[index]
      return x, y

  def __len__(self):
      return len(self.data)


 class Net(nn.Module):
  def __init__(self):
      super().__init__()
      self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=16)
      self.pool = nn.MaxPool2d(5, 5)
      self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
      self.fc1 = nn.Linear(in_features=16 * 13 * 5, out_features=120)
      self.fc2 = nn.Linear(in_features=120, out_features=84)
      self.fc3 = nn.Linear(in_features=84, out_features=2)

  def forward(self, x):
      x = self.pool(F.relu(self.conv1(x[None, ...])))
      x = self.pool(F.relu(self.conv2(x)))
      x = x.view(x.size(0), -1)
      x = F.relu(self.fc1(x))
      x = F.relu(self.fc2(x))
      x = self.fc3(x)
      return x


  def how_to_train_your_network():
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

  training_data = np.load("...")
  training_label = np.load("...")

  training_dataset = MyDataset(training_data, training_label)

  batch_size = 1

  trainloader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size,
                                            shuffle=True, num_workers=2)

  net = Net()
  net.to(device)

  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

  for epoch in range(50):  # loop over the dataset multiple times

      running_loss = 0.0
      for i, data in enumerate(trainloader, 0):
          inputs, labels = data
          inputs, labels = inputs.to(device), labels.to(device)

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = net(inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          # print statistics
          running_loss += loss.item()
          if i % 2000 == 1999:  # print every 2000 mini-batches
              print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
              running_loss = 0.0

  print('Finished Training')
  PATH = './cifar_net.pth'
  torch.save(net.state_dict(), PATH)

`

I’m able to overfit 20 random samples using your specified input shapes and the posted model by playing around with some optimizers and their hyperparameters, so you might want to do the same:

class Net(nn.Module):
 def __init__(self):
     super().__init__()
     self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=16)
     self.pool = nn.MaxPool2d(5, 5)
     self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
     self.fc1 = nn.Linear(in_features=16 * 13 * 5, out_features=120)
     self.fc2 = nn.Linear(in_features=120, out_features=84)
     self.fc3 = nn.Linear(in_features=84, out_features=2)

 def forward(self, x):
     x = self.pool(F.relu(self.conv1(x)))
     x = self.pool(F.relu(self.conv2(x)))
     x = x.view(x.size(0), -1)
     x = F.relu(self.fc1(x))
     x = F.relu(self.fc2(x))
     x = self.fc3(x)
     return x


training_dataset = TensorDataset(torch.randn(20, 1, 373, 171), torch.randint(0, 2, (20,)))
batch_size = 1
trainloader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
device = "cuda"
net = Net().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-3)

for epoch in range(20):  # loop over the dataset multiple times
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        print(f'[{epoch + 1}, {i + 1:5d}] loss: {loss.item():.3f}')
        

out = net(training_dataset.tensors[0].to(device))
preds = torch.argmax(out, dim=1)
preds.to("cpu") == training_dataset.tensors[1]
# tensor([True, True, True, True, True, True, True, True, True, True, True, True,
#         True, True, True, True, True, True, True, True])

Thank you so much for your help.
I appreciate that you take your time to answer a question you’ve heard already hundreds of times.
Your suggested changes worked for me. So I will now start to test around with the various parameters.

1 Like