Why my Bayesian - CNN predicts only one class?

Hi, I’ve a Bayesian CNN, composed by dropout between every layer, to recognize images of healthy or cancer patients. The problem is that the cnn predict always, or more less, one label for all samples of the test set. Parameter: lr = 0.01, epochs = 10, number of images per classes in train set = 124 (248 entire size of train set), few samples just to try the network.
Here the code of my CNN:

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    
    # the method outputSize receives (inputSize, kernelSize, stride, padding)
    outputSize_1 = self.outputSize(124, 3, 1, 1)
    #print("OUTPUTSIZE_1 = %d" % outputSize_1)
    
    # nn.Conv2d receives (#inputChannels, #outputChannels, #kernelSize)
    # (3, 124, 124) -> Conv -> (8, outputSize_1, outputSize_1)
    self.conv1= nn.Conv2d(3, 8, 3, stride= 1, padding= 1)

    # nn.MaxPool2d reveives (#filterSize)
    # (8, outputSize_1, outputSize_1) -> MaxPool -> (8, outputSize_2, outputSize_2)
    self.pool1= nn.MaxPool2d(2, stride= 1, padding= 0)

    # output of MaxPool with stride 1, fiter size 2 X 2 and no padding (0) is
    # ((n + 2*p - f) / (s)) + 1, where n is outputSize_1
    # so it is equal to n * n * #outputChannels
    
    outputSize_2 = self.outputSize(outputSize_1, 2, 1, 0)
    #print("OUTPUTSIZE_2 = %d" % outputSize_2)

    # (8, outputSize_2, outputSize_2) -> Conv -> (32, outputSize_3, outputSize_3)
    self.conv2 = nn.Conv2d(8, 32, 3, stride=1, padding=1)
    outputSize_3 = self.outputSize(outputSize_2, 3, 1, 1)
    #print("OUTPUTSIZE_3 = %d" % outputSize_3)
            
    # (32, outputSize_3, outputSize_3) -> MaxPool -> (32, outputSize_4, outputSize_4)
    self.pool2= nn.MaxPool2d(2, stride= 2, padding= 0)
    outputSize_4 = self.outputSize(outputSize_3, 2, 2, 0)
    #print("OUTPUTSIZE_3 = %d" % outputSize_3)

    # (32, outputSize_4, outputSize_4) -> Conv -> (56, outputSize_5, outputSize_5)
    self.conv3 = nn.Conv2d(32, 56, 3, stride=1, padding=1)
    outputSize_5 = self.outputSize(outputSize_4, 3, 1, 1)
    #print("OUTPUTSIZE_5 = %d" % outputSize_5)

    # (56, outputSize_5, outputSize_5) -> MaxPool -> (56, outputSize_6, outputSize_6)
    self.pool3= nn.MaxPool2d(2, stride= 2, padding= 0)
    outputSize_6 = self.outputSize(outputSize_5, 2, 2, 0)
    #print("OUTPUTSIZE_6 = %d" % outputSize_6)

    # (56, outputSize_6, outputSize_6) -> Conv -> (72, outputSize_7, outputSize_7)
    self.conv4 = nn.Conv2d(56, 72, 3, stride=1, padding=1)
    outputSize_7 = self.outputSize(outputSize_6, 3, 1, 1)
    #print("OUTPUTSIZE_7 = %d" % outputSize_7)

    # (72, outputSize_7, outputSize_7) -> MaxPool -> (72, outputSize_8, outputSize_8)
    self.pool4= nn.MaxPool2d(2, stride= 2, padding= 0)
    outputSize_8 = self.outputSize(outputSize_7, 2, 2, 0)
    #print("OUTPUTSIZE_8 = %d" % outputSize_8)

    self.drop = nn.Dropout(0.1)

    self.fc1= nn.Linear(72 * outputSize_8 * outputSize_8, 4000)
    self.fc2= nn.Linear(4000, 2000)
    self.fc3= nn.Linear(2000, 500)
    self.fc4= nn.Linear(500, 50)
    self.fc5= nn.Linear(50, 2)

def forward(self, x):
    x = self.drop(self.pool1(self.drop(F.relu(self.conv1(self.drop(x))))))
    x = self.drop(self.pool2(self.drop(F.relu(self.conv2(x)))))
    x = self.drop(self.pool3(self.drop(F.relu(self.conv3(x)))))
    x = self.drop(self.pool4(self.drop(F.relu(self.conv4(x)))))

    x= x.view(-1, self.num_flat_features(x))
    x= self.drop(F.relu(self.fc1(x)))
    x= self.drop(F.relu(self.fc2(x)))
    x= self.drop(F.relu(self.fc3(x)))
    x= self.drop(F.relu(self.fc4(x)))
    x= self.drop(F.relu(self.fc5(x)))
    return x 

While, this is the code for the training:

def train_CNN(trainloader, lr, EPOCHS):
  # create the CNN object
  net= Net()
  net.to(device) # buffer to CUDA tensor

  # define a Loss function and optimizer
  criterion= nn.CrossEntropyLoss()
  optimizer= optim.SGD(net.parameters(), lr= lr, momentum= 0.9)

  # train the model
  total_start_time = time.time()

  for epoch in range(EPOCHS):
      print("\n", '-' * 30, "Train epoch: %s" % str(epoch+1), '-' * 30, '\n')
      start_time = time.time()
      running_loss= 0.0
      for i, data in enumerate(trainloader, 0):
          # get the inputs
          inputs, labels= data

          # zero the parameter gradients
          optimizer.zero_grad() # Clear off the gradient in (w = w - gradient)

          # forward
          outputs= net(inputs)

          # optimize and backpropagation
          loss= criterion(outputs, labels)
          loss.backward() # Backpropagation 
          optimizer.step() # Update the weights

          # print statistics
          running_loss += loss.item()
          if i % 1 == 0: # print every 100 mini-batches
              print('[Epoch: %d, Mini-Batch %5d] loss: %.3f' % (epoch+1, i+1, running_loss/20))
              running_loss= 0.0
      time_per_epoch = time.time() - start_time
      seconds_left = int((EPOCHS - epoch) * time_per_epoch)
      print("Time per epoch: %s, Est. complete in: %s" % (
                                  str(timedelta(seconds=time_per_epoch)),
                                  str(timedelta(seconds=seconds_left))))
  print('Finished training')
  return net

What could be the problem? Thanks a lot for all of you, that help me.