Hi, I’ve a Bayesian CNN, composed by dropout between every layer, to recognize images of healthy or cancer patients. The problem is that the cnn predict always, or more less, one label for all samples of the test set. Parameter: lr = 0.01, epochs = 10, number of images per classes in train set = 124 (248 entire size of train set), few samples just to try the network.
Here the code of my CNN:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# the method outputSize receives (inputSize, kernelSize, stride, padding)
outputSize_1 = self.outputSize(124, 3, 1, 1)
#print("OUTPUTSIZE_1 = %d" % outputSize_1)
# nn.Conv2d receives (#inputChannels, #outputChannels, #kernelSize)
# (3, 124, 124) -> Conv -> (8, outputSize_1, outputSize_1)
self.conv1= nn.Conv2d(3, 8, 3, stride= 1, padding= 1)
# nn.MaxPool2d reveives (#filterSize)
# (8, outputSize_1, outputSize_1) -> MaxPool -> (8, outputSize_2, outputSize_2)
self.pool1= nn.MaxPool2d(2, stride= 1, padding= 0)
# output of MaxPool with stride 1, fiter size 2 X 2 and no padding (0) is
# ((n + 2*p - f) / (s)) + 1, where n is outputSize_1
# so it is equal to n * n * #outputChannels
outputSize_2 = self.outputSize(outputSize_1, 2, 1, 0)
#print("OUTPUTSIZE_2 = %d" % outputSize_2)
# (8, outputSize_2, outputSize_2) -> Conv -> (32, outputSize_3, outputSize_3)
self.conv2 = nn.Conv2d(8, 32, 3, stride=1, padding=1)
outputSize_3 = self.outputSize(outputSize_2, 3, 1, 1)
#print("OUTPUTSIZE_3 = %d" % outputSize_3)
# (32, outputSize_3, outputSize_3) -> MaxPool -> (32, outputSize_4, outputSize_4)
self.pool2= nn.MaxPool2d(2, stride= 2, padding= 0)
outputSize_4 = self.outputSize(outputSize_3, 2, 2, 0)
#print("OUTPUTSIZE_3 = %d" % outputSize_3)
# (32, outputSize_4, outputSize_4) -> Conv -> (56, outputSize_5, outputSize_5)
self.conv3 = nn.Conv2d(32, 56, 3, stride=1, padding=1)
outputSize_5 = self.outputSize(outputSize_4, 3, 1, 1)
#print("OUTPUTSIZE_5 = %d" % outputSize_5)
# (56, outputSize_5, outputSize_5) -> MaxPool -> (56, outputSize_6, outputSize_6)
self.pool3= nn.MaxPool2d(2, stride= 2, padding= 0)
outputSize_6 = self.outputSize(outputSize_5, 2, 2, 0)
#print("OUTPUTSIZE_6 = %d" % outputSize_6)
# (56, outputSize_6, outputSize_6) -> Conv -> (72, outputSize_7, outputSize_7)
self.conv4 = nn.Conv2d(56, 72, 3, stride=1, padding=1)
outputSize_7 = self.outputSize(outputSize_6, 3, 1, 1)
#print("OUTPUTSIZE_7 = %d" % outputSize_7)
# (72, outputSize_7, outputSize_7) -> MaxPool -> (72, outputSize_8, outputSize_8)
self.pool4= nn.MaxPool2d(2, stride= 2, padding= 0)
outputSize_8 = self.outputSize(outputSize_7, 2, 2, 0)
#print("OUTPUTSIZE_8 = %d" % outputSize_8)
self.drop = nn.Dropout(0.1)
self.fc1= nn.Linear(72 * outputSize_8 * outputSize_8, 4000)
self.fc2= nn.Linear(4000, 2000)
self.fc3= nn.Linear(2000, 500)
self.fc4= nn.Linear(500, 50)
self.fc5= nn.Linear(50, 2)
def forward(self, x):
x = self.drop(self.pool1(self.drop(F.relu(self.conv1(self.drop(x))))))
x = self.drop(self.pool2(self.drop(F.relu(self.conv2(x)))))
x = self.drop(self.pool3(self.drop(F.relu(self.conv3(x)))))
x = self.drop(self.pool4(self.drop(F.relu(self.conv4(x)))))
x= x.view(-1, self.num_flat_features(x))
x= self.drop(F.relu(self.fc1(x)))
x= self.drop(F.relu(self.fc2(x)))
x= self.drop(F.relu(self.fc3(x)))
x= self.drop(F.relu(self.fc4(x)))
x= self.drop(F.relu(self.fc5(x)))
return x
While, this is the code for the training:
def train_CNN(trainloader, lr, EPOCHS):
# create the CNN object
net= Net()
net.to(device) # buffer to CUDA tensor
# define a Loss function and optimizer
criterion= nn.CrossEntropyLoss()
optimizer= optim.SGD(net.parameters(), lr= lr, momentum= 0.9)
# train the model
total_start_time = time.time()
for epoch in range(EPOCHS):
print("\n", '-' * 30, "Train epoch: %s" % str(epoch+1), '-' * 30, '\n')
start_time = time.time()
running_loss= 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels= data
# zero the parameter gradients
optimizer.zero_grad() # Clear off the gradient in (w = w - gradient)
# forward
outputs= net(inputs)
# optimize and backpropagation
loss= criterion(outputs, labels)
loss.backward() # Backpropagation
optimizer.step() # Update the weights
# print statistics
running_loss += loss.item()
if i % 1 == 0: # print every 100 mini-batches
print('[Epoch: %d, Mini-Batch %5d] loss: %.3f' % (epoch+1, i+1, running_loss/20))
running_loss= 0.0
time_per_epoch = time.time() - start_time
seconds_left = int((EPOCHS - epoch) * time_per_epoch)
print("Time per epoch: %s, Est. complete in: %s" % (
str(timedelta(seconds=time_per_epoch)),
str(timedelta(seconds=seconds_left))))
print('Finished training')
return net
What could be the problem? Thanks a lot for all of you, that help me.