3D CNN accuracy and Loss function are almost stable

Anna_yah · October 28, 2020, 4:23pm

I used Pytorch to create 3D CNN with 2 conv layer
I used 1000 epochs as shown in the curve but the accuracy and the loss values are almost stable.
can you explain the reason to me please ?

class CNNModel(nn.Module):
    def __init__(self): 
        super(CNNModel, self).__init__() # héritage
        
        self.conv_layer1 = self._conv_layer_set(3, 32)
        self.conv_layer2 = self._conv_layer_set(32, 64) 
        self.fc1 = nn.Linear(64*28*28*28, 2) 
        self.fc2 = nn.Linear(1404928, num_classes) 
        self.relu = nn.LeakyReLU()
        self.batch=nn.BatchNorm1d(2)
        self.drop=nn.Dropout(p=0.15, inplace = True)   
        
    def _conv_layer_set(self, in_c, out_c):
        conv_layer = nn.Sequential(
        nn.Conv3d(in_c, out_c, kernel_size=(3, 3, 3), padding=0),
        nn.LeakyReLU(),
        nn.MaxPool3d((2, 2, 2)),
        )
        return conv_layer
    

    def forward(self, x):
        # Set 1
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = out.view(out.size(0), -1) 
        out = self.fc1(out)
        out = self.relu(out)
        out = self.batch(out)
        out = self.drop(out)
        out = F.softmax(out, dim=1) 
        return out

# Create CNN
model = CNNModel()
model.cuda() #pour utiliser   GPU
print(model)
# Cross Entropy Loss 
for param in model.parameters():
    param.requires_grad = True
    error = nn.CrossEntropyLoss()
# SGD Optimizer
learning_rate = 0.001 
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
###################################################accuracy function ##################################
def accuracyCalc (predicted, targets):
    correct = 0
    p = predicted.tolist()
    t = targets.flatten().tolist() 
    for i in range(len(p)):
        if (p[i] == t[i]):
            correct +=1
    accuracy = 100 * correct / targets.shape[0]
    return(accuracy)
#######################################################################################################
print(" build model --- %s seconds ---" % (time.time() - start_time))
#######################################################{{{{{{{training}}}}}}}##################################
print('data preparation ')
training_data = np.load("/content/drive/My Drive/brats6G/train/training_data.npy", allow_pickle=True)
training_data = training_data[:2]
targets = np.load("/content/drive/My Drive/brats6G/train/targets.npy", allow_pickle=True)
targets = targets[:2]
from sklearn.utils import shuffle
training_data, targets = shuffle(training_data, targets)

training_data = changechannel(training_data, 1, 5) #Channels ordering : first channel to ==> last channel'
training_data  = resize3Dimages(training_data) #resize images
training_data = channel1to3(training_data,)#1 channel to 3 channel ===> RGB
training_data = changechannel(training_data, 4, 1)# last to first

#Definition of hyperparameters
num_epochs = 5
loss_list_train = []
accuracy_list_train = []
for epoch in range(num_epochs): 
    outputs = []
    outputs= torch.tensor(outputs).cuda()
    for fold in range(0, len(training_data), 4):
        xtrain = training_data[fold : fold+4]
        xtrain =torch.tensor(xtrain).float().cuda() 
        xtrain = xtrain.view(2, 3, 120, 120, 120)
        # Clear gradients
        optimizer.zero_grad()
        # Forward propagation
        v = model(xtrain)
        outputs = torch.cat((outputs,v.detach()),dim=0)
      
    targets = torch.Tensor(targets)
    labels = targets.cuda()
    outputs = torch.tensor(outputs,  requires_grad=True) 
    _, predicted = torch.max(outputs, 1)
    accuracy = accuracyCalc(predicted, targets)
    labels = labels.long() 
    labels=labels.view(-1) 
    loss = nn.CrossEntropyLoss()
    loss = loss(outputs, labels)    
    # Calculating gradients
    loss.backward()
    # Update parameters
    optimizer.step()
    loss_list_train.append(loss.data) #loss values loss
    accuracy_list_train.append(accuracy/100)
    np.save('/content/drive/My Drive/brats6G/accuracy_list_train.npy', np.array(accuracy_list_train))
    np.save('/content/drive/My Drive/brats6G/loss_list_train.npy', np.array(loss_list_train))
    print('Iteration: {}/{}  Loss: {}  Accuracy: {} %'.format(epoch+1,  num_epochs, loss.data, accuracy))
print('Model training  : Finished')

ptrblck · October 29, 2020, 7:12am

Remove the nn.Softmax at the end of the model, since nn.CrossEntropyLoss will internally apply F.log_softmax and nn.NLLLoss.

Also, probably unrelated to the training issue, but your linear layers are quite big.
I’m also a bit skeptical about using a dropout layer at the end of the model as it would mask logits.

Anna_yah · October 29, 2020, 8:16am

Thank you for your reply @ptrblck
I set the linear layer in_features to 64 * 28 * 2 * 28 because the input image size is 120 * 120 * 120 and I have two conv layers. So how can I reduce the linear layer size and add another linear layer after the dopout layer?
Than you in advance.

ptrblck · October 29, 2020, 8:37am

You could reduce the spatial size of the activation with pooling layers or a generally deeper architecture.
However, I would focus on the other two points, i.e. the softmax layer and dropout at the end of the model.
Did you remove them and did anything change?

Anna_yah · October 29, 2020, 9:19am

I deleted them but the result = 0.5 for all epochs (200)

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__() 
        
        self.conv_layer1 = self._conv_layer_set(3, 32)
                                                      
        self.conv_layer2 = self._conv_layer_set(32, 64) 
        self.fc1 = nn.Linear(64*28*28*28, 2)
        self.relu = nn.LeakyReLU()
        self.batch=nn.BatchNorm1d(2)
  
        
    def _conv_layer_set(self, in_c, out_c):
        conv_layer = nn.Sequential(
        nn.Conv3d(in_c, out_c, kernel_size=(3, 3, 3), padding=0),
        nn.LeakyReLU(),
        nn.MaxPool3d((2, 2, 2)),
        )
        return conv_layer
    

    def forward(self, x):
        # Set 1
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.batch(out) 
        return out

ptrblck · October 29, 2020, 9:21am

In that case you could try to overfit a small dataset, e.g. just 10 samples, and make sure your model is able to do so by playing around with hyperparameters.
If that’s still not working, there might be other issues I haven’t seen yet.

Anna_yah · October 29, 2020, 9:26am

I am only using 4 images ( 2 images as training set and 2 images as validation set)

Anna_yah · November 1, 2020, 2:57pm

I did not find a solution to this problem so far

ptrblck · November 2, 2020, 8:21am

Your model works fine with 10 random samples and achieves a perfect accuracy after a few steps:

device = 'cuda'
model = CNNModel().to(device)
data = torch.randn(10, 3, 120, 120, 120).to(device)
target = torch.randint(0, 2, (10,)).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for epoch in range(100):
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    acc = (torch.argmax(output, 1) == target).float().mean()
    print('epoch {}, loss {}, acc {}'.format(epoch, loss.item(), acc))

> epoch 0, loss 0.707537829875946, acc 0.5
epoch 1, loss 0.25594037771224976, acc 0.9000000357627869
epoch 2, loss 0.18612369894981384, acc 1.0
epoch 3, loss 0.18332494795322418, acc 1.0
epoch 4, loss 0.18075349926948547, acc 1.0
epoch 5, loss 0.17838451266288757, acc 1.0

so I would still recommend to play around with some hyperparameters and make sure your model can overfit the tiny dataset.

Anna_yah · November 9, 2020, 4:40pm

Thank you very much @ptrblck