CIFAR-10 classification accuracy different on PyTorch and Keras

cyrus_raptor · September 9, 2019, 12:31pm

Greetings,

I’ve made a custom CNN in PyTorch for classifying 10 classes in the CIFAR-10 dataset. My classification accuracy on the test dataset is 45.739%, this is very low and I thought it’s because my model is not very deep but I implemented the same model in Keras and the classification accuracy is 78.92% on test dataset. I have used the same model architecture, strides, padding, dropout rate, optimizer, loss function, learning rate, batch size, number of epochs on both PyTorch and Keras and despite that, the difference in the classification accuracy is still huge thus I’m not able to decide how I should debug further.

Surely I believe I’m missing something in my PyTorch program but I’m unable to spot the problem. I have attached both my programs below. Yes this is a PyTorch forum however I think researchers and developers here can relate to other libraries as well, will be grateful to any suggestions .

My code in PyTorch:

#========DEFINE THE CNN MODEL=====
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3,1,1)#SAME PADDING
        self.conv2 = nn.Conv2d(32,32,3,1,0)#VALID PADDING
        self.pool1 = nn.MaxPool2d(2,2) #VALID PADDING
        self.drop1 = nn.Dropout(0.25) #DROPOUT OF 0.25

        self.conv3 = nn.Conv2d(32,64,3,1,1)#SAME PADDING
        self.conv4 = nn.Conv2d(64,64,3,1,0)#VALID PADDING
        self.pool2 = nn.MaxPool2d(2,2)#VALID PADDING
        self.drop2 = nn.Dropout(0.25) #DROPOUT OF 0.25

        self.conv5 = nn.Conv2d(64,128,3,1,1)#SAME PADDING
        self.conv6 = nn.Conv2d(128,128,3,1,0)#VALID PADDING
        self.pool3 = nn.MaxPool2d(2,2)#VALID PADDING
        self.drop3 = nn.Dropout(0.25) #DROPOUT OF 0.25

        self.fc1 = nn.Linear(128*2*2, 512)#128*2*2 IS OUTPUT DIMENSION AFTER THE PREVIOUS LAYER 
        self.drop4 = nn.Dropout(0.25) #DROPOUT OF 0.25
        self.fc2 = nn.Linear(512,10) #2 output nodes initially
        

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        x = self.drop1(x)
        

        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        x = self.drop2(x)

        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = self.pool3(x)
        x = self.drop3(x)


        x = x.view(-1,2*2*128) #FLATTENING OPERATION 2*2*128 IS OUTPUT AFTER THE PREVIOUS LAYER
        x = F.relu(self.fc1(x))
        x = self.drop4(x)
        x = self.fc2(x) #LAST LAYER DOES NOT NEED SOFTMAX BECAUSE THE LOSS FUNCTION WILL TAKE CARE OF IT
        return x

    

#=======FUNCTION TO CONVERT INPUT AND TARGET TO TORCH TENSORS AND LOADING INTO GPU======
def PrepareInputDataAndTargetData(device,images,labels,batch_size):

    #GET MINI BATCH OF TRAINING IMAGES AND RESHAPE THE TORCH TENSOR FOR CNN PROCESSING
    mini_batch_images = torch.tensor(images)
    mini_batch_images = mini_batch_images.view(batch_size,3,32,32)

    #GET MINI BATCH OF TRAINING LABELS, TARGET SHOULD BE IN LONG FORMAT SO CONVERT THAT TOO
    mini_batch_labels = torch.tensor(labels)
    mini_batch_labels = mini_batch_labels.long()
        
    #FEED THE INPUT DATA AND TARGET LABELS TO GPU
    mini_batch_images = mini_batch_images.to(device)
    mini_batch_labels = mini_batch_labels.to(device)

    return mini_batch_images,mini_batch_labels

#==========MAIN PROGRAM==========
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Images_train, Labels_train, Class_train = get_train_data(0,10)
Images_test, Labels_test, Class_test = get_test_data(0,10)

net = Net()
net = net.double() #https://discuss.pytorch.org/t/runtimeerror-expected-object-of-scalar-type-double-but-got-scalar-type-float-for-argument-2-weight/38961

#MAP THE MODEL ONTO THE GPU
net = net.to(device)

#CROSS ENTROPY LOSS FUNCTION AND ADAM OPTIMIZER
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-4)

#PREPARE THE DATALOADER
dataset = TensorDataset( Tensor(Images_train), Tensor(Labels_train) )
trainloader = DataLoader(dataset, batch_size= 128, shuffle=True)

#START TRAINING THE CNN MODEL FOR 50 EPOCHS
for epoch in range(0,50):
	for i, data in enumerate(trainloader, 0):
		inputs, labels = data
		inputs = torch.tensor(inputs).double()
		inputs = inputs.view(len(inputs),3,32,32) #RESHAPE THE IMAGES
		labels = labels.long() #MUST CONVERT LABEL TO LONG FORMAT
		
		#MAP THE INPUT AND LABELS TO THE GPU
		inputs=inputs.to(device)
		labels=labels.to(device)

		#FORWARD PROP, BACKWARD PROP, PARAMETER UPDATE
		optimizer.zero_grad()
		outputs = net.forward(inputs)
		loss = criterion(outputs, labels)
		loss.backward()
		optimizer.step()


#CALCULATE CLASSIFICATION ACCURACY ON ALL 10 CLASSES
with torch.no_grad():
    Images_class,Labels_class = PrepareInputDataAndTargetData(device,Images_test,Labels_test,len(Images_test))
    network_outputs = net.forward(Images_class)
    correct = (torch.argmax(network_outputs.data,1) == Labels_class.data).float().sum()
    acc = float(100.0*(correct/len(Images_class)))
    print("Accuracy is: "+str(acc)+"\n")

print("Done\n")

My code in Keras:

#================Function that defines the CNN model===========
def CNN_model():
    model = Sequential()

    model.add(Conv2D(32,(3,3),activation='relu',padding='same', input_shape=(size,size,channels))) #SAME PADDING
    model.add(Conv2D(32,(3,3),activation='relu')) #VALID PADDING
    model.add(MaxPooling2D(pool_size=(2,2))) #VALID PADDING
    model.add(Dropout(0.25))

    model.add(Conv2D(64,(3,3),activation='relu', padding='same')) #SAME PADDING
    model.add(Conv2D(64,(3,3),activation='relu')) #VALID PADDING
    model.add(MaxPooling2D(pool_size=(2,2))) #VALID PADDING
    model.add(Dropout(0.25))

    model.add(Conv2D(128,(3,3),activation='relu', padding='same'))
    model.add(Conv2D(128,(3,3),activation='relu')) #VALID PADDING
    model.add(MaxPooling2D(pool_size=(2,2),name='feature_extractor_layer')) #VALID PADDING
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(512, activation='relu', name='second_last_layer'))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation='softmax', name='softmax_layer')) #first only add 5 nodes in softmax layer
    model.summary()
    return model

#=====Main program starts here========
images_train, labels_train, class_train = get_train_data(0,10)
images_test, labels_test, class_test = get_test_data(0,10)

model = CNN_model()

model.compile(loss='categorical_crossentropy', #loss function of the CNN
             optimizer=Adam(lr=1.0e-4), #Optimizer
              metrics=['accuracy'])#'accuracy' metric is to be evaluated

model.fit(images_train,class_train,
          batch_size=128,
          epochs=50,
          validation_data=(images_test,class_test),
          verbose=1)

scores=model.evaluate(images_test,class_test,verbose=0)
print("Accuracy: "+str(scores[1]*100)+"% \n")

Ali_Amiri · May 19, 2020, 10:50pm

did you find your answer ? I think I have same issue
i think since pytorch gives us a very root access to defining, training and … phases (compared to keras which is only model.compile, model.fit and model.predict)
the problem should be in keras!
maybe keras is doing something in it’s backend