Greetings,
I’ve made a custom CNN in PyTorch for classifying 10 classes in the CIFAR-10 dataset. My classification accuracy on the test dataset is 45.739%, this is very low and I thought it’s because my model is not very deep but I implemented the same model in Keras and the classification accuracy is 78.92% on test dataset. I have used the same model architecture, strides, padding, dropout rate, optimizer, loss function, learning rate, batch size, number of epochs on both PyTorch and Keras and despite that, the difference in the classification accuracy is still huge thus I’m not able to decide how I should debug further.
Surely I believe I’m missing something in my PyTorch program but I’m unable to spot the problem. I have attached both my programs below. Yes this is a PyTorch forum however I think researchers and developers here can relate to other libraries as well, will be grateful to any suggestions .
My code in PyTorch:
#========DEFINE THE CNN MODEL=====
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3,1,1)#SAME PADDING
self.conv2 = nn.Conv2d(32,32,3,1,0)#VALID PADDING
self.pool1 = nn.MaxPool2d(2,2) #VALID PADDING
self.drop1 = nn.Dropout(0.25) #DROPOUT OF 0.25
self.conv3 = nn.Conv2d(32,64,3,1,1)#SAME PADDING
self.conv4 = nn.Conv2d(64,64,3,1,0)#VALID PADDING
self.pool2 = nn.MaxPool2d(2,2)#VALID PADDING
self.drop2 = nn.Dropout(0.25) #DROPOUT OF 0.25
self.conv5 = nn.Conv2d(64,128,3,1,1)#SAME PADDING
self.conv6 = nn.Conv2d(128,128,3,1,0)#VALID PADDING
self.pool3 = nn.MaxPool2d(2,2)#VALID PADDING
self.drop3 = nn.Dropout(0.25) #DROPOUT OF 0.25
self.fc1 = nn.Linear(128*2*2, 512)#128*2*2 IS OUTPUT DIMENSION AFTER THE PREVIOUS LAYER
self.drop4 = nn.Dropout(0.25) #DROPOUT OF 0.25
self.fc2 = nn.Linear(512,10) #2 output nodes initially
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.pool1(x)
x = self.drop1(x)
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = self.pool2(x)
x = self.drop2(x)
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = self.pool3(x)
x = self.drop3(x)
x = x.view(-1,2*2*128) #FLATTENING OPERATION 2*2*128 IS OUTPUT AFTER THE PREVIOUS LAYER
x = F.relu(self.fc1(x))
x = self.drop4(x)
x = self.fc2(x) #LAST LAYER DOES NOT NEED SOFTMAX BECAUSE THE LOSS FUNCTION WILL TAKE CARE OF IT
return x
#=======FUNCTION TO CONVERT INPUT AND TARGET TO TORCH TENSORS AND LOADING INTO GPU======
def PrepareInputDataAndTargetData(device,images,labels,batch_size):
#GET MINI BATCH OF TRAINING IMAGES AND RESHAPE THE TORCH TENSOR FOR CNN PROCESSING
mini_batch_images = torch.tensor(images)
mini_batch_images = mini_batch_images.view(batch_size,3,32,32)
#GET MINI BATCH OF TRAINING LABELS, TARGET SHOULD BE IN LONG FORMAT SO CONVERT THAT TOO
mini_batch_labels = torch.tensor(labels)
mini_batch_labels = mini_batch_labels.long()
#FEED THE INPUT DATA AND TARGET LABELS TO GPU
mini_batch_images = mini_batch_images.to(device)
mini_batch_labels = mini_batch_labels.to(device)
return mini_batch_images,mini_batch_labels
#==========MAIN PROGRAM==========
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Images_train, Labels_train, Class_train = get_train_data(0,10)
Images_test, Labels_test, Class_test = get_test_data(0,10)
net = Net()
net = net.double() #https://discuss.pytorch.org/t/runtimeerror-expected-object-of-scalar-type-double-but-got-scalar-type-float-for-argument-2-weight/38961
#MAP THE MODEL ONTO THE GPU
net = net.to(device)
#CROSS ENTROPY LOSS FUNCTION AND ADAM OPTIMIZER
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-4)
#PREPARE THE DATALOADER
dataset = TensorDataset( Tensor(Images_train), Tensor(Labels_train) )
trainloader = DataLoader(dataset, batch_size= 128, shuffle=True)
#START TRAINING THE CNN MODEL FOR 50 EPOCHS
for epoch in range(0,50):
for i, data in enumerate(trainloader, 0):
inputs, labels = data
inputs = torch.tensor(inputs).double()
inputs = inputs.view(len(inputs),3,32,32) #RESHAPE THE IMAGES
labels = labels.long() #MUST CONVERT LABEL TO LONG FORMAT
#MAP THE INPUT AND LABELS TO THE GPU
inputs=inputs.to(device)
labels=labels.to(device)
#FORWARD PROP, BACKWARD PROP, PARAMETER UPDATE
optimizer.zero_grad()
outputs = net.forward(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
#CALCULATE CLASSIFICATION ACCURACY ON ALL 10 CLASSES
with torch.no_grad():
Images_class,Labels_class = PrepareInputDataAndTargetData(device,Images_test,Labels_test,len(Images_test))
network_outputs = net.forward(Images_class)
correct = (torch.argmax(network_outputs.data,1) == Labels_class.data).float().sum()
acc = float(100.0*(correct/len(Images_class)))
print("Accuracy is: "+str(acc)+"\n")
print("Done\n")
My code in Keras:
#================Function that defines the CNN model===========
def CNN_model():
model = Sequential()
model.add(Conv2D(32,(3,3),activation='relu',padding='same', input_shape=(size,size,channels))) #SAME PADDING
model.add(Conv2D(32,(3,3),activation='relu')) #VALID PADDING
model.add(MaxPooling2D(pool_size=(2,2))) #VALID PADDING
model.add(Dropout(0.25))
model.add(Conv2D(64,(3,3),activation='relu', padding='same')) #SAME PADDING
model.add(Conv2D(64,(3,3),activation='relu')) #VALID PADDING
model.add(MaxPooling2D(pool_size=(2,2))) #VALID PADDING
model.add(Dropout(0.25))
model.add(Conv2D(128,(3,3),activation='relu', padding='same'))
model.add(Conv2D(128,(3,3),activation='relu')) #VALID PADDING
model.add(MaxPooling2D(pool_size=(2,2),name='feature_extractor_layer')) #VALID PADDING
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu', name='second_last_layer'))
model.add(Dropout(0.25))
model.add(Dense(10, activation='softmax', name='softmax_layer')) #first only add 5 nodes in softmax layer
model.summary()
return model
#=====Main program starts here========
images_train, labels_train, class_train = get_train_data(0,10)
images_test, labels_test, class_test = get_test_data(0,10)
model = CNN_model()
model.compile(loss='categorical_crossentropy', #loss function of the CNN
optimizer=Adam(lr=1.0e-4), #Optimizer
metrics=['accuracy'])#'accuracy' metric is to be evaluated
model.fit(images_train,class_train,
batch_size=128,
epochs=50,
validation_data=(images_test,class_test),
verbose=1)
scores=model.evaluate(images_test,class_test,verbose=0)
print("Accuracy: "+str(scores[1]*100)+"% \n")