I am relatively new to PyTorch, I have trained a CNN model to perform classification on CIFAR-10 dataset. I’m aware when a CNN has layers like dropout or batch normalization, the model should be set to evaluation mode when computing the accuracy else the accuracy will be different but in my case it remains unchanged i.e. my final accuracy is always 64-66% (first 4 classes) with or without adding net.eval()
. I’ve also used nn.Dropout instead of nn.functional.Dropout but despite the results, I’m not very sure what’s causing this behavior.
Since training my model on all 10 classes from scratch takes time, that’s why for my own testing purposes I am only training my model on the first 4 classes. Any suggestions will be very helpful!, my program is as follows:
#========DEFINE THE CNN MODEL=====
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)#SAME PADDING
self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=0)#VALID PADDING
self.pool1 = nn.MaxPool2d(2,2) #VALID PADDING
self.drop1 = nn.Dropout(0.25) #DROPOUT OF 0.25
self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)#SAME PADDING
self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=0)#VALID PADDING
self.pool2 = nn.MaxPool2d(2,2)#VALID PADDING
self.drop2 = nn.Dropout(0.25) #DROPOUT OF 0.25
self.conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)#SAME PADDING
self.conv6 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=0)#VALID PADDING
self.pool3 = nn.MaxPool2d(2,2)#VALID PADDING
self.drop3 = nn.Dropout(0.25) #DROPOUT OF 0.25
self.fc1 = nn.Linear(128*2*2, 512)#128*2*2 IS OUTPUT DIMENSION AFTER THE PREVIOUS LAYER
self.drop4 = nn.Dropout(0.25) #DROPOUT OF 0.25
self.fc2 = nn.Linear(512,4) #4 output nodes
#FORWARD PROPAGATION FUNCTION
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.pool1(x)
x = self.drop1(x)
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = self.pool2(x)
x = self.drop2(x)
x = F.relu(self.conv5(x))
x = F.relu(self.conv6(x))
x = self.pool3(x)
x = self.drop3(x)
x = x.view(-1,2*2*128) #FLATTENING OPERATION 2*2*128 IS OUTPUT AFTER THE PREVIOUS LAYER
x = F.relu(self.fc1(x))
x = self.drop4(x)
x = self.fc2(x) #LAST LAYER DOES NOT NEED SOFTMAX BECAUSE THE LOSS FUNCTION WILL TAKE CARE OF IT
return x
#=======FUNCTION TO CONVERT INPUT AND TARGET TO TORCH TENSORS AND LOADING INTO GPU======
def PrepareInputDataAndTargetData(device,images,labels,batch_size):
#GET MINI BATCH OF TRAINING IMAGES AND RESHAPE THE TORCH TENSOR FOR CNN PROCESSING
mini_batch_images = torch.tensor(images)
mini_batch_images = mini_batch_images.view(batch_size,3,32,32)
#GET MINI BATCH OF TRAINING LABELS, TARGET SHOULD BE IN LONG FORMAT SO CONVERT THAT TOO
mini_batch_labels = torch.tensor(labels)
mini_batch_labels = mini_batch_labels.long()
#FEED THE INPUT DATA AND TARGET LABELS TO GPU
mini_batch_images = mini_batch_images.to(device)
mini_batch_labels = mini_batch_labels.to(device)
return mini_batch_images,mini_batch_labels
#==========MAIN PROGRAM==========
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#get_train_data() and get_test_data() are my own custom functions to get CIFAR-10 dataset
Images_train, Labels_train, Class_train = get_train_data(0,10)
Images_test, Labels_test, Class_test = get_test_data(0,10)
#TRAINING AND TEST DATA FOR FIRST 4 CLASSES
Images_train = Images_train[0:20000]
Labels_train = Labels_train[0:20000]
Images_test = Images_test[0:4000]
Labels_test = Labels_test[0:4000]
net = Net()
net = net.double() #https://discuss.pytorch.org/t/runtimeerror-expected-object-of-scalar-type-double-but-got-scalar-type-float-for-argument-2-weight/38961
#MAP THE MODEL ONTO THE GPU
net = net.to(device)
#CROSS ENTROPY LOSS FUNCTION AND ADAM OPTIMIZER
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-4)
#PREPARE THE DATALOADER
#Images_train contains images and Labels_trains contains indices i.e. 0,1,...,9
dataset = TensorDataset( Tensor(Images_train), Tensor(Labels_train) )
trainloader = DataLoader(dataset, batch_size= 128, shuffle=True)
#START TRAINING THE CNN MODEL FOR 50 EPOCHS
for epoch in range(0,20):
for i, data in enumerate(trainloader, 0):
inputs, labels = data
inputs = torch.tensor(inputs).double()
inputs = inputs.view(len(inputs),3,32,32) #RESHAPE THE IMAGES
labels = labels.long() #MUST CONVERT LABEL TO LONG FORMAT
#MAP THE INPUT AND LABELS TO THE GPU
inputs=inputs.to(device)
labels=labels.to(device)
#FORWARD PROP, BACKWARD PROP, PARAMETER UPDATE
optimizer.zero_grad()
outputs = net.forward(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
#PUT THE MODEL IN EVALUATION MODE
net.eval()
#CALCULATE CLASSIFICATION ACCURACY ON ALL 10 CLASSES
with torch.no_grad():
Images_class,Labels_class = PrepareInputDataAndTargetData(device,Images_test,Labels_test,len(Images_test)) #MAP TEST IMAGES AND LABELS TO GPU
network_outputs = net.forward(Images_class) #FORWARD PASS ON THE MODEL
correct = (torch.argmax(network_outputs.data,1) == Labels_class.data).float().sum()
acc = float(100.0*(correct/len(Images_test)))
print("Accuracy is: "+str(acc)+"\n")