I’m trying to improve the validation accuracy of my network on the CIFAR-10 dataset. Currently without data augmentation, I achieve a validation accuracy of 82%, however upon introducing data augmentation, it results in a validation accuracy of 76%.
The following is my network:
class NaiveNet(nn.Module):
def __init__(self,**kwargs):
super(NaiveNet,self).__init__(**kwargs)
self.twoD_batchnorm_momentum = kwargs.pop("spatial_batchnorm_momentum",0.1)
self.oneD_batchnorm_momentum = kwargs.pop("linear_batchnorm_momentum",0.1)
#32x32 -> 16x16
self.block_1 = nn.Sequential(
#Convolutional layer
nn.Conv2d(3,64,kernel_size = 5,stride = 2,padding = 2),
#BachNorm layer
nn.BatchNorm2d(num_features = 64,momentum = self.twoD_batchnorm_momentum),
#Activation Layer
nn.ReLU(inplace = True)
)
#16x16 -> 15x15
self.pooling_1 = nn.MaxPool2d(kernel_size = 2,stride = 1)
#15x15 -> 13x13
self.block_2 = nn.Sequential(
#Convolutional layer
nn.Conv2d(64,128,kernel_size = 3,stride = 1,padding = 0),
#BachNorm layer
nn.BatchNorm2d(num_features = 128,momentum = self.twoD_batchnorm_momentum),
#Activation Layer
nn.ReLU(inplace = True)
)
#13x13 -> 11x11
self.block_3 = nn.Sequential(
#Convolutional layer
nn.Conv2d(128,256,kernel_size = 3,stride = 1,padding = 0),
#BachNorm layer
nn.BatchNorm2d(num_features = 256,momentum = self.twoD_batchnorm_momentum),
#Activation Layer
nn.ReLU(inplace = True)
)
#11x11 -> 10x10
self.pooling_2 = nn.MaxPool2d(kernel_size = 2,stride = 1)
#10x10 -> 8x8
self.block_4 = nn.Sequential(
#Convolutional layer
nn.Conv2d(256,512,kernel_size = 3,stride = 1,padding = 0),
#BachNorm layer
nn.BatchNorm2d(num_features = 512,momentum = self.twoD_batchnorm_momentum),
#Activation Layer
nn.ReLU(inplace = True)
)
#8x8 -> 6x6
self.block_5 = nn.Sequential(
#Convolutional layer
nn.Conv2d(512,512,kernel_size = 3,stride = 1,padding = 0),
#BachNorm layer
nn.BatchNorm2d(num_features = 512,momentum = self.twoD_batchnorm_momentum),
#Activation Layer
nn.ReLU(inplace = True)
)
#6x6 -> 5x5
self.pooling_3 = nn.MaxPool2d(kernel_size = 2,stride = 1)
self.flatten_layer = nn.Flatten()
#Fully connected layers
self.fcl_1 = nn.Sequential(
nn.Linear(12800,5000),
nn.BatchNorm1d(5000,momentum = self.oneD_batchnorm_momentum),
nn.ReLU(inplace = True))
self.dropout_1 = nn.Dropout(p = 0.5)
self.fcl_2 = nn.Sequential(
nn.Linear(5000,1000),
nn.BatchNorm1d(1000,momentum = self.oneD_batchnorm_momentum),
nn.ReLU(inplace = True))
self.dropout_2 = nn.Dropout(p = 0.5)
self.fcl_4 = nn.Sequential(
nn.Linear(1000,10))
def forward(self,x):
out = self.block_1(x)
out = self.pooling_1(out)
out = self.block_2(out)
out = self.block_3(out)
out = self.pooling_2(out)
out = self.block_4(out)
out = self.block_5(out)
out = self.pooling_3(out)
out = self.flatten_layer(out)
out = self.fcl_1(out)
out = self.dropout_1(out)
out = self.fcl_2(out)
out = self.dropout_2(out)
out = self.fcl_4(out)
return out
The following is my function to carry out the data augmentation :
def do_image_augmentation(image_array,image_labels):
np.random.seed(42)
transformed_data_array = np.zeros(shape = (20000,3,32,32))
transformed_data_labels = np.zeros(shape = (20000,))
indices_to_transform = np.random.randint(0,40000,20000)
#transformations used
transform_main = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(degrees = (0,100)),
transforms.ElasticTransform(alpha = 50.0),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2)
])
j = 0
for i in indices_to_transform:
pil_image = Image.fromarray(np.transpose(image_array[i,:,:,:],axes = (1,2,0)).astype('uint8'))
transformed_img = transform_main(pil_image)
transformed_data_array[j,:,:,:] = np.transpose(np.array(transformed_img),axes = (2,0,1)).astype('uint8')
transformed_data_labels[j] = image_labels[i]
j += 1
#stretching the dataset
result_array = np.vstack((image_array,transformed_data_array))
result_labels = np.hstack((image_labels,transformed_data_labels))
return result_array,result_labels
I pass in my training data, which is the first 40000 images of the CIFAR-10 dataset, and randomly select 20000 on which I do the transformation. Upon transforming, I attach them to numpy arrays and extend the dataset to have 60000 training instances in total.
The following is my code block to obtain the augmented data:
aug_train_data_array,aug_train_data_labels = torch_utils.do_image_augmentation(train_data_array,train_data_labels)
aug_train_data_array,aug_train_data_labels = aug_train_data_array.astype('float32'),aug_train_data_labels.astype('float32')
cifar10Dataset_train = torch_utils.cifar10Dataset(aug_train_data_array,aug_train_data_labels)
cifar10Dataloader_train = DataLoader(cifar10Dataset_train,batch_size = 150)
For the optimizer I have used Nesterov Adam as my prior parameter tuning showed that this optimizer worked better. I have used a Learning Rate of 0.0005 with a l2 regularization parameter of 1e-5. I’m also using an Exponential Learning Rate decay with a decay factor of 0.4.
I train it for 25 epochs in both cases.
Is there anything wrong in the way I’m doing my data augmentation? Any suggestions or advice is highly appreciated.