I am working on creating simple multi-class classifier using CAER dataset which classifies images into 7 basic emotion categories. Instead of sending entire image I am just passing facial image which is detected using face detector and then later on cropped and sent to network. For network initially I created my own model by using 5 convolutional layers which is then trained using SGD optimizer with learning rate as 1e-3.While training it is observed that loss is decreasing with epoch but no matter what it is always starting with 1.9**. I tried to use pretrained model where I froze some of the layers and tried training it but still it always starts with 1.9*** value. I also did augmentation and standardised the values but still no change. I don’t know where I am going wrong. Please suggest me any further changes that I need to do or I am going wrong somewhere.
Below is the code for custom dataset and model.
For pretrained model, I tried with alexnet,vgg16,resnet18.
class CAERDataset(Dataset):
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.transform = transform
#self.weight_path = "./dsfd/weights/WIDERFace_DSFD_RES152.pth"
#self.detector = DSFDDetector(self.weight_path)
self.detector = DSFDDetector()
self.image_list = glob.glob(self.root_dir+'/*/*png')
def __len__(self):
return len(self.image_list)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.to_list()
img_path = self.image_list[idx]
image = Image.open(img_path)
img_label = float(img_path.split('/')[-2])
im = np.array(image)
x,y,w,h = 0,0,0,0
detections = self.detector.detect_face(im, confidence_threshold=.5, shrink=1.0)
if(len(detections)==0):
face_image = image
else:
for i in range(len(detections)):
x = detections[i][0]
y = detections[i][1]
w = detections[i][2]
h = detections[i][3]
break
face_image = image.crop((x,y,w,h))
label = np.array(img_label)
if self.transform:
face_image = self.transform(face_image)
return (face_image, label)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, stride =1),
nn.ReLU(),
nn.BatchNorm2d(32),
nn.Conv2d(32, 64, kernel_size=3, stride =1),
nn.ReLU(),
nn.BatchNorm2d(64),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(64, 128, kernel_size=3, stride =1),
nn.ReLU(),
nn.BatchNorm2d(128),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(128, 256, kernel_size=3, stride =1),
nn.ReLU(),
nn.BatchNorm2d(256),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
nn.Conv2d(256, 256, kernel_size=3, stride =1),
nn.ReLU(),
nn.BatchNorm2d(256),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
#nn.Conv2d(256, 256, kernel_size=3, stride =1),
#nn.ReLU(),
#nn.BatchNorm2d(256),
#nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
)
self.avgpool = nn.AdaptiveAvgPool2d(output_size=(5, 5))
self.classifier = nn.Sequential(
nn.BatchNorm1d(6400),
nn.Dropout(0.5),
nn.Linear(6400,2048),
nn.ReLU(),
nn.Dropout(0.25),
nn.Linear(2048,512),
nn.ReLU(),
nn.Linear(512,7)
)
def forward(self,x):
out = self.features(x)
out = self.avgpool(out)
#out = torch.flatten(out,1)
out = out.view(out.size(0), -1)
out = self.classifier(out)
return out
main.py
transform = transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
#transforms.Grayscale(3),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([transforms.Resize([224,224]),
#transforms.Grayscale(3),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])
train_data = CAERDataset(root_dir = './data/train', transform = transform)
num_train = len(train_data)
indices = list(range(num_train))
split = int(np.floor(0.2*num_train))
np.random.shuffle(indices)
train_idx, val_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)
train_loader = torch.utils.data.DataLoader(train_data,sampler=train_sampler,batch_size=32)
val_loader = torch.utils.data.DataLoader(train_data,sampler=val_sampler,batch_size=32)
test_data = CAERDataset(root_dir = './data/test', transform = test_transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True, num_workers=1)
optimizer = torch.optim.SGD(model.parameters(),lr=1e-3)