Hello. I am very new to deep learning and recently I am going through tutorials on making neural network.
I have created my own custom set, which contains 10000 images of faces for training and roughly 3000 for validating, to classify age of people (79 classes, or ages). I use pretrain network from Resnet50 and freeze the CNN part and replace with my custom fully connected layers.
However, when training starts, the loss per batch (where I print it out everytime calculating the loss using cross entropy) is not decreasing. My GPU is kind of sucks so I use CPU for training. I just run for 10 batches where the loss per batch is always at 3.6-3.8, with no sign of lowering.
Hope someone can point out what is the exact underlying problems. Thank you.
train loss for 1 batch= 4.3613080978393555 train loss for 1 batch= 4.377458095550537 train loss for 1 batch= 4.360567092895508 train loss for 1 batch= 4.366171836853027 train loss for 1 batch= 4.372839450836182 train loss for 1 batch= 4.366669178009033 train loss for 1 batch= 4.3775248527526855 train loss for 1 batch= 4.3778791427612305 train loss for 1 batch= 4.360372066497803 train loss for 1 batch= 4.367254257202148 train loss for 1 batch= 4.349947929382324 train loss for 1 batch= 4.358979225158691 train loss for 1 batch= 4.36530065536499 train loss for 1 batch= 4.340318202972412 train loss for 1 batch= 4.3592658042907715 train loss for 1 batch= 4.3571014404296875 train loss for 1 batch= 4.34581995010376 train loss for 1 batch= 4.36361026763916 train loss for 1 batch= 4.362382888793945
import torch import torchvision.transforms as transforms import torchvision.models as models import torch.nn as nn import torch.optim as optim from torch.utils.data import dataset, DataLoader from CustomDataset import FaceDataset import time def num_correct(outputs, labels): correct_num=outputs.argmax(dim=1).eq(labels).sum().item() return correct_num # age= index+2 age_categories=79 iteration=20 learning_rate=0.001 device=torch.device('cpu') model = models.resnet50(pretrained=True) model.to(device) for paras in model.parameters(): paras.requires_grad = False fc_input = model.fc.in_features # 2048 new_fc_layers = nn.Sequential(nn.Linear(fc_input, 1024), nn.ReLU(), nn.Linear(1024, 512), nn.ReLU(), nn.Linear(512, age_categories), ) model.fc = new_fc_layers dataset = FaceDataset(csvFile='ImageInfo.csv', ImageFolder='aglined faces', transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) train_set, valid_set = torch.utils.data.random_split(dataset, [10000, 3322]) train_loader=DataLoader(dataset=train_set,batch_size=32,shuffle=True) valid_loader=DataLoader(dataset=valid_set,batch_size=1000,shuffle=True,drop_last=True) loss_function=nn.CrossEntropyLoss() optimizer=optim.SGD(model.parameters(), lr=learning_rate) time_start=time.time() for epoch in range(iteration): train_loss = 0 valid_loss = 0 model.train() for batch in train_loader: images, labels=batch images=images.to(device) labels=labels.to(device) output=model(images) loss=loss_function(output, labels) print('train loss for 1 batch= ',loss.item()) train_loss+=loss.item() optimizer.zero_grad() loss.backward() optimizer.step() print('total train loss for 1 epoch= ',train_loss) model.eval() with torch.no_grad(): for batch in valid_loader: number_Correct = 0 images, labels = batch images = images.to(device) labels = labels.to(device) output = model(images) loss = loss_function(output, labels) print('valid loss for 1 batch= ',loss.item()) valid_loss+=loss.item() number_Correct+=num_correct(output,labels) print('number correct for 1 batch= ',number_Correct) print('total valid loss for 1 epoch= ', valid_loss)