The training, evaluation and hyperparameter script is this
import torch
import torch.nn as nn
from affectnet_cpu import affectnet_cpu
from affectnet_gpu import affectnet_gpu
from evaluate import evaluate
import logger as logger
import models as models
num_epochs = 200
batch_size = 256
learning_rate = 0.001
momentum = 0.9
l2_reg = 0.0005
datasize = 5000
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = models.alexnet()
train_data = affectnet_gpu('../data/affectnet_images',
'../data/affectnet_labels/training.csv',
datasize)
train_loader = torch.utils.data.DataLoader(dataset=train_data,
batch_size=batch_size,
shuffle=True)
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()),
lr=learning_rate,
momentum=momentum,
dampening=0,
weight_decay=l2_reg, #try with zero!
nesterov=False)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
milestones=[80,120,180],
gamma=0.1)
CE = []
net.train()
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
inputs = images.cuda().float()
labels = labels.cuda().long()
optimizer.zero_grad()
outputs = net(inputs)
ideal = labels.argmax(1)
loss = criterion(outputs, ideal)
CE.append(loss.item())
# Backward and optimize
loss.backward()
optimizer.step()
if (i+1) % 20 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
scheduler.step()
net.eval()
test_data = affectnet_gpu('../data/affectnet_images',
'../data/affectnet_labels/validation.csv')
test_loader = torch.utils.data.DataLoader(dataset=test_data,
batch_size=12,
shuffle=False)
correct, total = evaluate(net, test_data, test_loader)
print(correct, total)
accuracy = float(float(correct) / float(total))
print("accuracy", accuracy)
The models.py
file is the following. Please note I adjust accordingly, to either 8 labels or 2 depending on if I’m using valence or expression labels.
import torch
import torch.nn as nn
import torchvision.models as models
"""
Custom VGG11
"""
class VGG11(nn.Module):
def __init__(self):
super(VGG11, self).__init__()
self.fc = nn.Linear(1000, 8)
self.net = models.vgg11(pretrained=True)
for p in self.net.parameters():
p.requires_grad=False
self.net.cuda().half()
self.fc.cuda().half()
def forward(self, x):
f = self.net(x)
y = self.fc(f)
return y
"""
Custom VGG19 with BN
"""
class VGG19BN(nn.Module):
def __init__(self):
super(VGG19BN,self).__init__()
self.layer1 = nn.Linear(1000,8)
self.net = models.vgg19_bn(pretrained=True)
for p in self.net.parameters():
p.requires_grad=False
self.net.cuda().half()
self.fc.cuda().half()
def forward(self,x):
f = self.net(x)
y = self.layer1(x1)
return y
"""
Custom AlexNet 2 label output
"""
class alexnet(nn.Module):
def __init__(self):
super(alexnet, self).__init__()
self.fc = nn.Linear(1000, 2)
self.net = models.alexnet(pretrained=True)
self.net.cuda().float()
self.fc.cuda().float()
def forward(self, x):
f = self.net(x)
y = self.fc(f)
return y
"""
Custom SqueezeNet 2 label output
"""
class squeezenet(nn.Module):
def __init__(self):
super(squeezenet, self).__init__()
self.fc = nn.Linear(1000, 2)
self.net = models.squeezenet1_1(pretrained=True)
self.net.cuda().float()
self.fc.cuda().float()
def forward(self, x):
f = self.net(x)
y = self.fc(f)
return y
I know I can do transfer learning which I’ve tried with Imagenet-trained networks and they all seem to produce worse accuracy, but I am willing to try again if you think it can work.
The actual affectnet
script has two classes, one for CPU and one for GPU.
Just adding it here for clarity:
import torch
from torchvision import transforms
import pandas as pd
import os
import stat
from PIL import Image
from labels import labels
from random import shuffle
from torch.utils.data.dataset import Dataset
import threading
MAX_GPU_MB = 10980000000
class affectnet_gpu(Dataset):
def __init__(self, img_path, csv_path, limit=414798):
"""
Args:
image_path (string) is where the annotated images are
csv_path (string) is where the CSV files (training and testing) are
"""
self.img_path = img_path
self.labels = labels(pd.read_csv(csv_path), img_path, limit)
# *NOTE* the means and stds are on RGB 3 channel 224x224 images
self.means = [0.54019716, 0.43742642, 0.38931704]
self.stds = [0.24726599, 0.2232768, 0.21396481]
normalize = transforms.Normalize(self.means, self.stds)
self.preprocess = transforms.Compose([transforms.Resize(size=(224,224)),
transforms.ToTensor(),
normalize])
self.data = []
print("Pre-processing and allocating data")
for idx in range(len(self.labels.rows)):
if torch.cuda.memory_allocated() < MAX_GPU_MB:
self.upload_pair(idx)
else:
break
print("using affectnet set: ", len(self.data))
#
# upload to CUDA/GPU a half float `FP16` input tensor and its equivalent output label
#
def upload_pair(self, idx):
"""
Args:
@param idx (unsigned int) is the item index in the dataset
"""
pair = self.process_row(idx)
in_tensor = pair[0].cuda(non_blocking=True).float()
out_tensor = pair[1].cuda(non_blocking=True).float()
self.data.append([in_tensor, out_tensor])
#
# pre-process a row by opening the image, creating an output/label tensor
# and setting it correctly, and then returning the pair, to be uploaded on the GPU
#
def process_row(self, index):
"""
Args:
@param idx (unsigned int) is the item index in the dataset
"""
item = self.labels[index]
file = self.img_path + "/" + item["file"]
img = Image.open(file)
array = self.valence(index)
#array = self.classes(index)
return self.preprocess(img).pin_memory(), array.pin_memory()
#
# access an item in the dataset using @param index
# @return a tuple of **input** tensor, **output** tensor
#
def __getitem__(self, index):
"""
Args:
@param index (unsigned int) is the item index in the dataset
@return a pair already pre-processed and allocated on the GPU
"""
return self.data[index]
#
# get dataset length (size)
#
def __len__(self):
return len(self.data)
#
# calculate classes output
#
def classes(self, index):
item = self.labels[index]
array = torch.zeros((8,), dtype=torch.long)
array[item["expression"]] = 1
return array
#
# calculate valence output
#
def valence(self, index):
"""
Args:
@param pass the `label` and create the correct output
@return a vector of [x,y,z] where:
- `x` is positive
- `y` is neutral
- `z` is negative
"""
array = torch.zeros((2,))
item = self.labels[index]
score = item["valence"]
if score > 0.0:
array = torch.tensor([1, 0], dtype=torch.long)
elif score < 0.0:
array = torch.tensor([0, 1], dtype=torch.long)
return array
In general I’ve tried to follow all tutorials on PyTorch, searched on Stackoverflow and the forums here, did all examples, etc. I am suprised that the 8 label classification fails so miserably with AlexNet and I guess the 2/3 label classification when using valence, at top-1 accuracy 62% is to be expected?
PS: I haven’t added the labels
function, and the evaluate
is:
import torch
import torch.nn as nn
from affectnet_cpu import affectnet_cpu
from affectnet_gpu import affectnet_gpu
def evaluate(model, test_data, test_loader):
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.cuda(non_blocking=True).float()
labels = labels.cuda(non_blocking=True).long()
ideal = labels.argmax(1)
# compute output
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += ideal.size(0)
correct += (predicted == ideal).sum().item()
return correct, total