VGG transfer learning

Daniele_Ligato · January 4, 2019, 8:35pm

Hi there I’m trying to implement the pre-trained VGG net to my script, in order to recognize faces from my dataset in RGB [256,256], but I’m getting a “size mismatch, m1: [1 x 2622], m2: [4096 x 2]” even if im resizing my images, as you can see my code work with resnet and alexnet.

import argparse
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import scipy.misc
import imageio
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets
import matplotlib.pyplot as plt
import numpy as np
import VGG_FACE


num_epochs = 20
num_classes = 2
batch_size = 4

DATA_PATH1 = '/Users/danieleligato/Documents/FILE/Università/tesi/dataset/Train'
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = torchvision.datasets.ImageFolder(root=DATA_PATH1, transform=trans)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
classes = ('coperte', 'scoperte')
dataiter = iter(train_loader)
# DATAITER SARà TI TIPO ITERATORE CON DENTRO I VALORI DI TRAINLOADER

def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

def training(model_conv, learning_rate, wd, net):

    criterion = nn.CrossEntropyLoss(weight= torch.FloatTensor([1,1]))
    optimizer = torch.optim.Adam(model_conv.fc.parameters(), lr=learning_rate, weight_decay = wd)
    total_step = len(train_loader)
    loss_list = []
    acc_list = []
    print("Inizio il training")

    for epoch in range(num_epochs):
        for i, (im, labels) in enumerate(train_loader):  #il TRAIN_LOADER ha dentro le immagini e le labels

            if net == "vgg":
                images = torch.nn.functional.interpolate(im, 224, mode='bilinear')

            if  net != "vgg":
                images = torch.nn.functional.interpolate(im, 224, mode = 'bilinear')

            outputs = model_conv(images)
            loss = criterion(outputs, labels)
            loss_list.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (i + 1) % 100 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                      .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
    torch.save(model_conv, 'TrainedModel.pt')
    return images, labels

def main():

    net = "vgg"
    learning_rate = 10e-6
    wd = 10e-4

    if net == "vgg":
        print("Hai selezionato VGG")
        model_conv = VGG_FACE.vgg_face
        data = torch.load("VGG_FACE.pth")
        model_conv.load_state_dict(data) #carico i parametri nella mia rete

        # Modifica per classificazione:
        model_conv.fc = nn.Linear(4096, 2)
        model_conv[-1] = model_conv.fc


    if net == "resnet18":
        print("Hai selezionato ResNet18")
        model_conv = torchvision.models.resnet18(pretrained=True)
        for param in model_conv.parameters():
            param.requires_grad = False  #in_feature is the number of inputs for your linear layer:
        num_ftrs = model_conv.fc.in_features  #fc è il nome del layer che andremo a sostituire #512
        model_conv.fc = nn.Linear(num_ftrs, 2) #viene sostuito con un nn.linear

    if net == "resnet50":
        print("Hai selezionato ResNet50")
        model_conv = torchvision.models.resnet50(pretrained=True)
        for param in model_conv.parameters():
            param.requires_grad = False
        num_ftrs = model_conv.fc.in_features  # fc è il nome del layer che andremo a sostituire #512
        model_conv.fc = nn.Linear(num_ftrs, 2)  # viene sostuito con un nn.linear

    if net == "alexnet":
        print("Hai selezionato AlexNet")
        model_conv = torchvision.models.alexnet(pretrained=True)
        for param in model_conv.parameters():
            param.requires_grad = False
        model_conv.fc = nn.Linear(4096, 2)
        model_conv.classifier[-1] = model_conv.fc

    training(model_conv, learning_rate, wd, net)

if __name__ == '__main__':
    main()

And this is another code where I used correctly my VGG with some random images

import VGG_FACE
import torch
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import scipy.misc
import sys

def test():
    N=5
    net = VGG_FACE.vgg_face
    data = torch.load("VGG_FACE.pth")
    net.load_state_dict(data)
    net.eval()
    names = open("names.txt").read().split()

    with torch.no_grad():

        mean = np.array([93.5940, 104.7624, 129.1863])
        images = scipy.misc.imread("cooper2.jpg", mode="RGB")
        images = scipy.misc.imresize(images, [224, 224])
        images = images.astype(np.float32)
        images -= mean[np.newaxis, np.newaxis, :]
        images = np.transpose(images, (2, 0, 1))
        images = images[np.newaxis, ...]
        images = torch.tensor(images, dtype=torch.float32)


        y = net(images)
        y = torch.nn.functional.softmax(y, 1)
        rank = torch.topk(y[0, :], N)
        for i in range(N):
            index = rank[1][i].item()
            score = rank[0][i].item()
            print("{}) {} ({:.2f})".format(i + 1, names[index], score))
        print()

    # Modifica per classificazione:
    numero_classi = 2
    net[-1] = torch.nn.Linear(4096, numero_classi)


if __name__ == "__main__":
    test()

This is the VGG

from functools import reduce

import torch
import torch.nn as nn
from torch.autograd import Variable

class LambdaBase(nn.Sequential):
    def __init__(self, fn, *args):
        super(LambdaBase, self).__init__(*args)
        self.lambda_func = fn

    def forward_prepare(self, input):
        output = []
        for module in self._modules.values():
            output.append(module(input))
        return output if output else input

class Lambda(LambdaBase):
    def forward(self, input):
        return self.lambda_func(self.forward_prepare(input))

class LambdaMap(LambdaBase):
    def forward(self, input):
        return map(self.lambda_func,self.forward_prepare(input))

class LambdaReduce(LambdaBase):
    def forward(self, input):
        return reduce(self.lambda_func,self.forward_prepare(input))


vgg_face = nn.Sequential( # Sequential,
	nn.Conv2d(3,64,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.Conv2d(64,64,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
	nn.Conv2d(64,128,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.Conv2d(128,128,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
	nn.Conv2d(128,256,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
	nn.Conv2d(256,512,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
	nn.ReLU(),
	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
	Lambda(lambda x: x.view(x.size(0),-1)), # View,
	nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(25088,4096)), # Linear,
	nn.ReLU(),
	nn.Dropout(0.5),
	nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(4096,4096)), # Linear,
	nn.ReLU(),
	nn.Dropout(0.5),
	nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(4096,2622)), # Linear,
)

You may think that using the transformation

mean = np.array([93.5940, 104.7624, 129.1863])
        images = scipy.misc.imread("cooper2.jpg", mode="RGB")
        images = scipy.misc.imresize(images, [224, 224])
        images = images.astype(np.float32)
        images -= mean[np.newaxis, np.newaxis, :]
        images = np.transpose(images, (2, 0, 1))
        images = images[np.newaxis, ...]
        images = torch.tensor(images, dtype=torch.float32)

As my second code the script should work but it doesn’t, that gives me another error
AttributeError: ‘Tensor’ object has no attribute ‘read’