How to Implement regression problem in VGG19

I have an 256 * 256 input image, label is a single value. I want to implement VGG19 for regression problem. Please can somebody help me. I am using the VGG19 code for classification, how to change the classification layer to perform regression task.

cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 2)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        return nn.Sequential(*layers)

model = VGG('VGG19')

Thanks in advance.

For a single output you should use self.classifier = nn.Linear(512, 1) and for the loss function you could use e.g. nn.MSELoss.

from skimage import io
import scipy.io
import os
import argparse
import numpy as np
import torch
import warnings
import matplotlib.pyplot as plt
from pathlib import Path
import matplotlib
import torch.nn as nn
import torch.optim as optim
import datetime
import time


def str2bool(v):
    if isinstance(v, bool):
       return v
    if str(v).lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif str(v).lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

#project_root - root directory
#data_dir - test mosaic images directory
#stitched_output_dir - predicted vesselness mosaic image
      
config = dict()
config["project_root"] = os.getenv('PROJECT_ROOT', 'Q:\\data')
config["data_dir"] = os.getenv('DATA_DIR', 'MB')
config["debug"] = str2bool(os.getenv('DEBUG', True))  # If True, will set device logging and other useful debug outputs.
config["model_dir"] = os.getenv('MODEL_DIR', 'MB\\models')
config["model_file"] = os.getenv("MODEL_FILE", "vggnet_")

parser = argparse.ArgumentParser(description='Splitting mosaic images, predicting split images and stitching them')

for key, value in config.items():
    t = type(value)
    if t is list or t is tuple:
        parser.add_argument('--' + key, nargs='+', 
            default=value, type=type(value[0]), 
            help="path to training images")
    elif t is bool:
        parser.add_argument('--' + key, type=str2bool, nargs='?',
            const=True, default=value,
            help="Activate nice mode.")
    else:
        parser.add_argument('--' + key, 
            default=value, type=t, 
            help="path to training images")

options = parser.parse_args()


cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 1)
        #self.last=nn.Sigmoid()

    def forward(self, x):
        print(x.size())
        out = self.features(x)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 1
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        return nn.Sequential(*layers)

model = VGG('VGG19')



if __name__ == '__main__':
    if torch.cuda.is_available() and not options.debug:
        device_type = 'cuda'
    else:
        device_type = 'cpu'
    device = torch.device(device_type)


image_dir  = os.path.join(
    options.project_root, options.data_dir, 'CNN_vessel_width/patches/')

image_list = [Path(f).stem for f in os.listdir(image_dir) 
    if os.path.isfile(os.path.join(image_dir, f))]
    
num_imgs = len(image_list)  
print ('number of images', num_imgs)


label_dir = os.path.join(
     options.project_root, options.data_dir, 'CNN_vessel_width/labels/')

label_list = [Path(f).stem for f in os.listdir(label_dir) 
    if os.path.isfile(os.path.join(label_dir, f))]
    
num_labels = len(label_list)  
print ('number of labels', num_labels)


model_path = os.path.join(
            options.project_root, options.model_dir, options.model_file)

criterion = nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(),lr=0.001)

since=time.time()

for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    epoch_loss=0.0

    for patch_name in image_list:
        image_name = image_dir + patch_name + '.png'
        patch= io.imread(image_name).astype('float32')
        patch = torch.from_numpy(
               np.expand_dims(np.expand_dims((patch-185)/38.7, axis=0),axis=0)).float()
        objText=open(label_dir + patch_name + '.txt', "r")
        label = objText.read()   
      
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(patch)
        print('output size is:', outputs.size())
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step() 

        running_loss += loss.item()

        if i % 128 == 127:    # print every 2000 mini-batches
            epoch_loss=running_loss/128
            print('[%d, %5d] loss: %.3f epoch_loss:%.3f' %
                  (epoch + 1, i + 1, running_loss / 128,epoch_loss))
            running_loss = 0.0

time_elapsed=time.time()-since
print('Finished Training in {:.0f}m {:.0f}s' .format(time_elapsed//60, time_elapsed %60))
model_name = model_path + datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
torch.save(model.state_dict(), model_name + '_final.pth')

model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), model_name + '_opt.pth')

After changing the self.classifier =nn.Linear(512,1) and using MSE Loss I am getting size mismatch error:

Traceback (most recent call last):
File “CNN_data.py”, line 157, in
outputs = model(patch)
File “/opt/apps/apps/binapps/pytorch/1.0.1/python3.6/gpu/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 489, in call
result = self.forward(*input, **kwargs)
File “CNN_data.py”, line 78, in forward
out = self.classifier(out)
File “/opt/apps/apps/binapps/pytorch/1.0.1/python3.6/gpu/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 489, in call
result = self.forward(*input, **kwargs)
File “/opt/apps/apps/binapps/pytorch/1.0.1/python3.6/gpu/lib/python3.6/site-packages/torch/nn/modules/linear.py”, line 67, in forward
return F.linear(input, self.weight, self.bias)
File “/opt/apps/apps/binapps/pytorch/1.0.1/python3.6/gpu/lib/python3.6/site-packages/torch/nn/functional.py”, line 1354, in linear
output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [4096 x 8], m2: [512 x 1] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:940

With your current setup, your activation will have the shape [batch_size, 512, 7, 7], so you would need to

  • flatten the activation before passing it to the linear layer via out = out.view(out.size(0), -1)
  • set the number of input features of self.classifier to 512*7*7

This code should work:

class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512 * 7 * 7, 1)
        #self.last=nn.Sigmoid()

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 1
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        return nn.Sequential(*layers)

model = VGG('VGG19')
x = torch.randn(2, 1, 224, 224)
out = model(x)
1 Like