Redesign LossNet for custom dataset

KanZa · December 18, 2022, 10:39am

I wanted to fix this Net for a custom dataset that has 64 sizes and 4 classes. When I input this dataset into this net it gives me the following error:

return F.avg_pool2d(input, self.kernel_size, self.stride,
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got -3)

model = LossNet(feature_sizes=[64, 32, 16, 8]).cuda()

class LossNet(nn.Module):
    def __init__(self, feature_sizes=[32, 16, 8, 4], num_channels=[64, 128, 256, 512], interm_dim=128):
        super(LossNet, self).__init__()
        
        self.GAP1 = nn.AvgPool2d(feature_sizes[0])
        self.GAP2 = nn.AvgPool2d(feature_sizes[1])
        self.GAP3 = nn.AvgPool2d(feature_sizes[2])
        self.GAP4 = nn.AvgPool2d(feature_sizes[3])

        self.FC1 = nn.Linear(num_channels[0], interm_dim)
        self.FC2 = nn.Linear(num_channels[1], interm_dim)
        self.FC3 = nn.Linear(num_channels[2], interm_dim)
        self.FC4 = nn.Linear(num_channels[3], interm_dim)

        self.linear = nn.Linear(4 * interm_dim, 1)
    
    def forward(self, features):
        out1 = self.GAP1(features[0])
        out1 = out1.view(out1.size(0), -1)
        out1 = F.relu(self.FC1(out1))

        out2 = self.GAP2(features[1])
        out2 = out2.view(out2.size(0), -1)
        out2 = F.relu(self.FC2(out2))

        out3 = self.GAP3(features[2])
        out3 = out3.view(out3.size(0), -1)
        out3 = F.relu(self.FC3(out3))

        out4 = self.GAP4(features[3])
        out4 = out4.view(out4.size(0), -1)
        out4 = F.relu(self.FC4(out4))

        out = self.linear(torch.cat((out1, out2, out3, out4), 1))
        return out

Output

<bound method Module.cuda of LossNet(
  (GAP1): AvgPool2d(kernel_size=32, stride=32, padding=0)
  (GAP2): AvgPool2d(kernel_size=16, stride=16, padding=0)
  (GAP3): AvgPool2d(kernel_size=8, stride=8, padding=0)
  (GAP4): AvgPool2d(kernel_size=4, stride=4, padding=0)
  (FC1): Linear(in_features=64, out_features=128, bias=True)
  (FC2): Linear(in_features=128, out_features=128, bias=True)
  (FC3): Linear(in_features=256, out_features=128, bias=True)
  (FC4): Linear(in_features=512, out_features=128, bias=True)
  (linear): Linear(in_features=512, out_features=1, bias=True)
)>

J_Johnson · December 18, 2022, 10:54am

If there are 4 classes, you need 4 out_features on the last layer.

What is the size of features that first enters the model? (i.e. print(features.size()) Or is that a list/tuple?

KanZa · December 18, 2022, 11:10am

do you mean in the forward()?
out of features[0]
tensor([-0.6650, -0.2847, -0.3537, -0.3787], device=‘cuda:0’)
torch.Size([32, 4])

J_Johnson · December 18, 2022, 12:32pm

Your self.GAP{n} layers are AvgPool2D. That expects to receive inputs with 4 dims of size (batch_size, channels, height, width). Your input is not the size it takes.

https://pytorch.org/docs/stable/generated/torch.nn.AvgPool2d.html

KanZa · December 22, 2022, 9:48am

Thank you for your reply, actually, the input of LossNet is out of the ResNet model. Still, I am not getting how to fix the error

lass ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=4):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)
        # self.linear2 = nn.Linear(1000, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out1 = self.layer1(out)
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = self.layer4(out3)
        out = F.avg_pool2d(out4, 4)
        outf = out.view(out.size(0), -1)            # -1
        # outl = self.linear(outf)
        out = self.linear(outf)
        return out, outf, [out1, out2, out3, out4]

J_Johnson · December 22, 2022, 1:32pm

Everything, except out and outf, should have 4 dims.

Try this:

out, outf, outn = res_model(inputs)
print(out.size(), outf.size())
for _ in range(len(outn)):
    print(_.size())

First, confirm you’re getting the right sizes out.

Once you’ve done that, I would change the last line of the Resnet forward pass to:

return [out, outf, out1, out2, out3, out4]

Then change your Loss model forward pass to:

def forward(self, features):
        out1 = self.GAP1(features[2])
        out1 = out1.view(out1.size(0), -1)
        out1 = F.relu(self.FC1(out1))

        out2 = self.GAP2(features[3])
        out2 = out2.view(out2.size(0), -1)
        out2 = F.relu(self.FC2(out2))

        out3 = self.GAP3(features[4])
        out3 = out3.view(out3.size(0), -1)
        out3 = F.relu(self.FC3(out3))

        out4 = self.GAP4(features[5])
        out4 = out4.view(out4.size(0), -1)
        out4 = F.relu(self.FC4(out4))

        out = self.linear(torch.cat((out1, out2, out3, out4), 1))
        return out

Or show me how you’re passing the outputs of the Resnet model to the Loss model. It could just be that you’re not giving it the right input.

Seems you’re doing:

out, outf, outn = res_model(inputs)
loss=loss_model(out)

When it should be:

out, outf, outn = res_model(inputs)
loss=loss_model(outn)

KanZa · December 23, 2022, 9:41am

hi, again, thank you so much for guiding me and for your time.

When I change the last line of ResNet

return [out, outf, out1, out2, out3, out4]

BTW the repository is calling the backbone model in the test function

def test(models, epoch, dataloaders, mode='val'):
    assert mode == 'val' or mode == 'test'
    models['backbone'].eval()
    models['module'].eval()
    total = 0 
    correct = 0
    with torch.no_grad():
        for (inputs, labels) in dataloaders[mode]:
            with torch.cuda.device(CUDA_VISIBLE_DEVICES):
                inputs = inputs.cuda()
                labels = labels.cuda()
            #scores, _, _= models['backbone'](inputs)

            out, outf, outn = models['backbone'](inputs)
            _, preds = torch.max(out.data, 1)
            
            #scores = models['backbone'](inputs)
            #_, preds = torch.max(scores.data, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

    return 100 * correct / total

ValueError: too many values to unpack (expected 3)

KanZa · December 23, 2022, 9:44am

As per your suggestion, I have changed the forward method of LossNet()
and here how I am calling the LossNet()

for cycle in range(CYCLES):

            if not total:
                random.shuffle(unlabeled_set)
                subset = unlabeled_set[:SUBSET]
            
            with torch.cuda.device(CUDA_VISIBLE_DEVICES):
               
                resnet18    = resnet.ResNet18(num_classes=NO_CLASSES).cuda()
                
                loss_module = LossNet().cuda()
                models      = {'backbone': resnet18}

                models = {'backbone': resnet18, 'module': loss_module}
                torch.backends.cudnn.benchmark = True

J_Johnson · December 23, 2022, 9:53am

That was if you were to also update the LossNet forward pass as I mentioned.

What’s not clear is how you are passing the outputs of the ResNet to the LossNet. Where is that occurring?

If you’re using the outputs of the ResNet elsewhere, best to not mess with the forward pass or it might break something else. Please show in training the code that passes the outputs of the Resnet to the loss_module(LossNet).

KanZa · December 23, 2022, 10:00am

So the output of ResNet forward() is

out: torch.Size([128, 64, 32, 32])
out1: torch.Size([128, 64, 32, 32])
out2: torch.Size([128, 128, 16, 16])
out3: torch.Size([128, 256, 8, 8])
out4: torch.Size([128, 512, 4, 4])
out: torch.Size([128, 512, 1, 1])
outf: torch.Size([128, 512])
Last out: torch.Size([128, 10])

def train(models, criterion, optimizers, schedulers, dataloaders, num_epochs, epoch_loss):
    print('>> Train a Model.')
    best_acc = 0.
    
    for epoch in range(num_epochs):

        best_loss = torch.tensor([0.5]).cuda()
        loss = train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss)

        schedulers['backbone'].step()
        schedulers['module'].step()

        if False and epoch % 20  == 7:
            acc = test(models, epoch, dataloaders, mode='test')
            # acc = test(models, dataloaders, mc, 'test')
            if best_acc < acc:
                best_acc = acc
                print('Val Acc: {:.3f} \t Best Acc: {:.3f}'.format(acc, best_acc))
    print('>> Finished.')

KanZa · December 23, 2022, 10:01am

Entire Code except the ResNet

import math
import torch
import os
import random
import numpy as np
import torch.nn as nn 
from tqdm import tqdm

import torch.nn.init as init
import torch.nn.functional as F 
from torch.nn.modules.module import Module
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as T

from load_dataset import load_dataset
import argparse

import models.resnet as resnet
from load_dataset import load_dataset
from config import *


class LossNet(nn.Module):
    def __init__(self, feature_sizes=[32, 16, 8, 4], num_channels=[64, 128, 256, 512], interm_dim=128):
        super(LossNet, self).__init__()
        
        self.GAP1 = nn.AvgPool2d(feature_sizes[0])
        self.GAP2 = nn.AvgPool2d(feature_sizes[1])
        self.GAP3 = nn.AvgPool2d(feature_sizes[2])
        self.GAP4 = nn.AvgPool2d(feature_sizes[3])

        self.FC1 = nn.Linear(num_channels[0], interm_dim)
        self.FC2 = nn.Linear(num_channels[1], interm_dim)
        self.FC3 = nn.Linear(num_channels[2], interm_dim)
        self.FC4 = nn.Linear(num_channels[3], interm_dim)

        self.linear = nn.Linear(4 * interm_dim, 1)
    
    '''
    def forward(self, features):
        out1 = self.GAP1(features[0])
        out1 = out1.view(out1.size(0), -1)
        out1 = F.relu(self.FC1(out1))

        out2 = self.GAP2(features[1])
        out2 = out2.view(out2.size(0), -1)
        out2 = F.relu(self.FC2(out2))

        out3 = self.GAP3(features[2])
        out3 = out3.view(out3.size(0), -1)
        out3 = F.relu(self.FC3(out3))

        out4 = self.GAP4(features[3])
        out4 = out4.view(out4.size(0), -1)
        out4 = F.relu(self.FC4(out4))
    '''
    def forward(self, features):
        out1 = self.GAP1(features[2])
        out1 = out1.view(out1.size(0), -1)
        out1 = F.relu(self.FC1(out1))

        out2 = self.GAP2(features[3])
        out2 = out2.view(out2.size(0), -1)
        out2 = F.relu(self.FC2(out2))

        out3 = self.GAP3(features[4])
        out3 = out3.view(out3.size(0), -1)
        out3 = F.relu(self.FC3(out3))

        out4 = self.GAP4(features[5])
        out4 = out4.view(out4.size(0), -1)
        out4 = F.relu(self.FC4(out4))

        out = self.linear(torch.cat((out1, out2, out3, out4), 1))
        return out

def test(models, epoch, dataloaders, mode='val'):
    assert mode == 'val' or mode == 'test'
    models['backbone'].eval()
    models['module'].eval()
    
    total = 0 
    correct = 0
    with torch.no_grad():
        for (inputs, labels) in dataloaders[mode]:
            with torch.cuda.device(CUDA_VISIBLE_DEVICES):
                inputs = inputs.cuda()
                labels = labels.cuda()
            #scores, _, _= models['backbone'](inputs)

            out, outf, outn = models['backbone'](inputs)
            _, preds = torch.max(out.data, 1)
            
            #scores = models['backbone'](inputs)
            #_, preds = torch.max(scores.data, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

    return 100 * correct / total

iters = 0

def train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss):
    models['backbone'].train()
    models['module'].train()
    
    global iters
    for data in tqdm(dataloaders['train'], leave=False, total=len(dataloaders['train'])):
        with torch.cuda.device(CUDA_VISIBLE_DEVICES):
            inputs = data[0].cuda()
            labels = data[1].cuda()

        iters += 1

        optimizers['backbone'].zero_grad()
        optimizers['module'].zero_grad()

        scores, _, features = models['backbone'](inputs) 
        #scores = models['backbone'](inputs) 
        target_loss = criterion(scores, labels)

        if epoch > epoch_loss:
            features[0] = features[0].detach()
            features[1] = features[1].detach()
            features[2] = features[2].detach()
            features[3] = features[3].detach()

        pred_loss = models['module'](features)
        pred_loss = pred_loss.view(pred_loss.size(0))
        m_module_loss   = LossPredLoss(pred_loss, target_loss, margin=MARGIN)
        m_backbone_loss = torch.sum(target_loss) / target_loss.size(0)        
        loss            = m_backbone_loss + WEIGHT * m_module_loss 
        loss.backward()
        optimizers['backbone'].step()   
        optimizers['module'].step()
    #return loss

def train(models, criterion, optimizers, schedulers, dataloaders, num_epochs, epoch_loss):
    print('>> Train a Model.')
    best_acc = 0.
    
    for epoch in range(num_epochs):

        best_loss = torch.tensor([0.5]).cuda()
        loss = train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss)

        schedulers['backbone'].step()
        schedulers['module'].step()

        if False and epoch % 20  == 7:
            acc = test(models, epoch, dataloaders, mode='test')
            # acc = test(models, dataloaders, mc, 'test')
            if best_acc < acc:
                best_acc = acc
                print('Val Acc: {:.3f} \t Best Acc: {:.3f}'.format(acc, best_acc))
    print('>> Finished.')

def LossPredLoss(input, target, margin=1.0, reduction='mean'):
    assert len(input) % 2 == 0, 'the batch size is not even.'
    assert input.shape == input.flip(0).shape
    
    input = (input - input.flip(0))[:len(input)//2] # [l_1 - l_2B, l_2 - l_2B-1, ... , l_B - l_B+1], where batch_size = 2B
    target = (target - target.flip(0))[:len(target)//2]
    target = target.detach()

    one = 2 * torch.sign(torch.clamp(target, min=0)) - 1 # 1 operation which is defined by the authors
    
    if reduction == 'mean':
        loss = torch.sum(torch.clamp(margin - one * input, min=0))
        loss = loss / input.size(0) # Note that the size of input is already halved
    elif reduction == 'none':
        loss = torch.clamp(margin - one * input, min=0)
    else:
        NotImplementedError()
    
    return loss

def get_uncertainty(models, unlabeled_loader):
    models['backbone'].eval()
    models['module'].eval()
    with torch.cuda.device(CUDA_VISIBLE_DEVICES):
        uncertainty = torch.tensor([]).cuda()

    with torch.no_grad():
        for inputs, _, _ in unlabeled_loader:
            with torch.cuda.device(CUDA_VISIBLE_DEVICES):
                inputs = inputs.cuda()
            #_, _, features = models['backbone'](inputs)
            features = models['backbone'](inputs)
            pred_loss = models['module'](features) # pred_loss = criterion(scores, labels) # ground truth loss
            pred_loss = pred_loss.view(pred_loss.size(0))
            uncertainty = torch.cat((uncertainty, pred_loss), 0)
    
    return uncertainty.cpu()

class SubsetSequentialSampler(torch.utils.data.Sampler):
    r"""Samples elements sequentially from a given list of indices, without replacement.

    Arguments:
        indices (sequence): a sequence of indices
    """

    def __init__(self, indices):
        self.indices = indices

    def __iter__(self):
        return (self.indices[i] for i in range(len(self.indices)))
    
    def __len__(self):
        return len(self.indices)

def query_samples(model, data_unlabeled, subset, labeled_set, cycle, args):
    unlabeled_loader = DataLoader(data_unlabeled, batch_size=BATCH, 
                                sampler=SubsetSequentialSampler(subset), 
                                pin_memory=True)

    # Measure uncertainty of each data points in the subset
    uncertainty = get_uncertainty(model, unlabeled_loader)
    arg = np.argsort(uncertainty)       



parser = argparse.ArgumentParser()
parser.add_argument("-d","--dataset", type=str, default="cifar10", help="")
args = parser.parse_args()

if __name__ == '__main__':

    total = False
    cycles = 5
    no_of_epochs = 20
    
    datasets = ['Satellite', 'COVID','cifar10']
    assert args.dataset in datasets, 'No dataset %s! Try options %s'%(args.dataset, datasets)
 
    results = open('results_'+"_"+args.dataset +'_main'+str(cycles)+'.txt','w')
    print("Dataset: %s"%args.dataset)
   
    CYCLES = cycles

    for trial in range(TRIALS):
        # Load training and testing dataset
        data_train, data_unlabeled, data_test, adden, NO_CLASSES, no_train = load_dataset(args.dataset)
        # Don't predefine budget size. Configure it in the config.py: ADDENDUM = adden
        NUM_TRAIN = no_train
        indices = list(range(NUM_TRAIN))
        random.shuffle(indices)

        labeled_set = indices[:ADDENDUM]
        unlabeled_set = [x for x in indices if x not in labeled_set]

        train_loader = DataLoader(data_train, batch_size=BATCH, 
                                    sampler=SubsetRandomSampler(labeled_set), 
                                    pin_memory=True, drop_last=True)
        test_loader  = DataLoader(data_test, batch_size=BATCH)
        dataloaders  = {'train': train_loader, 'test': test_loader}

        for cycle in range(CYCLES):

            if not total:
                random.shuffle(unlabeled_set)
                subset = unlabeled_set[:SUBSET]
            
            with torch.cuda.device(CUDA_VISIBLE_DEVICES):
                
                #if datasets == 'Satellite':
                '''       
                backbone = ResNet.ResNet18(num_classes=NO_CLASSES).cuda()
                pretrained_dict = torch.load('./resnet18-5c106cde.pth')
                model_dict = backbone.state_dict()
                pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
                model_dict.update(pretrained_dict)
                backbone.load_state_dict(model_dict)
                ''' 
                resnet18    = resnet.ResNet18(num_classes=NO_CLASSES).cuda()
                
                loss_module = LossNet().cuda()
                models      = {'backbone': resnet18}

                models = {'backbone': resnet18, 'module': loss_module}
                torch.backends.cudnn.benchmark = True
            
            # Loss, criterion and scheduler (re)initialization
            criterion      = nn.CrossEntropyLoss(reduction='none')
            optim_backbone = optim.SGD(models['backbone'].parameters(), lr=LR, 
                momentum=MOMENTUM, weight_decay=WDECAY)
 
            sched_backbone = lr_scheduler.MultiStepLR(optim_backbone, milestones=MILESTONES)
            optimizers = {'backbone': optim_backbone}
            schedulers = {'backbone': sched_backbone}
            
            optim_module   = optim.SGD(models['module'].parameters(), lr=LR, 
                momentum=MOMENTUM, weight_decay=WDECAY)
            sched_module   = lr_scheduler.MultiStepLR(optim_module, milestones=MILESTONES)
            optimizers = {'backbone': optim_backbone, 'module': optim_module}
            schedulers = {'backbone': sched_backbone, 'module': sched_module}
            
            # Training and testing
            train(models, criterion, optimizers, schedulers, dataloaders, no_of_epochs, EPOCHL)
            acc = test(models, EPOCH, dataloaders, mode='test')
            print('Trial {}/{} || Cycle {}/{} || Label set size {}: Test acc {}'.format(trial+1, TRIALS, cycle+1, CYCLES, len(labeled_set), acc))
            np.array([trial+1, TRIALS, cycle+1, CYCLES, len(labeled_set), acc]).tofile(results, sep=" ")
            results.write("\n")

            if cycle == (CYCLES-1):
                # Reached final training cycle
                print("Finished.")
                break
            # Get the indices of the unlabeled samples to train on next cycle
            arg = query_samples(models, data_unlabeled, subset, labeled_set, cycle, args)

            # Update the labeled dataset and the unlabeled dataset, respectively
            labeled_set += list(torch.tensor(subset)[arg][-ADDENDUM:].numpy())
            listd = list(torch.tensor(subset)[arg][:-ADDENDUM].numpy()) 
            unlabeled_set = listd + unlabeled_set[SUBSET:]
            print(len(labeled_set), min(labeled_set), max(labeled_set))
            # Create a new dataloader for the updated labeled dataset
            dataloaders['train'] = DataLoader(data_train, batch_size=BATCH, 
                                            sampler=SubsetRandomSampler(labeled_set), 
                                            pin_memory=True)

    results.close()

J_Johnson · December 23, 2022, 10:02am

Need to see the train_epoch function.

KanZa · December 23, 2022, 10:03am

Yeah, you are right train_epoch

def train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss):
    models['backbone'].train()
    models['module'].train()
    
    global iters
    for data in tqdm(dataloaders['train'], leave=False, total=len(dataloaders['train'])):
        with torch.cuda.device(CUDA_VISIBLE_DEVICES):
            inputs = data[0].cuda()
            labels = data[1].cuda()

        iters += 1

        optimizers['backbone'].zero_grad()
        optimizers['module'].zero_grad()

        scores, _, features = models['backbone'](inputs) 
        #scores = models['backbone'](inputs) 
        target_loss = criterion(scores, labels)

        if epoch > epoch_loss:
            features[0] = features[0].detach()
            features[1] = features[1].detach()
            features[2] = features[2].detach()
            features[3] = features[3].detach()

        pred_loss = models['module'](features)
        pred_loss = pred_loss.view(pred_loss.size(0))
        m_module_loss   = LossPredLoss(pred_loss, target_loss, margin=MARGIN)
        m_backbone_loss = torch.sum(target_loss) / target_loss.size(0)        
        loss            = m_backbone_loss + WEIGHT * m_module_loss 
        loss.backward()
        optimizers['backbone'].step()   
        optimizers['module'].step()
    #return loss

J_Johnson · December 23, 2022, 11:07am

Your resnet18 = resnet.ResNet18(num_classes=NO_CLASSES).cuda() inside of __main__ doesn’t define block or num_blocks. Both of those arguments are required by the model code you copied earlier. Can you copy the exact ResNet model code you are using?

KanZa · December 23, 2022, 12:01pm

First of all extremely sorry for the difference in ResNet, actually, I was trying to use PreTrained ResNet for a domain-specific dataset. But I thought to first try on plain CIFAR10 first. because it is also giving the same error
return F.avg_pool2d(input, self.kernel_size, self.stride,
IndexError: Dimension out of range (expected to be in range of [-2, 1], but got -3)

So the plain ResNet without PreTrained thing is

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)
        # self.linear2 = nn.Linear(1000, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out1 = self.layer1(out)
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = self.layer4(out3)
        out = F.avg_pool2d(out4, 4)
        outf = out.view(out.size(0), -1)
        # outl = self.linear(outf)
        out = self.linear(outf)
        return out, outf, [out1, out2, out3, out4]

def ResNet18(num_classes = 10):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)

J_Johnson · December 23, 2022, 12:12pm

KanZa:

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)
        # self.linear2 = nn.Linear(1000, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out1 = self.layer1(out)
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = self.layer4(out3)
        out = F.avg_pool2d(out4, 4)
        outf = out.view(out.size(0), -1)
        # outl = self.linear(outf)
        out = self.linear(outf)
        return out, outf, [out1, out2, out3, out4]

def ResNet18(num_classes = 10):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)

I’m not getting any errors with this code:

import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)
        # self.linear2 = nn.Linear(1000, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out1 = self.layer1(out)
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = self.layer4(out3)
        out = F.avg_pool2d(out4, 4)
        outf = out.view(out.size(0), -1)
        # outl = self.linear(outf)
        print(outf.size())
        out = self.linear(outf)
        return out, outf, [out1, out2, out3, out4]

def ResNet18(num_classes = 10):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)


class LossNet(nn.Module):
    def __init__(self, feature_sizes=None, num_channels=None, interm_dim=128):
        super(LossNet, self).__init__()

        feature_sizes = [32, 16, 8, 4]
        num_channels = [64, 128, 256, 512]

        self.GAP1 = nn.AvgPool2d(feature_sizes[0])
        self.GAP2 = nn.AvgPool2d(feature_sizes[1])
        self.GAP3 = nn.AvgPool2d(feature_sizes[2])
        self.GAP4 = nn.AvgPool2d(feature_sizes[3])

        self.FC1 = nn.Linear(num_channels[0], interm_dim)
        self.FC2 = nn.Linear(num_channels[1], interm_dim)
        self.FC3 = nn.Linear(num_channels[2], interm_dim)
        self.FC4 = nn.Linear(num_channels[3], interm_dim)

        self.linear = nn.Linear(4 * interm_dim, 1)

    def forward(self, features):
        out1 = self.GAP1(features[0])
        out1 = out1.view(out1.size(0), -1)
        out1 = F.relu(self.FC1(out1))

        out2 = self.GAP2(features[1])
        out2 = out2.view(out2.size(0), -1)
        out2 = F.relu(self.FC2(out2))

        out3 = self.GAP3(features[2])
        out3 = out3.view(out3.size(0), -1)
        out3 = F.relu(self.FC3(out3))

        out4 = self.GAP4(features[3])
        out4 = out4.view(out4.size(0), -1)
        out4 = F.relu(self.FC4(out4))

        out = self.linear(torch.cat((out1, out2, out3, out4), 1))
        return out

res_model = ResNet18(num_classes=4)
loss_model =LossNet()

x = torch.rand(10, 3, 32, 32) #CIFAR10 sizes

with torch.no_grad():
    outs, outf, features = res_model(x)
    loss = loss_model(features)
    print(loss.size())

KanZa · December 26, 2022, 10:05am

Thank you for your time.
But the actual purpose was for fixing the ResNet for these dimensions (4, 3, 64, 64) which is still giving errors. I hope I did the changes correctly. Can you please check if I am not mistaking anything?
Error: mat1 and mat2 shapes cannot be multiplied (4x256 and 64x128)

import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=4):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)
        # self.linear2 = nn.Linear(1000, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out1 = self.layer1(out)
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = self.layer4(out3)
        out = F.avg_pool2d(out4, 4)
        outf = out.view(out.size(0), -1)
        # outl = self.linear(outf)
        print(outf.size())
        out = self.linear(outf)
        return out, outf, [out1, out2, out3, out4]

def ResNet18(num_classes = 4):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)


class LossNet(nn.Module):
    def __init__(self, feature_sizes=None, num_channels=None, interm_dim=128):
        super(LossNet, self).__init__()

        feature_sizes = [32, 16, 8, 4]
        num_channels = [64, 128, 256, 512]

        self.GAP1 = nn.AvgPool2d(feature_sizes[0])
        self.GAP2 = nn.AvgPool2d(feature_sizes[1])
        self.GAP3 = nn.AvgPool2d(feature_sizes[2])
        self.GAP4 = nn.AvgPool2d(feature_sizes[3])

        self.FC1 = nn.Linear(num_channels[0], interm_dim)
        self.FC2 = nn.Linear(num_channels[1], interm_dim)
        self.FC3 = nn.Linear(num_channels[2], interm_dim)
        self.FC4 = nn.Linear(num_channels[3], interm_dim)

        self.linear = nn.Linear(4 * interm_dim, 1)

    def forward(self, features):
        out1 = self.GAP1(features[0])
        out1 = out1.view(out1.size(0), -1)
        out1 = F.relu(self.FC1(out1))

        out2 = self.GAP2(features[1])
        out2 = out2.view(out2.size(0), -1)
        out2 = F.relu(self.FC2(out2))

        out3 = self.GAP3(features[2])
        out3 = out3.view(out3.size(0), -1)
        out3 = F.relu(self.FC3(out3))

        out4 = self.GAP4(features[3])
        out4 = out4.view(out4.size(0), -1)
        out4 = F.relu(self.FC4(out4))

        out = self.linear(torch.cat((out1, out2, out3, out4), 1))
        return out

import os
import random
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as T
import torchvision.models as models
from tqdm import tqdm
import argparse

# Custom
import resnet as resnet
from resnet import LossNet
#from load_dataset import load_dataset
from load_PreTrained import load_dataset
from config import *



class SubsetSequentialSampler(torch.utils.data.Sampler):
    r"""Samples elements sequentially from a given list of indices, without replacement.
    Arguments:
        indices (sequence): a sequence of indices
    """

    def __init__(self, indices):
        self.indices = indices

    def __iter__(self):
        return (self.indices[i] for i in range(len(self.indices)))
    
    def __len__(self):
        return len(self.indices)
                
def get_uncertainty(models, unlabeled_loader):
    models['backbone'].eval()
    models['module'].eval()
    with torch.cuda.device(CUDA_VISIBLE_DEVICES):
        uncertainty = torch.tensor([]).cuda()

    with torch.no_grad():
        for inputs, _, _ in unlabeled_loader:
            with torch.cuda.device(CUDA_VISIBLE_DEVICES):
                inputs = inputs.cuda()
            _, _, features = models['backbone'](inputs)
            pred_loss = models['module'](features) # pred_loss = criterion(scores, labels) # ground truth loss
            pred_loss = pred_loss.view(pred_loss.size(0))
            uncertainty = torch.cat((uncertainty, pred_loss), 0)
    
    return uncertainty.cpu()



# Select the indices of the unlablled data according to the methods
def query_samples(model, data_unlabeled, subset, labeled_set, cycle, args):

    # Create unlabeled dataloader for the unlabeled subset
    unlabeled_loader = DataLoader(data_unlabeled, batch_size=BATCH, 
                                sampler=SubsetSequentialSampler(subset), 
                                pin_memory=True)

    # Measure uncertainty of each data points in the subset
    uncertainty = get_uncertainty(model, unlabeled_loader)
    arg = np.argsort(uncertainty)        

    
    return arg

def LossPredLoss(input, target, margin=1.0, reduction='mean'):
    assert len(input) % 2 == 0, 'the batch size is not even.'
    assert input.shape == input.flip(0).shape
    
    input = (input - input.flip(0))[:len(input)//2] # [l_1 - l_2B, l_2 - l_2B-1, ... , l_B - l_B+1], where batch_size = 2B
    target = (target - target.flip(0))[:len(target)//2]
    target = target.detach()

    one = 2 * torch.sign(torch.clamp(target, min=0)) - 1 # 1 operation which is defined by the authors
    
    if reduction == 'mean':
        loss = torch.sum(torch.clamp(margin - one * input, min=0))
        loss = loss / input.size(0) # Note that the size of input is already halved
    elif reduction == 'none':
        loss = torch.clamp(margin - one * input, min=0)
    else:
        NotImplementedError()
    
    return loss


def test(models, epoch, dataloaders, mode='val'):
    assert mode == 'val' or mode == 'test'
    models['backbone'].eval()
    
    models['module'].eval()
    
    total = 0
    correct = 0
    with torch.no_grad():
        for (inputs, labels) in dataloaders[mode]:
            with torch.cuda.device(CUDA_VISIBLE_DEVICES):
                inputs = inputs.cuda()
                labels = labels.cuda()

            scores, _, _ = models['backbone'](inputs)
            _, preds = torch.max(scores.data, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
    
    return 100 * correct / total

iters = 0
def train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss):


    models['backbone'].train()
    models['module'].train()
    global iters
    for data in tqdm(dataloaders['train'], leave=False, total=len(dataloaders['train'])):
        with torch.cuda.device(CUDA_VISIBLE_DEVICES):
            inputs = data[0].cuda()
            labels = data[1].cuda()

        iters += 1

        optimizers['backbone'].zero_grad()
        
        optimizers['module'].zero_grad()

        scores, _, features = models['backbone'](inputs) 
        target_loss = criterion(scores, labels)
        
        if epoch > epoch_loss:
            features[0] = features[0].detach()
            features[1] = features[1].detach()
            features[2] = features[2].detach()
            features[3] = features[3].detach()

        pred_loss = models['module'](features)
        pred_loss = pred_loss.view(pred_loss.size(0))
        m_module_loss   = LossPredLoss(pred_loss, target_loss, margin=MARGIN)
        m_backbone_loss = torch.sum(target_loss) / target_loss.size(0)        
        loss            = m_backbone_loss + WEIGHT * m_module_loss 
        
        loss.backward()
        optimizers['backbone'].step()
        
        optimizers['module'].step()
    return loss

def train(models, criterion, optimizers, schedulers, dataloaders, num_epochs, epoch_loss):
    print('>> Train a Model.')
    best_acc = 0.
    
    for epoch in range(num_epochs):

        best_loss = torch.tensor([0.5]).cuda()
        loss = train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss)

        schedulers['backbone'].step()
        
        schedulers['module'].step()

        if False and epoch % 20  == 7:
            acc = test(models, epoch, method, dataloaders, mode='test')
            # acc = test(models, dataloaders, mc, 'test')
            if best_acc < acc:
                best_acc = acc
                print('Val Acc: {:.3f} \t Best Acc: {:.3f}'.format(acc, best_acc))
    print('>> Finished.')

parser = argparse.ArgumentParser()

parser.add_argument("-d","--dataset", type=str, default="Satellite",
                    help="")
parser.add_argument("-e","--no_of_epochs", type=int, default=2,
                    help="Number of epochs for the active learner")
parser.add_argument("-m","--method_type", type=str, default="lloss",
                    help="")
parser.add_argument("-c","--cycles", type=int, default=5,
                    help="Number of active learning cycles")
parser.add_argument("-t","--total", type=bool, default=False,
                    help="Training on the entire dataset")

args = parser.parse_args()

# Main
if __name__ == '__main__':

    datasets = ['cifar10', 'Satellite']
   
    assert args.dataset in datasets, 'No dataset %s! Try options %s'%(args.dataset, datasets)
    
    results = open('results_'+str(args.method_type)+"_"+args.dataset +'_main'+str(args.cycles)+str(args.total)+'.txt','w')
    print("Dataset: %s"%args.dataset)
    
    if args.total:
        TRIALS = 1
        CYCLES = 1
    else:
        CYCLES = args.cycles
    for trial in range(TRIALS):

        # Load training and testing dataset
        data_train, data_unlabeled, data_test, adden, NO_CLASSES, no_train = load_dataset(args.dataset)
        # Don't predefine budget size. Configure it in the config.py: ADDENDUM = adden
        NUM_TRAIN = no_train
        indices = list(range(NUM_TRAIN))
        random.shuffle(indices)

        if args.total:
            labeled_set= indices
        else:
            labeled_set = indices[:ADDENDUM]
            unlabeled_set = [x for x in indices if x not in labeled_set]

        train_loader = DataLoader(data_train, batch_size=BATCH, 
                                    sampler=SubsetRandomSampler(labeled_set), 
                                    pin_memory=True, drop_last=True)
        test_loader  = DataLoader(data_test, batch_size=BATCH)
        dataloaders  = {'train': train_loader, 'test': test_loader}

        for cycle in range(CYCLES):
            
            # Randomly sample 10000 unlabeled data points
            if not args.total:
                random.shuffle(unlabeled_set)
                subset = unlabeled_set[:SUBSET]

            # Model - create new instance for every cycle so that it resets
            #with torch.cuda.device(CUDA_VISIBLE_DEVICES):
                
                #resnet18    = resnet.ResNet18(num_classes=NO_CLASSES).cuda()
                #loss_module = LossNet().cuda()
                resnet18 = resnet.ResNet18(num_classes=4)
                loss_module=LossNet()

                #x = torch.rand(10, 3, 32, 32) #CIFAR10 sizes
                x = torch.rand(4, 3, 64, 64)

                with torch.no_grad():
                    outs, outf, features = resnet18(x)
                    loss = loss_module(features)
                    print(loss.size())

            models      = {'backbone': resnet18}
            models = {'backbone': resnet18, 'module': loss_module}
            torch.backends.cudnn.benchmark = True
            
            # Loss, criterion and scheduler (re)initialization
            criterion      = nn.CrossEntropyLoss(reduction='none')
            optim_backbone = optim.SGD(models['backbone'].parameters(), lr=LR, 
                momentum=MOMENTUM, weight_decay=WDECAY)
 
            sched_backbone = lr_scheduler.MultiStepLR(optim_backbone, milestones=MILESTONES)
            optimizers = {'backbone': optim_backbone}
            schedulers = {'backbone': sched_backbone}
            
            optim_module   = optim.SGD(models['module'].parameters(), lr=LR, 
                momentum=MOMENTUM, weight_decay=WDECAY)
            sched_module   = lr_scheduler.MultiStepLR(optim_module, milestones=MILESTONES)
            optimizers = {'backbone': optim_backbone, 'module': optim_module}
            schedulers = {'backbone': sched_backbone, 'module': sched_module}
        
            # Training and testing
            train(models, criterion, optimizers, schedulers, dataloaders, args.no_of_epochs, EPOCHL)
            acc = test(models, EPOCH, dataloaders, mode='test')
            print('Trial {}/{} || Cycle {}/{} || Label set size {}: Test acc {}'.format(trial+1, TRIALS, cycle+1, CYCLES, len(labeled_set), acc))
            np.array([trial+1, TRIALS, cycle+1, CYCLES, len(labeled_set), acc]).tofile(results, sep=" ")
            results.write("\n")


            if cycle == (CYCLES-1):
                # Reached final training cycle
                print("Finished.")
                break
            # Get the indices of the unlabeled samples to train on next cycle
            arg = query_samples(models, data_unlabeled, subset, labeled_set, cycle, args)

            # Update the labeled dataset and the unlabeled dataset, respectively
            labeled_set += list(torch.tensor(subset)[arg][-ADDENDUM:].numpy())
            listd = list(torch.tensor(subset)[arg][:-ADDENDUM].numpy()) 
            unlabeled_set = listd + unlabeled_set[SUBSET:]
            print(len(labeled_set), min(labeled_set), max(labeled_set))
            # Create a new dataloader for the updated labeled dataset
            dataloaders['train'] = DataLoader(data_train, batch_size=BATCH, 
                                            sampler=SubsetRandomSampler(labeled_set), 
                                            pin_memory=True)

    results.close()

J_Johnson · December 26, 2022, 1:38pm

I’m not really sure what you changed. Anyway, this code should allow for variable input sizes:

import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)
        # self.linear2 = nn.Linear(1000, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        N, C, H, W = x.size()  #new code
        out = F.relu(self.bn1(self.conv1(x)))
        if (H!=32)|(W!=32):  #new code
            Hpool=H//32
            Wpool=W//32
            out=F.max_pool2d(out, (Hpool, Wpool), (Hpool, Wpool))  #end new code
        out1 = self.layer1(out)
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = self.layer4(out3)
        out = F.avg_pool2d(out4, 4)
        outf = out.view(out.size(0), -1)
        # outl = self.linear(outf)
        print(outf.size())
        out = self.linear(outf)
        return out, outf, [out1, out2, out3, out4]

def ResNet18(num_classes = 10):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)


class LossNet(nn.Module):
    def __init__(self, feature_sizes=None, num_channels=None, interm_dim=128):
        super(LossNet, self).__init__()

        feature_sizes = [32, 16, 8, 4]
        num_channels = [64, 128, 256, 512]

        self.GAP1 = nn.AvgPool2d(feature_sizes[0])
        self.GAP2 = nn.AvgPool2d(feature_sizes[1])
        self.GAP3 = nn.AvgPool2d(feature_sizes[2])
        self.GAP4 = nn.AvgPool2d(feature_sizes[3])

        self.FC1 = nn.Linear(num_channels[0], interm_dim)
        self.FC2 = nn.Linear(num_channels[1], interm_dim)
        self.FC3 = nn.Linear(num_channels[2], interm_dim)
        self.FC4 = nn.Linear(num_channels[3], interm_dim)

        self.linear = nn.Linear(4 * interm_dim, 1)

    def forward(self, features):
        out1 = self.GAP1(features[0])
        out1 = out1.view(out1.size(0), -1)
        out1 = F.relu(self.FC1(out1))

        out2 = self.GAP2(features[1])
        out2 = out2.view(out2.size(0), -1)
        out2 = F.relu(self.FC2(out2))

        out3 = self.GAP3(features[2])
        out3 = out3.view(out3.size(0), -1)
        out3 = F.relu(self.FC3(out3))

        out4 = self.GAP4(features[3])
        out4 = out4.view(out4.size(0), -1)
        out4 = F.relu(self.FC4(out4))

        out = self.linear(torch.cat((out1, out2, out3, out4), 1))
        return out

res_model = ResNet18(num_classes=4)
loss_model =LossNet()

x = torch.rand(10, 3, 64, 64) #CIFAR10 sizes

with torch.no_grad():
    outs, outf, features = res_model(x)
    loss = loss_model(features)
    print(loss.size())

I’ve marked the changes in the forward pass of the ResNet. You can use either maxpool2d or an avgpool2d. The benefit of this approach is it actually makes the model flexible for various-sized inputs, as long as they are divisible by 32 on the height and width.

KanZa · December 27, 2022, 11:30am

I cannot thank you enough for your help.

So The situation for my dataset which is x = torch.rand(4, 3, 64, 64)
in ResNet there was an error on this line out = F.avg_pool2d(out4, 4)
Error: mat1 and mat2 shapes cannot be multiplied (4x2048 and 512x4)
which I fixed by
out = F.avg_pool2d(out4, 8)
Is my correction allowed?
I have printed the output of CIFAR10 and then my Dataset

def forward(self, x):
        N, C, H, W = x.size()  #new code                3, 32, 10, 32                                                  
        out = F.relu(self.bn1(self.conv1(x)))
        if (H!=2)|(W!=2):  #new code
            Hpool=H//2                                # 16, 16
            Wpool=W//2
            out=F.max_pool2d(out, (Hpool, Wpool), (Hpool, Wpool))  #end new code
        out = F.relu(self.bn1(self.conv1(x)))       #([10, 64, 32, 32])   out             
        out1 = self.layer1(out)                     #([128, 64, 32, 32])    out1            ([4, 64, 64, 64])
        out2 = self.layer2(out1)                    #([128, 128, 16, 16])   out2            ([4, 128, 32, 32])
        out3 = self.layer3(out2)                    #([128, 256, 6, 6])   out3              ([4, 256, 16, 16])
        out4 = self.layer4(out3)                    #([128, 512, 4, 4])    out4             ([4, 512, 8, 8])    This one should be same

        out = F.avg_pool2d(out4, 8)                 #([10, 512, 1, 1])                      #([4, 512, 2, 2])  2 was giving error so I made 
                                                                                            #avg_pool2d(out4, 8) here 8
        outf = out.view(out.size(0), -1)            #torch.Size([128, 512]) outf            ([4, 2048])         with this one 
        # outl = self.linear(outf)
        #print(outf.size())
        out = self.linear(outf)
        return out, outf, [out1, out2, out3, out4]

Now I am still facing another error in LossNet forward()
Error: mat1 and mat2 shapes cannot be multiplied (4x256 and 64x128)
error is on out1

def forward(self, features):
        out1 = self.GAP1(features[0])
        out1 = out1.view(out1.size(0), -1)
        out1 = F.relu(self.FC1(out1))                  #([10, 64])    #<<<Error is on this line 

        out2 = self.GAP2(features[1])
        out2 = out2.view(out2.size(0), -1)          #([10, 128])
        out2 = F.relu(self.FC2(out2))

        out3 = self.GAP3(features[2])                #([10, 128])
        out3 = out3.view(out3.size(0), -1)
        out3 = F.relu(self.FC3(out3))

        out4 = self.GAP4(features[3])       
        out4 = out4.view(out4.size(0), -1)
        out4 = F.relu(self.FC4(out4))                  #([10, 128])

        out = self.linear(torch.cat((out1, out2, out3, out4), 1))       #([10, 1])
        return out

J_Johnson · December 27, 2022, 1:26pm

Did you change any of the arguments when making the ResNet? Because when I copy the code in my last response, it works for any size input image that is divisible by 32. num_classes can be changed, but the other parameters shouldn’t be changed.