Thank you for your time.
But the actual purpose was for fixing the ResNet for these dimensions (4, 3, 64, 64) which is still giving errors. I hope I did the changes correctly. Can you please check if I am not mistaking anything?
Error: mat1 and mat2 shapes cannot be multiplied (4x256 and 64x128)
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=4):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
# self.linear2 = nn.Linear(1000, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out1 = self.layer1(out)
out2 = self.layer2(out1)
out3 = self.layer3(out2)
out4 = self.layer4(out3)
out = F.avg_pool2d(out4, 4)
outf = out.view(out.size(0), -1)
# outl = self.linear(outf)
print(outf.size())
out = self.linear(outf)
return out, outf, [out1, out2, out3, out4]
def ResNet18(num_classes = 4):
return ResNet(BasicBlock, [2,2,2,2], num_classes)
class LossNet(nn.Module):
def __init__(self, feature_sizes=None, num_channels=None, interm_dim=128):
super(LossNet, self).__init__()
feature_sizes = [32, 16, 8, 4]
num_channels = [64, 128, 256, 512]
self.GAP1 = nn.AvgPool2d(feature_sizes[0])
self.GAP2 = nn.AvgPool2d(feature_sizes[1])
self.GAP3 = nn.AvgPool2d(feature_sizes[2])
self.GAP4 = nn.AvgPool2d(feature_sizes[3])
self.FC1 = nn.Linear(num_channels[0], interm_dim)
self.FC2 = nn.Linear(num_channels[1], interm_dim)
self.FC3 = nn.Linear(num_channels[2], interm_dim)
self.FC4 = nn.Linear(num_channels[3], interm_dim)
self.linear = nn.Linear(4 * interm_dim, 1)
def forward(self, features):
out1 = self.GAP1(features[0])
out1 = out1.view(out1.size(0), -1)
out1 = F.relu(self.FC1(out1))
out2 = self.GAP2(features[1])
out2 = out2.view(out2.size(0), -1)
out2 = F.relu(self.FC2(out2))
out3 = self.GAP3(features[2])
out3 = out3.view(out3.size(0), -1)
out3 = F.relu(self.FC3(out3))
out4 = self.GAP4(features[3])
out4 = out4.view(out4.size(0), -1)
out4 = F.relu(self.FC4(out4))
out = self.linear(torch.cat((out1, out2, out3, out4), 1))
return out
import os
import random
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as T
import torchvision.models as models
from tqdm import tqdm
import argparse
# Custom
import resnet as resnet
from resnet import LossNet
#from load_dataset import load_dataset
from load_PreTrained import load_dataset
from config import *
class SubsetSequentialSampler(torch.utils.data.Sampler):
r"""Samples elements sequentially from a given list of indices, without replacement.
Arguments:
indices (sequence): a sequence of indices
"""
def __init__(self, indices):
self.indices = indices
def __iter__(self):
return (self.indices[i] for i in range(len(self.indices)))
def __len__(self):
return len(self.indices)
def get_uncertainty(models, unlabeled_loader):
models['backbone'].eval()
models['module'].eval()
with torch.cuda.device(CUDA_VISIBLE_DEVICES):
uncertainty = torch.tensor([]).cuda()
with torch.no_grad():
for inputs, _, _ in unlabeled_loader:
with torch.cuda.device(CUDA_VISIBLE_DEVICES):
inputs = inputs.cuda()
_, _, features = models['backbone'](inputs)
pred_loss = models['module'](features) # pred_loss = criterion(scores, labels) # ground truth loss
pred_loss = pred_loss.view(pred_loss.size(0))
uncertainty = torch.cat((uncertainty, pred_loss), 0)
return uncertainty.cpu()
# Select the indices of the unlablled data according to the methods
def query_samples(model, data_unlabeled, subset, labeled_set, cycle, args):
# Create unlabeled dataloader for the unlabeled subset
unlabeled_loader = DataLoader(data_unlabeled, batch_size=BATCH,
sampler=SubsetSequentialSampler(subset),
pin_memory=True)
# Measure uncertainty of each data points in the subset
uncertainty = get_uncertainty(model, unlabeled_loader)
arg = np.argsort(uncertainty)
return arg
def LossPredLoss(input, target, margin=1.0, reduction='mean'):
assert len(input) % 2 == 0, 'the batch size is not even.'
assert input.shape == input.flip(0).shape
input = (input - input.flip(0))[:len(input)//2] # [l_1 - l_2B, l_2 - l_2B-1, ... , l_B - l_B+1], where batch_size = 2B
target = (target - target.flip(0))[:len(target)//2]
target = target.detach()
one = 2 * torch.sign(torch.clamp(target, min=0)) - 1 # 1 operation which is defined by the authors
if reduction == 'mean':
loss = torch.sum(torch.clamp(margin - one * input, min=0))
loss = loss / input.size(0) # Note that the size of input is already halved
elif reduction == 'none':
loss = torch.clamp(margin - one * input, min=0)
else:
NotImplementedError()
return loss
def test(models, epoch, dataloaders, mode='val'):
assert mode == 'val' or mode == 'test'
models['backbone'].eval()
models['module'].eval()
total = 0
correct = 0
with torch.no_grad():
for (inputs, labels) in dataloaders[mode]:
with torch.cuda.device(CUDA_VISIBLE_DEVICES):
inputs = inputs.cuda()
labels = labels.cuda()
scores, _, _ = models['backbone'](inputs)
_, preds = torch.max(scores.data, 1)
total += labels.size(0)
correct += (preds == labels).sum().item()
return 100 * correct / total
iters = 0
def train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss):
models['backbone'].train()
models['module'].train()
global iters
for data in tqdm(dataloaders['train'], leave=False, total=len(dataloaders['train'])):
with torch.cuda.device(CUDA_VISIBLE_DEVICES):
inputs = data[0].cuda()
labels = data[1].cuda()
iters += 1
optimizers['backbone'].zero_grad()
optimizers['module'].zero_grad()
scores, _, features = models['backbone'](inputs)
target_loss = criterion(scores, labels)
if epoch > epoch_loss:
features[0] = features[0].detach()
features[1] = features[1].detach()
features[2] = features[2].detach()
features[3] = features[3].detach()
pred_loss = models['module'](features)
pred_loss = pred_loss.view(pred_loss.size(0))
m_module_loss = LossPredLoss(pred_loss, target_loss, margin=MARGIN)
m_backbone_loss = torch.sum(target_loss) / target_loss.size(0)
loss = m_backbone_loss + WEIGHT * m_module_loss
loss.backward()
optimizers['backbone'].step()
optimizers['module'].step()
return loss
def train(models, criterion, optimizers, schedulers, dataloaders, num_epochs, epoch_loss):
print('>> Train a Model.')
best_acc = 0.
for epoch in range(num_epochs):
best_loss = torch.tensor([0.5]).cuda()
loss = train_epoch(models, criterion, optimizers, dataloaders, epoch, epoch_loss)
schedulers['backbone'].step()
schedulers['module'].step()
if False and epoch % 20 == 7:
acc = test(models, epoch, method, dataloaders, mode='test')
# acc = test(models, dataloaders, mc, 'test')
if best_acc < acc:
best_acc = acc
print('Val Acc: {:.3f} \t Best Acc: {:.3f}'.format(acc, best_acc))
print('>> Finished.')
parser = argparse.ArgumentParser()
parser.add_argument("-d","--dataset", type=str, default="Satellite",
help="")
parser.add_argument("-e","--no_of_epochs", type=int, default=2,
help="Number of epochs for the active learner")
parser.add_argument("-m","--method_type", type=str, default="lloss",
help="")
parser.add_argument("-c","--cycles", type=int, default=5,
help="Number of active learning cycles")
parser.add_argument("-t","--total", type=bool, default=False,
help="Training on the entire dataset")
args = parser.parse_args()
# Main
if __name__ == '__main__':
datasets = ['cifar10', 'Satellite']
assert args.dataset in datasets, 'No dataset %s! Try options %s'%(args.dataset, datasets)
results = open('results_'+str(args.method_type)+"_"+args.dataset +'_main'+str(args.cycles)+str(args.total)+'.txt','w')
print("Dataset: %s"%args.dataset)
if args.total:
TRIALS = 1
CYCLES = 1
else:
CYCLES = args.cycles
for trial in range(TRIALS):
# Load training and testing dataset
data_train, data_unlabeled, data_test, adden, NO_CLASSES, no_train = load_dataset(args.dataset)
# Don't predefine budget size. Configure it in the config.py: ADDENDUM = adden
NUM_TRAIN = no_train
indices = list(range(NUM_TRAIN))
random.shuffle(indices)
if args.total:
labeled_set= indices
else:
labeled_set = indices[:ADDENDUM]
unlabeled_set = [x for x in indices if x not in labeled_set]
train_loader = DataLoader(data_train, batch_size=BATCH,
sampler=SubsetRandomSampler(labeled_set),
pin_memory=True, drop_last=True)
test_loader = DataLoader(data_test, batch_size=BATCH)
dataloaders = {'train': train_loader, 'test': test_loader}
for cycle in range(CYCLES):
# Randomly sample 10000 unlabeled data points
if not args.total:
random.shuffle(unlabeled_set)
subset = unlabeled_set[:SUBSET]
# Model - create new instance for every cycle so that it resets
#with torch.cuda.device(CUDA_VISIBLE_DEVICES):
#resnet18 = resnet.ResNet18(num_classes=NO_CLASSES).cuda()
#loss_module = LossNet().cuda()
resnet18 = resnet.ResNet18(num_classes=4)
loss_module=LossNet()
#x = torch.rand(10, 3, 32, 32) #CIFAR10 sizes
x = torch.rand(4, 3, 64, 64)
with torch.no_grad():
outs, outf, features = resnet18(x)
loss = loss_module(features)
print(loss.size())
models = {'backbone': resnet18}
models = {'backbone': resnet18, 'module': loss_module}
torch.backends.cudnn.benchmark = True
# Loss, criterion and scheduler (re)initialization
criterion = nn.CrossEntropyLoss(reduction='none')
optim_backbone = optim.SGD(models['backbone'].parameters(), lr=LR,
momentum=MOMENTUM, weight_decay=WDECAY)
sched_backbone = lr_scheduler.MultiStepLR(optim_backbone, milestones=MILESTONES)
optimizers = {'backbone': optim_backbone}
schedulers = {'backbone': sched_backbone}
optim_module = optim.SGD(models['module'].parameters(), lr=LR,
momentum=MOMENTUM, weight_decay=WDECAY)
sched_module = lr_scheduler.MultiStepLR(optim_module, milestones=MILESTONES)
optimizers = {'backbone': optim_backbone, 'module': optim_module}
schedulers = {'backbone': sched_backbone, 'module': sched_module}
# Training and testing
train(models, criterion, optimizers, schedulers, dataloaders, args.no_of_epochs, EPOCHL)
acc = test(models, EPOCH, dataloaders, mode='test')
print('Trial {}/{} || Cycle {}/{} || Label set size {}: Test acc {}'.format(trial+1, TRIALS, cycle+1, CYCLES, len(labeled_set), acc))
np.array([trial+1, TRIALS, cycle+1, CYCLES, len(labeled_set), acc]).tofile(results, sep=" ")
results.write("\n")
if cycle == (CYCLES-1):
# Reached final training cycle
print("Finished.")
break
# Get the indices of the unlabeled samples to train on next cycle
arg = query_samples(models, data_unlabeled, subset, labeled_set, cycle, args)
# Update the labeled dataset and the unlabeled dataset, respectively
labeled_set += list(torch.tensor(subset)[arg][-ADDENDUM:].numpy())
listd = list(torch.tensor(subset)[arg][:-ADDENDUM].numpy())
unlabeled_set = listd + unlabeled_set[SUBSET:]
print(len(labeled_set), min(labeled_set), max(labeled_set))
# Create a new dataloader for the updated labeled dataset
dataloaders['train'] = DataLoader(data_train, batch_size=BATCH,
sampler=SubsetRandomSampler(labeled_set),
pin_memory=True)
results.close()