AttributeError: 'Sequential' object has no attribute 'classifier' i got error while fusing bnorm params with linear,exactly where i was replacing the newly fused linear layer with old one

Ganga · September 2, 2020, 8:23am

code used for fusing

m.classifier[1].linear = bc,in this code i am getting error

class DummyModule_1(nn.Module):
    def __init__(self):
        super(DummyModule_1, self).__init__()

    def forward(self, x):
        # print("Dummy, Dummy.")
        return x


def fuse_1(linear, bn):
    w = linear.weight
    print(w.size())
    mean = bn.running_mean
    var_sqrt = torch.sqrt(bn.running_var + bn.eps)

    beta = bn.weight
    gamma = bn.bias

    if linear.bias is not None:
        b = linear.bias
    else:
        b = mean.new_zeros(mean.shape)

    w = w.cuda()
    b = b.cuda()
    w = w * (beta / var_sqrt).reshape([4096, 1])
    b = (b - mean)/var_sqrt * beta + gamma
    fused_linear = nn.Linear(linear.in_features,
                         linear.out_features)
                                             
    fused_linear.weight = nn.Parameter(w)
    fused_linear.bias = nn.Parameter(b)
    return fused_linear


def fuse_module_1(m):
    children = list(m.named_children())
    c = None
    cn = None
    global c1
    global count
    global c18

    for name, child in children:
        print("name is",name,"child is",child)
       
         

        if name == 'linear':
          count = count+1 
          
          if count == 2:
            c18 = child
            print("c18 is",c18)

          else:
            fuse_module_1(child)

        if name =='2' and isinstance(child,nn.BatchNorm1d):
          print("child is",child)
          bc = fuse_1(c18,child)
          m.classifier[1].linear = bc
          m.classifier[2] = DummyModule_1(


        else:
            #fuse_module_1(child)
          fuse_module_1(child)```

Ganga · September 2, 2020, 8:25am

class AlexNet(nn.Module):

    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.num_classes = num_classes
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96, eps=1e-4, momentum=0.1, affine=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            BinConv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=1),
            nn.MaxPool2d(kernel_size=3, stride=2),
            BinConv2d(256, 384, kernel_size=3, stride=1, padding=1),
            BinConv2d(384, 384, kernel_size=3, stride=1, padding=1, groups=1),
            BinConv2d(384, 256, kernel_size=3, stride=1, padding=1, groups=1),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            BinConv2d(256 * 6 * 6, 4096, Linear=True),
            BinConv2d(4096, 4096, dropout=0.5, Linear=True),
            nn.BatchNorm1d(4096, eps=1e-3, momentum=0.1, affine=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )```

ptrblck · September 5, 2020, 6:07am

I guess this line of code is raising this error, if classifier is undefined for m:

m.classifier[1].linear = bc

Your current code doesn’t call fuse_module_1 (besides recursively inside the method), so I’m unsure where this error is exactly raised.

Ganga · September 7, 2020, 3:29am

I have passed m as an model as an argument from my function call
I am calling the function below

Ganga · September 8, 2020, 12:22pm

But,I am getting the weight size of that particular layer by using model.classifier[1].linear.weight.size()

ptrblck · September 9, 2020, 1:42am

My guess might be wrong, since I cannot execute the code and don’t know how the methods are used.
Feel free to post an executable code snippet, so that we could reproduce this issue and debug further.

Ganga · September 9, 2020, 4:18am

This is how my model gonna look like after recursively calling the fuse_module_1
Sequential(
(0): BinConv2d(
(bn): BatchNorm1d(9216, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(linear): Linear(in_features=9216, out_features=4096, bias=True)
(relu): ReLU(inplace=True)
)
(1): BinConv2d(
(dropout): Dropout(p=0.5, inplace=False)
(bn): BatchNorm1d(4096, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(linear): Linear(in_features=4096, out_features=4096, bias=True)
(relu): ReLU(inplace=True)
)
(2): BatchNorm1d(4096, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=1000, bias=True)

this is before m.classifier[1].linear = bc

Ganga · September 9, 2020, 4:20am

import os
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import argparse
import shutil
import time
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import numpy as np
import numpy
import util

# set the seed
torch.manual_seed(1)
torch.cuda.manual_seed(1)

import sys
import gc

parser = argparse.ArgumentParser(description='Alexnet')
parser.add_argument('--arch', '-a', metavar='ARCH', default='alexnet',
                    help='model architecture (default: alexnet)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set')
parser.add_argument('-m', '--trained_model', default='weights/alexnet.baseline.pytorch.pth.tar',
                    type=str, help='Trained state_dict file path to open')
parser.add_argument('--lr', '--learning-rate', default=0.001, type=float,
                    metavar='LR', help='initial learning rate')
parser.add_argument('--weight-decay', '--wd', default=1e-5, type=float,
                    metavar='W', help='weight decay (default: 1e-5)')  
parser.add_argument('--data', metavar='DATA_PATH', default='./data/',
                    help='path to imagenet data (default: ./data/)') 
parser.add_argument('-b', '--batch-size', default=256, type=int,
                    metavar='N', help='mini-batch size (default: 256)')  
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N',
                    help='number of data loading workers (default: 8)') 
parser.add_argument('--print-freq', '-p', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')                                       

args = parser.parse_args()

__all__ = ['AlexNet', 'alexnet']

best_prec1 = 0
bin_op = None
c1 =0 
count =0 

class DummyModule(nn.Module):
    def __init__(self):
        super(DummyModule, self).__init__()

    def forward(self, x):
        # print("Dummy, Dummy.")
        return x


def fuse(conv, bn):
    
    w = conv.weight
    #print(w)
    mean = bn.running_mean
    var_sqrt = torch.sqrt(bn.running_var + bn.eps)

    beta = bn.weight
    gamma = bn.bias

    if conv.bias is not None:
        b = conv.bias
    else:
        b = mean.new_zeros(mean.shape)

    w = w * (beta / var_sqrt).reshape([conv.out_channels, 1, 1, 1])
    b = (b - mean)/var_sqrt * beta + gamma
    fused_conv = nn.Conv2d(conv.in_channels,
                         conv.out_channels,
                         conv.kernel_size,
                         conv.stride,
                         conv.padding,
                         bias=True)                    
    fused_conv.weight = nn.Parameter(w)
    fused_conv.bias = nn.Parameter(b)
    return fused_conv


def fuse_module(m):
    children = list(m.named_children())
    c = None
    cn = None

    for name, child in children:
        #print("name is",name,"child is",child)
        if isinstance(child, nn.BatchNorm2d):
            bc = fuse(c, child)
            m._modules[cn] = bc
            print('hi',m._modules['0'])
            m._modules[name] = DummyModule()
            c = None
        elif isinstance(child, nn.Conv2d):
            c = child
            cn = name
        elif isinstance(child,BinConv2d):
            break
        else:
            #print(child)
            fuse_module(child) 

class DummyModule_1(nn.Module):
    def __init__(self):
        super(DummyModule_1, self).__init__()

    def forward(self, x):
        # print("Dummy, Dummy.")
        return x


def fuse_1(linear, bn):
    w = linear.weight
    #print(w)
    mean = bn.running_mean
    #print('mean',mean)
    var_sqrt = torch.sqrt(bn.running_var + bn.eps)
    #print(var_sqrt)
    beta = bn.weight
    #print('beta',beta)
    gamma = bn.bias
    #print("gamma",gamma)

    if linear.bias is not None:
        b = linear.bias
    else:
        b = mean.new_zeros(mean.shape)

    w = w.cuda()
    b = b.cuda()
    w = w * (beta / var_sqrt).reshape([4096, 1])

    #y = w.cpu().detach().numpy()
    #np.savetxt("weight.txt",y,fmt='%.6f')

    b = (b - mean)/var_sqrt * beta + gamma
    #print(b)
    j = b.cpu().detach().numpy()
    np.savetxt("bias.txt",j,fmt='%.6f')
    
    fused_linear = nn.Linear(linear.in_features,
                         linear.out_features)
                                             
    fused_linear.weight = nn.Parameter(w)
    #print(fused_linear.weight)
    fused_linear.bias = nn.Parameter(b)
    return fused_linear


def fuse_module_1(m):
    children = list(m.named_children())
    c = None
    cn = None
    global c1
    global count
    global c18

    for name, child in children:
        #print("name is",name,"child is",child)
        """if name == 'classifier':
          print("inside class")
          fuse_module_1(child)
          

        if name == '2' and isinstance(child,nn.BatchNorm1d):
          c = child
          cn = name

        elif name == '4' and isinstance(child,nn.Linear):
          print("children is",child)
          bc = fuse_1(child,c)
          m.classifier[4] = bc
          m._classifier[2] = DummyModule_1()

        if name == 'relu':
          c1 = c1+1"""
         

        if name == 'linear':
          count = count+1 
          
          if count == 2:
            c18 = child
            print("c18 is",c18)

          else:
            fuse_module_1(child)

        """elif c1 == 6:
          #print('c18 is',c18)
          fuse_module_1(child)
          break"""

        if name =='2' and isinstance(child,nn.BatchNorm1d):
          print("child is",child)
          bc = fuse_1(c18,child)
          print(m)
          m.classifier[1].linear = bc
          m.classifier[2] = DummyModule_1()

        else:
            #fuse_module_1(child)
          fuse_module_1(child)     

class BinActive(torch.autograd.Function):
    '''
    Binarize the input activations and calculate the mean across channel dimension.
    '''
    def forward(self, input):
        self.save_for_backward(input)
        size = input.size()
        input = input.sign()
        return input

    def backward(self, grad_output):
        input, = self.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input.ge(1)] = 0
        grad_input[input.le(-1)] = 0
        return grad_input

class BinConv2d(nn.Module): # change the name of BinConv2d
    def __init__(self, input_channels, output_channels,
            kernel_size=-1, stride=-1, padding=-1, groups=1, dropout=0,
            Linear=False):
        super(BinConv2d, self).__init__()
        self.layer_type = 'BinConv2d'
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dropout_ratio = dropout

        if dropout!=0:
            self.dropout = nn.Dropout(dropout)
        self.Linear = Linear
        if not self.Linear:
            self.bn = nn.BatchNorm2d(input_channels, eps=1e-4, momentum=0.1, affine=True)
            self.conv = nn.Conv2d(input_channels, output_channels,
                    kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)
        else:
            self.bn = nn.BatchNorm1d(input_channels, eps=1e-4, momentum=0.1, affine=True)
            self.linear = nn.Linear(input_channels, output_channels)
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, x):
        x = self.bn(x)
        x = BinActive()(x)
        if self.dropout_ratio!=0:
            x = self.dropout(x)
        if not self.Linear:
            x = self.conv(x)
        else:
            x = self.linear(x)
        x = self.relu(x)
        return x

class AlexNet(nn.Module):

    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.num_classes = num_classes
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96, eps=1e-4, momentum=0.1, affine=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            BinConv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=1),
            nn.MaxPool2d(kernel_size=3, stride=2),
            BinConv2d(256, 384, kernel_size=3, stride=1, padding=1),
            BinConv2d(384, 384, kernel_size=3, stride=1, padding=1, groups=1),
            BinConv2d(384, 256, kernel_size=3, stride=1, padding=1, groups=1),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            BinConv2d(256 * 6 * 6, 4096, Linear=True),
            BinConv2d(4096, 4096, dropout=0.5, Linear=True),
            nn.BatchNorm1d(4096, eps=1e-3, momentum=0.1, affine=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

"""def printbn(self, input, output):
    print('Inside ' + self.__class__.__name__ + ' forward')
    mean = input[0].mean(dim=0)
    var = input[0].var(dim=0)
    print(mean)"""



def alexnet(pretrained=False, **kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = AlexNet(**kwargs)
    input_size = 227
    
    #model.bn_layer.register_forward_hook(printbn)
    #model.forward()


    if pretrained:
        model_path = 'model_list/alexnet.pth.tar'
        pretrained_model = torch.load(model_path)
        model.load_state_dict(pretrained_model['state_dict'])
    #return model

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()
    
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.Adam(model.parameters(), args.lr,
                                weight_decay=args.weight_decay)

    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            c = float(m.weight.data[0].nelement())
            m.weight.data = m.weight.data.normal_(0, 2.0/c)
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data = m.weight.data.zero_().add(1.0)
            m.bias.data = m.bias.data.zero_()

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            del checkpoint
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    #print("before fusing model",model)
    #print(model)
    fuse_module(model)
    fuse_module_1(model)
    #print(model._modules[0].weight.size())
    cudnn.benchmark = True

    """for key, value in model.named_parameters():
      np.set_printoptions(threshold=50000000,formatter={'float_kind':'{:f}'.format})
      str1=".txt"
      str3=".shape"
      str2='./'+'merged_params/'+'_'+key+str1
      str4='./'+'merged_params/'+'_'+key+str3+str1
      print(str2)
      print(str4)
          #array=np.asarray(value)
      s=value.cpu().detach().numpy()
        #print(s)
      file2=open(str2,"w")
      data=s[:]
      file2.write(str(data))
        #file2.write(s)
      file2.close()

      file3=open(str4,"w")
        #data=s[:]
      file3.write(str(value.shape))
        #file2.write(s)
      file3.close()"""
        
      

    print('==> Using Pytorch Dataset')
    import torchvision
    import torchvision.transforms as transforms
    import torchvision.datasets as datasets
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                std=[1./255., 1./255., 1./255.])

    torchvision.set_image_backend('PIL')

    val_loader = torch.utils.data.DataLoader(
                datasets.ImageFolder(valdir, transforms.Compose([
                    transforms.Resize((256, 256)),
                    transforms.CenterCrop(input_size),
                    transforms.ToTensor(),
                    normalize,
                    ])),
                batch_size=args.batch_size, shuffle=False,
                num_workers=args.workers, pin_memory=True)

    global bin_op
    bin_op = util.BinOp(model)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return    

def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    
    model.eval()
    #print("after fusing model",model)
    
    end = time.time()
    bin_op.binarization()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda(async=True)
        with torch.no_grad():
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)
        values,indices=torch.max(output,1)
        #print("output is",output)
        print("maximum value and its indices is",values,indices)
        #print("target is",target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))
    bin_op.restore()

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return top1.avg


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = args.lr * (0.1 ** (epoch // 30))
    print 'Learning rate:', lr
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res
 

    #checkpoint = torch.load(args.resume)```

Ganga · September 9, 2020, 4:21am

Sequential(
(0): BinConv2d(
(bn): BatchNorm1d(9216, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(linear): Linear(in_features=9216, out_features=4096, bias=True)
(relu): ReLU(inplace=True)
)
(1): BinConv2d(
(dropout): Dropout(p=0.5, inplace=False)
(bn): BatchNorm1d(4096, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(linear): Linear(in_features=4096, out_features=4096, bias=True)
(relu): ReLU(inplace=True)
)
(2): BatchNorm1d(4096, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=1000, bias=True)

ptrblck · September 9, 2020, 8:35am

Thanks for the code.
The error is raised in fuse_module_1 in the children loop when name = 'classifier' and child = nn.Sequential(...), which is recursively calling into fuse_module_1 and errors in:

        if name =='2' and isinstance(child,nn.BatchNorm1d):
          print("child is",child)
          bc = fuse_1(c18,child)
          print(m)
          m.classifier[1].linear = bc
          m.classifier[2] = DummyModule_1()

In this iteration m is already the nn.Sequential module and you should probably use:

        if name =='2' and isinstance(child,nn.BatchNorm1d):
          print("child is",child)
          bc = fuse_1(c18,child)
          print(m)
          m[1].linear = bc
          m[2] = DummyModule_1()

instead. Note that I haven’t checked the rest of the code, just this error message.