IndexError: When performing advanced indexing the indexing objects must be LongTensors or convertible to LongTensors

Hi everyone,
I’m trying to run this script for pruning the network. I’m using pytorch0.2 for this, since version 0.4 produces alot of errors. Anyway, when I try to run the script, I face :

In shape: 3 Out shape:22
Traceback (most recent call last):
  File "prune.py", line 156, in <module>
    w = m0.weight.data[:, idx0, :, :].clone()
IndexError: When performing advanced indexing the indexing objects must be LongTensors or convertible to LongTensothis?

here is the script content:

import os
import argparse
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms

from vgg import vgg
import numpy as np

import models

model_names = sorted(name for name in models.__dict__
  if name.islower() and not name.startswith("__")
  and callable(models.__dict__[name]))


# Prune settings
parser = argparse.ArgumentParser(description='PyTorch Slimming CIFAR prune')
parser.add_argument('--dataset', type=str, default='cifar10',
                    help='training dataset (default: cifar10)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--percent', type=float, default=0.5,
                    help='scale sparse rate (default: 0.5)')
parser.add_argument('--model', default='', type=str, metavar='PATH',
                    help='path to raw trained model (default: none)')
parser.add_argument('--save', default='', type=str, metavar='PATH',
                    help='path to save prune model (default: none)')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

#model = vgg()
model = models.__dict__['simple'](10)
model = torch.nn.DataParallel(model, device_ids=list(range(1)))
if args.cuda:
    model.cuda()
if args.model:
    if os.path.isfile(args.model):
           print("=> loading checkpoint '{}'".format(args.model))
           checkpoint = torch.load(args.model)
           recorder = checkpoint['recorder']
           args.start_epoch = checkpoint['epoch']
           model.load_state_dict(checkpoint['state_dict'])
           #optimizer.load_state_dict(checkpoint['optimizer'])
           best_prec1 = 95.49
           print("=> loaded checkpoint '{}' (epoch {})" .format(args.model, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.model))

		

print(model)
total = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        total += m.weight.data.shape[0]

bn = torch.zeros(total)
index = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        size = m.weight.data.shape[0]
        bn[index:(index+size)] = m.weight.data.abs().clone()
        index += size

y, i = torch.sort(bn)
thre_index = int(total * args.percent)
thre = y[thre_index]

pruned = 0
cfg = []
cfg_mask = []
for k, m in enumerate(model.modules()):
    if isinstance(m, nn.BatchNorm2d):
        weight_copy = m.weight.data.clone()
        mask = weight_copy.abs().gt(thre).float().cuda()
        pruned = pruned + mask.shape[0] - torch.sum(mask)
        m.weight.data.mul_(mask)
        m.bias.data.mul_(mask)
        cfg.append(int(torch.sum(mask)))
        cfg_mask.append(mask.clone())
        print('layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}'.
            format(k, mask.shape[0], int(torch.sum(mask))))
    elif isinstance(m, nn.MaxPool2d):
        cfg.append('M')

pruned_ratio = pruned/total

print('Pre-processing Successful!')


# simple test model after Pre-processing prune (simple set BN scales to zeros)
def test():
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    test_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10('./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.507, 0.486, 0.440), (0.2675, 0.2565, 0.2761))]),
            download=True
        ),
        batch_size=args.test_batch_size, shuffle=True, **kwargs)
    model.eval()
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    print('\nTest set: Accuracy: {}/{} ({:.1f}%)\n'.format(
        correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
    return correct / float(len(test_loader.dataset))

test()


# Make real prune
print('cfg/simple')
newmodel = models.__dict__['simple'](10)#vgg(cfg=cfg)
newmodel.cuda()

layer_id_in_cfg = 0
start_mask = torch.ones(3)
end_mask = cfg_mask[layer_id_in_cfg]
for [m0, m1] in zip(model.modules(), newmodel.modules()):
    if isinstance(m0, nn.BatchNorm2d):
        idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))
        print('type: ', type(idx1))
        m1.weight.data = m0.weight.data[idx1].clone()
        m1.bias.data = m0.bias.data[idx1].clone()
        m1.running_mean = m0.running_mean[idx1].clone()
        m1.running_var = m0.running_var[idx1].clone()
        layer_id_in_cfg += 1
        start_mask = end_mask.clone()
        if layer_id_in_cfg < len(cfg_mask):  # do not change in Final FC
            end_mask = cfg_mask[layer_id_in_cfg]
    elif isinstance(m0, nn.Conv2d):
        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))
        print('In shape: {:d} Out shape:{:d}'.format(idx0.shape[0], idx1.shape[0]))
        w = m0.weight.data[:, idx0, :, :].clone()
        w = w[idx1, :, :, :].clone()
        m1.weight.data = w.clone()
        # m1.bias.data = m0.bias.data[idx1].clone()
    elif isinstance(m0, nn.Linear):
        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        m1.weight.data = m0.weight.data[:, idx0].clone()


torch.save({'cfg': cfg, 'state_dict': newmodel.state_dict()}, args.save)

print(newmodel)
model = newmodel
test()

What is happening here and how can I fix this ?

Could you print the type and shape of idx0? Apparently, the numpy array is not convertible to a LongTensor.

1 Like

here they are :

idx0 values : [0 1 2] idx1 values :[ 0  1  3  5  8  9 18 20 21 26 27 31 33 34 41 46 50 51 54 57 59 65]
idx0 shape: (3,) idx1 shape:(22,)
In shape: 3 Out shape:22
Traceback (most recent call last):
  File "prune.py", line 148, in <module>
    w = m0.weight.data[:, idx0, :, :].clone()
IndexError: When performing advanced indexing the indexing objects must be LongTensors or convertible to LongTensors

From your posted code it looks like m0 is a nn.Conv2d layer.
This should usually work:

conv = nn.Conv2d(3, 6 ,3, 1, 1)
idx0 = np.array([0, 1, 2])
conv.weight.data[:, idx0, :, :].clone()

Could you wrap it into a torch.LongTensor and see, if it helps? (torch.from_numpy(idx0))
What PyTorch version are you using?

1 Like

I’m using pytorch 0.2 (installed it using :
!pip install http://download.pytorch.org/whl/cu80/torch-0.2.0.post3-cp36-cp36m-manylinux1_x86_64.whl )

Here is the new error :

idx0 values : [0 1 2] idx1 values :[ 0  1  3  5  8  9 18 20 21 26 27 31 33 34 41 46 50 51 54 57 59 65]
idx0 shape: (3,) idx1 shape:(22,)
In shape: 3 Out shape:22
Traceback (most recent call last):
  File "prune.py", line 148, in <module>
    w = m0.weight.data[:, torch.from_numpy(idx0), :, :].clone()
TypeError: indexing a tensor with an object of type torch.LongTensor. The only supported types are integers, slices, numpy scalars and torch.cuda.LongTensor or torch.cuda.ByteTensor as the only argument.

Try to cast your LongTensor to a CUDA tensor and run it again.

Unrelated, but is there a reason you are using 0.2.0?
It’s a bit old by now and the current stable release has a lot of bug fixes and new features.

The only reason I am trying v 0.2 is that the 0.4 version seems to produce more weird errors!: I though it would be a better idea to try a lower version in order to not face with these errors.
If I start using a 0.4 version, this is the error I get :

  File "prune.py", line 79, in <module>
    mask = weight_copy.abs().gt(thre).float().cuda()
RuntimeError: Expected object of type torch.cuda.FloatTensor but found type torch.FloatTensor for argument #2 'other'

It’s basically the same error stating that you need to push your tensor to the GPU (apparently thre).
In 0.4.0 you can do it with

tensor = tensor.to('cuda:0')
1 Like

Thanks, I changed it to

        thre=thre.to('cuda:0')
        mask = weight_copy.abs().gt(thre).float()

but started getting out of memory errors!, Then I used with torch.no_grad(): and that was solved. however I now get this error :

Pre-processing Successful!
Files already downloaded and verified
prune.py:112: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
  data, target = Variable(data, volatile=True), Variable(target)

Test set: Accuracy: 1000/10000 (10.0%)

cfg/simpnet
idx0 values : [0 1 2] idx1 values :[ 0  1  3  5  8  9 18 20 21 26 27 31 33 34 41 46 50 51 54 57 59 65]
idx0 shape: (3,) idx1 shape:(22,)
In shape: 3 Out shape:22
type:  <class 'numpy.ndarray'>
Traceback (most recent call last):
  File "prune.py", line 136, in <module>
    m1.weight.data = m0.weight.data[idx1].clone()
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __getattr__
    type(self).__name__, name))
AttributeError: 'ReLU' object has no attribute 'weight'

Your conditions should prevent this.
Did you add an else into your pruning code?
Currently you check the Modules for nn.Conv2d, nn.BatchNorm2d and nn.Linear, so I don’t understand, how the nn.ReLU can get into the pruning part.

No the code is intact, I didnt change anything other than what I indicated. here is the source again :

 import os
import argparse
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms

from vgg import vgg
import numpy as np

import models

model_names = sorted(name for name in models.__dict__
  if name.islower() and not name.startswith("__")
  and callable(models.__dict__[name]))


# Prune settings
parser = argparse.ArgumentParser(description='PyTorch Slimming CIFAR prune')
parser.add_argument('--dataset', type=str, default='cifar10',
                    help='training dataset (default: cifar10)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--percent', type=float, default=0.5,
                    help='scale sparse rate (default: 0.5)')
parser.add_argument('--model', default='', type=str, metavar='PATH',
                    help='path to raw trained model (default: none)')
parser.add_argument('--save', default='', type=str, metavar='PATH',
                    help='path to save prune model (default: none)')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
print(model_names)
#model = vgg()
model = models.__dict__['simpnet'](10)
model = torch.nn.DataParallel(model, device_ids=list(range(1)))
if args.cuda:
    model.cuda()
if args.model:
    if os.path.isfile(args.model):
           print("=> loading checkpoint '{}'".format(args.model))
           checkpoint = torch.load(args.model)
           recorder = checkpoint['recorder']
           args.start_epoch = checkpoint['epoch']
           model.load_state_dict(checkpoint['state_dict'])
           #optimizer.load_state_dict(checkpoint['optimizer'])
           best_prec1 = 95.49
           print("=> loaded checkpoint '{}' (epoch {})" .format(args.model, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.model))

        

print(model)
total = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        total += m.weight.data.shape[0]

bn = torch.zeros(total)
index = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        size = m.weight.data.shape[0]
        bn[index:(index+size)] = m.weight.data.abs().clone()
        index += size

y, i = torch.sort(bn)
thre_index = int(total * args.percent)
thre = y[thre_index]

pruned = 0
cfg = []
cfg_mask = []
for k, m in enumerate(model.modules()):
    if isinstance(m, nn.BatchNorm2d):
        weight_copy = m.weight.data.clone()
        thre=thre.to('cuda:0')
        mask = weight_copy.abs().gt(thre).float()
        pruned = pruned + mask.shape[0] - torch.sum(mask)
        m.weight.data.mul_(mask)
        m.bias.data.mul_(mask)
        cfg.append(int(torch.sum(mask)))
        cfg_mask.append(mask.clone())
        print('layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}'.
            format(k, mask.shape[0], int(torch.sum(mask))))
    elif isinstance(m, nn.MaxPool2d):
        cfg.append('M')

pruned_ratio = pruned/total

print('Pre-processing Successful!')


# simple test model after Pre-processing prune (simple set BN scales to zeros)
def test():
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    test_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10('./data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.507, 0.486, 0.440), (0.2675, 0.2565, 0.2761))]),
            download=True
        ),
        batch_size=args.test_batch_size, shuffle=True, **kwargs)
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            if args.cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data, volatile=True), Variable(target)
            output = model(data)
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

        print('\nTest set: Accuracy: {}/{} ({:.1f}%)\n'.format(
            correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
    return correct / float(len(test_loader.dataset))

test()


# Make real prune
print('cfg/simpnet')
newmodel = models.__dict__['simpnet'](10)#vgg(cfg=cfg)
newmodel.cuda()

layer_id_in_cfg = 0
start_mask = torch.ones(3)
end_mask = cfg_mask[layer_id_in_cfg]
for [m0, m1] in zip(model.modules(), newmodel.modules()):
    if isinstance(m0, nn.BatchNorm2d):
        idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))
        #print('type: ', type(idx1))
        m1.weight.data = m0.weight.data[idx1].clone()
        m1.bias.data = m0.bias.data[idx1].clone()
        m1.running_mean = m0.running_mean[idx1].clone()
        m1.running_var = m0.running_var[idx1].clone()
        layer_id_in_cfg += 1
        start_mask = end_mask.clone()
        if layer_id_in_cfg < len(cfg_mask):  # do not change in Final FC
            end_mask = cfg_mask[layer_id_in_cfg]
    elif isinstance(m0, nn.Conv2d):
        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))
        print('idx0 values : {0} idx1 values :{1}'.format(idx0, idx1))
        print('idx0 shape: {0} idx1 shape:{1}'.format(idx0.shape, idx1.shape))
        print('In shape: {:d} Out shape:{:d}'.format(idx0.shape[0], idx1.shape[0]))
        w = m0.weight.data[:, torch.from_numpy(idx0), :, :].clone()
        w = w[idx1, :, :, :].clone()
        m1.weight.data = w.clone()
        # m1.bias.data = m0.bias.data[idx1].clone()
    elif isinstance(m0, nn.Linear):
        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        m1.weight.data = m0.weight.data[:, idx0].clone()


torch.save({'cfg': cfg, 'state_dict': newmodel.state_dict()}, args.save)

print(newmodel)
model = newmodel
test()

Thanks for the info.
It seems model.modules() and newmodel.modules() do not contain the same Modules, i.e.
m1 might be nn.ReLU instead of one of the appropriate classes.
Could you check if by looping the modules and printing them?

for m0, m1 in zip(model.modules(), newmodel.modules()):
    print(m0)
    print(m1)
1 Like

This is the result I get ,Here you are :

...
Pre-processing Successful!
Files already downloaded and verified
prune.py:112: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
  data, target = Variable(data, volatile=True), Variable(target)

Test set: Accuracy: 1000/10000 (10.0%)

cfg/simpnet
DataParallel(
  (module): simpnet(
    (features): Sequential(
      (0): Conv2d(3, 66, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(66, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): Conv2d(66, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (5): ReLU(inplace)
      (6): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (7): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (8): ReLU(inplace)
      (9): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (10): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (11): ReLU(inplace)
      (12): Conv2d(128, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (13): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (14): ReLU(inplace)
      (15): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
      (16): Dropout2d(p=0.05)
      (17): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (18): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (19): ReLU(inplace)
      (20): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (21): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (22): ReLU(inplace)
      (23): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (24): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (25): ReLU(inplace)
      (26): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (27): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (28): ReLU(inplace)
      (29): Conv2d(192, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (30): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (31): ReLU(inplace)
      (32): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
      (33): Dropout2d(p=0.05)
      (34): Conv2d(288, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (35): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (36): ReLU(inplace)
      (37): Conv2d(288, 355, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (38): BatchNorm2d(355, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (39): ReLU(inplace)
      (40): Conv2d(355, 432, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
      (41): BatchNorm2d(432, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
      (42): ReLU(inplace)
    )
    (classifier): Linear(in_features=432, out_features=10, bias=True)
  )
)
simpnet(
  (features): Sequential(
    (0): Conv2d(3, 66, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(66, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): Conv2d(66, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (5): ReLU(inplace)
    (6): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (8): ReLU(inplace)
    (9): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (11): ReLU(inplace)
    (12): Conv2d(128, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (14): ReLU(inplace)
    (15): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
    (16): Dropout2d(p=0.05)
    (17): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (18): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (19): ReLU(inplace)
    (20): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (21): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (22): ReLU(inplace)
    (23): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (24): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (25): ReLU(inplace)
    (26): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (27): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (28): ReLU(inplace)
    (29): Conv2d(192, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (30): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (31): ReLU(inplace)
    (32): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
    (33): Dropout2d(p=0.05)
    (34): Conv2d(288, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (35): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (36): ReLU(inplace)
    (37): Conv2d(288, 355, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (38): BatchNorm2d(355, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (39): ReLU(inplace)
    (40): Conv2d(355, 432, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (41): BatchNorm2d(432, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (42): ReLU(inplace)
  )
  (classifier): Linear(in_features=432, out_features=10, bias=True)
)
simpnet(
  (features): Sequential(
    (0): Conv2d(3, 66, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(66, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): Conv2d(66, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (5): ReLU(inplace)
    (6): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (8): ReLU(inplace)
    (9): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (10): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (11): ReLU(inplace)
    (12): Conv2d(128, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (14): ReLU(inplace)
    (15): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
    (16): Dropout2d(p=0.05)
    (17): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (18): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (19): ReLU(inplace)
    (20): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (21): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (22): ReLU(inplace)
    (23): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (24): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (25): ReLU(inplace)
    (26): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (27): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (28): ReLU(inplace)
    (29): Conv2d(192, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (30): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (31): ReLU(inplace)
    (32): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
    (33): Dropout2d(p=0.05)
    (34): Conv2d(288, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (35): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (36): ReLU(inplace)
    (37): Conv2d(288, 355, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (38): BatchNorm2d(355, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (39): ReLU(inplace)
    (40): Conv2d(355, 432, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
    (41): BatchNorm2d(432, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
    (42): ReLU(inplace)
  )
  (classifier): Linear(in_features=432, out_features=10, bias=True)
)
Sequential(
  (0): Conv2d(3, 66, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(66, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (2): ReLU(inplace)
  (3): Conv2d(66, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (5): ReLU(inplace)
  (6): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (7): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (8): ReLU(inplace)
  (9): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (10): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (11): ReLU(inplace)
  (12): Conv2d(128, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (13): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (14): ReLU(inplace)
  (15): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
  (16): Dropout2d(p=0.05)
  (17): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (18): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (19): ReLU(inplace)
  (20): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (21): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (22): ReLU(inplace)
  (23): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (24): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (25): ReLU(inplace)
  (26): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (27): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (28): ReLU(inplace)
  (29): Conv2d(192, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (30): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (31): ReLU(inplace)
  (32): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
  (33): Dropout2d(p=0.05)
  (34): Conv2d(288, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (35): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (36): ReLU(inplace)
  (37): Conv2d(288, 355, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (38): BatchNorm2d(355, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (39): ReLU(inplace)
  (40): Conv2d(355, 432, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (41): BatchNorm2d(432, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (42): ReLU(inplace)
)
Sequential(
  (0): Conv2d(3, 66, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(66, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (2): ReLU(inplace)
  (3): Conv2d(66, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (5): ReLU(inplace)
  (6): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (7): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (8): ReLU(inplace)
  (9): Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (10): BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (11): ReLU(inplace)
  (12): Conv2d(128, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (13): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (14): ReLU(inplace)
  (15): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
  (16): Dropout2d(p=0.05)
  (17): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (18): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (19): ReLU(inplace)
  (20): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (21): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (22): ReLU(inplace)
  (23): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (24): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (25): ReLU(inplace)
  (26): Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (27): BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (28): ReLU(inplace)
  (29): Conv2d(192, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (30): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (31): ReLU(inplace)
  (32): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
  (33): Dropout2d(p=0.05)
  (34): Conv2d(288, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (35): BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (36): ReLU(inplace)
  (37): Conv2d(288, 355, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (38): BatchNorm2d(355, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (39): ReLU(inplace)
  (40): Conv2d(355, 432, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (41): BatchNorm2d(432, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
  (42): ReLU(inplace)
)
Conv2d(3, 66, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(3, 66, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(66, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(66, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(66, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(66, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(128, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(128, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(128, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
Dropout2d(p=0.05)
Dropout2d(p=0.05)
Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(192, 192, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(192, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(192, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(192, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=False)
Dropout2d(p=0.05)
Dropout2d(p=0.05)
Conv2d(288, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(288, 288, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(288, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(288, 355, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(288, 355, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(355, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(355, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Conv2d(355, 432, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
Conv2d(355, 432, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
BatchNorm2d(432, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
BatchNorm2d(432, eps=1e-05, momentum=0.05, affine=True, track_running_stats=True)
ReLU(inplace)
ReLU(inplace)
Linear(in_features=432, out_features=10, bias=True)
----------------------------------------
idx0 values : [0 1 2] idx1 values :[ 0  1  3  5  8  9 18 20 21 26 27 31 33 34 41 46 50 51 54 57 59 65]
idx0 shape: (3,) idx1 shape:(22,)
In shape: 3 Out shape:22
Traceback (most recent call last):
  File "prune.py", line 142, in <module>
    m1.weight.data = m0.weight.data[idx1].clone()
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 532, in __getattr__
    type(self).__name__, name))
AttributeError: 'ReLU' object has no attribute 'weight'

You could you try to check all modules at once with:

all([m0.__class__ == m1.__class__ for m0, m1 in zip(modelA.modules(), modelB.modules())])

If it returns False, you can remove the all op and see at which position it returned False.
Then try to find out, why these layers differ.

1 Like

Thanks alot, I did so, but It just reports False for just everything!:

status :  [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]`
--------------------------------------------------------------------------------


m0: <class 'torch.nn.parallel.data_parallel.DataParallel'>
m1: <class 'models.simpnet.simpnet'>
m0: <class 'models.simpnet.simpnet'>
m1: <class 'torch.nn.modules.container.Sequential'>
m0: <class 'torch.nn.modules.container.Sequential'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.pooling.MaxPool2d'>
m0: <class 'torch.nn.modules.pooling.MaxPool2d'>
m1: <class 'torch.nn.modules.dropout.Dropout2d'>
m0: <class 'torch.nn.modules.dropout.Dropout2d'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.pooling.MaxPool2d'>
m0: <class 'torch.nn.modules.pooling.MaxPool2d'>
m1: <class 'torch.nn.modules.dropout.Dropout2d'>
m0: <class 'torch.nn.modules.dropout.Dropout2d'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.conv.Conv2d'>
m0: <class 'torch.nn.modules.conv.Conv2d'>
m1: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m0: <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
m1: <class 'torch.nn.modules.activation.ReLU'>
m0: <class 'torch.nn.modules.activation.ReLU'>
m1: <class 'torch.nn.modules.linear.Linear'>

This is weird!!

Your m0 classes seem to be wrapped into DataParallel, which adds a module to them.
That’s why everything is failing.
Could you try to use the first model without DataParallel?

1 Like

Yes, I did that. if I remove that part I get these errors:

=> loading checkpoint 'model_best.pth.tar'
Traceback (most recent call last):
  File "prune.py", line 46, in <module>
    model.load_state_dict(checkpoint['state_dict'])
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 721, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for simpnet:
	Missing key(s) in state_dict: "features.0.weight", "features.0.bias", "features.1.weight", "features.1.bias", "features.1.running_mean", "features.1.running_var", "features.3.weight", "features.3.bias", "features.4.weight", "features.4.bias", "features.4.running_mean", "features.4.running_var", "features.6.weight", "features.6.bias", "features.7.weight", "features.7.bias", "features.7.running_mean", "features.7.running_var", "features.9.weight", "features.9.bias", "features.10.weight", "features.10.bias", "features.10.running_mean", "features.10.running_var", "features.12.weight", "features.12.bias", "features.13.weight", "features.13.bias", "features.13.running_mean", "features.13.running_var", "features.17.weight", "features.17.bias", "features.18.weight", "features.18.bias", "features.18.running_mean", "features.18.running_var", "features.20.weight", "features.20.bias", "features.21.weight", "features.21.bias", "features.21.running_mean", "features.21.running_var", "features.23.weight", "features.23.bias", "features.24.weight", "features.24.bias", "features.24.running_mean", "features.24.running_var", "features.26.weight", "features.26.bias", "features.27.weight", "features.27.bias", "features.27.running_mean", "features.27.running_var", "features.29.weight", "features.29.bias", "features.30.weight", "features.30.bias", "features.30.running_mean", "features.30.running_var", "features.34.weight", "features.34.bias", "features.35.weight", "features.35.bias", "features.35.running_mean", "features.35.running_var", "features.37.weight", "features.37.bias", "features.38.weight", "features.38.bias", "features.38.running_mean", "features.38.running_var", "features.40.weight", "features.40.bias", "features.41.weight", "features.41.bias", "features.41.running_mean", "features.41.running_var", "classifier.weight", "classifier.bias". 
	Unexpected key(s) in state_dict: "module.features.0.weight", "module.features.0.bias", "module.features.1.weight", "module.features.1.bias", "module.features.1.running_mean", "module.features.1.running_var", "module.features.3.weight", "module.features.3.bias", "module.features.4.weight", "module.features.4.bias", "module.features.4.running_mean", "module.features.4.running_var", "module.features.6.weight", "module.features.6.bias", "module.features.7.weight", "module.features.7.bias", "module.features.7.running_mean", "module.features.7.running_var", "module.features.9.weight", "module.features.9.bias", "module.features.10.weight", "module.features.10.bias", "module.features.10.running_mean", "module.features.10.running_var", "module.features.12.weight", "module.features.12.bias", "module.features.13.weight", "module.features.13.bias", "module.features.13.running_mean", "module.features.13.running_var", "module.features.17.weight", "module.features.17.bias", "module.features.18.weight", "module.features.18.bias", "module.features.18.running_mean", "module.features.18.running_var", "module.features.20.weight", "module.features.20.bias", "module.features.21.weight", "module.features.21.bias", "module.features.21.running_mean", "module.features.21.running_var", "module.features.23.weight", "module.features.23.bias", "module.features.24.weight", "module.features.24.bias", "module.features.24.running_mean", "module.features.24.running_var", "module.features.26.weight", "module.features.26.bias", "module.features.27.weight", "module.features.27.bias", "module.features.27.running_mean", "module.features.27.running_var", "module.features.29.weight", "module.features.29.bias", "module.features.30.weight", "module.features.30.bias", "module.features.30.running_mean", "module.features.30.running_var", "module.features.34.weight", "module.features.34.bias", "module.features.35.weight", "module.features.35.bias", "module.features.35.running_mean", "module.features.35.running_var", "module.features.37.weight", "module.features.37.bias", "module.features.38.weight", "module.features.38.bias", "module.features.38.running_mean", "module.features.38.running_var", "module.features.40.weight", "module.features.40.bias", "module.features.41.weight", "module.features.41.bias", "module.features.41.running_mean", "module.features.41.running_var", "module.classifier.weight", "module.classifier.bias".

This post and thread might hepp get rid of the module. names in your state_dict.

1 Like

hey @ptrblck good to have you. you have helped me before, now i am following same code which is mention above and got this error. can you help me out .

Thanks

Traceback (most recent call last):
File “vggprune.py”, line 127, in
newmodel = vgg(dataset=args.dataset, cfg=cfg)
File “/home/ustc/akb/network-slimming/models/vgg.py”, line 22, in init
self.feature = self.make_layers(cfg, True)
File “/home/ustc/akb/network-slimming/models/vgg.py”, line 39, in make_layers
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False)
File “/home/ustc/anaconda2/lib/python2.7/site-packages/torch/nn/modules/conv.py”, line 297, in init
False, _pair(0), groups, bias)
File “/home/ustc/anaconda2/lib/python2.7/site-packages/torch/nn/modules/conv.py”, line 38, in init
self.reset_parameters()
File “/home/ustc/anaconda2/lib/python2.7/site-packages/torch/nn/modules/conv.py”, line 44, in reset_parameters
stdv = 1. / math.sqrt(n)
ZeroDivisionError: float division by zero
ustc@ustc-Z10PED16WS:~/akb/network-slimming$

It seems you are dividing by zero while trying to reset the parameters.
Did you write reset_parameters yourself or have you used some other implementation?
Could you post a code snippet to reproduce this issue?