Regarding Weights

if not self.Linear:

        self.bn = nn.BatchNorm2d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.conv = nn.Conv2d(input_channels, output_channels,

                kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)

the weights which i printed using self.conv.weight and the weights which i obtained from model.named_parameters() are totally different can anyone please provide the difference between them and also which is the correct weight values

Could you post a minimal and executable code snippet, which shows the issue you are seeing?
It’s unclear how and when you are printing the weights based on your code snippet.

loading the pretrained model here:

if os.path.isfile(args.resume):

        print("=> loading checkpoint '{}'".format(args.resume))

        checkpoint = torch.load(args.resume)

        args.start_epoch = checkpoint['epoch']

        best_prec1 = checkpoint['best_prec1']

        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])

        print("=> loaded checkpoint '{}' (epoch {})"

              .format(args.resume, checkpoint['epoch']))

        del checkpoint

    else:

        print("=> no checkpoint found at '{}'".format(args.resume))

#print("before fusing model",model)

fuse_module(model)

print(model.state_dict())

cudnn.benchmark = True

for key, value in model.named_parameters():

this is where i am printing weight

class BinConv2d(nn.Module): # change the name of BinConv2d

def __init__(self, input_channels, output_channels,

        kernel_size=-1, stride=-1, padding=-1, groups=1, dropout=0,

        Linear=False):

    super(BinConv2d, self).__init__()

    self.layer_type = 'BinConv2d'

    self.kernel_size = kernel_size

    self.stride = stride

    self.padding = padding

    self.dropout_ratio = dropout

    if dropout!=0:

        self.dropout = nn.Dropout(dropout)

    self.Linear = Linear

    if not self.Linear:

        self.bn = nn.BatchNorm2d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.conv = nn.Conv2d(input_channels, output_channels,

and this place where i am printimg weight

                kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)

    else:

        self.bn = nn.BatchNorm1d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.linear = nn.Linear(input_channels, output_channels)

    self.relu = nn.ReLU(inplace=True)



def forward(self, x):

    x = self.bn(x)

    x = BinActive()(x)

    if self.dropout_ratio!=0:

        x = self.dropout(x)

    if not self.Linear:

        x = self.conv(x)

    else:

        x = self.linear(x)

    x = self.relu(x)

    return x

class AlexNet(nn.Module):

def __init__(self, num_classes=1000):

    super(AlexNet, self).__init__()

    self.num_classes = num_classes

    self.features = nn.Sequential(

        nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),

        nn.BatchNorm2d(96, eps=1e-4, momentum=0.1, affine=True),

        nn.ReLU(inplace=True),

        nn.MaxPool2d(kernel_size=3, stride=2),

        BinConv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=1),

        nn.MaxPool2d(kernel_size=3, stride=2),

        BinConv2d(256, 384, kernel_size=3, stride=1, padding=1),

        BinConv2d(384, 384, kernel_size=3, stride=1, padding=1, groups=1),

        BinConv2d(384, 256, kernel_size=3, stride=1, padding=1, groups=1),

        nn.MaxPool2d(kernel_size=3, stride=2),

    )

    self.classifier = nn.Sequential(

        BinConv2d(256 * 6 * 6, 4096, Linear=True),

        BinConv2d(4096, 4096, dropout=0.5, Linear=True),

        nn.BatchNorm1d(4096, eps=1e-3, momentum=0.1, affine=True),

        nn.Dropout(),

        nn.Linear(4096, num_classes),

    )

def forward(self, x):

    x = self.features(x)

    x = x.view(x.size(0), 256 * 6 * 6)

    x = self.classifier(x)

    return x

i am loading the pretrained model and i am printing the weight from model.named_parameter()
and also i am printing the weights from self.conv.weight
but the weight i am getting from these two are different

class BinConv2d(nn.Module): # change the name of BinConv2d

def __init__(self, input_channels, output_channels,

        kernel_size=-1, stride=-1, padding=-1, groups=1, dropout=0,

        Linear=False):

    super(BinConv2d, self).__init__()

    self.layer_type = 'BinConv2d'

    self.kernel_size = kernel_size

    self.stride = stride

    self.padding = padding

    self.dropout_ratio = dropout

    if dropout!=0:

        self.dropout = nn.Dropout(dropout)

    self.Linear = Linear

    if not self.Linear:

        self.bn = nn.BatchNorm2d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.conv = nn.Conv2d(input_channels, output_channels,

                kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)

    else:

        self.bn = nn.BatchNorm1d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.linear = nn.Linear(input_channels, output_channels)

    self.relu = nn.ReLU(inplace=True)



def forward(self, x):

    x = self.bn(x)

    x = BinActive()(x)

    if self.dropout_ratio!=0:

        x = self.dropout(x)

    if not self.Linear:

        x = self.conv(x)

    else:

        x = self.linear(x)

    x = self.relu(x)

    return x

class AlexNet(nn.Module):

def __init__(self, num_classes=1000):

    super(AlexNet, self).__init__()

    self.num_classes = num_classes

    self.features = nn.Sequential(

        nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),

        nn.BatchNorm2d(96, eps=1e-4, momentum=0.1, affine=True),

        nn.ReLU(inplace=True),

        nn.MaxPool2d(kernel_size=3, stride=2),

        BinConv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=1),

        nn.MaxPool2d(kernel_size=3, stride=2),

        BinConv2d(256, 384, kernel_size=3, stride=1, padding=1),

        BinConv2d(384, 384, kernel_size=3, stride=1, padding=1, groups=1),

        BinConv2d(384, 256, kernel_size=3, stride=1, padding=1, groups=1),

        nn.MaxPool2d(kernel_size=3, stride=2),

    )

I cannot reproduce it, as I’m getting the same values for the direct access and the state_dict:

model = AlexNet()
state_dict = model.state_dict()

print((model.features[4].conv.weight == state_dict['features.4.conv.weight']).all())
> tensor(True)

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier. :wink:

print((model.features[4].conv.weight == state_dict[‘features.4.conv.weight’]).all())
TypeError: ‘DataParallel’ object does not support indexing
can you please provide the solution ,since i am very new to python,pytorch

You might have wrapped model.features into nn.DataParallel.
If that’s the case, you could add the .module attribute to the access:

model.features.module[4]

If that doesn’t work, could you post the model architecture, so that we could have a look?

File “/content/XNOR-Net-PyTorch/ImageNet/networks/model_list/alexnet.py”, line 871, in alexnet
print((model.features.module[4] == state_dict[‘features.module.4.conv.weight’]).all())
AttributeError: ‘bool’ object has no attribute ‘all’
“”“(‘after fusing model’, AlexNet(
(features): DataParallel(
(module): Sequential(
(0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
(1): DummyModule()
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(4): BinConv2d(
(bn): BatchNorm2d(96, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(relu): ReLU(inplace=True)
)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): BinConv2d(
(bn): BatchNorm2d(256, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu): ReLU(inplace=True)
)
(7): BinConv2d(
(bn): BatchNorm2d(384, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu): ReLU(inplace=True)
)
(8): BinConv2d(
(bn): BatchNorm2d(384, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu): ReLU(inplace=True)
)
(9): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
)
(classifier): Sequential(
(0): BinConv2d(
(bn): BatchNorm1d(9216, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(linear): Linear(in_features=9216, out_features=4096, bias=True)
(relu): ReLU(inplace=True)
)
(1): BinConv2d(
(dropout): Dropout(p=0.5, inplace=False)
(bn): BatchNorm1d(4096, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(linear): Linear(in_features=4096, out_features=4096, bias=True)
(relu): ReLU(inplace=True)
)
(2): BatchNorm1d(4096, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=1000, bias=True)
)
))”“”

Could you print the type and the shape of both tensors you are trying to compare, please?

print(model.features.module[4].conv.weight.size())
(256, 96, 5, 5)
print(type(model.features.module[4]))
<class ‘model_list.alexnet.BinConv2d’>

print(state_dict[‘features.module.4.conv.weight’].size())
(256,96,5,5)
print(type(state_dict[‘features.module.4.conv.weight’]))
<class ‘torch.Tensor’>
print((model.features.module[4].conv.weight == state_dict[‘features.module.4.conv.weight’]).all())
tensor(True, device=‘cuda:0’)

@ptrblck if you don’t mind can you please go through this and provide the solution

This line of code seems to work now:

print((model.features.module[4].conv.weight == state_dict[‘features.module.4.conv.weight’]).all())
> tensor(True, device=‘cuda:0’)

while it seemed to create the AttributeError before, which is strange.
Anyway, good to see it’s working now.

Thanks for your time