Regarding Weights

Ganga · June 30, 2020, 12:22pm

if not self.Linear:

        self.bn = nn.BatchNorm2d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.conv = nn.Conv2d(input_channels, output_channels,

                kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)

the weights which i printed using self.conv.weight and the weights which i obtained from model.named_parameters() are totally different can anyone please provide the difference between them and also which is the correct weight values

ptrblck · July 1, 2020, 9:03am

Could you post a minimal and executable code snippet, which shows the issue you are seeing?
It’s unclear how and when you are printing the weights based on your code snippet.

Ganga · July 3, 2020, 10:52am

loading the pretrained model here:

if os.path.isfile(args.resume):

        print("=> loading checkpoint '{}'".format(args.resume))

        checkpoint = torch.load(args.resume)

        args.start_epoch = checkpoint['epoch']

        best_prec1 = checkpoint['best_prec1']

        model.load_state_dict(checkpoint['state_dict'])

        optimizer.load_state_dict(checkpoint['optimizer'])

        print("=> loaded checkpoint '{}' (epoch {})"

              .format(args.resume, checkpoint['epoch']))

        del checkpoint

    else:

        print("=> no checkpoint found at '{}'".format(args.resume))

#print("before fusing model",model)

fuse_module(model)

print(model.state_dict())

cudnn.benchmark = True

for key, value in model.named_parameters():

this is where i am printing weight

class BinConv2d(nn.Module): # change the name of BinConv2d

def __init__(self, input_channels, output_channels,

        kernel_size=-1, stride=-1, padding=-1, groups=1, dropout=0,

        Linear=False):

    super(BinConv2d, self).__init__()

    self.layer_type = 'BinConv2d'

    self.kernel_size = kernel_size

    self.stride = stride

    self.padding = padding

    self.dropout_ratio = dropout

    if dropout!=0:

        self.dropout = nn.Dropout(dropout)

    self.Linear = Linear

    if not self.Linear:

        self.bn = nn.BatchNorm2d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.conv = nn.Conv2d(input_channels, output_channels,

and this place where i am printimg weight

                kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)

    else:

        self.bn = nn.BatchNorm1d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.linear = nn.Linear(input_channels, output_channels)

    self.relu = nn.ReLU(inplace=True)



def forward(self, x):

    x = self.bn(x)

    x = BinActive()(x)

    if self.dropout_ratio!=0:

        x = self.dropout(x)

    if not self.Linear:

        x = self.conv(x)

    else:

        x = self.linear(x)

    x = self.relu(x)

    return x

class AlexNet(nn.Module):

def __init__(self, num_classes=1000):

    super(AlexNet, self).__init__()

    self.num_classes = num_classes

    self.features = nn.Sequential(

        nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),

        nn.BatchNorm2d(96, eps=1e-4, momentum=0.1, affine=True),

        nn.ReLU(inplace=True),

        nn.MaxPool2d(kernel_size=3, stride=2),

        BinConv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=1),

        nn.MaxPool2d(kernel_size=3, stride=2),

        BinConv2d(256, 384, kernel_size=3, stride=1, padding=1),

        BinConv2d(384, 384, kernel_size=3, stride=1, padding=1, groups=1),

        BinConv2d(384, 256, kernel_size=3, stride=1, padding=1, groups=1),

        nn.MaxPool2d(kernel_size=3, stride=2),

    )

    self.classifier = nn.Sequential(

        BinConv2d(256 * 6 * 6, 4096, Linear=True),

        BinConv2d(4096, 4096, dropout=0.5, Linear=True),

        nn.BatchNorm1d(4096, eps=1e-3, momentum=0.1, affine=True),

        nn.Dropout(),

        nn.Linear(4096, num_classes),

    )

def forward(self, x):

    x = self.features(x)

    x = x.view(x.size(0), 256 * 6 * 6)

    x = self.classifier(x)

    return x

Ganga · July 3, 2020, 5:21pm

i am loading the pretrained model and i am printing the weight from model.named_parameter()
and also i am printing the weights from self.conv.weight
but the weight i am getting from these two are different

class BinConv2d(nn.Module): # change the name of BinConv2d

def __init__(self, input_channels, output_channels,

        kernel_size=-1, stride=-1, padding=-1, groups=1, dropout=0,

        Linear=False):

    super(BinConv2d, self).__init__()

    self.layer_type = 'BinConv2d'

    self.kernel_size = kernel_size

    self.stride = stride

    self.padding = padding

    self.dropout_ratio = dropout

    if dropout!=0:

        self.dropout = nn.Dropout(dropout)

    self.Linear = Linear

    if not self.Linear:

        self.bn = nn.BatchNorm2d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.conv = nn.Conv2d(input_channels, output_channels,

                kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)

    else:

        self.bn = nn.BatchNorm1d(input_channels, eps=1e-4, momentum=0.1, affine=True)

        self.linear = nn.Linear(input_channels, output_channels)

    self.relu = nn.ReLU(inplace=True)



def forward(self, x):

    x = self.bn(x)

    x = BinActive()(x)

    if self.dropout_ratio!=0:

        x = self.dropout(x)

    if not self.Linear:

        x = self.conv(x)

    else:

        x = self.linear(x)

    x = self.relu(x)

    return x

class AlexNet(nn.Module):

def __init__(self, num_classes=1000):

    super(AlexNet, self).__init__()

    self.num_classes = num_classes

    self.features = nn.Sequential(

        nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),

        nn.BatchNorm2d(96, eps=1e-4, momentum=0.1, affine=True),

        nn.ReLU(inplace=True),

        nn.MaxPool2d(kernel_size=3, stride=2),

        BinConv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=1),

        nn.MaxPool2d(kernel_size=3, stride=2),

        BinConv2d(256, 384, kernel_size=3, stride=1, padding=1),

        BinConv2d(384, 384, kernel_size=3, stride=1, padding=1, groups=1),

        BinConv2d(384, 256, kernel_size=3, stride=1, padding=1, groups=1),

        nn.MaxPool2d(kernel_size=3, stride=2),

    )

ptrblck · July 4, 2020, 7:34am

I cannot reproduce it, as I’m getting the same values for the direct access and the state_dict:

model = AlexNet()
state_dict = model.state_dict()

print((model.features[4].conv.weight == state_dict['features.4.conv.weight']).all())
> tensor(True)

PS: you can post code snippets by wrapping them into three backticks ```, which makes debugging easier.

Ganga · July 5, 2020, 3:00pm

print((model.features[4].conv.weight == state_dict[‘features.4.conv.weight’]).all())
TypeError: ‘DataParallel’ object does not support indexing
can you please provide the solution ,since i am very new to python,pytorch

ptrblck · July 6, 2020, 5:22am

You might have wrapped model.features into nn.DataParallel.
If that’s the case, you could add the .module attribute to the access:

model.features.module[4]

If that doesn’t work, could you post the model architecture, so that we could have a look?

Ganga · July 6, 2020, 7:18am

File “/content/XNOR-Net-PyTorch/ImageNet/networks/model_list/alexnet.py”, line 871, in alexnet
print((model.features.module[4] == state_dict[‘features.module.4.conv.weight’]).all())
AttributeError: ‘bool’ object has no attribute ‘all’
“”“(‘after fusing model’, AlexNet(
(features): DataParallel(
(module): Sequential(
(0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
(1): DummyModule()
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(4): BinConv2d(
(bn): BatchNorm2d(96, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(relu): ReLU(inplace=True)
)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): BinConv2d(
(bn): BatchNorm2d(256, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu): ReLU(inplace=True)
)
(7): BinConv2d(
(bn): BatchNorm2d(384, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu): ReLU(inplace=True)
)
(8): BinConv2d(
(bn): BatchNorm2d(384, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(conv): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu): ReLU(inplace=True)
)
(9): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
)
(classifier): Sequential(
(0): BinConv2d(
(bn): BatchNorm1d(9216, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(linear): Linear(in_features=9216, out_features=4096, bias=True)
(relu): ReLU(inplace=True)
)
(1): BinConv2d(
(dropout): Dropout(p=0.5, inplace=False)
(bn): BatchNorm1d(4096, eps=0.0001, momentum=0.1, affine=True, track_running_stats=True)
(linear): Linear(in_features=4096, out_features=4096, bias=True)
(relu): ReLU(inplace=True)
)
(2): BatchNorm1d(4096, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=1000, bias=True)
)
))”“”

ptrblck · July 6, 2020, 7:41am

Could you print the type and the shape of both tensors you are trying to compare, please?

Ganga · July 6, 2020, 8:09am

print(model.features.module[4].conv.weight.size())
(256, 96, 5, 5)
print(type(model.features.module[4]))
<class ‘model_list.alexnet.BinConv2d’>

print(state_dict[‘features.module.4.conv.weight’].size())
(256,96,5,5)
print(type(state_dict[‘features.module.4.conv.weight’]))
<class ‘torch.Tensor’>
print((model.features.module[4].conv.weight == state_dict[‘features.module.4.conv.weight’]).all())
tensor(True, device=‘cuda:0’)

Ganga · July 6, 2020, 8:15am

@ptrblck if you don’t mind can you please go through this and provide the solution

ptrblck · July 6, 2020, 8:34am

This line of code seems to work now:

print((model.features.module[4].conv.weight == state_dict[‘features.module.4.conv.weight’]).all())
> tensor(True, device=‘cuda:0’)

while it seemed to create the AttributeError before, which is strange.
Anyway, good to see it’s working now.

Ganga · July 6, 2020, 8:46am

Thanks for your time