Understanding dimensions of hooked and filled weights

maria.solyanik · April 19, 2022, 8:08pm

I am trying to access and manipulate weights in my NN. Here is the code

import torch
from pylab import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

from torch.utils.data import DataLoader
batch_size = 32

loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=batch_size, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=batch_size, 
                                          shuffle=True, 
                                          num_workers=1),
}
loaders

import torch.nn as nn

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(       # number of weights 100352      
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                                                    
            nn.MaxPool2d(2,2),
            nn.BatchNorm2d(16),
            nn.Flatten()  
        )
        self.conv2 = nn.Sequential(             # number of weights 8192      
            nn.Linear(16 * 14 * 14, batch_size * 8),  
            nn.ReLU(),                      
            nn.Flatten()               
        )
        self.out = nn.Sequential(nn.Linear(batch_size*8, 10), nn.Softmax(dim=1))

        self.weights_initialization()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        output = self.out(x)
        return output

    def weights_initialization(self):
      for m in self.modules():
          if isinstance(m, nn.Linear):
              nn.init.xavier_normal_(m.weight)
              # nn.init.constant_(m.bias, 0)

cnn = CNN()

activation = {}
def getActivation(name):
  # the hook signature
  def hook(model, input, output):
    activation[name] = output.detach()
  return hook

h1 = cnn.conv1.register_forward_hook(getActivation('conv1'))
h2 = cnn.conv2.register_forward_hook(getActivation('conv2'))
h3 = cnn.out.register_forward_hook(getActivation('out'))

from torch.autograd import Variable
import numpy as np

images, labels = next(iter(loaders['train']))
num_epochs = 1

chromosome = []
fitness = 0

def single_run(images, labels, num_epochs, chromosome, fitness):

  def train(num_epochs, cnn, loaders):
    cnn.train()
  for i, (images, labels) in enumerate(loaders['train']): 

      b_x = Variable(images)   # batch x
      b_y = Variable(labels)   # batch y

      output = cnn(b_x)[0] 

      out_ten = activation['out']

      layer_1 = activation['conv1']#.numpy().flatten()
      layer_2 = activation['conv2']#.numpy().flatten()
      print('1=',layer_1.size())
      print('2=',layer_2.size())

      layer_weights_1 = activation['conv1'].numpy().flatten()
      layer_weights_2 = activation['conv1'].numpy().flatten()

      chromosome = np.append(layer_weights_1,layer_weights_2)

  return chromosome

Here I print layer_1 and layer_1 dims here and get [32, 3136] and [32, 256] correspondingly.

with torch.no_grad():
    for layer in cnn.state_dict():
        cnn.state_dict()[layer].data.fill_(2)

output = cnn.state_dict()['conv1.0.weight']

print(output.size())

output = cnn.state_dict()['conv2.0.weight']

print(output.size())

Here I get [16, 1, 5, 5] for 1st and [256, 3136] for 2nd layer.

Shouldn’t they match? It seems like the output contains weights after the whole nn.Sequential block in case with Conv1 and Lin layer in Conv2. Is this true? What are [32, 3136] and [32, 256] then?

ptrblck · April 19, 2022, 10:39pm

No, these tensors shouldn’t match as you are comparing forward activations (i.e. the layer outputs) against the weights (i.e. the layer parameters).
If you want to check the weights, you can directly access them via model.layer.weight.