I am trying to access and manipulate weights in my NN. Here is the code
import torch
from pylab import *
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
root = 'data',
train = True,
transform = ToTensor(),
download = True,
)
test_data = datasets.MNIST(
root = 'data',
train = False,
transform = ToTensor()
)
from torch.utils.data import DataLoader
batch_size = 32
loaders = {
'train' : torch.utils.data.DataLoader(train_data,
batch_size=batch_size,
shuffle=True,
num_workers=1),
'test' : torch.utils.data.DataLoader(test_data,
batch_size=batch_size,
shuffle=True,
num_workers=1),
}
loaders
import torch.nn as nn
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential( # number of weights 100352
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.MaxPool2d(2,2),
nn.BatchNorm2d(16),
nn.Flatten()
)
self.conv2 = nn.Sequential( # number of weights 8192
nn.Linear(16 * 14 * 14, batch_size * 8),
nn.ReLU(),
nn.Flatten()
)
self.out = nn.Sequential(nn.Linear(batch_size*8, 10), nn.Softmax(dim=1))
self.weights_initialization()
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
output = self.out(x)
return output
def weights_initialization(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight)
# nn.init.constant_(m.bias, 0)
cnn = CNN()
activation = {}
def getActivation(name):
# the hook signature
def hook(model, input, output):
activation[name] = output.detach()
return hook
h1 = cnn.conv1.register_forward_hook(getActivation('conv1'))
h2 = cnn.conv2.register_forward_hook(getActivation('conv2'))
h3 = cnn.out.register_forward_hook(getActivation('out'))
from torch.autograd import Variable
import numpy as np
images, labels = next(iter(loaders['train']))
num_epochs = 1
chromosome = []
fitness = 0
def single_run(images, labels, num_epochs, chromosome, fitness):
def train(num_epochs, cnn, loaders):
cnn.train()
for i, (images, labels) in enumerate(loaders['train']):
b_x = Variable(images) # batch x
b_y = Variable(labels) # batch y
output = cnn(b_x)[0]
out_ten = activation['out']
layer_1 = activation['conv1']#.numpy().flatten()
layer_2 = activation['conv2']#.numpy().flatten()
print('1=',layer_1.size())
print('2=',layer_2.size())
layer_weights_1 = activation['conv1'].numpy().flatten()
layer_weights_2 = activation['conv1'].numpy().flatten()
chromosome = np.append(layer_weights_1,layer_weights_2)
return chromosome
Here I print layer_1 and layer_1 dims here and get [32, 3136] and [32, 256] correspondingly.
with torch.no_grad():
for layer in cnn.state_dict():
cnn.state_dict()[layer].data.fill_(2)
output = cnn.state_dict()['conv1.0.weight']
print(output.size())
output = cnn.state_dict()['conv2.0.weight']
print(output.size())
Here I get [16, 1, 5, 5] for 1st and [256, 3136] for 2nd layer.
Shouldn’t they match? It seems like the output contains weights after the whole nn.Sequential block in case with Conv1 and Lin layer in Conv2. Is this true? What are [32, 3136] and [32, 256] then?