Pass a custom tensor as weights to a model

I see a lot of implementations of the function:

with torch.no_grad():
    for layer in mask_model.state_dict():
        print(layer)
        mask_model.state_dict()[layer].data.fill_(const)

where the weights are filled with a constant number or a distribution. I have a custom tensor I want to pass. How do I do that? Thanks!

I would guess .copy_(tensor) would work in this case.

2 Likes

For some reason I get an error. I cast a numpy array inside .copy_ like that:

mask_model.state_dict()[layer].data.copy_(torch.from_numpy(elite_c1[1, 0:100352]))

and the error is attached:

Based on the error message you are hitting a size mismatch in the tensor.
I don’t know what your actual use case is, but the state_dict contains the parameters and buffers of the stored model. You can manipulate them (and replace tensors if needed) but would have to make sure the new model is able to load this state_dict based on the new shapes.

1 Like

I have shaped the tensor in exactly the same shape as the fill() gives out - [16,1,5,5]. The problem persists:

import torch
from pylab import *
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn as nn
from torch.utils.data import DataLoader

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

batch_size = 32

loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=batch_size, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=batch_size, 
                                          shuffle=True, 
                                          num_workers=1),
}
loaders

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(       # number of weights 100352      
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,
            ),                                                                    
            nn.MaxPool2d(2,2),
            nn.BatchNorm2d(16),
            nn.Flatten()  
        )
        self.conv2 = nn.Sequential(             # number of weights 8192      
            nn.Linear(16 * 14 * 14, batch_size * 8),  
            nn.ReLU(),                      
            nn.Flatten()               
        )
        self.out = nn.Sequential(nn.Linear(batch_size*8, 10), nn.Softmax(dim=1))

        self.weights_initialization()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        output = self.out(x)
        return output

    def weights_initialization(self):
      for m in self.modules():
          if isinstance(m, nn.Linear):
              nn.init.xavier_normal_(m.weight)
              nn.init.constant_(m.bias, 0)

cnn = CNN()

activation = {}
def getActivation(name):
  # the hook signature
  def hook(model, input, output):
    activation[name] = output.detach()
  return hook
  
h = cnn.out.register_forward_hook(getActivation('out'))

from torch.autograd import Variable
import numpy as np

images, labels = next(iter(loaders['train']))
num_epochs = 1

chromosome = []
fitness = 0

def single_run(images, labels, num_epochs, chromosome, fitness):

  def train(num_epochs, cnn, loaders):
    cnn.train()
  for i, (images, labels) in enumerate(loaders['train']): 

      b_x = Variable(images)   # batch x
      b_y = Variable(labels)   # batch y

      output = cnn(b_x)[0] 

      out_ten = activation['out']

      layer_weights_1 = cnn.conv1[0].weight.detach().numpy().flatten() #[16,1,5,5]
      layer_weights_2 = cnn.conv2[0].weight.detach().numpy().flatten() #[256, 3136]

      chromosome = np.append(layer_weights_1,layer_weights_2)

      fitness
      for j in range(len(out_ten)): 
        if out_ten[j].argmax().item() == labels[j].item(): 
          fitness += 1

  return fitness, chromosome

fitness1, chromosome1 = single_run(images, labels, num_epochs, chromosome, fitness)

# print(fitness1)
# print('______')
# print(chromosome1, len(chromosome1))

population_c = []
population_f = []

size=10

def populate(size, population_c,population_f):

  for i in range(size):
    population_f = np.append(population_f, single_run(images, labels,num_epochs, chromosome, fitness)[0])
    if i==0: population_c = np.append(population_c, single_run(images, labels,num_epochs, chromosome, fitness)[1])
    else: 
      population_c = np.vstack((population_c, single_run(images, labels,num_epochs, chromosome, fitness)[1]))

  return population_c, population_f

population_c1, population_f1 = populate(size, population_c, population_f)

import math

def selection(population_c, population_f):

  elite = int(ceil(len(population_f) * 0.2))

  elite_ind = np.argpartition(population_f, -elite)[-elite:]
  elite_c = np.take(population_c, elite_ind, 0)
  elite_f = np.take(population_f, elite_ind, 0)

  return elite_c, elite_f

elite_c1, elite_f1 = selection(population_c1, population_f1)

print(torch.reshape(torch.from_numpy(elite_c1[1, 0:400]),(16,1,5,5)).size())

with torch.no_grad():
    for layer in cnn.state_dict():
        # cnn.state_dict()[layer].data.fill_(1)
        cnn.state_dict()[layer].data.copy_(torch.reshape(torch.from_numpy(elite_c1[0, 0:400]),(16,1,5,5)))

output = cnn.state_dict()['conv1.0.weight']

output.size()

print(output.size())

Thank you in advance! You have been extremely helpful!

In your code snippet you are currently hard-coding the shape to (16,1,5,5) which is wrong for most of the layers.
Printing the shape of the parameters gives:

for layer in cnn.state_dict():
    print(cnn.state_dict()[layer].shape)

torch.Size([16, 1, 5, 5])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([])
torch.Size([256, 3136])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])

So the shape you are using to reshape elite_c1 is only valid for the first parameter and will fail in all others.

1 Like