How to transfer learned weight in the same model without last layer?

Hello all, I have my own network, it trained for the binary classifier (2 classes). After 10k epochs, I obtained the trained weight as 10000_model.pth. Now, I want to use the model for 4 classes classifier problem using the same network. Thus, I want to copy all trained weight in the binary classifier to 4 classes problem, without the lass layer that will random initialization. How could I do it? This is my model

 class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 20, 5, 1)
            self.conv2 = nn.Conv2d(20, 50, 5, 1)
            self.conv_classify= nn.Conv2d(50, 2, 1, 1, bias=True) # number of class
       
        def forward(self, x):
            x = F.relu(self.conv1(x))
            x = F.max_pool2d(x, 2, 2)
            x = F.relu(self.conv2(x))
            x = F.max_pool2d(x, 2, 2)
            x = F.relu(self.conv_classify(x))
            return x

This what I did

model = Net ()
checkpoint_dict = torch.load('10000_model.pth')        
pretrained_dict = checkpoint_dict['state_dict']
model_dict = model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
# 3. load the new state dict
model.load_state_dict(model_dict)
     

For now, I have to manually delete the pretrained_dict by name. It means pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} does not do anything.

Thanks

1 Like

The simpler approach would probably be to initialize the same pretrained model, load the state_dict, and reassign the last linear layer.
This would be the same workflow as if you would load a pretrained model from torchvision and just swap the last classifier for your own.
Using this approach you would avoid manipulating the state_dict.
Would that work for you?

Hi, I cannot use torch vision because I design a own model. I just want to transfer my trained weight from binary classifier to other task likes four class problem. I just change number from 2 to 4 in the new network Net()

self.conv_classify= nn.Conv2d(50, 2, 1, 1, bias=True) 

to

self.conv_classify= nn.Conv2d(50, 4, 1, 1, bias=True) 

Other layers are copied weight from 10000_model.pth

I just wanted to compare the workflow with loading a pre-trained model from torchvision. You should use your own model instead.
Here is some pseudo code:

model = Net()
model.load_state_dict(torch.load(PATH))
model.conv_classify = nn.Conv2d(...)
...

Thanks. I have an error

RuntimeError: While copying the parameter named conv_class.weight, whose dimensions in the model are torch.Size([4, 50, 1, 1]) and whose dimensions in the checkpoint are torch.Size([2, 50, 1, 1]).

This is my complete code

        checkpoint_dict = torch.load('./10000_model.pth')
        pretrained_dict = checkpoint_dict['state_dict']
        model.load_state_dict(pretrained_dict)
        num_ftrs = model.conv_classify.in_features
        model.conv_classify= nn.Conv2d(num_ftrs, 4, kernel_size=1, padding=0)

The error message sounds like you already changed the conv_class layer.
Could you check, if you are using two different layers as the output, since the error points to conv_class, while you are manipulating conv_classify.

Here is a small code snippet of what I was thinking about:

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 50, 3, 1, 1)
        self.conv_classify = nn.Conv2d(50, 2, 1, 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.conv_classify(x)
        return x
    
# Init model and train
model = MyModel()    
x = torch.randn(1, 1, 4, 4)    
output = model(x)    
print(output.shape)

# Save "trained" state_dict
torch.save(model.state_dict(), 'mymodel.pth')

# Reload model and load state_dict
model = MyModel()
model.load_state_dict(torch.load('mymodel.pth'))

# Change last conv layer
model.conv_classify = nn.Conv2d(50, 4, 1, 1)
output = model(x)
print(output.shape)
2 Likes

Oh. I see. I also change the number of classifier to 4 in the new one. So. I have to use same Net and change the number of class in the training code model.conv_classify = nn.Conv2d(50, 4, 1, 1). Thanks. for making me more clear understand

Hi,
I have an implementation of the C3D network as follows :-

import torch.nn as nn


class C3D(nn.Module):
    def __init__(self):
       super(C3D, self).__init__()

        self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))

        self.fc6 = nn.Linear(8192, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        self.fc8 = nn.Linear(4096, 487)

        self.dropout = nn.Dropout(p=0.5)

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

   def forward(self, x):
        h = self.relu(self.conv1(x))
        h = self.pool1(h)

        h = self.relu(self.conv2(h))
        h = self.pool2(h)

        h = self.relu(self.conv3a(h))
        h = self.relu(self.conv3b(h))
        h = self.pool3(h)

        h = self.relu(self.conv4a(h))
        h = self.relu(self.conv4b(h))
        h = self.pool4(h)

        h = self.relu(self.conv5a(h))
        h = self.relu(self.conv5b(h))
        h = self.pool5(h)

        h = h.view(-1, 8192)
        h = self.relu(self.fc6(h))
        h = self.dropout(h)
        h = self.relu(self.fc7(h))
        h = self.dropout(h)

        logits = self.fc8(h)
        probs = self.softmax(logits)

        return probs
   

Now I want to remove the last 5 layers . Simply put I want the 4096 vector output from the fc6 layer. I have the pretained weights in a pickle file…

The following is my implementation. :-

import numpy as np
import torch.nn as nn
import torch
from torch.autograd import Variable

from C3D_Model_Main import C3D

class C3D_Model(nn.Module):
    activation = {}
    def __init__(self):
        super(C3D_Model, self).__init__()
        net_c3d = C3D()
        net_c3d.load_state_dict(torch.load('c3d.pickle'))  # loading the pretrained weights pickle here
        modules = list(net_c3d.children())[:-5]
        # modules = list(net_c3d.children())[:-6]
       
        self.new_model = nn.Sequential(*modules)
       

    def forward(self, x):
        """Extract feature vectors from input images."""
        features = self.new_model(x)
        return features


    def get_activation(name):
     activation = {}
     def hook(model, input, output):
        activation[name] = output.detach()
        return hook

def c3Dfeatures(vector):
    X = Variable(torch.Tensor(vector))
    X = X.cuda()

    # get network pretrained model
    net = C3D_Model()
    net.cuda()
    print(net)


    output= net(X)
    print("ouput type and shape : ", np.shape(output),type(output))
 
data_reshaped = np.load('112x112_numpy/Arson001_x264.npy')

no_of_groups  = data_reshaped.shape[1]
no_of_groups  = (int)(np.true_divide(data_reshaped.shape[1], 16))
# print(no_of_groups)



no_of_frames =16
# new_frame_data = np.zeros([1,3,16,240,320])
new_frame_data = np.zeros([1,3,16,112,112])
# print("shape of new_frame_data on init : ",np.shape(new_frame_data))
# print(no_of_groups)
cnt = 0
for i in range(0,no_of_groups * 16,16):
    # print(i)
    cnt = cnt +1
    new_frame_data = data_reshaped[:, i:i + no_of_frames, :, :]
    new_frame_data =np.expand_dims(new_frame_data, axis= 0)
    prediction  = c3Dfeatures(new_frame_data)
    



the error I get is as follows

line 1354, in linear
output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [2048 x 4], m2: [8192 x 4096] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:266

I guess there is something wrong where I try to flatten/reshape the tensor after the pool5 layer. Any help/advice would be really nice, as I am stuck at this process.

hi,
did you solve this problem?

Intuitive solution, indeed.
If I wanna just remove the classify layer, what could be done? Thanks in advance!

You could replace it with an nn.Identity module (which would be the easiest approach) or override the forward and skip this layer.