Breaking up a pre trained model (Solved )

Hello. I am trying out stuff with pytorch. Basically i want to know how can I divide my pretrained model into 2 parts and then use them to classify an image. Like for example, I want to break an alexnet into two smaller models and then I send an image to the first sub model and then the output from that to the second sub model !!! Please help and thanks :slight_smile: :slight_smile:

I’m confused about what you mean by “splitting a model”, do you mean you just want to see an intermediate activation in AlexNet?

You can do that by modifying the AlexNet model example: https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py

I have written two small models, the first one containing the conv layers and the second containing the fully connected layers with help but while transferring the weights there is an error :
KeyError : ‘unexpected key “0.weight” in state_dict’

  class AlexNet(nn.Module):

    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x


class AlexNet_conv(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet_conv, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        return x

class AlexNet_classifier(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet_classifier, self).__init__()
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.classifier(x)
        return x

model_1 = AlexNet_conv()
model_2 = AlexNet_classifier()
    

pre_trained = alexnet(pretrained=True)
model_1.load_state_dict(pre_trained.features.state_dict())
model_2.load_state_dict(pre_trained.classifier.state_dict())

I would guess that the names of your modules are different from the names of the pre-trained AlexNet’s modules.
There might be an easy way around that by changing your names, or you could assign the weights manually.