Using pretrained ResNet-50 bottleneck outputs as inputs to my model

cakeeatingpolarbear · January 9, 2018, 6:51am

I’m trying to use the global-pooled bottleneck outputs of ResNet as inputs to my model. Currently this is what I have:

class MyNet(nn.Module):
    def __init__(self, resnet, n_features=12):
        super().__init__()
        self.initial_layers = nn.Sequential(*list(resnet.children())[:4])
        self.bottlenecks = []
        self.attribute_weights = nn.Linear(15104, n_features)

        # Extract the bottleneck layers
        for i, mod in enumerate(list(resnet.children())):
            if isinstance(mod, nn.Sequential):
                for bn in mod:
                    self.bottlenecks.append(bn)

        # Set the resnet weights to not update
        # for param in resnet.parameters():
            # param.requires_grad = False

    def forward(self, inp):
        all_feature_maps = []
        output = self.initial_layers(inp)

        # Loop to extract the outputs of the bottleneck layers from resnet
        for bn in self.bottlenecks:
            output = bn(output)
            kernel_size = (output.size()[2], output.size()[3])
            feature_maps = F.avg_pool2d(output, kernel_size)
            all_feature_maps.append(feature_maps)

        # Global pool
        features = torch.cat(all_feature_maps, dim=1).squeeze()
        if len(features.size()) == 1:
            features = features.unsqueeze(0)

        # Use features to predict scores
        attribute_scores = self.attribute_weights(features)
        return attribute_scores

This is working and the model can be created/trained via:

from torchvision import models
resnet = models.resnet50(pretrained=True)
net = MyNet(resnet, n_features=12)
optimizer = torch.optim.Adam(params=net.attribute_weights.parameters())
# train code...

Now I want to take this further and not only train the attribute_weights but also fine-tune the weights of the pretrained ResNet. To achieve this I’m rebuilding ResNet and creating “transparent layers” where I global pool the feature maps of the bottleneck outputs.

class MyNet3(nn.Module):
    def __init__(self, resnet, n_features=12):
        super().__init__()
        self.model = nn.Sequential(*list(resnet.children())[:4])
        self.all_pooled_features = []
        self.attribute_weights = nn.Linear(15104, n_features)

        count = 0
        for i, mod in enumerate(list(resnet.children())):
            # Extract the bottleneck layers
            if isinstance(mod, nn.Sequential):
                for bn in mod:
                    self.model.add_module(f"bn_{count}", bn)
                    
                    # Use a "Transparent layer" 
                    pooled_feature_map = PoolFeatures()
                    self.model.add_module(f"pooled_feature_{count}", pooled_feature_map)
                    self.all_pooled_features.append(pooled_feature_map)
                    count += 1

    def forward(self, inp):
        _ = self.model(inp)
        features = torch.cat([pool_fp.feature_maps for pool_fp in self.all_pooled_features], dim=1).squeeze()
        if len(features.size()) == 1:
            features = features.unsqueeze(0)

        # Use features to predict scores
        attribute_scores = self.attribute_weights(features)
        return attribute_scores

My Pooling Module is:

class PoolFeatures(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, inp):
        kernel_size = (inp.size()[2], inp.size()[3])
        self.feature_maps = F.avg_pool2d(inp, kernel_size)
        return inp

Again this seems (?) to be working and can be created and trained via:

resnet = models.resnet50(pretrained=True)
net = MyNet(resnet, n_features=12)
optimizer = torch.optim.Adam(params=net.attribute_weights.parameters())
# train code...

The problem is that I get a memory error and it seems like after each training batch the memory increases. I can’t help but think that this is as a result of my transparent layer. How would be I solve this problem, or what is the correct way to implement what I want? I’ve looked at the Neural Transfer and it seems like .detach and .clone might be useful, though I’m not that sure. Thanks

smth · January 11, 2018, 1:55pm

Can I see your training loop? It might be the reason that memory is increasing constantly.

cakeeatingpolarbear · January 16, 2018, 12:35am

Yes I figured it out… It was because I defined a Variable outside the training loop and as a result it was retaining the gradients after every batch!