How to make predictions using an ensemble of models in parallel on a single GPU

I am currently doing it in a for loop. As it is doing it sequentially, it adds a lot of additional computation time when I am running millions of single point predictions over an ensemble of models rather than a single model.

if I have multiple models with the same architecture but different weight parameters, is there a way to do forward passes for the multiple models at the same time across a single GPU?

You could check the vmap approach used for model ensembling.

1 Like

i have following model architecture


import logging
from dataclasses import dataclass
from typing import Dict, Optional

import torch
import torch.distributed as dist
from torch import nn, Tensor
from transformers import AutoModel
from transformers.file_utils import ModelOutput
class ConvEnc(nn.Module):
    
    def __init__(self,model_name,emb_dim=512,channels = 8):
        super().__init__()
        
        self.model = AutoModel.from_pretrained(model_name)
        self.conv2d_1 = torch.nn.Conv2d(in_channels=1,out_channels=channels,kernel_size=(1,11),stride=((5,3)))
        self.bn_1 = torch.nn.BatchNorm2d(channels)
        self.relu_1 = torch.nn.ReLU()

        self.conv2d_2 = torch.nn.Conv2d(in_channels=channels,out_channels=2*channels,kernel_size=(3,3),stride=(3,3))
        self.bn_2 = torch.nn.BatchNorm2d(2*channels)
        self.relu_2 = torch.nn.ReLU()

        self.conv2d_3 = torch.nn.Conv2d(in_channels=2*channels,out_channels=4*channels,kernel_size=(3,3),stride=(3,3))
        self.bn_3 = torch.nn.BatchNorm2d(4*channels)
        self.relu_3 = torch.nn.ReLU()

        self.conv2d_4 = torch.nn.Conv2d(in_channels=4*channels,out_channels=8*channels,kernel_size=(3,3),stride=(3,3))
        self.bn_4 = torch.nn.BatchNorm2d(8*channels)
        self.relu_4 = torch.nn.ReLU()

        self.fc = torch.nn.Linear(2304,emb_dim)
        
    def forward(self,features):
        
        prev = self.model(**features,return_dict=True)

        cls_emb = prev.last_hidden_state[:,0]
        hidden_state = prev.last_hidden_state
        # print(hidden_state.shape)
        padding_needed = max(0, 512 - hidden_state.size(1))
        # Pad the tensor along the first dimension to make it 512
        padded_tensor = torch.nn.functional.pad(hidden_state, (0, 0, 0, padding_needed))
        padded_tensor = padded_tensor.unsqueeze(0).transpose(1,0)
        
        y1 = self.conv2d_1(padded_tensor)
        y1 = self.relu_1(y1)
        y1 = self.bn_1(y1)

        y1 = self.conv2d_2(y1)
        y1 = self.relu_2(y1)
        y1 = self.bn_2(y1)

        y1 = self.conv2d_3(y1)
        y1 = self.relu_3(y1)
        y1 = self.bn_3(y1)

        y1 = self.conv2d_4(y1)
        y1 = self.relu_4(y1)
        y1 = self.bn_4(y1)
        # print(y1.shape)# batch size,64channels 3,12
        y = self.fc(y1.view(y1.shape[0],-1))
        
        return cls_emb,y

when i am saving this model using torch.save( self.model.state_dict() ,os.path.join(output_dir,“m.pth”))
model is saved but after loading a weight , now layer names are prefixed with extra model.
for example:
layer name in model : model.embeddings.word_embeddings.weight
layer name in loaded weight dict: model.model.embeddings.word_embeddings.weight

what i did wrong in model saving?
thanks

Make sure you are accessing and loading the state_dict from the same base model without accessing internal attributes.

Thanks but can you elaborate with the above example what do u mean

Sure, this will work:

model = ConvEnc("bert-base-uncased")
sd = model.state_dict()
model.load_state_dict(sd)
# <All keys matched successfully>