Thanks, I saw similar answer to previous question here. I present my implementation for concatenating the encoders output and feed them as the input of linear layer.
class Res50_test(nn.Module):
def __init__(self, st):
super().__init__()
self.encoders=nn.ModuleList()
for i in range(3):
self.encoders.append(torchvision.models.resnet50(zero_init_residual=True))
self.encoders[i].load_state_dict(torch.load(st.trained_models[i]))
for child in self.encoders.children():
for param in child.parameters():
param.requires_grad = False
sizes = [2048*3,100]
self.classifier = nn.Linear(sizes[-2], sizes[-1], bias=False)
def forward(self, x):
emb=[]
for i,encoder in enumerate(self.encoders):
emb.append(encoder(x))
emb=torch.cat(emb,dim=1)
logit=self.classifier(emb)
return logit,emb
I wanted to differentiate between the output effect of each encoder in classifier.