Hi, I am new in Pytorch and in Computer Vision in general.
I am trying to create a neural network with a pretrained model (Mobilenet_v3_small), and adding an encoder (only encoder, not autoencoder) to the CNN. This is because I want at the input of the network 2 objects: an image and a vector of keypoints (with a pose estimator), the pretrained model process the image, and the vector of keypoints by the encoder. Finally, I would like the output to be a vector of 2 components, first the prediction from the model (2 image classes), and second the pose features.
The code is:
class mi_Net(nn.Module):
def __init__(self, num_classes):
super(mi_Net, self).__init__()
self.model = models.mobilenet_v3_small(pretrained=True)
for param in self.model.parameters():
param.requires_grad = False
self.fc1 = nn.Linear(2048, 2048)
self.fc2 = nn.Linear(2048, num_classes)
self.dropout = nn.Dropout(0.3)
#Encoder
self.encoder_hidden_layer = nn.Linear(
in_features=kwargs["input_shape"], out_features=128
)
self.encoder_output_layer = nn.Linear(
in_features=128, out_features=128
)
def forward(self, x, features):
#Mobilenet
x = self.model.features(x)
x = self.model.avgpool(x)
x = self.model.classifier(x)
x = self.model.layers(x)
#Encoder
activation = self.encoder_hidden_layer(features)
activation = torch.relu(activation)
code = self.encoder_output_layer(activation)
code = torch.relu(code)
#Concatenate vectors
out= torch.cat(x,code)
return out
Thank you!!