Hi I used hugging face vision transofrmer to build my multitask model but encountered errors like:
TypeError: linear(): argument ‘input’ (position 1) must be Tensor, not ImageClassifierOutput
The code for the architecture looks like the following:
class multi_output_model(torch.nn.Module):
def __init__(self,categories,regression_out,frozen_layer=False):
super(multi_output_model,self).__init__()
#vit = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k')
self.vit = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k')
#self.features = torch.nn.ModuleList(vit.children())[:-1]
#self.features = torch.nn.Sequential(*self.features)
#in_features = vit.classifier.in_features
in_features = self.vit.config.hidden_size
if frozen_layer:
self.freeze_feature_layers()
self.classification = torch.nn.Linear(in_features,categories)
self.regression = torch.nn.Linear(in_features,regression_out)
def forward(self,input_imgs):
#output = self.features(input_imgs)
output = self.vit(input_imgs)
#output = output.logits.view(input_imgs.size(0), -1)
output_classification = self.classification(output)
output_regression = self.regression(output)
output_regression =torch.reshape(output_regression,[output_regression.size()[0],17,100])
#return [output_regression,output_classification]
return SequenceClassifierOutput(
logits=[output_regression,output_classification],
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
def freeze_feature_layers(self):
for n,p in self.features.named_parameters():
p.requires_grad = False
Can any body help me with such problem?
Looking forward to hearing from you!