I work on an action recognition task and I want based on the prediction of subclasses ‘action_name’ and ‘priority’ to predict the parent class ‘Diagnosis’
the script I used was inspired by this reference
I changed tho model implementation
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
class MultiOutputModel(nn.Module):
def __init__(self, n_action_classes, n_priority_classes, n_diagnosis_classes):
super().__init__()
self.base_model = models.mobilenet_v2().features # take the model without classifier
last_channel = models.mobilenet_v2().last_channel # size of the layer before classifier
# the input for the classifier should be two-dimensional, but we will have
# [batch_size, channels, width, height]
# so, let's do the spatial averaging: reduce width and height to 1
self.pool = nn.AdaptiveAvgPool2d((1, 1))
# create separate classifiers for our outputs
self.action = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=last_channel, out_features=n_action_classes)
)
self.priority = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=last_channel, out_features=n_priority_classes)
)
self.diagnosis = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=n_action_classes + n_priority_classes, out_features=n_diagnosis_classes)
)
def forward(self, x):
x = self.base_model(x)
x = self.pool(x)
# reshape from [batch, channels, 1, 1] to [batch, channels] to put it into classifier
x = torch.flatten(x, 1)
# Subclass predictions
action = self.action(x)
priority = self.priority(x)
# Concatenate subclass outputs for parent class prediction
combined_action_priority_outputs = torch.cat([action, priority], dim=1)
diagnosis = self.diagnosis(combined_action_priority_outputs)
return {
'action': action,
'priority': priority,
'diagnosis': diagnosis
}
def get_loss(self, net_output, ground_truth):
action_loss = F.cross_entropy(net_output['action'], ground_truth['action_labels'])
priority_loss = F.cross_entropy(net_output['priority'], ground_truth['priority_labels'])
diagnosis_loss = F.cross_entropy(net_output['diagnosis'], ground_truth['diagnosis_labels'])
loss = action_loss + priority_loss + diagnosis_loss
return loss, {'action': action_loss, 'priority': priority_loss, 'diagnosis': diagnosis_loss}
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
class MultiOutputModel(nn.Module):
def __init__(self, n_action_classes, n_priority_classes, n_diagnosis_classes):
super().__init__()
self.base_model = models.mobilenet_v2().features # take the model without classifier
last_channel = models.mobilenet_v2().last_channel # size of the layer before classifier
# the input for the classifier should be two-dimensional, but we will have
# [batch_size, channels, width, height]
# so, let's do the spatial averaging: reduce width and height to 1
self.pool = nn.AdaptiveAvgPool2d((1, 1))
# create separate classifiers for our outputs
self.action = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=last_channel, out_features=n_action_classes)
)
self.priority = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=last_channel, out_features=n_priority_classes)
)
self.diagnosis = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=n_action_classes + n_priority_classes, out_features=n_diagnosis_classes)
)
def forward(self, x):
x = self.base_model(x)
x = self.pool(x)
# reshape from [batch, channels, 1, 1] to [batch, channels] to put it into classifier
x = torch.flatten(x, 1)
# Subclass predictions
action = self.action(x)
priority = self.priority(x)
# Concatenate subclass outputs for parent class prediction
combined_action_priority_outputs = torch.cat([action, priority], dim=1)
diagnosis = self.diagnosis(combined_action_priority_outputs)
return {
'action': action,
'priority': priority,
'diagnosis': diagnosis
}
def get_loss(self, net_output, ground_truth):
action_loss = F.cross_entropy(net_output['action'], ground_truth['action_labels'])
priority_loss = F.cross_entropy(net_output['priority'], ground_truth['priority_labels'])
diagnosis_loss = F.cross_entropy(net_output['diagnosis'], ground_truth['diagnosis_labels'])
loss = action_loss + priority_loss + diagnosis_loss
return loss, {'action': action_loss, 'priority': priority_loss, 'diagnosis': diagnosis_loss}
the results gave me the value 1 accuracy , am I addressing the problem the right way !!