I have been working with efficient net models provided by the timm library
Using their efficient net models I was able to train that model on my custom dataset
the model code is as follows
#let's update the pretarined model:
for param in model.parameters():
param.requires_grad=False
#orginally, it was:
#(classifier): Linear(in_features=2048, out_features=1000, bias=True)
#we are updating it as a 2-class classifier:
"""
model.classifier = nn.Sequential(
nn.Linear(in_features=2048, out_features=625), #2048 is the orginal in_features
nn.ReLU(), #ReLu to be the activation function
nn.Dropout(p=0.3),
nn.Linear(in_features=625, out_features=256),
nn.ReLU(),
nn.Linear(in_features=256,out_features=128),
nn.ReLU(),
nn.Linear(in_features=128, out_features=3),
)
for classification and regression purposes I am able to get satisfactory training and val results with this current implementation.
The problem is when i put this model in a class,I end up getting errors of the form
RuntimeError: mat1 and mat2 shapes cannot be multiplied (393216x16 and 1536x625)
The new implementation in class based form is as follows
class AMT_Model(nn.Module):
def __init__(self):
super(AMT_Model,self).__init__()
self.model=timm.create_model('tf_efficientnet_b3_ns',pretrained=True)
for param in self.model.parameters():
param.requires_grad=False
self.classifer=nn.Sequential(
nn.Linear(in_features=1536,out_features=625),
nn.ReLU(),
nn.Dropout(p=0.3),
nn.Linear(in_features=625,out_features=256),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(in_features=256,out_features=199)
)
def forward(self,x):
features=self.model.forward_features(x)
out=self.classifer(features)
return out
def build_model():
model=AMT_Model()
return model
Another strange thing is when I use the model returned by the class I also end up recieivng the following error
TypeError: linear(): argument 'input' (position 1) must be Tensor, not list
This issue only arises when I use the class based implementation shown below.
class AMT_Model(nn.Module):
def __init__(self):
super(AMT_Model,self).__init__()
self.model=timm.create_model('tf_efficientnet_b3_ns',features_only=True,pretrained=True)
for param in self.model.parameters():
param.requires_grad=False
self.classifer=nn.Sequential(
nn.Linear(in_features=1536,out_features=625),
nn.ReLU(),
nn.Dropout(p=0.3),
nn.Linear(in_features=625,out_features=256),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(in_features=256,out_features=3)
)
def forward(self,x):
features=self.model(x)
out=self.classifer(features)
return out
def build_model():
model=AMT_Model()
return model
Model summary
(1): InvertedResidual(
(conv_pw): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(act1): SiLU(inplace=True)
(conv_dw): Conv2d(2304, 2304, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2304, bias=False)
(bn2): BatchNorm2d(2304, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(act2): SiLU(inplace=True)
(se): SqueezeExcite(
(conv_reduce): Conv2d(2304, 96, kernel_size=(1, 1), stride=(1, 1))
(act1): SiLU(inplace=True)
(conv_expand): Conv2d(96, 2304, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv_pwl): Conv2d(2304, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
)
(classifer): Sequential(
(0): Linear(in_features=1536, out_features=625, bias=True)
(1): ReLU()
(2): Dropout(p=0.3, inplace=False)
(3): Linear(in_features=625, out_features=256, bias=True)
(4): ReLU()
(5): Dropout(p=0.2, inplace=False)
(6): Linear(in_features=256, out_features=3, bias=True)
)
)
As such I would appreciate it if someone could point out where I am going wrong in when I implement the model as class.