I have trained model with layers stacks in nn.Sequential for classification problem.
The ConvNet architecture look like this:
class ConvNet(nn.Module):
def __init__(self,num_classes=8):
super(ConvNet,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1,64,kernel_size=7),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.AvgPool2d(kernel_size=2,stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(64,128,kernel_size=7,stride=2),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.AvgPool2d(kernel_size=2,stride=2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(128,256,kernel_size=3),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.AvgPool2d(kernel_size=2,stride=2)
)
self.layer4 = nn.Sequential(
nn.Conv2d(256,512,kernel_size=3),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.AvgPool2d(kernel_size=2,stride=2),
nn.BatchNorm2d(512)
)
self.hidden = nn.Linear(2*2*512,1024)
self.drop = nn.Dropout(0.6)
self.dense1 = nn.Sequential(
nn.Linear(1024,256),
nn.ReLU(),
nn.Dropout(0.25)
)
self.dense2 = nn.Sequential(
nn.Linear(256,64),
nn.ReLU()
)
self.fc1 = nn.Linear(64,32)
self.fc = nn.Linear(32,num_classes)
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = out.reshape(out.size(0),-1)
out = F.relu(self.hidden(out))
out = self.drop(out)
out = self.dense1(out)
out = self.dense2(out)
out = F.relu(self.fc1(out))
out = self.fc(out)
return out
ConvNet(
(layer1): Sequential(
(0): Conv2d(1, 64, kernel_size=(7, 7), stride=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(layer2): Sequential(
(0): Conv2d(64, 128, kernel_size=(7, 7), stride=(2, 2))
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(layer3): Sequential(
(0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(layer4): Sequential(
(0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(hidden): Linear(in_features=2048, out_features=1024, bias=True)
(drop): Dropout(p=0.6, inplace=False)
(dense1): Sequential(
(0): Linear(in_features=1024, out_features=256, bias=True)
(1): ReLU()
(2): Dropout(p=0.25, inplace=False)
)
(dense2): Sequential(
(0): Linear(in_features=256, out_features=64, bias=True)
(1): ReLU()
)
(fc1): Linear(in_features=64, out_features=32, bias=True)
(fc): Linear(in_features=32, out_features=8, bias=True)
)
Then, I have used this method delete the last layer in order to obtain the matrix 32:
model = ConvNet(8).to(device)
model.load_state_dict(torch.load('model.pt'))
removed = list(model.children())[:-1]
new_model= torch.nn.Sequential(*removed)
print(new_model)
Output:
Sequential(
(0): Sequential(
(0): Conv2d(1, 64, kernel_size=(7, 7), stride=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(1): Sequential(
(0): Conv2d(64, 128, kernel_size=(7, 7), stride=(2, 2))
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(2): Sequential(
(0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(3): Sequential(
(0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(4): Linear(in_features=2048, out_features=1024, bias=True)
(5): Dropout(p=0.6, inplace=False)
(6): Sequential(
(0): Linear(in_features=1024, out_features=256, bias=True)
(1): ReLU()
(2): Dropout(p=0.25, inplace=False)
)
(7): Sequential(
(0): Linear(in_features=256, out_features=64, bias=True)
(1): ReLU()
)
(8): Linear(in_features=64, out_features=32, bias=True)
)
However, when I want to predict new picture to get matrix 32, I get an error:
RuntimeError: size mismatch, m1: [1024 x 2], m2: [2048 x 1024] at C:/w/1/s/tmp_conda_3.8_075429/conda/conda-bld/pytorch_1579852542185/work/aten/src\THC/generic/THCTensorMathBlas.cu:290
Something was wrong here?
How can i get the maxtrix 32 before last layer classifies?