I was defining the image classification model using two different formats (CNN1 and CNN2) as below. I think the two models should be identical (I also checked they have the same number of parameters by calling self.count_parameters()). While CNN1 can be trained normally, CNN2 encounters the problem of not being able to reduce the losses. (Note: When training, except for the difference between these two definitions, all other codes remain the same)
So, I would like to ask what am I missing in my definition of CNN2? Why the program does not return an error but the model cannot be trained?
class CNN1(nn.Module):
def __init__(self):
super(CNN1, self).__init__()
self.network = nn.Sequential(
# (batch, 3, 256, 256)
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1), # (batch, 32, 256, 256)
nn.ReLU(),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1, stride=2), # (batch, 64, 128, 128)
nn.ReLU(),
nn.MaxPool2d(2, 2), # (batch, 64, 64, 64)
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), # (batch, 128, 64, 64)
nn.ReLU(),
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=2), # (batch, 256, 32, 32)
nn.ReLU(),
nn.MaxPool2d(2, 2), # (batch, 256, 16, 16)
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), # (batch, 256, 16, 16)
nn.ReLU(),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), # (batch, 512, 16, 16)
nn.ReLU(),
nn.MaxPool2d(2, 2), # (batch, 256, 8, 8)
nn.Flatten(), # (batch, 256*8*8)
nn.Linear(in_features=256*8*8, out_features=4096),
nn.ReLU(),
nn.Linear(in_features=4096, out_features=1024),
nn.ReLU(),
nn.Linear(in_features=1024, out_features=512),
nn.ReLU(),
nn.Linear(in_features=512, out_features=2)
)
def forward(self, xb):
return self.network(xb)
def count_parameters(self):
return sum(p.numel() for p in self.parameters() if p.requires_grad)
Number of parameters: 73402690
Output (In each training step, losses are decreasing, training is normal):
val_loss: 0.5657 val_roc: 0.7292
val_loss: 0.5432 val_roc: 0.9091
val_loss: 0.6879 val_roc: 0.7308
val_loss: 0.6001 val_roc: 0.7292
val_loss: 0.6052 val_roc: 0.6970
val_loss: 0.5604 val_roc: 0.7500
val_loss: 0.5020 val_roc: 0.7747
val_loss: 0.6042 val_roc: 0.5500
…
class CNN2(nn.Module):
def __init__(self):
super(CNN2, self).__init__()
self.conv1 = self.conv_block(in_channels=3, out_channels=32, kernel_size=3, padding=1)
self.conv2 = self.conv_block(in_channels=32, out_channels=64, kernel_size=3, padding=1, stride=2, pool=True)
self.conv3 = self.conv_block(in_channels=64, out_channels=128, kernel_size=3, padding=1)
self.conv4 = self.conv_block(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=2, pool=True)
self.conv5 = self.conv_block(in_channels=256, out_channels=256, kernel_size=3, padding=1)
self.conv6 = self.conv_block(in_channels=256, out_channels=256, kernel_size=3, padding=1, pool=True)
self.ffn1 = self.FFN(in_features = 256*8*8, out_features = 4096, flatten=True)
self.ffn2 = self.FFN(in_features = 4096, out_features = 1024)
self.ffn3 = self.FFN(in_features = 1024, out_features = 512)
self.ffn4 = self.FFN(in_features = 512, out_features = 2)
def forward(self, xb):
out = self.conv1(xb) # (batch, 32, 256, 256)
out = self.conv2(out) # (batch, 64, 64, 64)
out = self.conv3(out) # (batch, 128, 64, 64)
out = self.conv4(out) # (batch, 256, 16, 16)
out = self.conv5(out) # (batch, 256, 16, 16)
out = self.conv6(out) # (batch, 256, 8, 8)
out = self.ffn1(out) # (batch, 4096)
out = self.ffn2(out) # (batch, 1024)
out = self.ffn3(out) # (batch, 512)
out = self.ffn4(out) # (batch, 2)
return out
def conv_block(self, in_channels, out_channels, kernel_size, padding, stride=1, pool=False):
layers = [
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, padding=padding, stride=stride),
nn.ReLU()
]
if pool:
layers.append(nn.MaxPool2d(2, 2))
return nn.Sequential(*layers)
def FFN(self, in_features, out_features, flatten = False):
layers = [
nn.Linear(in_features=in_features, out_features=out_features),
nn.ReLU()
]
if flatten:
layers = [nn.Flatten()] + layers
return nn.Sequential(*layers)
Number of parameters: 73402690
output (In each training step, losses are not decreasing…):
val_loss: 0.6931 val_roc: 0.5000
val_loss: 0.6931 val_roc: 0.5000
val_loss: 0.6931 val_roc: 0.5000
val_loss: 0.6931 val_roc: 0.5000
val_loss: 0.6931 val_roc: 0.5000
val_loss: 0.6931 val_roc: 0.5000
val_loss: 0.6931 val_roc: 0.5000
val_loss: 0.6931 val_roc: 0.5000
val_loss: 0.6931 val_roc: 0.5000
…