Hi there,
I have been struggling with this for a while. I converted my model from manual to nn.Sequential, which should be identical, but for some reason Sequential model does not learn. Here is how I did it:
Before conversion:
class OldModel(nn.Module):
def __init__(self):
super(OldModel, self).__init__()
dropout_p = 0.2
self.conv1 = nn.Conv1d(1, 16, 15, stride=1) # 16x 2034
self.batchNormC1 = nn.BatchNorm1d(16)
self.dropoutC1 = nn.Dropout(p=dropout_p) # 32512
self.conv2 = nn.Conv1d(16, 16, 15, stride=2) # 1010
self.batchNormC2 = nn.BatchNorm1d(16)
self.dropoutC2 = nn.Dropout(p=dropout_p)
self.conv3 = nn.Conv1d(16, 16, 15, stride=1) # 996
self.batchNormC3 = nn.BatchNorm1d(16)
self.dropoutC3 = nn.Dropout(p=dropout_p) # 15920
.....
def forward(self, x):
x = self.conv1(x)
x = self.batchNormC1(x)
x = F.relu(x)
x = self.dropoutC1(x)
x = self.conv2(x)
x = self.batchNormC2(x)
x = F.relu(x)
x = self.dropoutC2(x)
x = self.conv3(x)
x = self.batchNormC3(x)
x = F.relu(x)
x = self.dropoutC3(x)
To this:
class NewModel(nn.Module)
def __init__(self, input_length=2048, config=None):
super(NewModel, self).__init__()
dropout_p = 0.2
conv_layers = []
in_channels = 1
output_length = input_length
for layer in config["layers"]:
conv_layers.append(
nn.Conv1d(in_channels,
out_channels = layer["channels"],
kernel_size = layer["kernel"],
padding=padding,
stride=layer["stride"]))
conv_layers.append(nn.BatchNorm1d(layer["channels"]))
conv_layers.append(nn.Dropout(p=config["dropout"]))
in_channels = layer["channels"]
output_length, *rest = conv_output_shape((output_length, 1), kernel_size=layer["kernel"], stride=layer["stride"], pad=padding, dilation=1)
self.conv_layers = nn.Sequential(*conv_layers,
nn.Flatten(),
nn.Linear(output_length * in_channels, 30),
nn.BatchNorm1d(30),
nn.Dropout(p=0.7),
nn.Linear(30, 1))
.....
def forward(self, x):
x = self.conv_layers(x)
return(x)
config = {
"layers": [
{"channels": 16, "kernel": 15, "padding": "valid", "stride": 1},
{"channels": 16, "kernel": 15, "padding":"valid", "stride": 2},
{"channels": 16, "kernel": 15, "padding":"valid", "stride": 1},
OldModel trains very well, but NewModel does not. The model architecture is identical. The only difference is one is nested in sequential module, but the other one is regular.
I can post print_model_summary to show you if needed.
Any ideas why? I’m so confused