Conv1D -matrix size issue

I move from keras to pytorch and try to copy a Conv1D model I was using in keras that worked well.
The conv1d model uses a (x, 42, 18) X_train matrix that is the input for a conv1D network defined as follows :

class Conv1D(nn.Module):
    def __init__(self, input_dim, output_dim, num_features, seq_length, batch_size, dropout): 
        super(Conv1D, self).__init__()
        # initialize params
        self.input_dim = input_dim                   # 18
        self.output_dim = output_dim              # 4 (number of classes for classification)
        self.num_features = num_features     # 64
        self.seq_length = seq_length             # 42
        self.batch_size = batch_size             # 128
        self.dropout = dropout                       #nn.Dropout(dropout)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        self.conv1 = nn.Conv1d(seq_length, num_features, kernel_size = 8, padding = 8)
        self.bn1 = nn.BatchNorm1d(num_features)
        self.conv2 = nn.Conv1d(num_features, num_features * 2, kernel_size = 5, padding = 5)
        self.bn2 = nn.BatchNorm1d(num_features * 2)
        self.conv3 = nn.Conv1d(num_features*2, num_features*2, kernel_size = 3, padding = 3)
        self.bn3 = nn.BatchNorm1d(num_features * 2)
        self.avgpool = nn.AvgPool1d(num_features * 2, stride = 1)
        self.linear = nn.Linear(num_features * 2, output_dim)
        
    def forward(self, x_t):
        x_t = self.conv1(x_t)
        x_t = self.bn1(x_t)
        x_t = self.relu(x_t)
        x_t = self.conv2(x_t)
        x_t = self.bn2(x_t)
        x_t = self.relu(x_t)
        x_t = self.conv3(x_t)
        x_t = self.bn3(x_t)
        x_t = self.relu(x_t)
        x_t = self.avgpool(x_t)
        x_t = x_t[:,-1,:]
        x_t = self.linear(x_t) 
        x_t = self.softmax(x_t)
        return x_t

The model appears printed as follows :slight_smile:print(model)

Conv1D(
  (relu): ReLU()
  (softmax): Softmax(dim=1)
  (conv1): Conv1d(42, 64, kernel_size=(8,), stride=(1,), padding=(8,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv1d(64, 128, kernel_size=(5,), stride=(1,), padding=(5,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,))
  (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (avgpool): AvgPool1d(kernel_size=(128,), stride=(1,), padding=(0,))
  (linear): Linear(in_features=128, out_features=4, bias=True)
)

But I get the double problem of rejection of the average pooling and, when I withdraw the avgpool the error message that


  File "D:\Anaconda3\envs\torchlight\lib\site-packages\torch\nn\functional.py", line 1692, in linear
    output = input.matmul(weight.t())

RuntimeError: mat1 dim 1 must match mat2 dim 0

I don’t understand the issue. Could someone please explain what I miss ?
THX a lot

Could you explain, what error you are seeing in this pooling layer?

Also, I’m not sure what the issue with the Conv1d weight matrix is. Do you think its shape is unexpected somehow?

The shape mismatch error after removing the polling layer is raised, since the features of the incoming activation don’t match the in_features of the linear layer.
You could print the activation with a custom module inside the nn.Sequential container and adapt the in_features. However, I think it would be better to solve the pooling issue first.

1 Like

Solved :slight_smile:

class Conv1D(nn.Module):
    def __init__(self, input_dim, output_dim, num_features, seq_length, batch_size, iter_dim, dropout): 
        super(Conv1D, self).__init__()
        self.input_dim = input_dim              # number of features of dataX = 6, 18 or 55
        self.output_dim = output_dim            # 4
        self.num_features = num_features        # 64
        self.seq_length = seq_length
        self.batch_size = batch_size            # 128
        self.iter_dim = iter_dim                # 0
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        self.conv1 = nn.Conv1d(input_dim, num_features, kernel_size = 9, padding = 4)  # padding = 8)
        self.bn1 = nn.BatchNorm1d(num_features)
        self.conv2 = nn.Conv1d(num_features, num_features, kernel_size = 5, padding = 2)  # padding = 5)
        self.bn2 = nn.BatchNorm1d(num_features)
        self.conv3 = nn.Conv1d(num_features, num_features * 2, kernel_size = 3, padding = 1)  # padding = 3)
        self.bn3 = nn.BatchNorm1d(num_features * 2)
        self.avgpool = nn.AvgPool1d(seq_length, padding = 0)
        self.linear1 = nn.Linear(num_features * 2, num_features)
        self.linear2 = nn.Linear(num_features, output_dim)
        
    def forward(self, x_t):
        x_t = self.conv1(x_t)
        x_t = self.bn1(x_t)
        x_t = self.relu(x_t)
        for layer in range(self.iter_dim-2):
            x_t = self.conv2(x_t)
            x_t = self.bn2(x_t)
            x_t = self.relu(x_t)
        x_t = self.conv3(x_t)
        x_t = self.bn3(x_t)
        x_t = self.relu(x_t)
        x_t = self.avgpool(x_t)
        x_t = x_t.view(x_t.shape[0],-1)
        #x_t = x_t.view(-1)
        x_t = self.linear1(x_t)
        x_t = self.dropout(x_t)
        x_t = self.relu(x_t)
        x_t = self.linear2(x_t)
        x_t = self.softmax(x_t)
        return x_t