Tensor size issue while using TransformerEncoder

arpu-nagar · April 16, 2024, 11:29pm

I’m working on a two class audio classification project and have run into some issues with TransformerEncoder.

Here’s my model:

import torch
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class MyConvTransformerNet(nn.Module):
    def __init__(self, d_model, nhead, num_layers, dim_feedforward, dropout=0.1):
        super(MyConvTransformerNet, self).__init__()

        # Define convolutional layers
        self.conv1 = nn.Conv1d(in_channels=160, out_channels=320, kernel_size=3, stride=1, padding=1)
        self.batchnorm1 = nn.BatchNorm1d(320)
        self.conv2 = nn.Conv1d(in_channels=320, out_channels=480, kernel_size=5, stride=2, padding=1)
        self.batchnorm2 = nn.BatchNorm1d(480)
        self.conv3 = nn.Conv1d(in_channels=480, out_channels=640, kernel_size=5, stride=2, padding=1)
        self.batchnorm3 = nn.BatchNorm1d(640)
        self.conv4 = nn.Conv1d(in_channels=640, out_channels=320, kernel_size=5, stride=2, padding=1)
        self.batchnorm4 = nn.BatchNorm1d(320)
        self.conv5 = nn.Conv1d(in_channels=320, out_channels=180, kernel_size=3, stride=1, padding=1)
        self.batchnorm5 = nn.BatchNorm1d(180)

        # Define Transformer encoder layers
        encoder_layers = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers)

        # Define fully connected layers
        self.fc1 = nn.Linear(4680, 200)  
        self.fc2 = nn.Linear(200, 20)
        self.fc3 = nn.Linear(20, 2)

        # Define activation function
        self.relu = nn.LeakyReLU()

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):
        # Convolutional layers with ReLU activation and skip connections
        x1 = self.relu(self.batchnorm1(self.conv1(x)))
        x1 = self.dropout(x1)
        
        x2 = self.relu(self.batchnorm2(self.conv2(x1)))
        x2 = self.dropout(x2)
        
        x3 = self.relu(self.batchnorm3(self.conv3(x2)))
        x3 = self.dropout(x3)
        
        x4 = self.relu(self.batchnorm4(self.conv4(x3)))
        x4 = self.dropout(x4)
        
        x5 = self.relu(self.batchnorm5(self.conv5(x4)))
        x5 = self.dropout(x5)

        # Reshape the output for Transformer encoder
        x = x5.permute(2, 0, 1)  # Reshape to (sequence_length, batch_size, embedding_dim)

        # Transformer encoder layers
        print(x.shape)
        x = self.transformer_encoder(x)

        # Flatten the output
        # x = x.permute(1, 0, 2)  # Reshape back to (batch_size, sequence_length, embedding_dim)
        x = x.reshape(x.size(0), -1)

        # Fully connected layers with ReLU activation
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        return x

# Instantiate the model
d_model = 180
nhead = 4
num_layers = 2
dim_feedforward = 512
dropout = 0.1

model = MyConvTransformerNet(d_model, nhead, num_layers, dim_feedforward, dropout)
print(model)

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define optimizer (e.g., Adam optimizer with learning rate 0.001)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

When I run torchsummary with my input of shape (32, 160, 216) where
32 = batch_size, 160 = channels, 216 = seq_length

print(summary(model, (160,216), 32, "cpu"))

I get the error:

Cell In[129], line 1 ----> 1 print(summary(model, (160,216), 32, “cpu”)) File ~/miniconda3/envs/prog/lib/python3.10/site-packages/torchsummary/torchsummary.py:72, in summary(model, input_size, batch_size, device) 68 model.apply(register_hook) 70 # make a forward pass 71 # print(x.shape) —> 72 model(*x) 74 # remove these hooks 75 for h in hooks: File ~/miniconda3/envs/prog/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs) 1496 # If we don’t have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): → 1501 return forward_call(*args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = , Cell In[127], line 57 55 # Transformer encoder layers … 24 ] 25 else: 26 summary[m_key][“output_shape”] = list(output.size())

AttributeError: ‘NoneType’ object has no attribute ‘size’

Can someone help me with this?