Bidirectional GRU stacked TypeError: forward() takes 2 positional arguments but 3 were given

Hi,
Below is my code:

class BiGRU(nn.Module):
    def __init__(self, input_size, hidden_dim, dropout):
        super(BiGRU, self).__init__()
        self.BiGRU = nn.GRU(
            input_size=input_size, hidden_size=hidden_dim,
            num_layers=1, batch_first=True, bidirectional=True)
        self.layer_norm = nn.LayerNorm(input_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, hidden):
        x = self.layer_norm(x)
        x = F.relu(x)
        print("Here")
        x, hidden = self.BiGRU(x, hidden)
        x = self.dropout(x)
        return x, hidden

class SpeechRecognitionDNN(nn.Module):
    """
    This class will be used to create models like:
    - GRU (can be bidirectional)
    - Layer_norm
    - ReLU
    - Repeat the 3 above 10 times 
    - Fully Connected Network
    - ReLU
    - Dropout
    - Classifier
    - SoftMax --> Classfication problem (29 characters to decode).
    """

    def __init__(self, gru_input_size, n_feats, output_size, hidden_dim, drop_prob=0.2):
        super(SpeechRecognitionDNN, self).__init__()
        """
        - Convolutional
        - GRU Layer --> input (batch, time, features)
        - GRU Layer --> hidden input & output (n_layers*2, batch_size, hidden_dim)
        - GRU Layer --> output (batch, time, hidden_dim*2)
        """
        #output_dim = will be the alphabet + '' and space = 28 chars
        self.gru_input_size = gru_input_size
        self.hidden_dim = hidden_dim
        self.drop_prob = drop_prob
        self.output_dim = output_size
        self.n_layers = 1
        self.n_direction = 2
        # With n_layers=1 --> Dropout expects to be 0
        
        # Input size = number of features = 128
        # 1 layer of convolutional for extracting the features
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=4, stride=2, padding=1)
        # 1. Conv2d --> depth=32, kernel size=(4,4), strides=(2,2), padding=1
        # Output size (batch, 32, 64 (features/2), time/2 )
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.fully_connected = nn.Linear((n_feats//2)*32, gru_input_size)
        # With batch first --> The input is (batch, sequence, features)
        layers = []
        for i in range(6):
            layers.append(BiGRU(input_size=gru_input_size, hidden_dim=hidden_dim, dropout=drop_prob))
        self.gru_layers = nn.Sequential(*layers)
                                       
        # (batch, channel, features, time)
        #Fully Connected 
        #if self.bidir:
        self.classifier = nn.Linear(2*hidden_dim, output_size )
        self.dropout = nn.Dropout(0.2)

        
    def forward(self, x, hidden):

        x = self.conv1(x)
        #print("After Conv 1 : {}".format(x.size()))
        # Activation Function ReLU
        x = F.relu(x)
        x = self.dropout(x)
        # Conv 2
        x = self.conv2(x)
        #print("After Conv 2 : {}".format(x.size()))
        x = F.relu(x)
        x = self.dropout(x)
        sizes = x.size()
        x = x.view(sizes[0], sizes[1]*sizes[2], sizes[3]) #(batch, features*channel, time)
        x = x.transpose(1,2)    #(batch, time, features*channel)
        #print("After View & Transpose : {}".format(x.size()))
        x = self.fully_connected(x)
        #print("After Fully Connected : {}".format(x.size()))
        x = F.relu(x)
        x = self.dropout(x)
        #First Layer --> Layer Normalization
        print("Before GRU1 : {}".format(x.size()))
        print("Before GRU1 : {}".format(hidden.size()))
        out, hidden = self.gru_layers(x, hidden)
        #print("After View & Transpose : {}".format(x.size()))
        #print("After View & Transpose : {}".format(hidden.size()))
        
        #print("After GRU : {}".format(out.size()))

        #out = out.contiguous().view(-1, self.hidden_dim)

        out = self.classifier(out)
        #out = self.dropout()
        
        out = F.log_softmax(out, dim=2)
        #print("After classifier {}".format(out.size()))
        return out, hidden
    
    def init_hidden(self, batch_size, device):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of GRU
        #weight = next(self.parameters()).data
            

        hidden = (torch.zeros(self.n_layers*self.n_direction, batch_size, self.hidden_dim).zero_()).to(device)

        
        return hidden

In the training function I call:

output, h = model(specs, h)

I am getting error when I call the function from the sequential layer to the forward function. It takes also x and hidden and I am passing those two, not sure why it says three were given

TypeError: forward() takes 2 positional arguments but 3 were given

nn.Sequential takes one additional input, while you are trying to pass two to it.

The error message mentions 2 and 3 input arguments, as the self argument will also be counted into the number of input arguments.

You could change BiGRU.forward to accept a list of inputs, unwrap it inside the method, and return a list of outputs, which should work in an nn.Sequential container.

1 Like

it worked! Thanks :slight_smile: