Forward() takes 2 positional arguments but 3 were given for nn.Sqeuential with linear layers

I tried to refactor a linear layer that takes bn and activation functions as optional arguments to be used in a NN, the code for it is

act_fn = nn.ReLU(inplace=True)

def linear_layer(n_in, n_out, drop=False, dropout_rate = 0.5, bn = False, zero_bn = False, act=True):
    lin = nn.Linear(n_in, n_out)
    dropout = nn.Dropout(p=dropout_rate)
    bn = nn.BatchNorm1d(n_out)
    nn.init.constant_(bn.weight, 0. if zero_bn else 1.)

    layers = [lin(n_in, n_out)]
    if bn:
    if drop:
    if act: 

    return nn.Sequential(*layers)

But when I tried to test it

# test linear layer

tmp = torch.randn(64*7*7)
a = linear_layer(64*7*7, 1024)

I get


TypeError                                 Traceback (most recent call last)

<ipython-input-61-1c1d696ee893> in <module>()
      2 tmp = torch.randn(64*7*7)
----> 3 a = linear_layer(64*7*7, 1024)
      4 print(a(tmp).shape)

1 frames

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

TypeError: forward() takes 2 positional arguments but 3 were given

However, it works fine if I define each layer separately and call each one in the forward() of a nn module

I even tried to reduce it down to

def linear_layer(n_in, n_out):
    lin = nn.Linear(n_in, n_out)

    layers = [lin(n_in, n_out)]

    return nn.Sequential(*layers)

But it still gives me the same error


This is a limitation of the Sequential module: it only works with single input / single output modules.


I seemed to have fixed it by doing

# linear layers
def linear(n_in, n_out):
    return nn.Linear(n_in, n_out)

def linear_layer(n_in, n_out, dp=False, dropout_rate = 0.5, bn = False, zero_bn = False, act=True):
    dropout = nn.Dropout(p=dropout_rate)
    bn = nn.BatchNorm1d(n_out)
    nn.init.constant_(bn.weight, 0. if zero_bn else 1.)

    layers = [linear(n_in, n_out)]
    if bn:
    if dp:
    if act: 

    return nn.Sequential(*layers)

In your original code you have:

   lin = nn.Linear(n_in, n_out)
   layers = [lin(n_in, n_out)]

The expression lin(n_in, n_out)
is not creating a linear layer (that already happens above!) – it’s trying to do a forward pass on the linear layer and failing because of the invalid arguments. Instead you should just write:

   layers = [lin]

The code in your second post is equivalent to this, but more complicated.

1 Like

I am facing similar kind of issue but not while creating the layers rather while calling forward.
Below is the code.

class BasicBlock(nn.Module):
    expansion = 1
    __constants__ = ['downsample']

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1):
        super(BasicBlock, self).__init__()
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self,x,sample=False):
        identity = x

        out = self.conv1(x,sample)
        out = self.relu(out)
        out = self.conv2(out,sample)

        if self.downsample is not None:
            identity = self.downsample(x,sample)

        out += identity
        out = self.relu(out)

        return out
class ResNet(nn.Module):

    def __init__(self, block, layers, init='xavierUniform', num_classes=7, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group

        self.conv1 = BayesianConv(3,self.inplanes,kernel_size=7, stride=2, padding=3,bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = BayesianLinear(512*block.expansion,num_classes)


        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation))

        return nn.Sequential(*layers)
 def _forward_impl(self, x, sample):
        x = self.conv1(x,sample)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x,sample)
        x = self.layer2(x,sample)
        x = self.layer3(x,sample)
        x = self.layer4(x,sample)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = F.log_softmax(self.fc(x, sample), dim=1)
        return x

    def forward(self, x, sample=False):
        return self._forward_impl(x,sample)

def sample_elbo(self, input, target, NUM_BATCHES, samples=1):
        outputs = torch.zeros(samples, input.shape[0], 7).to(DEVICE)
        log_priors = torch.zeros(samples).to(DEVICE)
        log_variational_posteriors = torch.zeros(samples).to(DEVICE)
        for i in range(samples):
            outputs[i] = self(input, sample=True)
            log_priors[i] = self.log_prior()
            log_variational_posteriors[i] = self.log_variational_posterior()
        log_prior = log_priors.mean()
        log_variational_posterior = log_variational_posteriors.mean()
        negative_log_likelihood = F.nll_loss(outputs.mean(0), target, size_average=False)
        loss = (log_variational_posterior - log_prior)/NUM_BATCHES + negative_log_likelihood
        return loss, log_prior, log_variational_posterior, negative_log_likelihood
File "", line 141, in train
    loss, log_prior, log_variational_posterior, negative_log_likelihood = net.sample_elbo(data, target,NUM_BATCHES)
  File "/scratch/engn8536/project_data/u6724013/caer/", line 208, in sample_elbo
    outputs[i] = self(input, sample=True)
  File "/usr/local/anaconda3/lib/python3.6/site-packages/torch/nn/modules/", line 547, in __call__
    result = self.forward(*input, **kwargs)
  File "/scratch/engn8536/project_data/u6724013/caer/", line 177, in forward
    return self._forward_impl(x,sample)
  File "/scratch/engn8536/project_data/u6724013/caer/", line 166, in _forward_impl
    x = self.layer1(x,sample)
  File "/usr/local/anaconda3/lib/python3.6/site-packages/torch/nn/modules/", line 547, in __call__
    result = self.forward(*input, **kwargs)
TypeError: forward() takes 2 positional arguments but 3 were given


There seems to be a problem with conv3x3 of self.conv1 of BasicBlock. In the forward function of BasicBlock, you are passing two inputs to self.conv1 (out = self.conv1(x,sample)). Usually, conv3x3 is implemented in the following manner:

def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)

So it should not take your additional sample input/parameter. Otherwise, the code seems fine to me.

Hi, have you solved this problem? I meet the same problem, I try to use multiple inputs in basicblock like x = self.layer1(x,sample). It can’t work.

Thank you for your time

I am facing similar kind of issue but not while creating the layers rather while calling forward.
Below is the code.

class PreActBlock(nn.Module):
    '''Pre-activation version of the BasicBlock.'''
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)

        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)

    def forward(self,x,planes,w,h):
        c1_mem=c1_spike=torch.zeros(batch_size,planes,w, h)
        c2_mem=c2_spike=torch.zeros(batch_size,planes,w, h)
        out = self.conv1(self.bn1(x))
        c1_mem, c1_spike = mem_update(out, c1_mem, c1_spike)
        #shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
        out = self.conv2(self.bn2(c1_spike))
        c2_mem, c2_spike = mem_update(out, c2_mem, c2_spike)
        out = x+c2_spike 
        return out

out = self.layer1(c_spike,64,32,32)
out = self.layer2(out,128,16,16)
out = self.layer3(out,256,8,8)
out = self.layer4(out,512,4,4)
result = self.forward(*input, **kwargs)
TypeError: forward() takes 2 positional arguments but 5 were given

Could you give me suggestions?
Thank you for your time.

Your code snippet uses undefined modules such as mem_update and it’s also unclear how and where self.layerX are defined and used.
Could you post an executable code snippet, which would reproduce this issue?