RuntimeError: running_mean should contain 64 elements not 96

Hi,

i am trying to train pnasnet5large from scratch of my custom dataset and i am using pretrainedmodels package. i have modified my input and final layers as suggested in this site
https://github.com/Cadene/pretrained-models.pytorch.

My code snippet
model = pnasnet5large (pretrained=“imagenet”)
model.conv1 = nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
model.avgpool = nn.AdaptiveAvgPool2d(1)
model.last_linear = nn.Sequential(nn.BatchNorm1d(1024),nn.Dropout(0.5),nn.Linear(1024, config.num_classes))

my number of channels is 4 and num of classes is 28 and its a multilabel classification problem

but i am getting the following error
RuntimeError: running_mean should contain 64 elements not 96

What could be the issue?

Based on the error message, it looks like the BatchNorm layer after conv1 is using 96 input channels, while you are passing 64.
Try to change the number of kernels to 96 and try it again.

6 Likes

yes the issue was solved by specifying 96 filters in the conv1 layer

I have a similar error.

RuntimeError: running_mean should contain 64 elements not 32

in forward(self, x1)
31
32 x1 = self.conv1(x1)
—> 33 x1 = self.bn1(x1)
34 x1 = self.relu1(x1)
35 x1 = self.maxpool1(x1)

Initial 2 layers of my model.

class Net(nn.Module):
def init(self):
super(Net, self).init()

    self.conv1 = nn.Conv2d(in_channels=3,out_channels=64,kernel_size=(3,3),stride=(2,2),padding=(1,1))
    self.bn1 = nn.BatchNorm2d(num_features=64)
    self.relu1=nn.ReLU()
    self.maxpool1= nn.MaxPool2d(kernel_size=(3,3),stride=(1,1),padding=(1,1))
    
    self.conv2 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=(3,3),stride=(2,2),padding=(1,1))
    self.bn1 = nn.BatchNorm2d(num_features=64)
    self.relu1=nn.ReLU()
    self.maxpool1= nn.MaxPool2d(kernel_size=(3,3),stride=(1,1),padding=(1,1))

The first two layers of your model are working fine:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=64,kernel_size=(3,3),stride=(2,2),padding=(1,1))
        self.bn1 = nn.BatchNorm2d(num_features=64)
        
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        return x

model = Net()
x = torch.randn(2, 3, 24, 24)
out = model(x)

However, note that you are overriding the batchnorm layers by using the same variable name:

self.conv1 = nn.Conv2d(in_channels=3,out_channels=64,kernel_size=(3,3),stride=(2,2),padding=(1,1))
self.bn1 = nn.BatchNorm2d(num_features=64)
[...]
self.conv2 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=(3,3),stride=(2,2),padding=(1,1))
self.bn1 = nn.BatchNorm2d(num_features=64)

Here self.bn1 is reused, which won’t raise an error, since both layers are using the same number of feature, but I guess you might be overriding the layer again in a missing code snippet, which would explain the shape mismatch.

2 Likes

I see the topic,and has the some problem,
the code followed:
class desNet(nn.Module):

def __init__(self):
    super(desNet, self).__init__()
    # 输入层 CBRP
    self.conv0 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
    self.bn0 = nn.BatchNorm2d(num_features=64)
    self.relu0 = nn.ReLU(inplace=True)
    self.pool0 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    block_transition = {'b1': [64, 96, 128, 160, 192, 224],
                        't1': [256, 128],
                        'b2': [128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480],
                        't2': [512, 256],
                        'b3': [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992],
                        't3': [1024, 512],
                        'b4': [512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992]
                        }

    self.desblock_net = nn.Sequential()
    for i, name in enumerate(block_transition):
        if list(name)[0] == 'b':
            nums = block_transition.get(name)
            for ik, n in enumerate(nums):

                self.desblock_net.add_module("{0}_{1}_BN".format(name, ik), nn.BatchNorm2d(num_features=n))
                self.desblock_net.add_module("{0}_{1}_Conv".format(name, ik), nn.Conv2d(in_channels=n, out_channels=128, kernel_size=1, stride=1, bias=False))
                self.desblock_net.add_module("{0}_{1}_Relu".format(name, ik), nn.ReLU(inplace=True))
                self.desblock_net.add_module("{0}_{1}_bn".format(name, ik), nn.BatchNorm2d(num_features=128))
                self.desblock_net.add_module("{0}_{1}_conv".format(name, ik), nn.Conv2d(in_channels=128, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False))
                self.desblock_net.add_module("{0}_{1}_relu".format(name, ik), nn.ReLU(inplace=True))

        elif list(name)[0] == 't':

            nums = block_transition[name]
            self.desblock_net.add_module("{0}_{1}_bn".format(name, i), nn.BatchNorm2d(num_features=nums[0]))
            self.desblock_net.add_module("{0}_{1}_relu".format(name, i), nn.ReLU(inplace=True))
            self.desblock_net.add_module("{0}_{1}_conv".format(name, i), nn.Conv2d(in_channels=nums[0], out_channels=nums[1], kernel_size=1, stride=1, bias=False))
            self.desblock_net.add_module("{0}_{1}_avgpool".format(name, i), nn.AvgPool2d(kernel_size=2, stride=2, padding=0))

分类层
self.bn = nn.BatchNorm2d(num_features=1024)
self.fc = nn.Linear(in_features=1024, out_features=1000)

def forward(self, input):
out = self.conv0(input)
out = self.bn0(out)
out = self.relu0(out)
out = self.pool0(out)
print(out.size())
out = self.desblock_net(input)
out = self.bn(out)
out = self.fc(out)
return out
if name == ‘main’:
mydesnet = desNet()
print(mydesnet)
x = torch.rand(10, 3, 224, 224)
out = mydesnet(x)
print(out.size())

Hi, I see the topic and I have the same problem.
My code is as follow:
Each of my inputs are a tensor of size (512, 20).

class MyModel(nn.Module):
  def __init__(self, input_dim: int = 512, hidden_dim: int = 50, num_layers: int = 4, dropout: float = 0.5):
    super(MyModel, self).__init__()
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers
    self.dropout = dropout
    self.fc = nn.Linear(in_features=self.num_layers * self.hidden_dim, out_features=1)
    self.bn = nn.BatchNorm1d(num_features=self.num_layers * self.hidden_dim)

    convs_list = [nn.Conv1d(in_channels=self.input_dim, out_channels=self.hidden_dim, kernel_size=i+1) for i in range(self.num_layers)]
    self.convs = nn.ModuleList(modules=convs_list)

  def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
    
  def forward(self, x):
    out = []
    for i in range(self.num_layers):
      x_i = self.convs[i](x)
      x_i = F.leaky_relu(x_i, negative_slope=0.02)
      x_i = F.dropout(x_i, self.dropout, self.training)
      x_i = x_i.mean(dim=0)
      out.append(x_i)
    out = torch.cat(out, dim=1)
    out = self.bn(out)
    out = self.fc(out)
    return out

I don’t quite understand your code, since you are averaging the batch dimension in:

x_i = x_i.mean(dim=0)

and are creating a new tensor in:

out = torch.cat(out, dim=1)

which would have shape of [50, *] where * is created by the temporal dimension in the input.
The error you are seeing is then raised in self.bn(out) so could you explain your use case a bit more, please?

I am trying to built a model via PyTorch which I have implemented it by keras.

The figure in below shows what I want to build.