RuntimeError: Given groups=1, weight of size 256 1024 1 1, expected input[32, 512, 15, 15] to have 1024 channels, but got 512 channels instead

RuntimeError: Given groups=1, weight of size 256 1024 1 1, expected input[32, 512, 15, 15] to have 1024 channels, but got 512 channels instead
This is my code:
class ResNet(nn.Module):
def init(self, block, layers, used_layers):
self.inplanes = 64
super(ResNet, self).init()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=0, # 3
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)

    self.feature_size = 128 * block.expansion
    self.used_layers = used_layers
    layer3 = True if 3 in used_layers else False
    layer4 = True if 4 in used_layers else False

    if layer3:
        self.layer3 = self._make_layer(block, 256, layers[2],
                                       stride=1, dilation=2)  # 15x15, 7x7
        self.feature_size = (256 + 128) * block.expansion
    else:
        self.layer3 = lambda x: x  # identity

    if layer4:
        self.layer4 = self._make_layer(block, 512, layers[3],
                                       stride=1, dilation=4)  # 7x7, 3x3
        self.feature_size = 512 * block.expansion
    else:
        self.layer4 = lambda x: x  # identity

    for m in self.modules():
        if isinstance(m, nn.Conv2d):
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()

    self.smooth1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
    self.smooth2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
    self.latlayer1 = nn.Conv2d(2048, 512, kernel_size=1, stride=1, padding=0)
    #nn.init.xavier_uniform(self.latlayer1.weight)
    #nn.init.constant(self.latlayer1.bias, 0.1)
    self.latlayer2 = nn.Conv2d(1024, 512, kernel_size=1, stride=1, padding=0)

def _make_layer(self, block, planes, blocks, stride=1, dilation=1):

    downsample = None
    dd = dilation
    if stride != 1 or self.inplanes != planes * block.expansion:
        if stride == 1 and dilation == 1:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )
        else:
            if dilation > 1:
                dd = dilation // 2
                padding = dd
            else:
                dd = 1
                padding = 0
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=3, stride=stride, bias=False,
                          padding=padding, dilation=dd),
                nn.BatchNorm2d(planes * block.expansion),
            )

    layers = []
    layers.append(block(self.inplanes, planes, stride,
                        downsample, dilation=dilation))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
        layers.append(block(self.inplanes, planes, dilation=dilation))

    return nn.Sequential(*layers)
def _upsample_add(self, x, y):
    '''Upsample and add two feature maps.
    Args:
      x: (Variable) top feature map to be upsampled.
      y: (Variable) lateral feature map.
    Returns:
      (Variable) added feature map.
    Note in PyTorch, when input size is odd, the upsampled feature map
    with `F.upsample(..., scale_factor=2, mode='nearest')`
    maybe not equal to the lateral feature map size.
    e.g.
    original input size: [N,_,15,15] ->
    conv2d feature map size: [N,_,8,8] ->
    upsampled feature map size: [N,_,16,16]
    So we choose bilinear upsample which supports arbitrary output sizes.
    '''
    _,_,H,W = y.size()
    return nn.functional.interpolate(x, size=(H,W), mode='bilinear', align_corners=True) + y

def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x_ = self.relu(x)
    x = self.maxpool(x_)
    # Bottom-up
    p1 = self.layer1(x)
    p2 = self.layer2(p1)
    p3 = self.layer3(p2)
    p4 = self.layer4(p3)
    # Top-down
    p5 = self.latlayer1(p4)
    p6 = self._upsample_add(p5, self.latlayer2(p3))
    p6 = self.smooth1(p6)
    p7 = self._upsample_add(p6, p2)
    p7 = self.smooth2(p7)
    out = [x_, p1, p7, p6, p4]
    out = [out[i] for i in self.used_layers]
    if len(out) == 1:
        return out[0]
    else:
        return out

Thanks for your reply!

Could you check, which layer raises this exception, and then make sure to use the layer at the appropriate place in your model or to change the number of channels accordingly?

I also have this problem. I am trying to reproduce a code I found on a blog post but getting the error RuntimeError: given groups=1, weight of size [512, 1024, 1, 1], expected input[4, 512, 188, 188] to have 1024 channels, but got 512 channels instead…
here is the network:

class ConvRelu(nn.Module):

def __init__(self, in_depth, out_depth):
    super(ConvRelu, self).__init__()
    self.conv = torch.nn.Conv2d(in_depth, out_depth, kernel_size=3, stride=1, padding=1)
    self.activation = nn.ReLU(inplace=True)

def forward(self, x):
    x = self.conv(x)
    x = self.activation(x)
    return x
class DecoderBlock(nn.Module, ABC):
    def __init__(self, in_depth, middle_depth, out_depth):
        super(DecoderBlock, self).__init__()
        self.conv_relu = ConvRelu(in_depth, middle_depth)
        self.conv_transpose = nn.ConvTranspose2d(middle_depth, out_depth, kernel_size=4, stride=2, padding=1)
        self.activation = nn.ReLU(inplace=True)
def forward(self, x):
    x = self.conv_relu(x)
    x = self.conv_transpose(x)
    x = self.activation(x)
    return x
class UNetResNet(nn.Module):
    def __init__(self, num_classes):
        super(UNetResNet, self).__init__()
        self.encoder = torchvision.models.resnet101(pretrained=True)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Sequential(self.encoder.conv1, self.encoder.bn1, self.encoder.relu, self.pool)
        self.conv2 = self.encoder.layer1
        self.conv3 = self.encoder.layer2
        self.conv4 = self.encoder.layer3
        self.conv4 = self.encoder.layer4

    self.pool = nn.MaxPool2d(2, 2)
    self.center = DecoderBlock(2048, 512, 256)

    self.dec5 = DecoderBlock(2048 + 256, 512, 256)
    self.dec4 = DecoderBlock(1024 + 256, 512, 256)
    self.dec3 = DecoderBlock(512 + 256, 256, 64)
    self.dec2 = DecoderBlock(256 + 64, 128, 128)
    self.dec1 = DecoderBlock(128, 128, 32)
    self.dec0 = ConvRelu(32, 32)
    self.final = nn.Conv2d(32, num_classes, kernel_size=1)

def forward(self, x):
    conv1 = self.conv1(x)
    conv2 = self.conv2(conv1)
    conv3 = self.conv3(conv2)
    conv4 = self.conv4(conv3)
    conv5 = self.conv5(conv4)

    pool = self.pool(conv5)
    center = self.center(pool)

    dec5 = self.dec5(torch.cat([center, conv5], 1))
    dec4 = self.dec4(torch.cat([dec5, conv4], 1))
    dec3 = self.dec4(torch.cat([dec4, conv3], 1))
    dec2 = self.dec4(torch.cat([dec3, conv2], 1))
    dec1 = self.dec1(dec2)
    dec0 = self.dec0(dec1)

    return self.final(dec0)

and the training loop

for epoch_idx in range(2):
    loss_batches = []
    for batch_idx, data in enumerate(tqdm(train_dataloader, desc = "training")):
        imgs = torch.autograd.Variable(data['sat_img'].cuda())
        #imgs = imgs.view(1, -1)
        #imgs.unsqueeze(0)
        #imgs.squeeze()
        masks = torch.autograd.Variable(data['map_img'].cuda())
        masks.unsqueeze_(0)
        #print(imgs.size())


        y = unet_resnet(imgs)
        loss = cross_entropy_loss(y, masks)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_batches.append(loss.data.cpu().numpy())
    print('epoch: ' + str(epoch_idx) + ' training loss: ' + str(np.sum(loss_batches)))

model_file = './unet-' + str(epoch_idx)
unet_resnet = unet_resnet.cpu()
torch.save(unet_resnet.state_dict(), model_file)
unet_resnet = unet_resnet.cuda()
print('model saved')

According to the error, there is something going on in the conv4 layer.
the input is a PIL image with 3 channels and my batch size is 4.
How can I fix this?

You are assigning self.encoder.layer3 to self.conv4 and in the next line you are replacing it with self.encoder.layer4 so you might want to assign it to self.conv5.

1 Like

thanks very much @ptrblck. That worked but I got another error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-5-2439e6bd56c4> in <module>()
     34 
     35 
---> 36         y = unet_resnet(imgs)
     37         loss = cross_entropy_loss(y, masks)
     38 

1 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

/content/drive/My Drive/Colab Notebooks/newExtract/network.py in forward(self, x)
     67         dec5 = self.dec5(torch.cat([center, conv5], 1))
     68         dec4 = self.dec4(torch.cat([dec5, conv4], 1))
---> 69         dec3 = self.dec3(torch.cat([dec4, conv3], 1))
     70         dec2 = self.dec2(torch.cat([dec3, conv2], 1))
     71         dec1 = self.dec1(dec2)

RuntimeError: Sizes of tensors must match except in dimension 2. Got 63 and 64 (The offending index is 0)

please How can I make them both 64?

You could either change the conv/pooling setup so that both outputs would have the same spatial size, you could pad the smaller output activation or use an interpolation technique.

1 Like

I figured out the input images were too big (500 x 500), so i did some cropping to get 256 x 256 and the error disappeared. Thanks for your support.