RuntimeError: expected stride to be a single integer value or a list of 3 values to match the convolution dimensions, but got stride=[1, 1]

Traceback (most recent call last):
File “crnn_main.py”, line 217, in
cost = trainBatch(crnn, criterion, optimizer)
File “crnn_main.py”, line 200, in trainBatch
preds = crnn(image.unsqueeze_(0))
File “/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 491, in call
result = self.forward(*input, **kwargs)
File “/home/jethodh/Documents/crnn.pytorch-master/models/crnn.py”, line 70, in forward
conv = self.cnn(input)
File “/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 491, in call
result = self.forward(*input, **kwargs)
File “/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/container.py”, line 91, in forward
input = module(input)
File “/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/module.py”, line 491, in call
result = self.forward(*input, **kwargs)
File “/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/conv.py”, line 301, in forward
self.padding, self.dilation, self.groups)
RuntimeError: expected stride to be a single integer value or a list of 3 values to match the convolution dimensions, but got stride=[1, 1]

Which PyTorch version are you using?
Could you post the model definition and check your input data has a batch dimension, i.e. [batch_size, channels, height, width]?

Sorry to reply so late,
I am using pytorch 0.4
Code for my model is:

class BidirectionalLSTM(nn.Module):

    def __init__(self, nIn, nHidden, nOut):
        super(BidirectionalLSTM, self).__init__()

        self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
        self.embedding = nn.Linear(nHidden * 2, nOut)

    def forward(self, input):
        recurrent, _ = self.rnn(input)
        T, b, h = recurrent.size()
        t_rec = recurrent.view(T * b, h)

        output = self.embedding(t_rec)  # [T * b, nOut]
        output = output.view(T, b, -1)

        return output


class CRNN(nn.Module):

    def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
        super(CRNN, self).__init__()
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'

        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        cnn = nn.Sequential()

        def convRelu(i, batchNormalization=False):
            nIn = nc if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
            if batchNormalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
            if leakyRelu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        convRelu(1)
        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(2, True)
        convRelu(3)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(4, True)
        convRelu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(6, True)  # 512x1x16

        self.cnn = cnn
        self.rnn = nn.Sequential(
            BidirectionalLSTM(512, nh, nh),
            BidirectionalLSTM(nh, nh, nclass))

    def forward(self, input):
        # conv features
        conv = self.cnn(input.unsqueeze(0))
        b, c, h, w = conv.size()
        print(b,c,h,w)
        assert h == 1, "the height of conv must be 1"
        conv = conv.squeeze(2)
        conv = conv.permute(2, 0, 1)  # [w, b, c]

        # rnn features
        output = self.rnn(conv)

        return output

[batch_size,channels,height,width]=[64 512 1 26]

I"m not sure what input share you are using, but based on your code comments I just tried this and it works:

x = torch.randn(512, 32, 128)
output = model(x)

Could you post your input shape which is causing the error?

I made some changes in my scripts(removed a couple of unsqueeze’s) and it ran until this error:

[0/20][500/1910] Loss: 8.526760
Start val
(64, 64)
/home/jethodh/.local/lib/python3.6/site-packages/skimage/transform/_warps.py:105: UserWarning: The default mode, 'constant', will be changed to 'reflect' in skimage 0.15.
  warn("The default mode, 'constant', will be changed to 'reflect' in "
/home/jethodh/.local/lib/python3.6/site-packages/skimage/transform/_warps.py:110: UserWarning: Anti-aliasing will be enabled by default in skimage 0.15 to avoid aliasing artifacts when down-sampling images.
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
(64, 64)
Traceback (most recent call last):
  File "crnn_main.py", line 227, in <module>
    val(crnn, test_dataset, criterion)
  File "crnn_main.py", line 169, in val
    preds = crnn(image)
  File "/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/jethodh/Documents/crnn.pytorch-master/models/crnn.py", line 70, in forward
    conv = self.cnn(input.unsqueeze_(2))
  File "/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/container.py", line 91, in forward
    input = module(input)
  File "/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/jethodh/.local/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 301, in forward
    self.padding, self.dilation, self.groups)
RuntimeError: expected stride to be a single integer value or a list of 1 values to match the convolution dimensions, but got stride=[1, 1]

Could you provide the code snippet reproducing this error?
I’m still unsure, how to get this error using your model definitions.

@ptrblck I have added the code as requested.

 max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[
        :opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))


def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)
    
    preds = crnn(image.unsqueeze(2))
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost


for epoch in range(opt.niter):
    train_iter = iter(train_loader)
    i = 0
    while i < len(train_loader):
        for p in crnn.parameters():
            p.requires_grad = True
        crnn.train()

        cost = trainBatch(crnn, criterion, optimizer)
        loss_avg.add(cost)
        i += 1

        if i % opt.displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, opt.niter, i, len(train_loader), loss_avg.val()))
            loss_avg.reset()

        if i % opt.valInterval == 0:
            val(crnn, test_dataset, criterion)

Thanks for the code.
Unfortunately, there are some unknown functions and classes, so that I cannot run the code on my machine.
Could you create a dummy code like this:

data = torch.randn(YOUR_SIZE)
model = CRNN(YOUR_ARGS)
output = model(data) # error here?

This would make it possible to debug your model architecture.

Also, it seems the error is thrown during the validation.
If so, could you check you are passing the right input shapes as you used during training?

Hi,

I’m currently facing the same issue with customizing nn.conv2d() 's kernel:

Traceback (most recent call last):
  File "/home/Documents/code/EMVD/temp.py", line 21, in forward
    x = self.conv1(x)
  File "/home/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/anaconda3/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 399, in forward
    return self._conv_forward(input, self.weight, self.bias)
  File "/home/anaconda3/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 395, in _conv_forward
    return F.conv2d(input, weight, bias, self.stride,
RuntimeError: expected stride to be a single integer value or a list of 1 values to match the convolution dimensions, but got stride=[1, 1]
torch.Size([1, 4, 128, 128])

My code is below:

class MyModel(nn.Module):
    def __init__(self, kernel_stack):
        super(MyModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=4, out_channels=4, kernel_size=4, bias=True)
        # Add other layers here

        # Initialize conv1 with custom kernel
        kernel_stack = torch.Tensor(kernel_stack)

        self.conv1.weight = nn.Parameter(kernel_stack)

    def forward(self, x):
        try:
            x = self.conv1(x)
        except:
            traceback.print_exc()

        return x


if __name__ == '__main__':

    # Define your custom kernel
    kernel = [[0.5, 0.5, 0.5, 0.5],
              [-0.5, 0.5, 0.5, -0.5],
              [0.7, 0.1, -0.1, -0.7],
              [-0.1, 0.7, -0.7, 0.1]]
    kernel = np.array(kernel)
    kernel_stack = np.stack([kernel, kernel, kernel, kernel], axis=1)


    model = MyModel(kernel_stack)
    model.eval()

    x = torch.randn(1, 4, 128, 128)

    with torch.no_grad():
        output = model(x)
    print(output.shape)

Your kernel_stack has only 3 dimensions, which would be a kernel for nn.Conv1d.
Since you are using nn.Conv2d, the checks raise this error.
nn.Conv2d kernels have the shape [out_channels, in_channels, height, width], so you would need to add the missing dimension (e.g. via nn.Parameter(kernel_stack.expand(4, -1, -1, -1))).

Yup, it works. Thanks bro