TypeError: conv2d(): argument 'input' (position 1) must be Tensor, not tuple

I can’t start training an encoder model.
I always have this error and I cannot understand where I am wrong.
under the code.

class EncoderCell(nn.Module):
    def __init__(self):
        super(EncoderCell, self).__init__()

        self.conv = nn.Conv2d(
            3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.rnn1 = ConvGRUCell(
            64,
            256,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn2 = ConvGRUCell(
            256,
            512,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)
        self.rnn3 = ConvGRUCell(
            512,
            512,
            kernel_size=3,
            stride=2,
            padding=1,
            hidden_kernel_size=1,
            bias=False)

    def forward(self, input, hidden1, hidden2, hidden3):

        x = self.conv(input)
        hidden1 = self.rnn1(x, hidden1)
        x = hidden1[0]

        hidden2 = self.rnn2(x, hidden2)
        x = hidden2[0]

        hidden3 = self.rnn3(x, hidden3)
        x = hidden3[0]

        return x, hidden1, hidden2, hidden3
class ConvGRUCell(ConvRNNCellBase):
    def __init__(self,
                 input_channels,
                 hidden_channels,
                 kernel_size=3,
                 stride=1,
                 padding=0,
                 dilation=1,
                 hidden_kernel_size=1,
                 bias=True):
        super(ConvGRUCell, self).__init__()
        self.input_channels = input_channels
        self.hidden_channels = hidden_channels

        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)

        self.hidden_kernel_size = _pair(hidden_kernel_size)

        hidden_padding = _pair(hidden_kernel_size // 2)

        #gate_channels = 4 * self.hidden_channels
        gate_channels = 3 * self.hidden_channels

        self.conv_ih = nn.Conv2d(
            in_channels=self.input_channels,
            out_channels=gate_channels,
            kernel_size=self.kernel_size,
            stride=self.stride,
            padding=self.padding,
            dilation=self.dilation,
            bias=bias)

        self.conv_hh = nn.Conv2d(
            in_channels=self.hidden_channels,
            out_channels=gate_channels,
            kernel_size=hidden_kernel_size,
            stride=1,
            padding=hidden_padding,
            dilation=1,
            bias=bias)

        self.reset_parameters()

@ptrblck only you can help me for this problem really i don’t know what i’m doing wrong

I reckon full error message and code of training loop would be helpful.

this is the error:

Traceback (most recent call last):
  File "train.py", line 164, in <module>
    res, encoder_h_1, encoder_h_2, encoder_h_3)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\user\Desktop\ImageCompzByGRU-master\network.py", line 43, in forward
    hidden1 = self.rnn1(x, hidden1)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\user\Desktop\ImageCompzByGRU-master\modules\conv_rnn.py", line 75, in forward
    gate_h = self.conv_hh(hidden)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\conv.py", line 345, in forward
    return self.conv2d_forward(input, self.weight)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\conv.py", line 342, in conv2d_forward
    self.padding, self.dilation, self.groups)
TypeError: conv2d(): argument 'input' (position 1) must be Tensor, not tuple

the part of training:

import dataset

train_transform = transforms.Compose([
    transforms.RandomCrop((32, 32)),
    transforms.ToTensor(),
])

train_set = dataset.ImageFolder(root=args.train, transform=train_transform)

train_loader = data.DataLoader(
    dataset=train_set, batch_size=args.batch_size, shuffle=True, num_workers=0)

print('total images: {}; total batches: {}'.format(
    len(train_set), len(train_loader)))

if args.cuda:
    print ('Using GPU')

## load networks on GPU
import network

encoder = network.EncoderCell().cuda()
binarizer = network.Binarizer().cuda()
decoder = network.DecoderCell().cuda()

solver = optim.Adam(
    [
        {
            'params': encoder.parameters()
        },
        {
            'params': binarizer.parameters()
        },
        {
            'params': decoder.parameters()
        },
    ],
    lr=args.lr)


def resume(epoch=None):
    if epoch is None:
        s = 'iter'
        epoch = 0
    else:
        s = 'epoch'

    encoder.load_state_dict(
        torch.load('checkpoint/encoder_{}_{:08d}.pth'.format(s, epoch)))
    binarizer.load_state_dict(
        torch.load('checkpoint/binarizer_{}_{:08d}.pth'.format(s, epoch)))
    decoder.load_state_dict(
        torch.load('checkpoint/decoder_{}_{:08d}.pth'.format(s, epoch)))


def save(index, epoch=True):
    if not os.path.exists('checkpoint'):
        os.mkdir('checkpoint')

    if epoch:
        s = 'epoch'
    else:
        s = 'iter'

    index = 1
    torch.save(encoder.state_dict(), 'checkpoint/encoder_{}_{:08d}.pth'.format(
        s, index))

    torch.save(binarizer.state_dict(),
               'checkpoint/binarizer_{}_{:08d}.pth'.format(s, index))

    torch.save(decoder.state_dict(), 'checkpoint/decoder_{}_{:08d}.pth'.format(
        s, index))


# resume()

scheduler = LS.MultiStepLR(solver, milestones=[3, 10, 20, 50, 100], gamma=0.5)

last_epoch = 0
if args.checkpoint:
    resume(args.checkpoint)
    last_epoch = args.checkpoint
    scheduler.last_epoch = last_epoch - 1

for epoch in range(last_epoch + 1, args.max_epochs + 1):

    scheduler.step()

    for batch, data in enumerate(train_loader):
        batch_t0 = time.time()

        ## init lstm state
        encoder_h_1 = (Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()),
                       Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()))
        encoder_h_2 = (Variable(torch.zeros(data.size(0), 512, 4, 4).cuda()),
                       Variable(torch.zeros(data.size(0), 512, 4, 4).cuda()))
        encoder_h_3 = (Variable(torch.zeros(data.size(0), 512, 2, 2).cuda()),
                       Variable(torch.zeros(data.size(0), 512, 2, 2).cuda()))

        decoder_h_1 = (Variable(torch.zeros(data.size(0), 512, 2, 2).cuda()),
                       Variable(torch.zeros(data.size(0), 512, 2, 2).cuda()))
        decoder_h_2 = (Variable(torch.zeros(data.size(0), 512, 4, 4).cuda()),
                       Variable(torch.zeros(data.size(0), 512, 4, 4).cuda()))
        decoder_h_3 = (Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()),
                       Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()))
        decoder_h_4 = (Variable(torch.zeros(data.size(0), 128, 16, 16).cuda()),
                       Variable(torch.zeros(data.size(0), 128, 16, 16).cuda()))

        patches = Variable(data.cuda())

        solver.zero_grad()

        losses = []

        res = patches - 0.5

        bp_t0 = time.time()

        for _ in range(args.iterations):
            encoded, encoder_h_1, encoder_h_2, encoder_h_3 = encoder(
                res, encoder_h_1, encoder_h_2, encoder_h_3)

            codes = binarizer(encoded)

            output, decoder_h_1, decoder_h_2, decoder_h_3, decoder_h_4 = decoder(
                codes, decoder_h_1, decoder_h_2, decoder_h_3, decoder_h_4)

            res = res - output
            losses.append(res.abs().mean())

        bp_t1 = time.time()

        loss = sum(losses) / args.iterations
        loss.backward()

        solver.step()

        batch_t1 = time.time()

        print(
            '[TRAIN] Epoch[{}]({}/{}); Loss: {:.6f}; Backpropagation: {:.4f} sec; Batch: {:.4f} sec'.
            format(epoch, batch + 1,
                   len(train_loader), loss.data[0], bp_t1 - bp_t0, batch_t1 -
                   batch_t0))
        print(('{:.4f} ' * args.iterations +
               '\n').format(* [l.data[0] for l in losses]))

        index = (epoch - 1) * len(train_loader) + batch

        ## save checkpoint every 500 training steps
        if index % 500 == 0:
            save(0, False)

    save(epoch)```

Thank you for the information.

Could you share the forward of ConvGRUCell as I think that method fails according to the quote below.

import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.autograd import Variable
from torch.nn.modules.utils import _pair


class ConvRNNCellBase(nn.Module):
    def __repr__(self):
        s = (
            '{name}({input_channels}, {hidden_channels}, kernel_size={kernel_size}'
            ', stride={stride}')
        if self.padding != (0, ) * len(self.padding):
            s += ', padding={padding}'
        if self.dilation != (1, ) * len(self.dilation):
            s += ', dilation={dilation}'
        s += ', hidden_kernel_size={hidden_kernel_size}'
        s += ')'
        return s.format(name=self.__class__.__name__, **self.__dict__)


class ConvGRUCell(ConvRNNCellBase):
    def __init__(self,
                 input_channels,
                 hidden_channels,
                 kernel_size=3,
                 stride=1,
                 padding=0,
                 dilation=1,
                 hidden_kernel_size=1,
                 bias=True):
        super(ConvGRUCell, self).__init__()
        self.input_channels = input_channels
        self.hidden_channels = hidden_channels

        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)

        self.hidden_kernel_size = _pair(hidden_kernel_size)

        hidden_padding = _pair(hidden_kernel_size // 2)

        #gate_channels = 4 * self.hidden_channels
        gate_channels = 3 * self.hidden_channels

        self.conv_ih = nn.Conv2d(
            in_channels=self.input_channels,
            out_channels=gate_channels,
            kernel_size=self.kernel_size,
            stride=self.stride,
            padding=self.padding,
            dilation=self.dilation,
            bias=bias)

        self.conv_hh = nn.Conv2d(
            in_channels=self.hidden_channels,
            out_channels=gate_channels,
            kernel_size=hidden_kernel_size,
            stride=1,
            padding=hidden_padding,
            dilation=1,
            bias=bias)

        self.reset_parameters()

    def reset_parameters(self):
        self.conv_ih.reset_parameters()
        self.conv_hh.reset_parameters()

    def forward(self, input, hidden):
        x = input
        gate_x = self.conv_ih(input) 
        gate_h = self.conv_hh(hidden)
        
        i_r, i_i, i_n = gate_x.chunk(3, 1)
        h_r, h_i, h_n = gate_h.chunk(3, 1)
        
        resetgate = F.sigmoid(i_r + h_r)
        inputgate = F.sigmoid(i_i + h_i)
        newgate = F.tanh(i_n + (resetgate * h_n))
        
        hy = newgate + inputgate * (hidden - newgate)
        
        return hy

The error is because your encoder hidden states are a tuples. Ex:

 encoder_h_1 = (Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()),
                       Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()))

you cannot pass this into your conv2d layers. I am not sure exactly what you want but I would just delete one of the two variables or add another conv2d that is a cell conv2d for the second variable.

can you share to me your edit to my network?Thanks for your help

It depends on what you are trying to do. Are you trying to make a lstm with cnns if so then I would recommend doing this. Add this a new conv layer to your ConvGRUCell:

self.conv_ch = nn.Conv2d(
            in_channels=self.hidden_channels,
            out_channels=gate_channels,
            kernel_size=hidden_kernel_size,
            stride=1,
            padding=hidden_padding,
            dilation=1,
            bias=bias)

then in the forward function you can do this

x = input
        gate_x = self.conv_ih(input) 
        gate_h = self.conv_hh(hidden[0])
        gate_c = self.conv_ch(hidden[1])
        i_r, i_i, i_n = gate_x.chunk(3, 1)
        h_r, h_i, h_n = gate_h.chunk(3, 1)
        c_r, c_i, c_n = gate_c.chunk(3, 1)

        resetgate = F.sigmoid(i_r + h_r + c_r)
        inputgate = F.sigmoid(i_i + h_i + c_i)
        newgate = F.tanh(i_n + (resetgate * (h_n + c_i) ))
        
        hy = newgate + inputgate * (hidden[0]  - newgate)
        
        return hy

or variants of that. You could also just remove one of the variables like this

encoder_h_1 = Variable(torch.zeros(data.size(0), 256, 8, 8).cuda()

that would be less complicated either way it should fix the problem.

I’ll try and let you know right away.Thank you for your time

i have another error

RuntimeError: Expected 4-dimensional input for 4-dimensional weight 1536 256 3 3, but got 3-dimensional input of size [256, 8, 8] instead

You need a batch size dimension. Which line is this error on?

  File "train.py", line 164, in <module>
    res, encoder_h_1, encoder_h_2, encoder_h_3)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\user\Desktop\master\network.py", line 46, in forward
    hidden2 = self.rnn2(x, hidden2)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\user\Desktop\master\modules\conv_rnn.py", line 83, in forward
    gate_x = self.conv_ih(input)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\conv.py", line 345, in forward
    return self.conv2d_forward(input, self.weight)
  File "C:\Users\user\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\conv.py", line 342, in conv2d_forward
    self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight 1536 256 3 3, but got 3-dimensional input of size [256, 8, 8] instea

Can you print out the shape of the x and hidden2 that this line is referring to:

File "C:\Users\user\Desktop\master\network.py", line 46, in forward
    hidden2 = self.rnn2(x, hidden2)