Expected input batch_size (1) to match target batch_size (2)

class RNN(nn.Module):
def init(self):
super(RNN, self).init()

    self.rnn = nn.LSTM(
        input_size=6,
        hidden_size=6,
        num_layers=2,
        batch_first=True,
    )

def forward(self, x):
    out, (h_n, h_c) = self.rnn(x, None)
    print(out)
    return out[:, -1, :]    # Return output at last time-step

X = torch.FloatTensor(X)
y = torch.LongTensor(Y)

rnn = RNN()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

for j in range(500):
for i, item in enumerate(X):
item = item.unsqueeze(0)
print(item.shape)
output = rnn(item)
target = y[i]
target = target.squeeze_()
print(output.shape,target.shape)
loss = loss_func(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()

if j % 5 == 0:
    print('Loss: ', np.average(loss.detach()))

i am using somebody else code. and getting the error as mentioned in my Subject.

my shape of output.shape and target.shape is following

output.shape = torch.Size([1, 6])
target.shape = torch.Size([2])

output = tensor([[ 0.0463, -0.0402, -0.0437, 0.0302, -0.0994, 0.0320]],
grad_fn=)

target = tensor([0, 0])

This error comes from your loss nn.CrossEntropyLoss(), which needs the batch_size for the input tensor (here: 1) to be the same as the target tensor (here: 2).

Since you flatten the input tensor with .unsqueeze(0), the input batch_size will always be 1.

What are your targets for one sample?

1 Like

thanks for your reply.

input = tensor([[[0., 0., 0., 0., 0., 1.]]])
output = tensor([[0, 0]])

that is my one target sample input and output

Not sure what you are trying to do, but your output tensor has size 2 while the RNN model has output of size 6. You may want to add a linear fully connected layer to match the desired output tensor size.

Modifying your code slightly:

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(
            input_size=6,
            hidden_size=6,
            num_layers=2,
            batch_first=True,
        )
        
        self.fc = nn.Linear(6,2)

    def forward(self, x):
        out, (h_n, h_c) = self.rnn(x, None)
        #print(out)
        return self.fc(out[:, -1, :])    # Return output at last time-step

Here is an example notebook of using the RNN model with the 1 sample example you provided: https://colab.research.google.com/drive/1N-TLRFvvbmSqoyFrhj9_ZcvKEdeVaS3a

my input is 6 but out is 2

my output vector is 2 not 1