THIndexTensor_(size)(target, 0) == batch_size'

I was trying to build a language model but got error THIndexTensor_(size)(target, 0) == batch_size. Here is the code

    import numpy as np
    import torch
    from torch.autograd import Variable
    import torch.nn as nn

    data = '...'
    words = list(set(data))
    word2ind = {word: i for i, word in enumerate(words)}
    ind2word = {i: word for i, word in enumerate(words)}

    class RNN(nn.Module):
        def __init__(self, input_size, hidden_size, output_size):
            super(RNN, self).__init__()
            self.hidden_size = hidden_size
            self.in2h = nn.Linear(input_size-1+hidden_size, hidden_size)
            self.in2o = nn.Linear(input_size-1+hidden_size, output_size)
            self.o2o = nn.Linear(hidden_size+output_size, output_size)
            self.softmax = nn.LogSoftmax()

        def forward(self, inputs, hidden):
            input_combined =, hidden.float()), 1)
            hidden = self.in2h(input_combined)
            output = self.in2o(input_combined)
            output_combined =, output), 1)
            output = self.o2o(output_combined)
            output = self.softmax(output)
            return output, hidden

        def init_hidden(self):
            return Variable(torch.from_numpy(np.zeros((1, self.hidden_size))).type(torch.LongTensor))

    def form_onehot(sent):
        one_hot = np.zeros((len(data), len(words)), dtype=np.int64)
        for i, word in enumerate(sent):
            one_hot[i, word2ind[word]] = 1
        return torch.LongTensor(one_hot)

    def random_choice(vec):
        return np.random.choice(range(len(words)), p=vec)

    def train(rnn, learning_rate, optimizer, criterion, input_tensor, target_tensor):
        hidden = rnn.init_hidden()
        for i in range(input_tensor.size(1)):
            output, hidden = rnn(input_tensor[i, :].unsqueeze(0), hidden)
            loss = criterion(output, target_tensor[i])
        return output,[0] / input_tensor.size()[0]

    onehot_data = form_onehot(data)
    rnn = RNN(len(words), 10, len(words))
    learning_rate = 0.1
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)
    input_tensor = Variable(onehot_data[:, :-1].type(torch.FloatTensor))
    target_tensor = Variable(onehot_data[:, 1:])
    int_target_tensor = Variable(onehot_data[1:, :].type(torch.LongTensor))
    output, loss = train(rnn, learning_rate, optimizer, criterion, input_tensor, int_target_tensor)

And here is the error details:

RuntimeError                              Traceback (most recent call last)
<ipython-input-449-8abc91b616c7> in <module>()
----> 1 output, loss = train(rnn, learning_rate, optimizer, criterion, input_tensor, int_target_tensor)

<ipython-input-445-72363097fc21> in train(rnn, learning_rate, optimizer, criterion, input_tensor, target_tensor)
     52         output, hidden = rnn(input_tensor[i, :].unsqueeze(0), hidden)
     53         print(output.size(), target_tensor[i].size())
---> 54         loss = criterion(output, target_tensor[i])
     55         print('aaaaaaaaaaa')
     56         loss.backward()

D:\Anaconda3\lib\site-packages\torch\nn\modules\ in __call__(self, *input, **kwargs)
    205     def __call__(self, *input, **kwargs):
--> 206         result = self.forward(*input, **kwargs)
    207         for hook in self._forward_hooks.values():
    208             hook_result = hook(self, input, result)

D:\Anaconda3\lib\site-packages\torch\nn\modules\ in forward(self, input, target)
    319         _assert_no_grad(target)
    320         return F.cross_entropy(input, target,
--> 321                                self.weight, self.size_average)

D:\Anaconda3\lib\site-packages\torch\nn\ in cross_entropy(input, target, weight, size_average)
    535                 for each minibatch.
    536     """
--> 537     return nll_loss(log_softmax(input), target, weight, size_average)

D:\Anaconda3\lib\site-packages\torch\nn\ in nll_loss(input, target, weight, size_average)
    503     else:
    504         raise ValueError('Expected 2 or 4 dimensions (got {})'.format(dim))
--> 505     return f(input, target)

D:\Anaconda3\lib\site-packages\torch\nn\_functions\thnn\ in forward(self, input, target)
     39         output =
     40         getattr(self._backend,, input, target,
---> 41                                                    output, *self.additional_args)
     42         return output

RuntimeError: Assertion `THIndexTensor_(size)(target, 0) == batch_size' failed.  at d:\downloads\pytorch-master-1\torch\lib\thnn\generic/ClassNLLCriterion.c:50


I think problem is that you are missing the batch dimension on your target_tensor.
The error says that the size of the 0th dimension is not equal to the batch size.
Try changing this: loss = criterion(output, target_tensor[i].unsqueeze(0)).

Thank you for your reply. But I don’t think it works, it raised an error:

RuntimeError: multi-target not supported at d:\downloads\pytorch-master-1\torch\lib\thnn\generic/ClassNLLCriterion.c:20

I think it is because I unsqueezed the target, and torch regards it as a muti-target.
And after using unsqueeze, I printed the output.size() and target.size(), and got torch.Size([1, 1139]), torch.Size([1, 1139]), respectively.

Your output should have one more dimension than the target corresponding to a score for each label and the target should just contain the index of the correct label.

Yeah, I mean before using unsqueeze, I got torch.Size([1, 1139]), torch.Size([1139]), which is right I think. But it raised THIndexTensor_(size)(target, 0) == batch_size. And I didn’t try to use batch here.

Pytorch always use batches (even if it means having a first dimension of size 1).
If you have a single element with 1139 possible label, then output should be 1x1139 and target should be a LongTensor of size 1 (containing the index of the correct label).


Thank you so much, man!! It just worked. But there is another error, can you help me with this? :light_smile:

RuntimeError                              Traceback (most recent call last)
<ipython-input-75-95f5d8615326> in <module>()
----> 1 output, loss = train(rnn, learning_rate, optimizer, criterion, input_tensor, target)

<ipython-input-71-ebb92fd662bb> in train(rnn, learning_rate, optimizer, criterion, input_tensor, target_tensor)
     51         print(output.size(), target_tensor[i])
     52         loss = criterion(output, target_tensor[i])
---> 53         loss.backward()
     54         optimizer.step()
     55     return output,[0] / input_tensor.size()[0]

D:\Anaconda3\lib\site-packages\torch\autograd\ in backward(self, gradient, retain_variables)
    142                 raise TypeError("gradient has to be a Tensor, Variable or None")
    143             gradient = Variable(gradient, volatile=True)
--> 144         self._execution_engine.run_backward((self,), (gradient,), retain_variables)
    146     def register_hook(self, hook):

D:\Anaconda3\lib\site-packages\torch\autograd\ in apply(self, *args)
     89     def apply(self, *args):
---> 90         return self._forward_cls.backward(self, *args)

D:\Anaconda3\lib\site-packages\torch\nn\_functions\ in backward(ctx, grad_output)
     19     @staticmethod
     20     def backward(ctx, grad_output):
---> 21         input, weight, bias = ctx.saved_variables
     23         grad_input = grad_weight = grad_bias = None

RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.

The problem here is in the way you use Variable.
Basically as soon as you start using a Variable, it will create an history of all the computations you do with it to be able to get gradients.
So for elements that do not need gradients, you want to create it as late as possible. Keep in mind that creating a Variable is completely free so you can do it (and should do it) in your inner loop of training.

In you case, you should not wrap your whole dataset in a single Variable and then slice it in your training loop but have input_tensor = onehot_data[:, :-1].type(torch.FloatTensor) and in your training loop net_input = Variable(input_tensor[i, :].unsqueeze(0)). And the same for the target.

The error that you saw is because of memory optimization, when you backpropagate through the graph, all intermediary buffers are freed. If you try and call backward again on the same graph (or a subset of it in your case) then it cannot run the backward because some of these data have been freed. In your case, the problem is that when you call loss.backward(), it backpropagates all the way to the full dataset tensor, and at the next step, the same part of the graph that goes from the full dataset to your sample is reused but the buffers have been freed already. Changing the moment where you package into Variable as proposed above will solve this problem.

1 Like

Oh, I see, thanks very much!!!

If I may post on this thread, I’m having a similar issue as the original one so I thought I’d post it here rather than creating a new thread.

My training code looks as follows:

train_loss = []
train_accu = []
i = 0
for epoch in range(30):
    for data, target in test_loader:
        data, target = Variable(data), Variable(target)
        output = model(data.view(batch_size,1,64,64))
        loss = F.nll_loss(output, target.view(batch_size)) # Negative log likelihood (goes with softmax). 
        loss.backward()    # calc gradients
        train_loss.append([0]) # Calculating the loss
        optimizer.step()   # update gradients
        prediction =[1]   # first column has actual prob.
        accuracy = (prediction.eq(*100
        if i % 10 == 0:
            print('Epoch:',str(epoch),'Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(i,[0], accuracy))
        i += 1


torch.Size([3, 1, 64, 64])
torch.Size([12, 12])
RuntimeError                              Traceback (most recent call last)
<ipython-input-334-ce8b8adb782b> in <module>()
     11         output = model(data.view(batch_size,1,64,64))
     12         print(output.shape)
---> 13         loss = F.nll_loss(output, target.view(batch_size)) # Negative log likelihood (goes with softmax).
     14         loss.backward()    # calc gradients
     15         train_loss.append([0]) # Calculating the loss

~/anaconda3/lib/python3.6/site-packages/torch/nn/ in nll_loss(input, target, weight, size_average, ignore_index, reduce)
   1047         weight = Variable(weight)
   1048     if dim == 2:
-> 1049         return torch._C._nn.nll_loss(input, target, weight, size_average, ignore_index, reduce)
   1050     elif dim == 4:
   1051         return torch._C._nn.nll_loss2d(input, target, weight, size_average, ignore_index, reduce)

RuntimeError: Assertion `THIndexTensor_(size)(target, 0) == batch_size' failed.  at /opt/conda/conda-bld/pytorch-cpu_1515613813020/work/torch/lib/THNN/generic/ClassNLLCriterion.c:79

The last layer of my CNN outputs 12 numbers:

class Net(nn.Module):
    def __init__(self):
        self.conv1  = nn.Conv2d(1,32,5,padding=2) # 1 input, 32 out, filter size = 5x5, 2 block outer padding
        self.conv2  = nn.Conv2d(32,64,5,padding=2) # 32 input, 64 out,  filter size = 5x5, 2 block padding
        self.fc1    = nn.Linear(64*8*8,1024) # Fully connected layer 
        self.fc2    = nn.Linear(1024,12) #Fully connected layer 2 out.
    def forward(self,x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2) # Max pool over convolution with 2x2 pooling 
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # Max pool over convolution with 2x2 pooling 
        x = x.view(-1,64*8*8) # tensor.view() reshapes the tensor
        x = F.relu(self.fc1(x)) # Activation function after passing through fully connected layer
#        x = F.dropout(x, training = #Dropout regularisation
        x = self.fc2(x) # Pass through final fully connected layer
        output= F.log_softmax(x,dim=1) # Give results using softmax
        return output
model = Net()
model = model.double()

But for some reason in my mind (given the above thread) my target should have size (3,12) if it is going to match the batch size issue.

Does anyone have any ideas as to how to fix this problem?

Your .view has the wrong dimensions.
Based on your input size, it should be x = x.view(-1, 64*16*16)
or alternatively x = view(x.size(0), -1).
Since you are pooling twice with kernel_size=2 and stride=2, your height and width will be reduced to 64/2/2 = 16.

Therefore, you also have to change the in_features of fc1 to 64*16*16.

Thank you for replying :slight_smile: Ah yes, you’re quite right! Now I’m getting this error however:

RuntimeError: size mismatch, m1: [3 x 16384], m2: [4096 x 1024] at /opt/conda/conda-bld/pytorch-cpu_1515613813020/work/torch/lib/TH/generic/THTensorMath.c:1416

Sorry, I was too fast posting. I’ve added the note, that you also have to change the in_features of fc1 to 64*16*16. :wink:

That did the job! Thanks so much! Admittedly I should have noticed that last point myself too :wink:

I’m trying to concatenate two layers as below.

class Net1(nn.Module):
    def __init__(self, num_classes=2):
        super(Net1, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=10, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv2d(in_channels=10, out_channels=10, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()

        self.pool = nn.MaxPool2d(kernel_size=2)

        self.conv3 = nn.Conv2d(in_channels=10, out_channels=10, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()

        self.conv4 = nn.Conv2d(in_channels=10, out_channels=10, kernel_size=3, stride=1, padding=1)
        self.relu4 = nn.ReLU()

        self.fc = nn.Linear(in_features=112 * 112 * 10, out_features=num_classes)

    def forward(self, input):
        output1 = self.conv1(input)
        output2 = self.relu1(output1)

        output3 = self.conv2(output2)
        output4 = self.relu2(output3)

        output5 = self.pool(output4)

        output6 = self.conv3(output5)
        output7 = self.relu3(output6)

        output8 = self.conv4(output7)
        output9 = self.relu4(output8)
        #output =,self.conv3(output)), 1)
        output10 =,output7), 1)

        #output10 = output9.view(-1, 112 * 112 * 10)
        output11 = output10.view(-1, 112 * 112 * 10)

        output12 = self.fc(output11)

        return output12

net1 = Net1()

But I got this error after adding layer

ValueError: Expected input batch_size (8) to match target batch_size (4).

Your model works with an input size of [batch_size, 3, 224, 224].
Could you post the Dataset or generally how you load and process the data?
Based on the error message it seems there is a mismatch between your data and target.

PS: I’ve formatted your post. You can add code snippets using three backticks. :wink:

Thank you
Here is my data loading part

data_dir_train= 'cross_vali/train'
data_dir_val = 'cross_vali/val'
transform = transforms.Compose(
     #transforms.Normalize((76.02, 34.22, 37.86), (52.76, 6.61, 28.19))])

trainset = torchvision.datasets.ImageFolder(root=data_dir_train,
trainloader =, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.ImageFolder(root=data_dir_val,
testloader =, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('MCIc','MCIs')

Thanks for the code. Could you print the shape of output10 before the operation in forward?

Hi everyone,
I got a similar issue :

Expected input batch_size (896) to match target batch_size

My code is the following:

import torch
import torch.nn as nn
import torchvision.models as models

class EncoderCNN(nn.Module):
    def __init__(self, embed_size):
        super(EncoderCNN, self).__init__()
        resnet = models.resnet50(pretrained=True)
        for param in resnet.parameters():
        modules = list(resnet.children())[:-1]
        self.resnet = nn.Sequential(*modules)
        self.embed = nn.Linear(resnet.fc.in_features, embed_size)

    def forward(self, images):
        features = self.resnet(images)
        features = features.view(features.size(0), -1)
        features = self.embed(features)
        return features

class DecoderRNN(nn.Module):
    def __init__(self, embed_size, hidden_size, vocab_size, num_layers=1):
        super(DecoderRNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embed_size= embed_size
        self.drop_prob= 0.2
        self.vocabulary_size = vocab_size
        #Define LSTSM
        self.lstm = nn.LSTM(self.embed_size, self.hidden_size , self.num_layers,batch_first=True)
        self.dropout = nn.Dropout(self.drop_prob)
        self.embed = nn.Embedding(self.vocabulary_size, self.embed_size)
        self.linear = nn.Linear(hidden_size, vocab_size)
    def forward(self, features, captions):
        #generating embedings from captures labels
        embeddings = self.embed(captions)
        #Concatenate captions embedidings and images features in one dimension array
        embeddings =, embeddings), 1)
        #Pack in sequences to create several batches with sequence length vocabulary size
        #packed = torch.nn.utils.rnn.pack_padded_sequence(embeddings, self.vocabulary_size,batch_first= True) 
        #LSTM return hidden states and output of LSTM layers (score telling how near we are from finding the right word sequence)
        hiddens, c = self.lstm(embeddings)
        #Regression that feed to the next LSTM cell and contains the previous state
        outputs = self.linear(hiddens)
        return outputs

    def sample(self, inputs, states=None, max_len=20):
        " accepts pre-processed image tensor (inputs) and returns predicted sentence (list of tensor ids of length max_len) "
        sampled_ids = []
        inputs = inputs.unsqueeze(1)
        for i in range(max_len):
            #LSTM cell h, c
            hidden, states = self.lstm(inputs,states)
            outputs = self.linear(hiddens.squeeze(1)) 
            #arg max probability per output in LSTM cell 
            _, predicted = outputs.max(1)    
            #Update Hidden state with new output to next LSTM cell
            #How to tell if the index is word-vector index?

            inputs = self.embed(predicted)
            print("NEW_INPUT", inputs.shape)
            inputs = inputs.unsqueeze(1) 
        sampled_ids = torch.stack(sampled_ids, 1)                # sampled_ids: (batch_size, max_seq_length)
        return sampled_ids

Thanks in advance,