IndexError: Target 2 is out of bounds

vyaslkv · February 13, 2020, 3:34pm

After epoch 1 it is throwing this error

Model: resnet50 Frozen Layers: False
Epoch 1/50

train Loss: 0.2108 Acc: 0.9226 TPR: 0.9270 FPR: 0.0819

IndexError Traceback (most recent call last)
in
17 optimizer = optim.Adam(parameters)
18
—> 19 net = train_model(net, criterion, optimizer, num_epochs=num_epochs, name=name)
20
21 net = net.to(torch.device(‘cpu’))

in train_model(model, criterion, optimizer, num_epochs, dataset_sizes, name)
41
42 # criterion is the loss
—> 43 loss = criterion(outputs, labels)
44
45 # then back propogate the loss

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
–> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
914 def forward(self, input, target):
915 return F.cross_entropy(input, target, weight=self.weight,
–> 916 ignore_index=self.ignore_index, reduction=self.reduction)
917
918

~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2019 if size_average is not None or reduce is not None:
2020 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2021 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2022
2023

~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1836 .format(input.size(0), target.size(0)))
1837 if dim == 2:
-> 1838 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
1839 elif dim == 4:
1840 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

IndexError: Target 2 is out of bounds.

ptrblck · February 14, 2020, 5:53am

How many classes are you currently using and what is the shape of your output?
Note that class indices start at 0 so your target should contain indices in the range [0, nb_classes-1].

vyaslkv · February 14, 2020, 11:55am

I am using two classes

vyaslkv · February 14, 2020, 11:57am

class Net(nn.Module):
    def __init__(self, name, freeze_layers=True):
        super(Net, self).__init__()
        model = self.feature_extractor(name)
        
        if freeze_layers:
            for params in model.parameters():
                params.requires_grad = False

        self.basemodel = model
        # these are the fully connected layers
        self.feedforward1 = nn.Linear(1000, 256)
        self.dropout1 = nn.Dropout(0.2)
        self.feedforward2 = nn.Linear(256, 32)
        self.dropout2 = nn.Dropout(0.2)
        self.feedforward3 = nn.Linear(32, 2)
    
    def forward(self, x):
        x = self.basemodel(x)
        x = F.relu(self.dropout1(self.feedforward1(x)))
        x = F.relu(self.dropout2(self.feedforward2(x)))
        x = self.feedforward3(x)
        return x

    def feature_extractor(self, name):
        if name == 'vgg16':
            model = models.vgg16(pretrained=True)
        elif name == 'resnet18':
            model = models.resnet18(pretrained=True)
        elif name == 'resnet50':
            model = models.resnet18(pretrained=True)
        elif name == 'resnet50':
            model = models.alexnet(pretrained=True)
        elif name == 'squeezenet1_0':
            model = models.squeezenet1_0(pretrained=True)
        elif name == 'densenet161':
            model = models.densenet161(pretrained=True)
        else:
            raise ValueError('Should be one of vgg16,resnet50,resnet18, alexnet, squeezenet1_0, densenet161')
        return model



device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
def train_model(model, criterion, optimizer, num_epochs=25, dataset_sizes=dataset_sizes, name=''):
    start = datetime.now()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_loss = np.inf
    best_tpr = 0.0
    best_fpr = np.inf
    save_every = 5
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-'*10)
        
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
            running_tp = 0
            running_fp = 0
            running_fn = 0
            running_tn = 0
            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                # https://discuss.pytorch.org/t/why-do-we-need-to-set-the-gradients-manually-to-zero-in-pytorch/4903/6
                optimizer.zero_grad()
                # feed forward process to find out the loss
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)



                    # criterion is the loss
                    loss = criterion(outputs, labels)
                    
                    # then back propogate the loss  
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # running loss    
                running_loss += loss.item()*inputs.size(0)
                running_corrects += torch.sum(preds == labels.data).item()
                tp, fp, fn, tn = confusion_matrix(preds, labels)
                running_tp += tp
                running_fp += fp
                running_fn += fn
                running_tn += tn
                
            epoch_loss = running_loss/dataset_sizes[phase]
            epoch_acc = running_corrects/dataset_sizes[phase]
            epoch_tpr = running_tp/(running_tp+running_fn)
            epoch_fpr = running_fp/(running_fp+running_tn)
            
            print('{} Loss: {:.4f} Acc: {:.4f} TPR: {:.4f} FPR: {:.4f}'.format(phase, epoch_loss, epoch_acc, 
                                                                  epoch_tpr,
                                                                  epoch_fpr))
            
            if phase == 'valid' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_acc = epoch_acc
                best_tpr = epoch_tpr
                best_fpr = epoch_fpr
                best_model_wts = copy.deepcopy(model.state_dict())
       
        if (epoch+1)%save_every == 0:
            model = model.to(torch.device('cpu'))
            state = {'optimizer': optimizer,
                     'optimizer_state_dict': optimizer.state_dict(),
                    'criterion': criterion,
                     'criterion_state_dict': criterion.state_dict(),
                    'model_state_dict': model.state_dict(),
                    'model': model,
                    'epoch': epoch+1}
            torch.save(state, name)
            model = model.to(device)
            print('Model Checkpointed')
        
        print()
        
    time_elapsed = datetime.now() - start

    print('Training complete in '+str(time_elapsed))
    print('Best Valid Loss: {:.4f} Acc: {:.4f} TPR: {:.4f} FPR: {:.4f}'.format(best_loss, best_acc,
                                                                              best_tpr, best_fpr))

    model.load_state_dict(best_model_wts)
    return model


num_epochs=50
#for m in ['vgg16', 'resnet50', 'resnet18', 'alexnet', 'squeezenet1_0', 'densenet161'][4:]:
for m in ['resnet50']:
    for freeze in [False]: #[True, False]:
        print('-'*100)
        print()
        print('Model: '+m+' Frozen Layers: '+str(freeze))
        name='./'+m+str(freeze)+str(50)+'.pth'
        net = Net(m, freeze_layers=freeze)
        # The batch size should be larger than the number of GPUs used.
        net = nn.DataParallel(net)
        net.to(device)
        
        criterion = nn.CrossEntropyLoss()
        parameters = filter(lambda p: p.requires_grad, net.parameters())
        # differential learning rates for different layers
        optimizer = optim.Adam(parameters)
        
        net = train_model(net, criterion, optimizer, num_epochs=num_epochs, name=name)
        
        net = net.to(torch.device('cpu'))
        state = {'optimizer': optimizer,
                 'optimizer_state_dict': optimizer.state_dict(),
                'criterion': criterion,
                 'criterion_state_dict': criterion.state_dict(),
                'model_state_dict': net.state_dict(),
                'model': net,
                'epoch': num_epochs}
#         torch.save(model_wtd.state_dict(), './Checkpoints/resnetdev2.pth')
#         torch.save(state, './'+m+str(freeze)+'_24April_jackets_single_data_added_'+'50.pth')
        torch.save(state, './'+m+str(freeze)+'13feb_organic_non_organic'+'50.pth')
        print('Best Model Checkpointed')
        
        print()
        print('-'*100)

ptrblck · February 14, 2020, 4:01pm

Could you add a print statement to your code and check the min and max values of labels please?

Jie_Gao · February 26, 2020, 9:13pm

This answer solved my question! Thanks!!

ronitganguly · February 27, 2020, 7:39pm

Hi, I am using 7 classes and it says that Target 6 is out of bounds. Batch size I am using is 70 and the labels I printed are these:

torch.Size([70])
tensor([4, 5, 0, 5, 5, 0, 0, 0, 1, 4, 0, 1, 5, 4, 4, 0, 4, 2, 6, 0, 1, 4, 0, 2,
        4, 6, 4, 3, 6, 2, 4, 0, 3, 4, 0, 0, 4, 0, 5, 5, 5, 3, 2, 3, 6, 4, 0, 4,
        5, 5, 2, 4, 5, 3, 6, 1, 2, 0, 1, 6, 3, 4, 1, 4, 4, 2, 6, 6, 2, 2])
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-73-4e4c221a4846> in <module>()
      8     loss_fn = loss_fn,
      9     train_loader = train_loader,
---> 10     validation_loader=validation_loader
     11 )
     12 

2 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   1836                          .format(input.size(0), target.size(0)))
   1837     if dim == 2:
-> 1838         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   1839     elif dim == 4:
   1840         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

IndexError: Target 6 is out of bounds.

The first line is the size of the labels or Targets, second line is the targets themselves.

Please help me. I am in dire need of help.

ptrblck · February 28, 2020, 6:15am

Could you check the shape of your input tensor please?
If you are dealing with 7 classes, your model should output a tensor with the shape [batch_size, 7].

Vijaya_kumar · April 16, 2020, 6:46pm

I am in similar situation but the for me it says Target 10 is out of bounds. Number of classes =10, min =1, max=10
and my net is as follows:

def __init__(self):
    super(ConvNet, self).__init__()
    self.layer1 = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2))
    self.layer2 = nn.Sequential(
        nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2))
    self.drop_out = nn.Dropout()
    self.fc1 = nn.Linear(8 * 8 * 64, 1000)
    self.fc2 = nn.Linear(1000, 10)

def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = out.reshape(out.size(0), -1)
    out = self.drop_out(out)
    out = self.fc1(out)
    out = self.fc2(out)
    return out

How can I rectify this problem? Can you kindly help?

Vijaya_kumar · April 16, 2020, 6:47pm

Hi Jie, Can you please explain how did you fix the issue? I still did not get what changes needs to be made

ptrblck · April 16, 2020, 6:48pm

The targets should be in the range [0, 9] for your use case, as they are used to index the output tensor, so you might just subtract 1 from the target tensor.

Vijaya_kumar · April 16, 2020, 6:52pm

This is how I train my model. Where can I subtract from target tensor.

def train(self, X_train, Y_train, X_test, Y_test):
    tensor_train_x = torch.Tensor(X_train.reshape(-1,1,32,32))  # transform to torch tensor
    tensor_train_y = torch.Tensor(Y_train)
    tensor_train_y = tensor_train_y.long()
    trainset = utils.data.TensorDataset(tensor_train_x, tensor_train_y)
    trainloader = utils.data.DataLoader(trainset)

    tensor_test_x = torch.Tensor(X_test.reshape(-1,1,32,32))  # transform to torch tensor
    tensor_test_y = torch.Tensor(Y_test)
    tensor_test_y = tensor_test_y.long()
    testset = torch.utils.data.TensorDataset(tensor_test_x, tensor_test_y)
    testloader = torch.utils.data.DataLoader(testset)

    model = ConvNet()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate)

    # Train the model
    total_step = len(trainloader)
    loss_list = []
    acc_list = []
    for epoch in range(self.epoch):
        for i, (images, labels) in enumerate(trainloader):
            # Run the forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss_list.append(loss.item())

            # Backprop and perform Adam optimisation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Track the accuracy
            total = labels.size(0)
            _, predicted = torch.max(outputs.data, 1)
            correct = (predicted == labels).sum().item()
            acc_list.append(correct / total)

            if (i + 1) % 2000 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                      .format(epoch + 1, self.epoch, i + 1, total_step, loss.item(),
                              (correct / total) * 100))

ptrblck · April 16, 2020, 7:09pm

This should work:

tensor_train_y = tensor_train_y - 1
tensor_test_y = tensor_test_y - 1

mathematics · September 12, 2020, 2:05pm

suddenly, my model gave IndexError target 4 out of bounds

but as you told i have n_bclasses = [1,2,3,4]
and I put output_size of linear layer n_bclasses - 1 gave error as mention

but giving n_bclasses +1 i.e 5 works, Any reason for that .

Thanks

update: Problem has been solved , but weirdly, I have to go to dataframe and minus by one so that it become [0,1,2,3] , then just passing nb_classes to Linear layer,strangely

ptrblck · September 12, 2020, 8:56pm

That’s the expected behavior since the target should contain class indices in the range [0, nb_classes-1] (the start index should be 0 not 1).

Hizella · March 23, 2021, 10:27am

Hello,

Can anyone please explain what is meant by “number of classes”? What does this refer to?
I’m getting the same error message but the “target” that is out of bounds varies from run to run.

Many thanks!
hizella

ptrblck · March 23, 2021, 8:45pm

“Number of classes” is defined by the number of different target classes in your multi-class classification use case.
E.g. you could use a neural network in order to classify an input image into 3 different classes: cat, dog, bird. In this case the number of classes would be 3 and the target tensor should thus contain values in: [0, 1, 2], where each integer represents a specific class.

Hizella · March 24, 2021, 6:06pm

Hi ptrblck,

Thanks! That clarifies it a bit more.

However, I don’t understand why this error is being raised. In my case, it’s when I call the backward() function on my loss I think. Some background on my task: technically speaking, I’m working on neural machine translation, linguistically speaking I’m reconstructing on the sound level. Let me know, if that is relevant. Can you help me shoot this error? Or do I open a new thread?

for epoch in range(num_epochs):
    print(f"[Epoch {epoch} / {num_epochs}]")
    
    reconstructed_word = reconstruct_word(model, word1)
    
    with open('out.txt', 'a') as f:
        print(f"Reconstruct example words {word1, word2, word3, word4, word5,} \n as {reconstructed_word}", file=f)

    model.train()
    optimizer.zero_grad()
    model.eval()    
    
    for batch_idx, element in enumerate(train_iterator):
                #Get input and targets
                #batch_idx is the iterator and elememt are the element in train_iterator
                inp_data = element.italian.to(device)
                targets = element.latin.to(device)
    
                #Forward propagation
                output = model(inp_data, targets)
                print("Output shape:", output.shape)
                print("Target shape:", targets.max(), targets.min())
                
                output = output[1:].reshape(-1, output.shape[2])
                targets = targets[1:].reshape(-1)
                print("Output shape 2:", output.shape)
                print("Target shape 2:", targets.max(), targets.min())
                
         
                optimizer.zero_grad()
                loss = criterion(output, targets) #error gets thrown either here or down at loss.backward()
                print("Loss:", loss)

                loss.backward()

                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
    
                optimizer.step()

Best,
hizella

ptrblck · March 25, 2021, 12:54am

I assume you are using nn.CrossEntropyLoss or nn.NLLLoss as the criterion.
If so, check the output shape as well as the min. and max. values on targets.
As described before: for a multi-class classification the output should have the shape [batch_size, nb_classes], while the target is expected to have the shape [batch_size] and contain class indices in the range [0, nb_classes-1].

Hizella · March 25, 2021, 10:33am

Thank you very much! I will try to implement your suggestion and let you know!

IndexError: Target 2 is out of bounds

Model: resnet50 Frozen Layers: False Epoch 1/50

train Loss: 0.2108 Acc: 0.9226 TPR: 0.9270 FPR: 0.0819

Model: resnet50 Frozen Layers: False
Epoch 1/50