IndexError: index 3 is out of bounds for dimension 1 with size 3

Hello,
I get the following error message: IndexError: index 3 is out of bounds for dimension 1 with size 3, it’s in this function def box_iou(boxes1, boxes2): because of this line:
box_area = lambda boxes: ((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]))
So how can I solve this error?

Here is some of the code:

from operator import concat
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import torch
import torch.nn as nn
from xml.etree import ElementTree
from d2l import torch as d2l
  
def my_collate_pad(batch): 
    data=batch
    data_idx=0
    y_data_pad=[]
    for bild_nr in data:   
        y_data=torch.tensor( data[data_idx] )   
        y_data_pad.append(y_data)
        data_idx+=1
    y_alle_data_gepadded=nn.utils.rnn.pad_sequence(y_data_pad, batch_first=True, padding_value=0)  #Hinweis für Listen: [] bei sequence weglassen!
    return y_alle_data_gepadded

def main():
    print("load data ...")
    class_names = ['person','person-like']
    class_names_label = {'person': 2, 'person-like': 1} 
    size = (300,300)
    batch_size=59   
   
    (train_images, b_tr, b_label_tr),(test_images,b_te,b_label_te),(val_images, b_v,b_label_v) = load_data(class_names_label, size)   
    
    train_images=torch.as_tensor(train_images)
    train_images=torch.permute(train_images,(0,3,1,2))
    #train_img_loader=torch.utils.data.DataLoader(train_images, batch_size=batch_size, shuffle=False)
    #train_box_loader=torch.utils.data.DataLoader(b_tr, batch_size=batch_size, shuffle=False, collate_fn=my_collate_pad)
    #train_label_loader=torch.utils.data.DataLoader(b_label_tr, batch_size=batch_size, shuffle=False, collate_fn=my_collate_pad)


   
    i=np.random.randint(0,train_images.shape[0]-1)
    sizes=[0.75, 0.5, 0.25]
    ratios=[0.5, 1, 2]
    img=train_images[i]   #img
    anchors=multibox_prior(img, sizes, ratios)
  
    a=anchors[None,...] 
    box=b_tr[i]   
    b2=[]
    for x in range(len(box)):
        b=[box[x][0]/300,box[x][1]/300,box[x][2]/300,box[x][3]/300 ]
        b2.append(b)
    b2=torch.tensor(b2)[None,...]   

    labela=multibox_target(a,b2) #return: box_offset, bbox_mask, class_labels




    
        


def multibox_target(anchors, labels):
    """Label anchor boxes using ground-truth bounding boxes."""
    batch_size, anchors = labels.shape[0], anchors.squeeze(0)
    batch_offset, batch_mask, batch_class_labels = [], [], []
    device, num_anchors = anchors.device, anchors.shape[0]
    for i in range(batch_size):
        label = labels[i, :, :]
        anchors_bbox_map = assign_anchor_to_bbox(
            label[:, 1:], anchors, device)
        bbox_mask = ((anchors_bbox_map >= 0).float().unsqueeze(-1)).repeat(
            1, 4)
        # Initialize class labels and assigned bounding box coordinates with
        # zeros
        class_labels = torch.zeros(num_anchors, dtype=torch.long,
                                   device=device)
        assigned_bb = torch.zeros((num_anchors, 4), dtype=torch.float32,
                                  device=device)
        # Label classes of anchor boxes using their assigned ground-truth
        # bounding boxes. If an anchor box is not assigned any, we label its
        # class as background (the value remains zero)
        indices_true = torch.nonzero(anchors_bbox_map >= 0)
        bb_idx = anchors_bbox_map[indices_true]
        class_labels[indices_true] = label[bb_idx, 0].long() + 1
        assigned_bb[indices_true] = label[bb_idx, 1:]
        # Offset transformation
        offset = offset_boxes(anchors, assigned_bb) * bbox_mask
        batch_offset.append(offset.reshape(-1))
        batch_mask.append(bbox_mask.reshape(-1))
        batch_class_labels.append(class_labels)
    bbox_offset = torch.stack(batch_offset)
    bbox_mask = torch.stack(batch_mask)
    class_labels = torch.stack(batch_class_labels)
    return (bbox_offset, bbox_mask, class_labels)
def offset_boxes(anchors, assigned_bb, eps=1e-6):
    """Transform for anchor box offsets."""
    c_anc = d2l.box_corner_to_center(anchors)
    c_assigned_bb = d2l.box_corner_to_center(assigned_bb)
    offset_xy = 10 * (c_assigned_bb[:, :2] - c_anc[:, :2]) / c_anc[:, 2:]
    offset_wh = 5 * torch.log(eps + c_assigned_bb[:, 2:] / c_anc[:, 2:])
    offset = torch.cat([offset_xy, offset_wh], axis=1)
    return offset
def assign_anchor_to_bbox(ground_truth, anchors, device, iou_threshold=0.5):
    """Assign closest ground-truth bounding boxes to anchor boxes."""
    num_anchors, num_gt_boxes = anchors.shape[0], ground_truth.shape[0]
    # Element x_ij in the i-th row and j-th column is the IoU of the anchor
    # box i and the ground-truth bounding box j
    jaccard = box_iou(anchors, ground_truth)
    # Initialize the tensor to hold the assigned ground-truth bounding box for
    # each anchor
    anchors_bbox_map = torch.full((num_anchors,), -1, dtype=torch.long,
                                  device=device)
    max_ious, indices = torch.max(jaccard, dim=1)
    anc_i = torch.nonzero(max_ious >= 0.5).reshape(-1)
    box_j = indices[max_ious >= 0.5]
    anchors_bbox_map[anc_i] = box_j
    col_discard = torch.full((num_anchors,), -1)
    row_discard = torch.full((num_gt_boxes,), -1)
    for _ in range(num_gt_boxes):
        max_idx = torch.argmax(jaccard)  # Find the largest IoU
        box_idx = (max_idx % num_gt_boxes).long()
        anc_idx = (max_idx / num_gt_boxes).long()
        anchors_bbox_map[anc_idx] = box_idx
        jaccard[:, box_idx] = col_discard
        jaccard[anc_idx, :] = row_discard
    return anchors_bbox_map
def box_iou(boxes1, boxes2):
    """Compute pairwise IoU across two lists of anchor or bounding boxes."""
    box_area = lambda boxes: ((boxes[:, 2] - boxes[:, 0]) *
                              (boxes[:, 3] - boxes[:, 1]))
    # Shape of `boxes1`, `boxes2`, `areas1`, `areas2`: (no. of boxes1, 4),
    # (no. of boxes2, 4), (no. of boxes1,), (no. of boxes2,)
    areas1 = box_area(boxes1)
    print(areas1)
    areas2 = box_area(boxes2)
    # Shape of `inter_upperlefts`, `inter_lowerrights`, `inters`: (no. of
    # boxes1, no. of boxes2, 2)
    inter_upperlefts = torch.max(boxes1[:, None, :2], boxes2[:, :2])
    inter_lowerrights = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])
    inters = (inter_lowerrights - inter_upperlefts).clamp(min=0)
    # Shape of `inter_areas` and `union_areas`: (no. of boxes1, no. of boxes2)
    print()
    print("areas2.shape", areas2.shape)
    print("inters",inters.shape)
    print()
    inter_areas = inters[:, :, 0] * inters[:, :, 1]
    union_areas = areas1[:, None] + areas2 - inter_areas
    return inter_areas / union_areas

def multibox_prior(data, sizes, ratios):
    """Generate anchor boxes with different shapes centered on each pixel."""
    in_height, in_width = data.shape[-2:]
    device, num_sizes, num_ratios = data.device, len(sizes), len(ratios)
    boxes_per_pixel = (num_sizes + num_ratios - 1)
    size_tensor = torch.tensor(sizes, device=device)
    ratio_tensor = torch.tensor(ratios, device=device)
    # Offsets are required to move the anchor to the center of a pixel. Since
    # a pixel has height=1 and width=1, we choose to offset our centers by 0.5
    offset_h, offset_w = 0.5, 0.5
    steps_h = 1.0 / in_height  # Scaled steps in y axis
    steps_w = 1.0 / in_width  # Scaled steps in x axis

    # Generate all center points for the anchor boxes
    center_h = (torch.arange(in_height, device=device) + offset_h) * steps_h
    center_w = (torch.arange(in_width, device=device) + offset_w) * steps_w
    shift_y, shift_x = torch.meshgrid(center_h, center_w)
    shift_y, shift_x = shift_y.reshape(-1), shift_x.reshape(-1)

    # Generate `boxes_per_pixel` number of heights and widths that are later
    # used to create anchor box corner coordinates (xmin, xmax, ymin, ymax)
    w = torch.cat((size_tensor * torch.sqrt(ratio_tensor[0]),
                   sizes[0] * torch.sqrt(ratio_tensor[1:])))\
                   * in_height / in_width  # Handle rectangular inputs
    h = torch.cat((size_tensor / torch.sqrt(ratio_tensor[0]),
                   sizes[0] / torch.sqrt(ratio_tensor[1:])))
    # Divide by 2 to get half height and half width
    anchor_manipulations = torch.stack((-w, -h, w, h)).T.repeat(
                                        in_height * in_width, 1) / 2

    # Each center point will have `boxes_per_pixel` number of anchor boxes, so
    # generate a grid of all anchor box centers with `boxes_per_pixel` repeats
    out_grid = torch.stack([shift_x, shift_y, shift_x, shift_y],
                dim=1).repeat_interleave(boxes_per_pixel, dim=0)
    output = out_grid + anchor_manipulations
    return output.unsqueeze(0)

def load_data(class_names_label, size):
    datasets = ['Train/Train', 'Test/Test', 'Val/Val']
    output=[]
    for dataset in datasets:
        imags = []
        boxes_all=[]
        label_boxes=[]
        directoryA = "PedestrianDetection/" + dataset +"/Annotations/"
        directoryIMG = "PedestrianDetection/" + dataset +"/JPEGImages/"
        file = os.listdir(directoryA)
        img = os.listdir(directoryIMG)
        file.sort()
        img.sort()
        i = 0
        for xml in file:
            xmlf = os.path.join(directoryA,xml)
            root = ElementTree.parse(xmlf)
            vb = root.findall('object')

            s=root.findall('size')
            w=float(s[0].find('width').text)
            h=float(s[0].find('height').text)
            #boxes
            boxes_img=[]
            idx=0
            label_boxes_img=[]
            for anz in vb:
                #label from box in image
                label_box=vb[idx].find('name').text
                label_boxes_img.append(class_names_label[label_box])
                #box
                single_box=[]
                xmin=float(vb[idx].find('bndbox/xmin').text)
                ymin=float(vb[idx].find('bndbox/ymin').text)
                xmax=float(vb[idx].find('bndbox/xmax').text)
                ymax=float(vb[idx].find('bndbox/ymax').text)
                #normalize box
                if h>0 and w>0:
                    x_min=xmin*(size[1]/w)
                    y_min=ymin*(size[0]/h)
                    x_max=xmax*(size[1]/w)
                    y_max=ymax*(size[0]/h)
                single_box=[(x_min),(y_min),(x_max), (y_max)] 
                boxes_img.append(single_box)    
                idx+=1
            boxes_all.append(boxes_img)   
            label_boxes.append(label_boxes_img)
            #image
            img_path = directoryIMG + img[i]
            curr_img = cv2.imread(img_path)
            curr_img = cv2.resize(curr_img, size)
            imags.append(curr_img)
            i +=1
        imags = np.array(imags, dtype='float32')
        imags = imags / 255    
 
        output.append((imags, boxes_all, label_boxes))
    return output 
def plot_img_withRectangle(i,boxes,images,box_labels,img_size):
    plt.figure(1)
    plt.title(box_labels[i])
    images=torch.permute(images,(0,2,3,1))  #change back to: 59,300,300,3
    images=images.numpy()                   #change back to numpy array
    img_cv=cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB)
    plt.imshow(img_cv)
    line_width=1.5 
    xmin_n_img=[]
    ymin_n_img=[]
    xmax_n_img=[]
    ymax_n_img=[]
    width_n_img=[]
    height_n_img=[]
    ytop_img=[]
    zz=0
    for anz in range(len(boxes[i])):
        xmin_n=(boxes[i][zz][0])    #[image][single box][idx]
        ymin_n=(boxes[i][zz][1])
        xmax_n=(boxes[i][zz][2])
        ymax_n=(boxes[i][zz][3])
        width_n=(xmax_n-xmin_n)
        height_n=(ymax_n-ymin_n)
        ytop=ymin_n     
        if (height_n+ytop) > (img_size[0]-line_width):      
            height_n=height_n-line_width
        if (width_n+xmin_n) > (img_size[1]-line_width):     
            width_n=width_n-line_width
        xmin_n_img.append(xmin_n)
        ymin_n_img.append(ymin_n)
        xmax_n_img.append(xmax_n)
        ymax_n_img.append(ymax_n)
        width_n_img.append(width_n)
        height_n_img.append(height_n)
        ytop_img.append(ytop)
        #box_label
        if box_labels[i][zz]==2:    #person
            plt.text(xmin_n,ytop, 'person', color='yellow')
        elif box_labels[i][zz]==1:  #person-like
            plt.text(xmin_n,ytop, 'person-like', color='blue')
        zz+=1                               
    z=0
    for anz in range(len(boxes[i])):
        if box_labels[i][z]==2:
            plt.gca().add_patch(Rectangle(
                (xmin_n_img[z],ytop_img[z]),width_n_img[z],height_n_img[z],    
                edgecolor='yellow',
                facecolor='none',
                lw=line_width
            ))
        elif box_labels[i][z]==1:
            plt.gca().add_patch(Rectangle(
                (xmin_n_img[z],ytop_img[z]),width_n_img[z],height_n_img[z],             
                edgecolor='blue',
                facecolor='none',
                lw=line_width
            ))
        z+=1



if __name__=="__main__":
    main()

Thanks for your help :slight_smile:

check boxes1.shape and boxes2.shape before this line, seems like you don’t have 4th ‘layer’ at dim1