Hello,
I get the following error message: IndexError: index 3 is out of bounds for dimension 1 with size 3
, it’s in this function def box_iou(boxes1, boxes2):
because of this line:
box_area = lambda boxes: ((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]))
So how can I solve this error?
Here is some of the code:
from operator import concat
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import torch
import torch.nn as nn
from xml.etree import ElementTree
from d2l import torch as d2l
def my_collate_pad(batch):
data=batch
data_idx=0
y_data_pad=[]
for bild_nr in data:
y_data=torch.tensor( data[data_idx] )
y_data_pad.append(y_data)
data_idx+=1
y_alle_data_gepadded=nn.utils.rnn.pad_sequence(y_data_pad, batch_first=True, padding_value=0) #Hinweis für Listen: [] bei sequence weglassen!
return y_alle_data_gepadded
def main():
print("load data ...")
class_names = ['person','person-like']
class_names_label = {'person': 2, 'person-like': 1}
size = (300,300)
batch_size=59
(train_images, b_tr, b_label_tr),(test_images,b_te,b_label_te),(val_images, b_v,b_label_v) = load_data(class_names_label, size)
train_images=torch.as_tensor(train_images)
train_images=torch.permute(train_images,(0,3,1,2))
#train_img_loader=torch.utils.data.DataLoader(train_images, batch_size=batch_size, shuffle=False)
#train_box_loader=torch.utils.data.DataLoader(b_tr, batch_size=batch_size, shuffle=False, collate_fn=my_collate_pad)
#train_label_loader=torch.utils.data.DataLoader(b_label_tr, batch_size=batch_size, shuffle=False, collate_fn=my_collate_pad)
i=np.random.randint(0,train_images.shape[0]-1)
sizes=[0.75, 0.5, 0.25]
ratios=[0.5, 1, 2]
img=train_images[i] #img
anchors=multibox_prior(img, sizes, ratios)
a=anchors[None,...]
box=b_tr[i]
b2=[]
for x in range(len(box)):
b=[box[x][0]/300,box[x][1]/300,box[x][2]/300,box[x][3]/300 ]
b2.append(b)
b2=torch.tensor(b2)[None,...]
labela=multibox_target(a,b2) #return: box_offset, bbox_mask, class_labels
def multibox_target(anchors, labels):
"""Label anchor boxes using ground-truth bounding boxes."""
batch_size, anchors = labels.shape[0], anchors.squeeze(0)
batch_offset, batch_mask, batch_class_labels = [], [], []
device, num_anchors = anchors.device, anchors.shape[0]
for i in range(batch_size):
label = labels[i, :, :]
anchors_bbox_map = assign_anchor_to_bbox(
label[:, 1:], anchors, device)
bbox_mask = ((anchors_bbox_map >= 0).float().unsqueeze(-1)).repeat(
1, 4)
# Initialize class labels and assigned bounding box coordinates with
# zeros
class_labels = torch.zeros(num_anchors, dtype=torch.long,
device=device)
assigned_bb = torch.zeros((num_anchors, 4), dtype=torch.float32,
device=device)
# Label classes of anchor boxes using their assigned ground-truth
# bounding boxes. If an anchor box is not assigned any, we label its
# class as background (the value remains zero)
indices_true = torch.nonzero(anchors_bbox_map >= 0)
bb_idx = anchors_bbox_map[indices_true]
class_labels[indices_true] = label[bb_idx, 0].long() + 1
assigned_bb[indices_true] = label[bb_idx, 1:]
# Offset transformation
offset = offset_boxes(anchors, assigned_bb) * bbox_mask
batch_offset.append(offset.reshape(-1))
batch_mask.append(bbox_mask.reshape(-1))
batch_class_labels.append(class_labels)
bbox_offset = torch.stack(batch_offset)
bbox_mask = torch.stack(batch_mask)
class_labels = torch.stack(batch_class_labels)
return (bbox_offset, bbox_mask, class_labels)
def offset_boxes(anchors, assigned_bb, eps=1e-6):
"""Transform for anchor box offsets."""
c_anc = d2l.box_corner_to_center(anchors)
c_assigned_bb = d2l.box_corner_to_center(assigned_bb)
offset_xy = 10 * (c_assigned_bb[:, :2] - c_anc[:, :2]) / c_anc[:, 2:]
offset_wh = 5 * torch.log(eps + c_assigned_bb[:, 2:] / c_anc[:, 2:])
offset = torch.cat([offset_xy, offset_wh], axis=1)
return offset
def assign_anchor_to_bbox(ground_truth, anchors, device, iou_threshold=0.5):
"""Assign closest ground-truth bounding boxes to anchor boxes."""
num_anchors, num_gt_boxes = anchors.shape[0], ground_truth.shape[0]
# Element x_ij in the i-th row and j-th column is the IoU of the anchor
# box i and the ground-truth bounding box j
jaccard = box_iou(anchors, ground_truth)
# Initialize the tensor to hold the assigned ground-truth bounding box for
# each anchor
anchors_bbox_map = torch.full((num_anchors,), -1, dtype=torch.long,
device=device)
max_ious, indices = torch.max(jaccard, dim=1)
anc_i = torch.nonzero(max_ious >= 0.5).reshape(-1)
box_j = indices[max_ious >= 0.5]
anchors_bbox_map[anc_i] = box_j
col_discard = torch.full((num_anchors,), -1)
row_discard = torch.full((num_gt_boxes,), -1)
for _ in range(num_gt_boxes):
max_idx = torch.argmax(jaccard) # Find the largest IoU
box_idx = (max_idx % num_gt_boxes).long()
anc_idx = (max_idx / num_gt_boxes).long()
anchors_bbox_map[anc_idx] = box_idx
jaccard[:, box_idx] = col_discard
jaccard[anc_idx, :] = row_discard
return anchors_bbox_map
def box_iou(boxes1, boxes2):
"""Compute pairwise IoU across two lists of anchor or bounding boxes."""
box_area = lambda boxes: ((boxes[:, 2] - boxes[:, 0]) *
(boxes[:, 3] - boxes[:, 1]))
# Shape of `boxes1`, `boxes2`, `areas1`, `areas2`: (no. of boxes1, 4),
# (no. of boxes2, 4), (no. of boxes1,), (no. of boxes2,)
areas1 = box_area(boxes1)
print(areas1)
areas2 = box_area(boxes2)
# Shape of `inter_upperlefts`, `inter_lowerrights`, `inters`: (no. of
# boxes1, no. of boxes2, 2)
inter_upperlefts = torch.max(boxes1[:, None, :2], boxes2[:, :2])
inter_lowerrights = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])
inters = (inter_lowerrights - inter_upperlefts).clamp(min=0)
# Shape of `inter_areas` and `union_areas`: (no. of boxes1, no. of boxes2)
print()
print("areas2.shape", areas2.shape)
print("inters",inters.shape)
print()
inter_areas = inters[:, :, 0] * inters[:, :, 1]
union_areas = areas1[:, None] + areas2 - inter_areas
return inter_areas / union_areas
def multibox_prior(data, sizes, ratios):
"""Generate anchor boxes with different shapes centered on each pixel."""
in_height, in_width = data.shape[-2:]
device, num_sizes, num_ratios = data.device, len(sizes), len(ratios)
boxes_per_pixel = (num_sizes + num_ratios - 1)
size_tensor = torch.tensor(sizes, device=device)
ratio_tensor = torch.tensor(ratios, device=device)
# Offsets are required to move the anchor to the center of a pixel. Since
# a pixel has height=1 and width=1, we choose to offset our centers by 0.5
offset_h, offset_w = 0.5, 0.5
steps_h = 1.0 / in_height # Scaled steps in y axis
steps_w = 1.0 / in_width # Scaled steps in x axis
# Generate all center points for the anchor boxes
center_h = (torch.arange(in_height, device=device) + offset_h) * steps_h
center_w = (torch.arange(in_width, device=device) + offset_w) * steps_w
shift_y, shift_x = torch.meshgrid(center_h, center_w)
shift_y, shift_x = shift_y.reshape(-1), shift_x.reshape(-1)
# Generate `boxes_per_pixel` number of heights and widths that are later
# used to create anchor box corner coordinates (xmin, xmax, ymin, ymax)
w = torch.cat((size_tensor * torch.sqrt(ratio_tensor[0]),
sizes[0] * torch.sqrt(ratio_tensor[1:])))\
* in_height / in_width # Handle rectangular inputs
h = torch.cat((size_tensor / torch.sqrt(ratio_tensor[0]),
sizes[0] / torch.sqrt(ratio_tensor[1:])))
# Divide by 2 to get half height and half width
anchor_manipulations = torch.stack((-w, -h, w, h)).T.repeat(
in_height * in_width, 1) / 2
# Each center point will have `boxes_per_pixel` number of anchor boxes, so
# generate a grid of all anchor box centers with `boxes_per_pixel` repeats
out_grid = torch.stack([shift_x, shift_y, shift_x, shift_y],
dim=1).repeat_interleave(boxes_per_pixel, dim=0)
output = out_grid + anchor_manipulations
return output.unsqueeze(0)
def load_data(class_names_label, size):
datasets = ['Train/Train', 'Test/Test', 'Val/Val']
output=[]
for dataset in datasets:
imags = []
boxes_all=[]
label_boxes=[]
directoryA = "PedestrianDetection/" + dataset +"/Annotations/"
directoryIMG = "PedestrianDetection/" + dataset +"/JPEGImages/"
file = os.listdir(directoryA)
img = os.listdir(directoryIMG)
file.sort()
img.sort()
i = 0
for xml in file:
xmlf = os.path.join(directoryA,xml)
root = ElementTree.parse(xmlf)
vb = root.findall('object')
s=root.findall('size')
w=float(s[0].find('width').text)
h=float(s[0].find('height').text)
#boxes
boxes_img=[]
idx=0
label_boxes_img=[]
for anz in vb:
#label from box in image
label_box=vb[idx].find('name').text
label_boxes_img.append(class_names_label[label_box])
#box
single_box=[]
xmin=float(vb[idx].find('bndbox/xmin').text)
ymin=float(vb[idx].find('bndbox/ymin').text)
xmax=float(vb[idx].find('bndbox/xmax').text)
ymax=float(vb[idx].find('bndbox/ymax').text)
#normalize box
if h>0 and w>0:
x_min=xmin*(size[1]/w)
y_min=ymin*(size[0]/h)
x_max=xmax*(size[1]/w)
y_max=ymax*(size[0]/h)
single_box=[(x_min),(y_min),(x_max), (y_max)]
boxes_img.append(single_box)
idx+=1
boxes_all.append(boxes_img)
label_boxes.append(label_boxes_img)
#image
img_path = directoryIMG + img[i]
curr_img = cv2.imread(img_path)
curr_img = cv2.resize(curr_img, size)
imags.append(curr_img)
i +=1
imags = np.array(imags, dtype='float32')
imags = imags / 255
output.append((imags, boxes_all, label_boxes))
return output
def plot_img_withRectangle(i,boxes,images,box_labels,img_size):
plt.figure(1)
plt.title(box_labels[i])
images=torch.permute(images,(0,2,3,1)) #change back to: 59,300,300,3
images=images.numpy() #change back to numpy array
img_cv=cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB)
plt.imshow(img_cv)
line_width=1.5
xmin_n_img=[]
ymin_n_img=[]
xmax_n_img=[]
ymax_n_img=[]
width_n_img=[]
height_n_img=[]
ytop_img=[]
zz=0
for anz in range(len(boxes[i])):
xmin_n=(boxes[i][zz][0]) #[image][single box][idx]
ymin_n=(boxes[i][zz][1])
xmax_n=(boxes[i][zz][2])
ymax_n=(boxes[i][zz][3])
width_n=(xmax_n-xmin_n)
height_n=(ymax_n-ymin_n)
ytop=ymin_n
if (height_n+ytop) > (img_size[0]-line_width):
height_n=height_n-line_width
if (width_n+xmin_n) > (img_size[1]-line_width):
width_n=width_n-line_width
xmin_n_img.append(xmin_n)
ymin_n_img.append(ymin_n)
xmax_n_img.append(xmax_n)
ymax_n_img.append(ymax_n)
width_n_img.append(width_n)
height_n_img.append(height_n)
ytop_img.append(ytop)
#box_label
if box_labels[i][zz]==2: #person
plt.text(xmin_n,ytop, 'person', color='yellow')
elif box_labels[i][zz]==1: #person-like
plt.text(xmin_n,ytop, 'person-like', color='blue')
zz+=1
z=0
for anz in range(len(boxes[i])):
if box_labels[i][z]==2:
plt.gca().add_patch(Rectangle(
(xmin_n_img[z],ytop_img[z]),width_n_img[z],height_n_img[z],
edgecolor='yellow',
facecolor='none',
lw=line_width
))
elif box_labels[i][z]==1:
plt.gca().add_patch(Rectangle(
(xmin_n_img[z],ytop_img[z]),width_n_img[z],height_n_img[z],
edgecolor='blue',
facecolor='none',
lw=line_width
))
z+=1
if __name__=="__main__":
main()
Thanks for your help