Hey,
I want to predict multiple objects in images and also their bounding boxes.
So at first I load the data from xml file and because of the different numbers of values for labels and boxes in each image I use an own collate-function.
The problem is that I get these error message: ValueError: too many values to unpack (expected 3).
And here the full messages:
File "...", line 169, in <module> main() File "...", line 46, in main for batch, data in enumerate(train_data): File "...PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\utils\data\dataloader.py", line 521, in __next__ data = self._next_data() File "...PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\utils\data\dataloader.py", line 561, in _next_data data = self._dataset_fetcher.fetch(index) # may raise StopIteration File "...PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch return self.collate_fn(data) File "...", line 12, in my_collate img,b,labels = zip(*batch)
I am just a beginner in deep learning, so it would be great if someone could help.
Here is some of my written code:
# libraries
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import torch
from xml.etree import ElementTree
import config
def my_collate(batch):
img,b,labels = zip(*batch)
img = torch.LongTensor([config.ws.transform(i,max_len=config.max_len) for i in img])
labels = torch.LongTensor(labels)
b = torch.LongTensor(b)
return img, b,labels
def main():
print("load data ...")
class_names = ['person','person-like']
class_names_label = {class_name:i for i, class_name in enumerate(class_names)}
size = (256,256)
batch_size=32
(train_images, b_tr, b_label_tr),(test_images,b_te,b_label_te),(val_images, b_v,b_label_v) = load_data(class_names_label, size)
train_dataset=(train_images, b_tr, b_label_tr)
test_dataset=(test_images, b_te,b_label_te)
val_dataset=(val_images, b_v, b_label_v)
train_data=torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True, collate_fn=my_collate)
test_data=torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=True,collate_fn=my_collate)
val_data=torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=True,collate_fn=my_collate)
#show example images
i=np.random.randint(0,train_images.shape[0]-1)
#print("i: ", i, " , ", b_tr[i])
#print(b_label_tr[i])
plot_img_withRectangle(i=i, boxes=b_tr,images=train_images,box_labels=b_label_tr,img_size=size)
#plt.show()
plt.close()
#train: train_data
lr=0.2
num_epochs=2
for epoch in range(num_epochs):
print()
for batch, data in enumerate(train_data):
i=0 #irrelevant value
#functions
def load_data(class_names_label, size):
datasets = ['Train/Train', 'Test/Test', 'Val/Val']
output=[]
for dataset in datasets:
imags = []
boxes_all=[]
label_boxes=[]
directoryA = "PedestrianDetection/" + dataset +"/Annotations/"
directoryIMG = "PedestrianDetection/" + dataset +"/JPEGImages/"
file = os.listdir(directoryA)
img = os.listdir(directoryIMG)
file.sort()
img.sort()
i = 0
for xml in file:
xmlf = os.path.join(directoryA,xml)
root = ElementTree.parse(xmlf)
vb = root.findall('object')
s=root.findall('size')
w=float(s[0].find('width').text)
h=float(s[0].find('height').text)
#boxes
boxes_img=[]
idx=0
label_boxes_img=[]
for anz in vb:
#label from box in image
label_box=vb[idx].find('name').text
label_boxes_img.append(class_names_label[label_box])
#box
single_box=[]
xmin=float(vb[idx].find('bndbox/xmin').text)
ymin=float(vb[idx].find('bndbox/ymin').text)
xmax=float(vb[idx].find('bndbox/xmax').text)
ymax=float(vb[idx].find('bndbox/ymax').text)
#normalize box
if h>0 and w>0:
x_min=xmin*(size[1]/w)
y_min=ymin*(size[0]/h)
x_max=xmax*(size[1]/w)
y_max=ymax*(size[0]/h)
single_box=[(x_min), (y_min), (x_max), (y_max)]
boxes_img.append(single_box)
idx+=1
boxes_all.append(boxes_img)
label_boxes.append(label_boxes_img)
#image
img_path = directoryIMG + img[i]
curr_img = cv2.imread(img_path)
curr_img = cv2.resize(curr_img, size)
imags.append(curr_img)
i +=1
imags = np.array(imags, dtype='float32')
imags = imags / 255
output.append((imags, boxes_all, label_boxes))
return output
def plot_img_withRectangle(i,boxes,images,box_labels,img_size):
plt.figure(1)
plt.title(box_labels[i])
img_cv=cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB)
plt.imshow(img_cv)
line_width=1.5
xmin_n_img=[]
ymin_n_img=[]
xmax_n_img=[]
ymax_n_img=[]
width_n_img=[]
height_n_img=[]
ytop_img=[]
zz=0
for anz in range(len(boxes[i])):
xmin_n=(boxes[i][zz][0]) #[image][single box][idx]
ymin_n=(boxes[i][zz][1])
xmax_n=(boxes[i][zz][2])
ymax_n=(boxes[i][zz][3])
width_n=(xmax_n-xmin_n)
height_n=(ymax_n-ymin_n)
ytop=ymin_n
if (height_n+ytop) > (img_size[0]-line_width):
height_n=height_n-line_width
if (width_n+xmin_n) > (img_size[1]-line_width):
width_n=width_n-line_width
xmin_n_img.append(xmin_n)
ymin_n_img.append(ymin_n)
xmax_n_img.append(xmax_n)
ymax_n_img.append(ymax_n)
width_n_img.append(width_n)
height_n_img.append(height_n)
ytop_img.append(ytop)
#box_label
if box_labels[i][zz]==0: #person
plt.text(xmin_n,ytop, 'person', color='yellow')
elif box_labels[i][zz]==1: #person-like
plt.text(xmin_n,ytop, 'person-like', color='blue')
zz+=1
z=0
for anz in range(len(boxes[i])):
if box_labels[i][z]==0:
plt.gca().add_patch(Rectangle(
(xmin_n_img[z],ytop_img[z]),width_n_img[z],height_n_img[z],
edgecolor='yellow',
facecolor='none',
lw=line_width
))
elif box_labels[i][z]==1:
plt.gca().add_patch(Rectangle(
(xmin_n_img[z],ytop_img[z]),width_n_img[z],height_n_img[z],
edgecolor='blue',
facecolor='none',
lw=line_width
))
z+=1
if __name__=="__main__":
main()