I obtained the image through Google crawling, and have the coordinates (x_min, y_min, x_max, y_max) where I drew the bounding box myself as a txt file.
I’m currently trying to create a customdataset, but I’m getting an error.
My code:
class CustomDataset(Dataset):
def init(self, img_dir, label_dir, transform=None):
self.img_dir = img_dir
self.label_dir = label_dir
self.transform = transform
self.img_file_list, self.txt_file_list = self.load_file_lists()
def __len__(self):
return len(self.img_file_list)
def __getitem__(self, idx):
img_name = os.path.join(self.img_dir, self.img_file_list[idx])
label_name = os.path.join(self.label_dir, self.txt_file_list[idx])
# Load image and convert to RGB
image = Image.open(img_name).convert("RGB")
# load label
with open(label_name, 'r') as file:
label_content = file.read()
# Convert labels
transformed_label = self.transform_label(label_content, image.size, (300,300))
print("trans : ",transformed_label.shape , transformed_label)
# Apply transformations if specified
if self.transform:
image = self.transform(image)
print(image.shape)
# print(image.shape, torch.Tensor(transformed_label))
return image, transformed_label
def load_file_lists(self):
img_file_list = []
txt_file_list = []
for filename in sorted(os.listdir(self.img_dir), key=lambda x: int(re.search(r'\d+', x).group())):
img_file_path = os.path.join(self.img_dir, filename)
txt_file_path = os.path.join(self.label_dir, filename.replace('.jpg', '.txt'))
if os.path.isfile(img_file_path) and os.path.isfile(txt_file_path):
img_file_list.append(os.path.basename(img_file_path))
txt_file_list.append(os.path.basename(txt_file_path))
return img_file_list, txt_file_list
def transform_label(self, label_content, original_img_size, transformed_img_size):
values = [list(map(int, match.groups())) for match in re.finditer(r'(\d+)\s+(\d+)\s+(\d+)\s+(\d+)', label_content )]
transformed_labels = []
for xmin, ymin, xmax, ymax in values:
# Update coordinates based on image resize (xmin, ymin, xmax, ymax) > (xc, yc, w, h)
xmin = int(xmin * transformed_img_size[0] / original_img_size[0])
ymin = int(ymin * transformed_img_size[1] / original_img_size[1])
xmax = int(xmax * transformed_img_size[0] / original_img_size[0])
ymax = int(ymax * transformed_img_size[1] / original_img_size[1])
xc = (xmin + xmax) / 2
yc = (ymin + ymax) / 2
w = xmax - xmin
h = ymax - ymin
# Coordinate normalization
xc /= transformed_img_size[0]
yc /= transformed_img_size[1]
w /= transformed_img_size[0]
h /= transformed_img_size[1]
# Create a tensor for each bounding box
bbox_coord = [xc, yc, w, h]
transformed_labels.append(bbox_coord)
transformed_labels = torch.tensor(transformed_labels)
if len(transformed_labels) > 1 :
transformed_labels = transformed_labels.view(1,-1)
print(len(transformed_labels))
return transformed_labels
An image may have one or multiple bounding boxes. All images were sized to (300,300).
My guess is that the error occurs in the process of creating batches of coordinates for multiple bounding boxes in a single image.