Dear Community,
I am currently working on some data augmentation for a yolov4 framework. I have found the albumentation
library terrible and decided to go the opencv
route and to them myself.
While I was able to add a couple of augmentations in opencv
with ease, I am currently stuck with the the flipping aspect of the augmentation. I tried adjusting the x midpoint, using equations I found in other forums, changing the left, right, bottom, top corner points, among others, all without success. I would be happy to hear from you and find a solution together.
Please let me know and find the code for copy/paste purpose and the test image attached:
import cv2 as cv
import numpy as np
import random
bounding_box_data = [
[0, 0.456492, 0.769865, 0.059234, 0.217604],
[0, 0.519500, 0.748167, 0.033625, 0.143083],
[0, 0.791195, 0.789125, 0.085766, 0.206292],
[0, 0.903539, 0.702250, 0.023203, 0.074917],
[0, 0.410375, 0.745729, 0.018781, 0.077542],
[0, 0.650742, 0.741333, 0.025078, 0.028708],
[33, 0.228477, 0.214917, 0.146422, 0.196375],
[33, 0.065187, 0.227896, 0.073000, 0.099833],
[33, 0.897586, 0.053021, 0.032078, 0.014250],
[33, 0.159336, 0.919021, 0.184859, 0.138500],
[33, 0.807406, 0.013177, 0.064594, 0.026354],
[0, 0.832289, 0.695281, 0.017266, 0.033729],
[0, 0.919578, 0.701260, 0.022750, 0.071854],
[0, 0.950719, 0.747521, 0.038719, 0.154250],
[0, 0.815211, 0.703896, 0.014047, 0.055583],
[0, 0.006359, 0.786427, 0.012719, 0.134812],
[0, 0.390805, 0.750187, 0.023297, 0.074542],
[0, 0.831250, 0.723958, 0.031250, 0.039583]
]
def draw_bounding_box(image_path, bounding_boxes, test = False):
"""
Input: image and bounding boxes (as list).
Output: Image with drawn bounding boxes.
"""
image = cv.imread(image_path)
image = np.ascontiguousarray(image, dtype = np.uint8)
image = cv.flip(image, 1)
cmap = [
[147, 69, 52],
[29, 178, 255],
[200, 149, 255],
[151, 157, 255],
[255, 115, 100],
[134, 219, 61],
[199, 55, 255],
[49, 210, 207],
[187, 212, 0],
[52, 147, 26],
[236, 24, 0],
[168, 153, 44],
[56, 56, 255],
[255, 194, 0],
[255, 56, 132],
[133, 0, 82],
[255, 56, 203],
[31, 112, 255],
[23, 204, 146]
]
class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
# Calculate the number of classes
nclasses = len(class_names)
# Create a list of colors by mapping the class index to the cmap list
colors = [cmap[i % len(cmap)] for i in range(nclasses)]
# Extract transform_vals
for i in range(len(bounding_boxes)):
if test == True:
height, width = image.shape[:2]
class_pred = int(bounding_boxes[i][0])
certainty = bounding_boxes[i][1]
bounding_box = bounding_boxes[i][1:]
# Extract x midpoint, y midpoint, w width and h height
x = bounding_box[0]
y = bounding_box[1]
w = bounding_box[2]
h = bounding_box[3]
l = int((x - w / 2) * width)
r = int((x + w / 2) * width)
t = int((y - h / 2) * height)
b = int((y + h / 2) * height)
if l < 0:
l = 0
if r > width - 1:
r = width - 1
if t < 0:
t = 0
if b > height - 1:
b = height - 1
image = cv.rectangle(image, (l, t), (int(r), int(b)), colors[class_pred], 3)
(txt_width, txt_height), _ = cv.getTextSize(class_names[class_pred], cv.FONT_HERSHEY_TRIPLEX, 0.6, 2)
if t < 20:
image = cv.rectangle(image, (l-2, t + 15), (l + txt_width, t), colors[class_pred], -1)
image = cv.putText(image, class_names[class_pred], (l, t+12),
cv.FONT_HERSHEY_TRIPLEX, 0.5, [255, 255, 255], 1)
else:
image = cv.rectangle(image, (l-2, t - 15), (l + txt_width, t), colors[class_pred], -1)
image = cv.putText(image, class_names[class_pred], (l, t-3),
cv.FONT_HERSHEY_TRIPLEX, 0.5, [255, 255, 255], 1)
return image
res_img = draw_bounding_box(image_path='data/coco/images/train2017_000000125693.jpg', bounding_boxes = bounding_box_data, test = True)
# Check if the image was loaded successfully
if res_img is not None:
# Display the image in a window
cv.imshow('Image', res_img)
# Wait for a key press and then close the window
cv.waitKey(0)
cv.destroyAllWindows()
else:
print('Failed to load the image.')