if you didn’t specify pix_fmts in filter_desc, then the img was shape of 3, w, h, and it turned out to be
yuv444p format.
run the following code to display a yuv444p image using opencv.
def yuv444_to_bgr(frames):
frames = frames.cpu().to(torch.float)
y = frames[..., 0, :, :]
u = frames[..., 1, :, :]
v = frames[..., 2, :, :]
y /= 255
u = u / 255 - 0.5
v = v / 255 - 0.5
r = y + 1.14 * v
g = y + -0.396 * u - 0.581 * v
b = y + 2.029 * u
bgr = torch.stack([b, g, r], -1)
bgr = (bgr * 255).clamp(0, 255).to(torch.uint8)
return bgr.numpy()
def show_ffmpeg_frame(vid_path):
s = StreamReader(vid_path)
width = 640
height = 360
mode = "bilinear"
threads = 8
# no output format specified !!!!!!
s.add_video_stream(
-1, filter_desc=f"scale={width}:{height}:sws_flags={mode}",
decoder_option={"threads": f"{threads}"}
)
for (chunk,) in s.stream():
for img in chunk:
bgr_img = yuv444_to_bgr(img)
cv.imshow("a", bgr_img)
cv.waitKey(0)
return
if you specified bgr24 format in filter_desc, then the img was going to be bgr24 format, but it was still a
shape of 3, w, h. in this case, a shape of h, w, 3 was what we expected.
in the following code, we just call cv.imshow to display a bgr image. and you need to do transpose, or
permute first.
def show_ffmpeg_frame(vid_path):
s = StreamReader(vid_path)
width = 640
height = 360
mode = "bilinear"
threads = 8
s.add_video_stream(
-1, filter_desc=f"scale={width}:{height}:sws_flags={mode},format=pix_fmts=bgr24",
decoder_option={"threads": f"{threads}"}
)
for (chunk,) in s.stream():
for img in chunk:
bgr_img = img.numpy().transpose(1, 2, 0)
cv.imshow("a", bgr_img)
cv.waitKey(0)
return
and specified yuv420 output format, but it ended up yuv444p, filter_desc didn’t work.
def show_ffmpeg_frame(vid_path):
s = StreamReader(vid_path)
width = 640
height = 360
mode = "bilinear"
threads = 8
s.add_video_stream(
-1, filter_desc=f"scale={width}:{height}:sws_flags={mode},format=pix_fmts=yuv420p",
decoder_option={"threads": f"{threads}",
"pixel_format": "yuv420p"}
)
for (chunk,) in s.stream():
for img in chunk:
bgr_img = yuv444_to_bgr(img)
cv.imshow("a", bgr_img)
cv.waitKey(0)
return
But, ffmpeg supports yuv420p format, and pyav can output yuv420p frames using av.filter.Graph.