Skip to content

Commit b97f705

Browse files
author
Daniel Flores
committed
update language, add metaadata prints
1 parent 025af84 commit b97f705

File tree

1 file changed

+46
-41
lines changed

1 file changed

+46
-41
lines changed

examples/decoding/custom_frame_mappings.py

Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,27 @@
55
# LICENSE file in the root directory of this source tree.
66

77
"""
8-
========================================================================
9-
Decoding with custom_frame_mappings: Performance and accuracy comparison
10-
========================================================================
8+
====================================
9+
Decoding with custom frame mappings
10+
====================================
1111
1212
In this example, we will describe the ``custom_frame_mappings`` parameter of the
1313
:class:`~torchcodec.decoders.VideoDecoder` class.
14-
1514
This parameter allows you to provide pre-computed frame mapping information to
1615
speed up :class:`~torchcodec.decoders.VideoDecoder` instantiation, while
1716
maintaining the frame seeking accuracy of ``seek_mode="exact"``.
1817
1918
This makes it ideal for workflows where:
20-
1. accuracy is critical, so ``seek_mode="approximate"`` cannot be used
21-
2. the videos can be preprocessed once and then decoded many times.
19+
20+
1. Frame accuracy is critical, so :doc:`approximate mode <approximate_mode>` cannot be used
21+
2. Videos can be preprocessed once and then decoded many times
2222
"""
2323

2424
# %%
25-
# First, let's set up our test videos: we'll download a short video and
26-
# use ffmpeg to create a longer version by repeating it multiple times.
25+
# First, some boilerplate: we'll download a short video from the web, and
26+
# use ffmpeg to create a longer version by repeating it multiple times. We'll end up
27+
# with two videos: a short one of approximately 3 minutes and a long one of about 13 minutes.
28+
# You can ignore that part and jump right below to :ref:`frame_mappings_creation`.
2729

2830
import tempfile
2931
from pathlib import Path
@@ -62,8 +64,8 @@
6264
# -------------------------------------------
6365
#
6466
# The key to using custom frame mappings is preprocessing your videos to extract
65-
# frame timing information, and whether or not a frame is a keyframe information.
66-
# We use ffprobe to generate JSON files containing this metadata.
67+
# frame timing information and keyframe indicators. We use ffprobe to generate
68+
# JSON files containing this metadata.
6769

6870
from pathlib import Path
6971
import subprocess
@@ -75,13 +77,13 @@
7577
long_json_path = Path(temp_dir) / "long_custom_frame_mappings.json"
7678
short_json_path = Path(temp_dir) / "short_custom_frame_mappings.json"
7779

78-
ffprobe_cmd = ["ffprobe", "-i", f"{long_video_path}", "-select_streams", f"{stream_index}", "-show_frames", "-show_entries", "frame=pts,duration,key_frame", "-of", "json"]
80+
ffprobe_cmd = ["ffprobe", "-i", f"{long_video_path}", "-select_streams", f"{stream_index}", "-show_frames", "-show_entries", "frame=pkt_pts,pkt_duration,key_frame", "-of", "json"]
7981
ffprobe_result = subprocess.run(ffprobe_cmd, check=True, capture_output=True, text=True)
8082
with open(long_json_path, "w") as f:
8183
f.write(ffprobe_result.stdout)
8284
print(f"Wrote {len(ffprobe_result.stdout)} characters to {long_json_path}")
8385

84-
ffprobe_cmd = ["ffprobe", "-i", f"{short_video_path}", "-select_streams", f"{stream_index}", "-show_frames", "-show_entries", "frame=pts,duration,key_frame", "-of", "json"]
86+
ffprobe_cmd = ["ffprobe", "-i", f"{short_video_path}", "-select_streams", f"{stream_index}", "-show_frames", "-show_entries", "frame=pkt_pts,pkt_duration,key_frame", "-of", "json"]
8587
ffprobe_result = subprocess.run(ffprobe_cmd, check=True, capture_output=True, text=True)
8688
with open(short_json_path, "w") as f:
8789
f.write(ffprobe_result.stdout)
@@ -90,12 +92,16 @@
9092
# %%
9193
# .. _perf_creation:
9294
#
93-
# Performance: ``VideoDecoder`` creation with custom frame mappings
94-
# -----------------------------------------------------------------
95+
# Performance: ``VideoDecoder`` creation
96+
# --------------------------------------
9597
#
96-
# Let's define a benchmarking function to measure performance. Note that when using
97-
# file-like objects for custom_frame_mappings, we need to seek back to the beginning
98-
# between iterations since the JSON data is consumed during VideoDecoder creation.
98+
# In terms of performance, custom frame mappings ultimately affect the
99+
# **creation** of a :class:`~torchcodec.decoders.VideoDecoder` object. The
100+
# longer the video, the higher the performance gain.
101+
# Let's define a benchmarking function to measure performance.
102+
# Note that when using file-like objects for custom_frame_mappings, we need to
103+
# seek back to the beginning between iterations since the JSON data is consumed
104+
# during VideoDecoder creation.
99105

100106
import torch
101107

@@ -141,9 +147,10 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
141147
# Performance: Frame decoding with custom frame mappings
142148
# ------------------------------------------------------
143149
#
144-
# The performance benefits extend to frame decoding operations as well, since
145-
# each decoding workflow typically involves creating a VideoDecoder instance.
146-
# Let's compare frame decoding performance between the two approaches.
150+
# Although the custom_frame_mappings parameter only affects the performance of
151+
# the :class:`~torchcodec.decoders.VideoDecoder` creation, decoding workflows
152+
# typically involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance.
153+
# As a result, the performance benefits of custom_frame_mappings can be seen.
147154

148155

149156
def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None):
@@ -165,25 +172,22 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
165172
bench(decode_frames, video_path=video_path, seek_mode="exact")
166173

167174
# %%
168-
# Accuracy: High accuracy frame seeking with custom frame mappings
169-
# ----------------------------------------------------------------
175+
# Accuracy: Metadata and frame retrieval
176+
# --------------------------------------
170177
#
171-
# The main advantage of using custom frame mappings over approximate mode is that
172-
# frame seeking accuracy is as high as exact mode.
173-
174-
video_path = long_video_path
175-
json_path = long_json_path
176-
with open(json_path, "r") as f:
177-
custom_frame_mappings_decoder = VideoDecoder(
178-
source=video_path,
179-
custom_frame_mappings=f,
180-
stream_index=0
181-
)
182-
183-
exact_decoder = VideoDecoder(video_path, seek_mode="exact", stream_index=0)
184-
approx_decoder = VideoDecoder(video_path, seek_mode="approximate", stream_index=0)
185-
186-
print("Comparing frames between exact seek mode decoder and custom_frame_mappings decoder:")
178+
# We've seen that using custom frame mappings can significantly speed up
179+
# the :class:`~torchcodec.decoders.VideoDecoder` creation. The advantage is that
180+
# seeking is still as accurate as with ``seek_mode="exact"``.
181+
182+
print("Metadata of short video with custom_frame_mappings:")
183+
with open(short_json_path, "r") as f:
184+
print(VideoDecoder(short_video_path, custom_frame_mappings=f).metadata)
185+
print("Metadata of short video with seek_mode='exact':")
186+
print(VideoDecoder(short_video_path, seek_mode="exact").metadata)
187+
188+
with open(short_json_path, "r") as f:
189+
custom_frame_mappings_decoder = VideoDecoder(short_video_path, custom_frame_mappings=f)
190+
exact_decoder = VideoDecoder(short_video_path, seek_mode="exact")
187191
for i in range(len(exact_decoder)):
188192
torch.testing.assert_close(
189193
exact_decoder.get_frame_at(i).data,
@@ -203,10 +207,11 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
203207
# as a JSON, it eliminates the need for the expensive scan while preserving all the
204208
# accuracy benefits.
205209
#
206-
# Which approach should I use?
207-
# ----------------------------
210+
# Which mode should I use?
211+
# ------------------------
208212
#
209-
# - For fastest decoding, "approximate" mode is strongly recommended.
213+
# - For fastest decoding when speed is more important than exact seeking accuracy,
214+
# "approximate" mode is recommended.
210215
#
211216
# - For exact frame seeking, custom frame mappings will benefit workflows where the
212217
# same videos are decoded repeatedly, and some preprocessing work can be done.

0 commit comments

Comments
 (0)