5
5
# LICENSE file in the root directory of this source tree.
6
6
7
7
"""
8
- ========================================================================
9
- Decoding with custom_frame_mappings: Performance and accuracy comparison
10
- ========================================================================
8
+ ====================================
9
+ Decoding with custom frame mappings
10
+ ====================================
11
11
12
12
In this example, we will describe the ``custom_frame_mappings`` parameter of the
13
13
:class:`~torchcodec.decoders.VideoDecoder` class.
14
-
15
14
This parameter allows you to provide pre-computed frame mapping information to
16
15
speed up :class:`~torchcodec.decoders.VideoDecoder` instantiation, while
17
16
maintaining the frame seeking accuracy of ``seek_mode="exact"``.
18
17
19
18
This makes it ideal for workflows where:
20
- 1. accuracy is critical, so ``seek_mode="approximate"`` cannot be used
21
- 2. the videos can be preprocessed once and then decoded many times.
19
+
20
+ 1. Frame accuracy is critical, so :doc:`approximate mode <approximate_mode>` cannot be used
21
+ 2. Videos can be preprocessed once and then decoded many times
22
22
"""
23
23
24
24
# %%
25
- # First, let's set up our test videos: we'll download a short video and
26
- # use ffmpeg to create a longer version by repeating it multiple times.
25
+ # First, some boilerplate: we'll download a short video from the web, and
26
+ # use ffmpeg to create a longer version by repeating it multiple times. We'll end up
27
+ # with two videos: a short one of approximately 3 minutes and a long one of about 13 minutes.
28
+ # You can ignore that part and jump right below to :ref:`frame_mappings_creation`.
27
29
28
30
import tempfile
29
31
from pathlib import Path
62
64
# -------------------------------------------
63
65
#
64
66
# The key to using custom frame mappings is preprocessing your videos to extract
65
- # frame timing information, and whether or not a frame is a keyframe information.
66
- # We use ffprobe to generate JSON files containing this metadata.
67
+ # frame timing information and keyframe indicators. We use ffprobe to generate
68
+ # JSON files containing this metadata.
67
69
68
70
from pathlib import Path
69
71
import subprocess
75
77
long_json_path = Path (temp_dir ) / "long_custom_frame_mappings.json"
76
78
short_json_path = Path (temp_dir ) / "short_custom_frame_mappings.json"
77
79
78
- ffprobe_cmd = ["ffprobe" , "-i" , f"{ long_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pts,duration ,key_frame" , "-of" , "json" ]
80
+ ffprobe_cmd = ["ffprobe" , "-i" , f"{ long_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pkt_pts,pkt_duration ,key_frame" , "-of" , "json" ]
79
81
ffprobe_result = subprocess .run (ffprobe_cmd , check = True , capture_output = True , text = True )
80
82
with open (long_json_path , "w" ) as f :
81
83
f .write (ffprobe_result .stdout )
82
84
print (f"Wrote { len (ffprobe_result .stdout )} characters to { long_json_path } " )
83
85
84
- ffprobe_cmd = ["ffprobe" , "-i" , f"{ short_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pts,duration ,key_frame" , "-of" , "json" ]
86
+ ffprobe_cmd = ["ffprobe" , "-i" , f"{ short_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pkt_pts,pkt_duration ,key_frame" , "-of" , "json" ]
85
87
ffprobe_result = subprocess .run (ffprobe_cmd , check = True , capture_output = True , text = True )
86
88
with open (short_json_path , "w" ) as f :
87
89
f .write (ffprobe_result .stdout )
90
92
# %%
91
93
# .. _perf_creation:
92
94
#
93
- # Performance: ``VideoDecoder`` creation with custom frame mappings
94
- # -----------------------------------------------------------------
95
+ # Performance: ``VideoDecoder`` creation
96
+ # --------------------------------------
95
97
#
96
- # Let's define a benchmarking function to measure performance. Note that when using
97
- # file-like objects for custom_frame_mappings, we need to seek back to the beginning
98
- # between iterations since the JSON data is consumed during VideoDecoder creation.
98
+ # In terms of performance, custom frame mappings ultimately affect the
99
+ # **creation** of a :class:`~torchcodec.decoders.VideoDecoder` object. The
100
+ # longer the video, the higher the performance gain.
101
+ # Let's define a benchmarking function to measure performance.
102
+ # Note that when using file-like objects for custom_frame_mappings, we need to
103
+ # seek back to the beginning between iterations since the JSON data is consumed
104
+ # during VideoDecoder creation.
99
105
100
106
import torch
101
107
@@ -141,9 +147,10 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
141
147
# Performance: Frame decoding with custom frame mappings
142
148
# ------------------------------------------------------
143
149
#
144
- # The performance benefits extend to frame decoding operations as well, since
145
- # each decoding workflow typically involves creating a VideoDecoder instance.
146
- # Let's compare frame decoding performance between the two approaches.
150
+ # Although the custom_frame_mappings parameter only affects the performance of
151
+ # the :class:`~torchcodec.decoders.VideoDecoder` creation, decoding workflows
152
+ # typically involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance.
153
+ # As a result, the performance benefits of custom_frame_mappings can be seen.
147
154
148
155
149
156
def decode_frames (video_path , seek_mode = "exact" , custom_frame_mappings = None ):
@@ -165,25 +172,22 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
165
172
bench (decode_frames , video_path = video_path , seek_mode = "exact" )
166
173
167
174
# %%
168
- # Accuracy: High accuracy frame seeking with custom frame mappings
169
- # ----------------------------------------------------------------
175
+ # Accuracy: Metadata and frame retrieval
176
+ # --------------------------------------
170
177
#
171
- # The main advantage of using custom frame mappings over approximate mode is that
172
- # frame seeking accuracy is as high as exact mode.
173
-
174
- video_path = long_video_path
175
- json_path = long_json_path
176
- with open (json_path , "r" ) as f :
177
- custom_frame_mappings_decoder = VideoDecoder (
178
- source = video_path ,
179
- custom_frame_mappings = f ,
180
- stream_index = 0
181
- )
182
-
183
- exact_decoder = VideoDecoder (video_path , seek_mode = "exact" , stream_index = 0 )
184
- approx_decoder = VideoDecoder (video_path , seek_mode = "approximate" , stream_index = 0 )
185
-
186
- print ("Comparing frames between exact seek mode decoder and custom_frame_mappings decoder:" )
178
+ # We've seen that using custom frame mappings can significantly speed up
179
+ # the :class:`~torchcodec.decoders.VideoDecoder` creation. The advantage is that
180
+ # seeking is still as accurate as with ``seek_mode="exact"``.
181
+
182
+ print ("Metadata of short video with custom_frame_mappings:" )
183
+ with open (short_json_path , "r" ) as f :
184
+ print (VideoDecoder (short_video_path , custom_frame_mappings = f ).metadata )
185
+ print ("Metadata of short video with seek_mode='exact':" )
186
+ print (VideoDecoder (short_video_path , seek_mode = "exact" ).metadata )
187
+
188
+ with open (short_json_path , "r" ) as f :
189
+ custom_frame_mappings_decoder = VideoDecoder (short_video_path , custom_frame_mappings = f )
190
+ exact_decoder = VideoDecoder (short_video_path , seek_mode = "exact" )
187
191
for i in range (len (exact_decoder )):
188
192
torch .testing .assert_close (
189
193
exact_decoder .get_frame_at (i ).data ,
@@ -203,10 +207,11 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
203
207
# as a JSON, it eliminates the need for the expensive scan while preserving all the
204
208
# accuracy benefits.
205
209
#
206
- # Which approach should I use?
207
- # ----------------------------
210
+ # Which mode should I use?
211
+ # ------------------------
208
212
#
209
- # - For fastest decoding, "approximate" mode is strongly recommended.
213
+ # - For fastest decoding when speed is more important than exact seeking accuracy,
214
+ # "approximate" mode is recommended.
210
215
#
211
216
# - For exact frame seeking, custom frame mappings will benefit workflows where the
212
217
# same videos are decoded repeatedly, and some preprocessing work can be done.
0 commit comments