Skip to content

Commit 613a35b

Browse files
authored
Make mp4 parsing faster & lower memory overhead (#7860)
1 parent c23e81e commit 613a35b

File tree

12 files changed

+177
-118
lines changed

12 files changed

+177
-118
lines changed

.github/workflows/reusable_bench.yml

+13-1
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,30 @@ jobs:
7575
workload_identity_provider: ${{ secrets.GOOGLE_WORKLOAD_IDENTITY_PROVIDER }}
7676
service_account: ${{ secrets.GOOGLE_SERVICE_ACCOUNT }}
7777

78+
- uses: prefix-dev/[email protected]
79+
with:
80+
pixi-version: v0.25.0
81+
# Only has the deps for round-trips. Not all examples.
82+
environments: wheel-test-min
83+
84+
- name: Download test assets
85+
run: pixi run -e wheel-test-min python ./tests/assets/download_test_assets.py
86+
7887
- name: Add SHORT_SHA env property with commit short sha
7988
run: echo "SHORT_SHA=`echo ${{github.sha}} | cut -c1-7`" >> $GITHUB_ENV
8089

8190
- name: Run benchmark
8291
# Use bash shell so we get pipefail behavior with tee
92+
# Running under `pixi` so we get `nasm`
8393
run: |
84-
cargo bench \
94+
pixi run -e wheel-test-min \
95+
cargo bench \
8596
--all-features \
8697
-p re_entity_db \
8798
-p re_log_encoding \
8899
-p re_query \
89100
-p re_tuid \
101+
-p re_video \
90102
-- --output-format=bencher | tee /tmp/${{ env.SHORT_SHA }}
91103
92104
- name: "Set up Cloud SDK"

Cargo.lock

+2-2
Original file line numberDiff line numberDiff line change
@@ -5555,8 +5555,7 @@ dependencies = [
55555555
[[package]]
55565556
name = "re_mp4"
55575557
version = "0.1.0"
5558-
source = "registry+https://github.com/rust-lang/crates.io-index"
5559-
checksum = "3d1e30657b1ae7f0dd3428a59dc8140732b74a22cc07763606c9ec4054138731"
5558+
source = "git+https://github.com/rerun-io/re_mp4?rev=7d38361ee5b05f5a2b83a8029057c8a24d2e9023#7d38361ee5b05f5a2b83a8029057c8a24d2e9023"
55605559
dependencies = [
55615560
"byteorder",
55625561
"bytes",
@@ -6182,6 +6181,7 @@ name = "re_video"
61826181
version = "0.20.0-alpha.1+dev"
61836182
dependencies = [
61846183
"cfg_aliases 0.2.1",
6184+
"criterion",
61856185
"crossbeam",
61866186
"econtext",
61876187
"indicatif",

Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -565,3 +565,6 @@ missing_errors_doc = "allow"
565565
re_arrow2 = { git = "https://github.com/rerun-io/re_arrow2", rev = "e4717d6debc6d4474ec10db8f629f823f57bad07" }
566566

567567
# dav1d = { path = "/home/cmc/dev/rerun-io/rav1d", package = "re_rav1d", version = "0.1.1" }
568+
569+
# Commit on `main` branch of `re_mp4`
570+
re_mp4 = { git = "https://github.com/rerun-io/re_mp4", rev = "7d38361ee5b05f5a2b83a8029057c8a24d2e9023" }

crates/store/re_video/Cargo.toml

+6-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ dav1d = { workspace = true, optional = true, default-features = false, features
6262

6363
[dev-dependencies]
6464
indicatif.workspace = true
65-
65+
criterion.workspace = true
6666

6767
# For build.rs:
6868
[build-dependencies]
@@ -71,3 +71,8 @@ cfg_aliases.workspace = true
7171

7272
[[example]]
7373
name = "frames"
74+
75+
76+
[[bench]]
77+
name = "video_load_bench"
78+
harness = false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#![allow(clippy::unwrap_used)] // acceptable in benchmarks
2+
3+
use std::path::Path;
4+
5+
use criterion::{criterion_group, criterion_main, Criterion};
6+
7+
fn video_load(c: &mut Criterion) {
8+
let video_path = Path::new(env!("CARGO_MANIFEST_DIR"))
9+
.ancestors()
10+
.nth(3)
11+
.unwrap()
12+
.join("tests/assets/video/Big_Buck_Bunny_1080_10s_av1.mp4");
13+
let video = std::fs::read(video_path).unwrap();
14+
c.bench_function("video_load", |b| {
15+
b.iter_batched(
16+
|| {},
17+
|()| re_video::VideoData::load_from_bytes(&video, "video/mp4"),
18+
criterion::BatchSize::LargeInput,
19+
);
20+
});
21+
}
22+
23+
criterion_group!(benches, video_load);
24+
criterion_main!(benches);

crates/store/re_video/examples/frames.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ fn main() {
2424

2525
println!("Decoding {video_path}");
2626

27-
let video = std::fs::read(video_path).expect("failed to read video");
28-
let video = re_video::VideoData::load_mp4(&video).expect("failed to load video");
27+
let video_blob = std::fs::read(video_path).expect("failed to read video");
28+
let video = re_video::VideoData::load_mp4(&video_blob).expect("failed to load video");
2929

3030
println!(
3131
"{} {}x{}",
@@ -37,11 +37,12 @@ fn main() {
3737
let mut decoder = re_video::decode::new_decoder(video_path.to_string(), &video)
3838
.expect("Failed to create decoder");
3939

40-
write_video_frames(&video, decoder.as_mut(), &output_dir);
40+
write_video_frames(&video, &video_blob, decoder.as_mut(), &output_dir);
4141
}
4242

4343
fn write_video_frames(
4444
video: &re_video::VideoData,
45+
video_blob: &[u8],
4546
decoder: &mut dyn re_video::decode::SyncDecoder,
4647
output_dir: &PathBuf,
4748
) {
@@ -61,7 +62,7 @@ fn write_video_frames(
6162
let start = Instant::now();
6263
for sample in &video.samples {
6364
let should_stop = std::sync::atomic::AtomicBool::new(false);
64-
let chunk = video.get(sample).unwrap();
65+
let chunk = sample.get(video_blob).unwrap();
6566
decoder.submit_chunk(&should_stop, chunk, &on_output);
6667
}
6768

crates/store/re_video/src/demux/mod.rs

+22-24
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,6 @@ pub struct VideoData {
6060
/// and should be presented in composition-timestamp order.
6161
pub samples: Vec<Sample>,
6262

63-
/// This array stores all data used by samples.
64-
pub data: Vec<u8>,
65-
6663
/// All the tracks in the mp4; not just the video track.
6764
///
6865
/// Can be nice to show in a UI.
@@ -245,25 +242,6 @@ impl VideoData {
245242
.sorted()
246243
})
247244
}
248-
249-
/// Returns `None` if the sample is invalid/out-of-range.
250-
pub fn get(&self, sample: &Sample) -> Option<Chunk> {
251-
let byte_offset = sample.byte_offset as usize;
252-
let byte_length = sample.byte_length as usize;
253-
254-
if self.data.len() < byte_offset + byte_length {
255-
None
256-
} else {
257-
let data = &self.data[byte_offset..byte_offset + byte_length];
258-
259-
Some(Chunk {
260-
data: data.to_vec(),
261-
composition_timestamp: sample.composition_timestamp,
262-
duration: sample.duration,
263-
is_sync: sample.is_sync,
264-
})
265-
}
266-
}
267245
}
268246

269247
/// A Group of Pictures (GOP) always starts with an I-frame, followed by delta-frames.
@@ -311,13 +289,34 @@ pub struct Sample {
311289
/// Duration of the sample, in time units.
312290
pub duration: Time,
313291

314-
/// Offset into [`VideoData::data`]
292+
/// Offset into the video data.
315293
pub byte_offset: u32,
316294

317295
/// Length of sample starting at [`Sample::byte_offset`].
318296
pub byte_length: u32,
319297
}
320298

299+
impl Sample {
300+
/// Read the sample from the video data.
301+
///
302+
/// Note that `data` _must_ be a reference to the original MP4 file
303+
/// from which the [`VideoData`] was loaded.
304+
///
305+
/// Returns `None` if the sample is out of bounds, which can only happen
306+
/// if `data` is not the original video data.
307+
pub fn get(&self, data: &[u8]) -> Option<Chunk> {
308+
let data = data
309+
.get(self.byte_offset as usize..(self.byte_offset + self.byte_length) as usize)?
310+
.to_vec();
311+
Some(Chunk {
312+
data,
313+
composition_timestamp: self.composition_timestamp,
314+
duration: self.duration,
315+
is_sync: self.is_sync,
316+
})
317+
}
318+
}
319+
321320
/// Configuration of a video.
322321
#[derive(Debug, Clone)]
323322
pub struct Config {
@@ -385,7 +384,6 @@ impl std::fmt::Debug for VideoData {
385384
"samples",
386385
&self.samples.iter().enumerate().collect::<Vec<_>>(),
387386
)
388-
.field("data", &self.data.len())
389387
.finish()
390388
}
391389
}

crates/store/re_video/src/demux/mp4.rs

-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ impl VideoData {
4141
let mut samples = Vec::<Sample>::new();
4242
let mut gops = Vec::<GroupOfPictures>::new();
4343
let mut gop_sample_start_index = 0;
44-
let data = track.data.clone();
4544

4645
for sample in &track.samples {
4746
if sample.is_sync && !samples.is_empty() {
@@ -86,7 +85,6 @@ impl VideoData {
8685
duration,
8786
gops,
8887
samples,
89-
data,
9088
mp4_tracks,
9189
})
9290
}

crates/viewer/re_data_ui/src/blob.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ pub fn blob_preview_and_save_ui(
129129
ui_layout,
130130
&video_result,
131131
video_timestamp,
132+
blob,
132133
);
133134
}
134135

@@ -175,6 +176,7 @@ fn show_video_blob_info(
175176
ui_layout: UiLayout,
176177
video_result: &Result<re_renderer::video::Video, VideoLoadError>,
177178
video_timestamp: Option<VideoTimestamp>,
179+
blob: &re_types::datatypes::Blob,
178180
) {
179181
#[allow(clippy::match_same_arms)]
180182
match video_result {
@@ -262,7 +264,12 @@ fn show_video_blob_info(
262264
ui.id().with("video_player").value(),
263265
);
264266

265-
match video.frame_at(render_ctx, decode_stream_id, timestamp_in_seconds) {
267+
match video.frame_at(
268+
render_ctx,
269+
decode_stream_id,
270+
timestamp_in_seconds,
271+
blob.as_slice(),
272+
) {
266273
Ok(VideoFrameTexture {
267274
texture,
268275
time_range,

crates/viewer/re_renderer/src/video/decoder/mod.rs

+10-8
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ impl VideoDecoder {
189189
&mut self,
190190
render_ctx: &RenderContext,
191191
presentation_timestamp_s: f64,
192+
video_data: &[u8],
192193
) -> Result<VideoFrameTexture, DecodingError> {
193194
if presentation_timestamp_s < 0.0 {
194195
return Err(DecodingError::NegativeTimestamp);
@@ -197,7 +198,7 @@ impl VideoDecoder {
197198
let presentation_timestamp = presentation_timestamp.min(self.data.duration); // Don't seek past the end of the video.
198199

199200
let error_on_last_frame_at = self.last_error.is_some();
200-
let result = self.frame_at_internal(render_ctx, presentation_timestamp);
201+
let result = self.frame_at_internal(render_ctx, presentation_timestamp, video_data);
201202

202203
match result {
203204
Ok(()) => {
@@ -248,6 +249,7 @@ impl VideoDecoder {
248249
&mut self,
249250
render_ctx: &RenderContext,
250251
presentation_timestamp: Time,
252+
video_data: &[u8],
251253
) -> Result<(), DecodingError> {
252254
re_tracing::profile_function!();
253255

@@ -322,21 +324,21 @@ impl VideoDecoder {
322324
if requested_gop_idx != self.current_gop_idx {
323325
if self.current_gop_idx.saturating_add(1) == requested_gop_idx {
324326
// forward seek to next GOP - queue up the one _after_ requested
325-
self.enqueue_gop(requested_gop_idx + 1)?;
327+
self.enqueue_gop(requested_gop_idx + 1, video_data)?;
326328
} else {
327329
// forward seek by N>1 OR backward seek across GOPs - reset
328330
self.reset()?;
329-
self.enqueue_gop(requested_gop_idx)?;
330-
self.enqueue_gop(requested_gop_idx + 1)?;
331+
self.enqueue_gop(requested_gop_idx, video_data)?;
332+
self.enqueue_gop(requested_gop_idx + 1, video_data)?;
331333
}
332334
} else if requested_sample_idx != self.current_sample_idx {
333335
// special case: handle seeking backwards within a single GOP
334336
// this is super inefficient, but it's the only way to handle it
335337
// while maintaining a buffer of only 2 GOPs
336338
if requested_sample_idx < self.current_sample_idx {
337339
self.reset()?;
338-
self.enqueue_gop(requested_gop_idx)?;
339-
self.enqueue_gop(requested_gop_idx + 1)?;
340+
self.enqueue_gop(requested_gop_idx, video_data)?;
341+
self.enqueue_gop(requested_gop_idx + 1, video_data)?;
340342
}
341343
}
342344

@@ -384,15 +386,15 @@ impl VideoDecoder {
384386
/// Enqueue all samples in the given GOP.
385387
///
386388
/// Does nothing if the index is out of bounds.
387-
fn enqueue_gop(&mut self, gop_idx: usize) -> Result<(), DecodingError> {
389+
fn enqueue_gop(&mut self, gop_idx: usize, video_data: &[u8]) -> Result<(), DecodingError> {
388390
let Some(gop) = self.data.gops.get(gop_idx) else {
389391
return Ok(());
390392
};
391393

392394
let samples = &self.data.samples[gop.range()];
393395

394396
for (i, sample) in samples.iter().enumerate() {
395-
let chunk = self.data.get(sample).ok_or(DecodingError::BadData)?;
397+
let chunk = sample.get(video_data).ok_or(DecodingError::BadData)?;
396398
let is_keyframe = i == 0;
397399
self.chunk_decoder.decode(chunk, is_keyframe)?;
398400
}

crates/viewer/re_renderer/src/video/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ impl Video {
203203
render_context: &RenderContext,
204204
decoder_stream_id: VideoDecodingStreamId,
205205
presentation_timestamp_s: f64,
206+
video_data: &[u8],
206207
) -> FrameDecodingResult {
207208
re_tracing::profile_function!();
208209

@@ -233,7 +234,7 @@ impl Video {
233234
decoder_entry.frame_index = render_context.active_frame_idx();
234235
decoder_entry
235236
.decoder
236-
.frame_at(render_context, presentation_timestamp_s)
237+
.frame_at(render_context, presentation_timestamp_s, video_data)
237238
}
238239

239240
/// Removes all decoders that have been unused in the last frame.

0 commit comments

Comments
 (0)