Skip to content

Commit e170212

Browse files
authored
Add preset to VideoEncoder API (#1042)
1 parent af6a008 commit e170212

File tree

6 files changed

+87
-26
lines changed

6 files changed

+87
-26
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,10 @@ void VideoEncoder::initializeEncoder(
745745
std::to_string(videoStreamOptions.crf.value()).c_str(),
746746
0);
747747
}
748+
if (videoStreamOptions.preset.has_value()) {
749+
av_dict_set(
750+
&options, "preset", videoStreamOptions.preset.value().c_str(), 0);
751+
}
748752
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
749753
av_dict_free(&options);
750754

src/torchcodec/_core/StreamOptions.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,11 @@ struct VideoStreamOptions {
4545
std::string_view deviceVariant = "ffmpeg";
4646

4747
// Encoding options
48-
// TODO-VideoEncoder: Consider adding other optional fields here
49-
// (bit rate, gop size, max b frames, preset)
50-
std::optional<double> crf;
51-
5248
// Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
5349
// If not specified, uses codec's default format.
5450
std::optional<std::string> pixelFormat;
51+
std::optional<double> crf;
52+
std::optional<std::string> preset;
5553
};
5654

5755
struct AudioStreamOptions {

src/torchcodec/_core/custom_ops.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3737
m.def(
3838
"_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
3939
m.def(
40-
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None) -> ()");
40+
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4141
m.def(
42-
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None) -> Tensor");
42+
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
4343
m.def(
44-
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None) -> ()");
44+
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4545
m.def(
4646
"create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
4747
m.def(
@@ -603,11 +603,13 @@ void encode_video_to_file(
603603
const at::Tensor& frames,
604604
int64_t frame_rate,
605605
std::string_view file_name,
606-
std::optional<std::string> pixel_format = std::nullopt,
607-
std::optional<double> crf = std::nullopt) {
606+
std::optional<std::string_view> pixel_format = std::nullopt,
607+
std::optional<double> crf = std::nullopt,
608+
std::optional<std::string_view> preset = std::nullopt) {
608609
VideoStreamOptions videoStreamOptions;
609610
videoStreamOptions.pixelFormat = pixel_format;
610611
videoStreamOptions.crf = crf;
612+
videoStreamOptions.preset = preset;
611613
VideoEncoder(
612614
frames,
613615
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -620,12 +622,14 @@ at::Tensor encode_video_to_tensor(
620622
const at::Tensor& frames,
621623
int64_t frame_rate,
622624
std::string_view format,
623-
std::optional<std::string> pixel_format = std::nullopt,
624-
std::optional<double> crf = std::nullopt) {
625+
std::optional<std::string_view> pixel_format = std::nullopt,
626+
std::optional<double> crf = std::nullopt,
627+
std::optional<std::string_view> preset = std::nullopt) {
625628
auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
626629
VideoStreamOptions videoStreamOptions;
627630
videoStreamOptions.pixelFormat = pixel_format;
628631
videoStreamOptions.crf = crf;
632+
videoStreamOptions.preset = preset;
629633
return VideoEncoder(
630634
frames,
631635
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -640,8 +644,9 @@ void _encode_video_to_file_like(
640644
int64_t frame_rate,
641645
std::string_view format,
642646
int64_t file_like_context,
643-
std::optional<std::string> pixel_format = std::nullopt,
644-
std::optional<double> crf = std::nullopt) {
647+
std::optional<std::string_view> pixel_format = std::nullopt,
648+
std::optional<double> crf = std::nullopt,
649+
std::optional<std::string_view> preset = std::nullopt) {
645650
auto fileLikeContext =
646651
reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
647652
TORCH_CHECK(
@@ -651,6 +656,7 @@ void _encode_video_to_file_like(
651656
VideoStreamOptions videoStreamOptions;
652657
videoStreamOptions.pixelFormat = pixel_format;
653658
videoStreamOptions.crf = crf;
659+
videoStreamOptions.preset = preset;
654660

655661
VideoEncoder encoder(
656662
frames,

src/torchcodec/_core/ops.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ def encode_video_to_file_like(
215215
file_like: Union[io.RawIOBase, io.BufferedIOBase],
216216
crf: Optional[Union[int, float]] = None,
217217
pixel_format: Optional[str] = None,
218+
preset: Optional[str] = None,
218219
) -> None:
219220
"""Encode video frames to a file-like object.
220221
@@ -225,6 +226,7 @@ def encode_video_to_file_like(
225226
file_like: File-like object that supports write() and seek() methods
226227
crf: Optional constant rate factor for encoding quality
227228
pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
229+
preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
228230
"""
229231
assert _pybind_ops is not None
230232

@@ -235,6 +237,7 @@ def encode_video_to_file_like(
235237
_pybind_ops.create_file_like_context(file_like, True), # True means for writing
236238
pixel_format,
237239
crf,
240+
preset,
238241
)
239242

240243

@@ -322,8 +325,9 @@ def encode_video_to_file_abstract(
322325
frames: torch.Tensor,
323326
frame_rate: int,
324327
filename: str,
325-
crf: Optional[Union[int, float]] = None,
326328
pixel_format: Optional[str] = None,
329+
crf: Optional[Union[int, float]] = None,
330+
preset: Optional[str] = None,
327331
) -> None:
328332
return
329333

@@ -333,8 +337,9 @@ def encode_video_to_tensor_abstract(
333337
frames: torch.Tensor,
334338
frame_rate: int,
335339
format: str,
336-
crf: Optional[Union[int, float]] = None,
337340
pixel_format: Optional[str] = None,
341+
crf: Optional[Union[int, float]] = None,
342+
preset: Optional[str] = None,
338343
) -> torch.Tensor:
339344
return torch.empty([], dtype=torch.long)
340345

@@ -345,8 +350,9 @@ def _encode_video_to_file_like_abstract(
345350
frame_rate: int,
346351
format: str,
347352
file_like_context: int,
348-
crf: Optional[Union[int, float]] = None,
349353
pixel_format: Optional[str] = None,
354+
crf: Optional[Union[int, float]] = None,
355+
preset: Optional[str] = None,
350356
) -> None:
351357
return
352358

src/torchcodec/encoders/_video_encoder.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def to_file(
3838
*,
3939
pixel_format: Optional[str] = None,
4040
crf: Optional[Union[int, float]] = None,
41+
preset: Optional[Union[str, int]] = None,
4142
) -> None:
4243
"""Encode frames into a file.
4344
@@ -50,13 +51,19 @@ def to_file(
5051
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
5152
mean better quality. Valid range depends on the encoder (commonly 0-51).
5253
Defaults to None (which will use encoder's default).
54+
preset (str or int, optional): Encoder option that controls the tradeoff between
55+
encoding speed and compression. Valid values depend on the encoder (commonly
56+
a string: "fast", "medium", "slow"). Defaults to None
57+
(which will use encoder's default).
5358
"""
59+
preset = str(preset) if isinstance(preset, int) else preset
5460
_core.encode_video_to_file(
5561
frames=self._frames,
5662
frame_rate=self._frame_rate,
5763
filename=str(dest),
5864
pixel_format=pixel_format,
5965
crf=crf,
66+
preset=preset,
6067
)
6168

6269
def to_tensor(
@@ -65,6 +72,7 @@ def to_tensor(
6572
*,
6673
pixel_format: Optional[str] = None,
6774
crf: Optional[Union[int, float]] = None,
75+
preset: Optional[Union[str, int]] = None,
6876
) -> Tensor:
6977
"""Encode frames into raw bytes, as a 1D uint8 Tensor.
7078
@@ -76,16 +84,22 @@ def to_tensor(
7684
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
7785
mean better quality. Valid range depends on the encoder (commonly 0-51).
7886
Defaults to None (which will use encoder's default).
87+
preset (str or int, optional): Encoder option that controls the tradeoff between
88+
encoding speed and compression. Valid values depend on the encoder (commonly
89+
a string: "fast", "medium", "slow"). Defaults to None
90+
(which will use encoder's default).
7991
8092
Returns:
8193
Tensor: The raw encoded bytes as 4D uint8 Tensor.
8294
"""
95+
preset_value = str(preset) if isinstance(preset, int) else preset
8396
return _core.encode_video_to_tensor(
8497
frames=self._frames,
8598
frame_rate=self._frame_rate,
8699
format=format,
87100
pixel_format=pixel_format,
88101
crf=crf,
102+
preset=preset_value,
89103
)
90104

91105
def to_file_like(
@@ -95,6 +109,7 @@ def to_file_like(
95109
*,
96110
pixel_format: Optional[str] = None,
97111
crf: Optional[Union[int, float]] = None,
112+
preset: Optional[Union[str, int]] = None,
98113
) -> None:
99114
"""Encode frames into a file-like object.
100115
@@ -111,12 +126,18 @@ def to_file_like(
111126
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
112127
mean better quality. Valid range depends on the encoder (commonly 0-51).
113128
Defaults to None (which will use encoder's default).
129+
preset (str or int, optional): Encoder option that controls the tradeoff between
130+
encoding speed and compression. Valid values depend on the encoder (commonly
131+
a string: "fast", "medium", "slow"). Defaults to None
132+
(which will use encoder's default).
114133
"""
134+
preset = str(preset) if isinstance(preset, int) else preset
115135
_core.encode_video_to_file_like(
116136
frames=self._frames,
117137
frame_rate=self._frame_rate,
118138
format=format,
119139
file_like=file_like,
120140
pixel_format=pixel_format,
121141
crf=crf,
142+
preset=preset,
122143
)

test/test_encoders.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,12 @@ def test_bad_input_parameterized(self, tmp_path, method):
617617
)
618618
getattr(encoder, method)(**valid_params, crf=-10)
619619

620+
with pytest.raises(
621+
RuntimeError,
622+
match=r"avcodec_open2 failed: Invalid argument",
623+
):
624+
encoder.to_tensor(format="mp4", preset="fake_preset")
625+
620626
@pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
621627
@pytest.mark.parametrize("crf", [23, 23.5, -0.9])
622628
def test_crf_valid_values(self, method, crf, tmp_path):
@@ -829,13 +835,26 @@ def test_against_to_file(self, tmp_path, format, method):
829835
pytest.param("webm", marks=pytest.mark.slow),
830836
),
831837
)
832-
@pytest.mark.parametrize("pixel_format", ("yuv444p", "yuv420p"))
833-
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
838+
@pytest.mark.parametrize(
839+
"encode_params",
840+
[
841+
{"pixel_format": "yuv444p", "crf": 0, "preset": None},
842+
{"pixel_format": "yuv420p", "crf": 30, "preset": None},
843+
{"pixel_format": "yuv420p", "crf": None, "preset": "ultrafast"},
844+
{"pixel_format": "yuv420p", "crf": None, "preset": None},
845+
],
846+
)
847+
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, encode_params):
834848
ffmpeg_version = get_ffmpeg_major_version()
835849
if format == "webm" and (
836850
ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7))
837851
):
838852
pytest.skip("Codec for webm is not available in this FFmpeg installation.")
853+
854+
pixel_format = encode_params["pixel_format"]
855+
crf = encode_params["crf"]
856+
preset = encode_params["preset"]
857+
839858
if format in ("avi", "flv") and pixel_format == "yuv444p":
840859
pytest.skip(f"Default codec for {format} does not support {pixel_format}")
841860

@@ -848,8 +867,7 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
848867

849868
ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}")
850869
frame_rate = 30
851-
crf = 0
852-
# Some codecs (ex. MPEG4) do not support CRF.
870+
# Some codecs (ex. MPEG4) do not support CRF or preset.
853871
# Flags not supported by the selected codec will be ignored.
854872
ffmpeg_cmd = [
855873
"ffmpeg",
@@ -864,18 +882,26 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
864882
str(frame_rate),
865883
"-i",
866884
temp_raw_path,
867-
"-pix_fmt",
868-
pixel_format, # Output format
869-
"-crf",
870-
str(crf),
871-
ffmpeg_encoded_path,
872885
]
886+
if pixel_format is not None: # Output format
887+
ffmpeg_cmd.extend(["-pix_fmt", pixel_format])
888+
if preset is not None:
889+
ffmpeg_cmd.extend(["-preset", preset])
890+
if crf is not None:
891+
ffmpeg_cmd.extend(["-crf", str(crf)])
892+
# Output path must be last
893+
ffmpeg_cmd.append(ffmpeg_encoded_path)
873894
subprocess.run(ffmpeg_cmd, check=True)
874895

875896
# Encode with our video encoder
876897
encoder_output_path = str(tmp_path / f"encoder_output.{format}")
877898
encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
878-
encoder.to_file(dest=encoder_output_path, pixel_format=pixel_format, crf=crf)
899+
encoder.to_file(
900+
dest=encoder_output_path,
901+
pixel_format=pixel_format,
902+
crf=crf,
903+
preset=preset,
904+
)
879905

880906
ffmpeg_frames = self.decode(ffmpeg_encoded_path).data
881907
encoder_frames = self.decode(encoder_output_path).data

0 commit comments

Comments
 (0)