Skip to content

Commit c69739f

Browse files
authored
Add crf to VideoEncoder API (#1031)
1 parent b35005d commit c69739f

File tree

7 files changed

+329
-301
lines changed

7 files changed

+329
-301
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "torch/types.h"
66

77
extern "C" {
8+
#include <libavutil/opt.h>
89
#include <libavutil/pixdesc.h>
910
}
1011

@@ -568,6 +569,43 @@ AVPixelFormat validatePixelFormat(
568569
}
569570
TORCH_CHECK(false, errorMsg.str());
570571
}
572+
573+
void validateDoubleOption(
574+
const AVCodec& avCodec,
575+
const char* optionName,
576+
double value) {
577+
if (!avCodec.priv_class) {
578+
return;
579+
}
580+
const AVOption* option = av_opt_find2(
581+
// Convert obj arg from const AVClass* const* to non-const void*
582+
// First cast to remove const, then cast to void*
583+
const_cast<void*>(static_cast<const void*>(&avCodec.priv_class)),
584+
optionName,
585+
nullptr,
586+
0,
587+
AV_OPT_SEARCH_FAKE_OBJ,
588+
nullptr);
589+
// If the option was not found, let FFmpeg handle it later
590+
if (!option) {
591+
return;
592+
}
593+
if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
594+
option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
595+
TORCH_CHECK(
596+
value >= option->min && value <= option->max,
597+
optionName,
598+
"=",
599+
value,
600+
" is out of valid range [",
601+
option->min,
602+
", ",
603+
option->max,
604+
"] for this codec. For more details, run 'ffmpeg -h encoder=",
605+
avCodec.name,
606+
"'");
607+
}
608+
}
571609
} // namespace
572610

573611
VideoEncoder::~VideoEncoder() {
@@ -700,6 +738,7 @@ void VideoEncoder::initializeEncoder(
700738
// Apply videoStreamOptions
701739
AVDictionary* options = nullptr;
702740
if (videoStreamOptions.crf.has_value()) {
741+
validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
703742
av_dict_set(
704743
&options,
705744
"crf",

src/torchcodec/_core/StreamOptions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct VideoStreamOptions {
4747
// Encoding options
4848
// TODO-VideoEncoder: Consider adding other optional fields here
4949
// (bit rate, gop size, max b frames, preset)
50-
std::optional<int> crf;
50+
std::optional<double> crf;
5151

5252
// Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
5353
// If not specified, uses codec's default format.

src/torchcodec/_core/custom_ops.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3737
m.def(
3838
"_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
3939
m.def(
40-
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None) -> ()");
40+
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None) -> ()");
4141
m.def(
42-
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None) -> Tensor");
42+
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None) -> Tensor");
4343
m.def(
44-
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None) -> ()");
44+
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None) -> ()");
4545
m.def(
4646
"create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
4747
m.def(
@@ -604,7 +604,7 @@ void encode_video_to_file(
604604
int64_t frame_rate,
605605
std::string_view file_name,
606606
std::optional<std::string> pixel_format = std::nullopt,
607-
std::optional<int64_t> crf = std::nullopt) {
607+
std::optional<double> crf = std::nullopt) {
608608
VideoStreamOptions videoStreamOptions;
609609
videoStreamOptions.pixelFormat = pixel_format;
610610
videoStreamOptions.crf = crf;
@@ -621,7 +621,7 @@ at::Tensor encode_video_to_tensor(
621621
int64_t frame_rate,
622622
std::string_view format,
623623
std::optional<std::string> pixel_format = std::nullopt,
624-
std::optional<int64_t> crf = std::nullopt) {
624+
std::optional<double> crf = std::nullopt) {
625625
auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
626626
VideoStreamOptions videoStreamOptions;
627627
videoStreamOptions.pixelFormat = pixel_format;
@@ -641,7 +641,7 @@ void _encode_video_to_file_like(
641641
std::string_view format,
642642
int64_t file_like_context,
643643
std::optional<std::string> pixel_format = std::nullopt,
644-
std::optional<int64_t> crf = std::nullopt) {
644+
std::optional<double> crf = std::nullopt) {
645645
auto fileLikeContext =
646646
reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
647647
TORCH_CHECK(

src/torchcodec/_core/ops.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def encode_video_to_file_like(
213213
frame_rate: int,
214214
format: str,
215215
file_like: Union[io.RawIOBase, io.BufferedIOBase],
216-
crf: Optional[int] = None,
216+
crf: Optional[Union[int, float]] = None,
217217
pixel_format: Optional[str] = None,
218218
) -> None:
219219
"""Encode video frames to a file-like object.
@@ -322,7 +322,7 @@ def encode_video_to_file_abstract(
322322
frames: torch.Tensor,
323323
frame_rate: int,
324324
filename: str,
325-
crf: Optional[int] = None,
325+
crf: Optional[Union[int, float]] = None,
326326
pixel_format: Optional[str] = None,
327327
) -> None:
328328
return
@@ -333,7 +333,7 @@ def encode_video_to_tensor_abstract(
333333
frames: torch.Tensor,
334334
frame_rate: int,
335335
format: str,
336-
crf: Optional[int] = None,
336+
crf: Optional[Union[int, float]] = None,
337337
pixel_format: Optional[str] = None,
338338
) -> torch.Tensor:
339339
return torch.empty([], dtype=torch.long)
@@ -345,7 +345,7 @@ def _encode_video_to_file_like_abstract(
345345
frame_rate: int,
346346
format: str,
347347
file_like_context: int,
348-
crf: Optional[int] = None,
348+
crf: Optional[Union[int, float]] = None,
349349
pixel_format: Optional[str] = None,
350350
) -> None:
351351
return

src/torchcodec/encoders/_video_encoder.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def to_file(
3737
dest: Union[str, Path],
3838
*,
3939
pixel_format: Optional[str] = None,
40+
crf: Optional[Union[int, float]] = None,
4041
) -> None:
4142
"""Encode frames into a file.
4243
@@ -46,27 +47,35 @@ def to_file(
4647
container format.
4748
pixel_format (str, optional): The pixel format for encoding (e.g.,
4849
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
50+
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
51+
mean better quality. Valid range depends on the encoder (commonly 0-51).
52+
Defaults to None (which will use encoder's default).
4953
"""
5054
_core.encode_video_to_file(
5155
frames=self._frames,
5256
frame_rate=self._frame_rate,
5357
filename=str(dest),
5458
pixel_format=pixel_format,
59+
crf=crf,
5560
)
5661

5762
def to_tensor(
5863
self,
5964
format: str,
6065
*,
6166
pixel_format: Optional[str] = None,
67+
crf: Optional[Union[int, float]] = None,
6268
) -> Tensor:
6369
"""Encode frames into raw bytes, as a 1D uint8 Tensor.
6470
6571
Args:
6672
format (str): The container format of the encoded frames, e.g. "mp4", "mov",
67-
"mkv", "avi", "webm", "flv", or "gif"
73+
"mkv", "avi", "webm", "flv", etc.
6874
pixel_format (str, optional): The pixel format to encode frames into (e.g.,
6975
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
76+
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
77+
mean better quality. Valid range depends on the encoder (commonly 0-51).
78+
Defaults to None (which will use encoder's default).
7079
7180
Returns:
7281
Tensor: The raw encoded bytes as 4D uint8 Tensor.
@@ -76,6 +85,7 @@ def to_tensor(
7685
frame_rate=self._frame_rate,
7786
format=format,
7887
pixel_format=pixel_format,
88+
crf=crf,
7989
)
8090

8191
def to_file_like(
@@ -84,6 +94,7 @@ def to_file_like(
8494
format: str,
8595
*,
8696
pixel_format: Optional[str] = None,
97+
crf: Optional[Union[int, float]] = None,
8798
) -> None:
8899
"""Encode frames into a file-like object.
89100
@@ -94,14 +105,18 @@ def to_file_like(
94105
``write(data: bytes) -> int`` and ``seek(offset: int, whence:
95106
int = 0) -> int``.
96107
format (str): The container format of the encoded frames, e.g. "mp4", "mov",
97-
"mkv", "avi", "webm", "flv", or "gif".
108+
"mkv", "avi", "webm", "flv", etc.
98109
pixel_format (str, optional): The pixel format for encoding (e.g.,
99110
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
111+
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
112+
mean better quality. Valid range depends on the encoder (commonly 0-51).
113+
Defaults to None (which will use encoder's default).
100114
"""
101115
_core.encode_video_to_file_like(
102116
frames=self._frames,
103117
frame_rate=self._frame_rate,
104118
format=format,
105119
file_like=file_like,
106120
pixel_format=pixel_format,
121+
crf=crf,
107122
)

0 commit comments

Comments
 (0)