diff --git a/pyperformance/data-files/benchmarks/bm_pypdf/data/libreoffice-writer-password.pdf b/pyperformance/data-files/benchmarks/bm_pypdf/data/libreoffice-writer-password.pdf new file mode 100755 index 00000000..de3e0c16 Binary files /dev/null and b/pyperformance/data-files/benchmarks/bm_pypdf/data/libreoffice-writer-password.pdf differ diff --git a/pyperformance/data-files/benchmarks/bm_pypdf/pyproject.toml b/pyperformance/data-files/benchmarks/bm_pypdf/pyproject.toml new file mode 100644 index 00000000..a68daee4 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_pypdf/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "pyperformance_bm_pypdf" +requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "pypdf" diff --git a/pyperformance/data-files/benchmarks/bm_pypdf/requirements.txt b/pyperformance/data-files/benchmarks/bm_pypdf/requirements.txt new file mode 100644 index 00000000..728d2160 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_pypdf/requirements.txt @@ -0,0 +1 @@ +pypdf==4.2.0 diff --git a/pyperformance/data-files/benchmarks/bm_pypdf/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_pypdf/run_benchmark.py new file mode 100644 index 00000000..d0a46319 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_pypdf/run_benchmark.py @@ -0,0 +1,89 @@ +""" +A simple pypdf benchmark. + +Adapted from pypdf's own benchmarks: + + https://github.com/py-pdf/pypdf/blob/main/tests/bench.py +""" + +import io +from pathlib import Path + + +from pypdf import PdfReader, PdfWriter, Transformation +import pyperf + + +DATA_DIR = Path(__file__).parent / "data" + + +def page_ops(stream, password): + reader = PdfReader(stream) + writer = PdfWriter() + + if password: + reader.decrypt(password) + + page = reader.pages[0] + writer.add_page(page) + + op = Transformation().rotate(90).scale(1.2) + page.add_transformation(op) + page.merge_page(page) + + op = Transformation().scale(1).translate(tx=1, ty=1) + page.add_transformation(op) + page.merge_page(page) + + op = Transformation().rotate(90).scale(1).translate(tx=1, ty=1) + page.add_transformation(op) + page.merge_page(page) + + page.add_transformation((1, 0, 0, 0, 0, 0)) + page.scale(2, 2) + page.scale_by(0.5) + page.scale_to(100, 100) + + page = writer.pages[0] + page.compress_content_streams() + page.extract_text() + + +def bench_page_ops(loops): + """ + Apply various page operations. + + Rotation, scaling, translation, content stream compression, text extraction + """ + content = (DATA_DIR / "libreoffice-writer-password.pdf").read_bytes() + stream = io.BytesIO(content) + + t0 = pyperf.perf_counter() + for _ in range(loops): + page_ops(stream, "openpassword") + return pyperf.perf_counter() - t0 + + +BENCHMARKS = ("page_ops",) + + +def add_cmdline_args(cmd, args): + if args.benchmark: + cmd.append(args.benchmark) + + +if __name__ == "__main__": + runner = pyperf.Runner(add_cmdline_args=add_cmdline_args) + runner.metadata["description"] = "pypdf benchmark" + runner.argparser.add_argument("benchmark", nargs="?", choices=BENCHMARKS) + + args = runner.parse_args() + if args.benchmark: + benchmarks = (args.benchmark,) + else: + benchmarks = BENCHMARKS + + for bench in benchmarks: + name = f"pypdf_{bench}" + func = globals()[f"bench_{bench}"] + runner.bench_time_func(name, func)