From 75fb60bdf70d6689dfd8528e8573898301052f54 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 29 Apr 2025 12:03:56 +0000 Subject: [PATCH] Add utility to fetch and prepare ZIM illustration --- CHANGELOG.md | 4 ++ src/zimscraperlib/constants.py | 3 ++ src/zimscraperlib/image/illustration.py | 52 +++++++++++++++++++ src/zimscraperlib/zim/metadata.py | 6 ++- tests/image/test_illustration.py | 69 +++++++++++++++++++++++++ 5 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 src/zimscraperlib/image/illustration.py create mode 100644 tests/image/test_illustration.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e4df3d..1d182f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - JS rewriting abusively rewrite import function (#255) +### Added + +- Add utility to fetch and prepare ZIM illustration (#254) + ## [5.1.1] - 2025-02-17 ### Changed diff --git a/src/zimscraperlib/constants.py b/src/zimscraperlib/constants.py index eee0673..485342b 100644 --- a/src/zimscraperlib/constants.py +++ b/src/zimscraperlib/constants.py @@ -29,3 +29,6 @@ # default timeout to get responses from upstream when doing web requests ; this is not # the total time it gets to download the whole resource DEFAULT_WEB_REQUESTS_TIMEOUT = 10 + +DEFAULT_ZIM_ILLLUSTRATION_SIZE = 48 +DEFAULT_ZIM_ILLLUSTRATION_SCALE = 1 diff --git a/src/zimscraperlib/image/illustration.py b/src/zimscraperlib/image/illustration.py new file mode 100644 index 0000000..ccc0726 --- /dev/null +++ b/src/zimscraperlib/image/illustration.py @@ -0,0 +1,52 @@ +import io +import pathlib + +from zimscraperlib.constants import DEFAULT_ZIM_ILLLUSTRATION_SIZE +from zimscraperlib.image.conversion import convert_image, convert_svg2png +from zimscraperlib.image.optimization import optimize_png +from zimscraperlib.image.probing import format_for +from zimscraperlib.image.transformation import resize_image +from zimscraperlib.inputs import handle_user_provided_file + + +def get_zim_illustration( + illustration_location: pathlib.Path | str, + width: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE, + height: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE, + resize_method: str = "contain", +) -> io.BytesIO: + """Get ZIM-ready illustration from any image path or URL + + illustration_location will be downloaded if needed. Image is automatically + converted to PNG, resized and optimized as needed. + + Arguments: + illustration_location: path or URL to an image + width: target illustration width + height: target illustration height + resize_method: method to resize the image ; in general only 'contain' or + 'cover' make sense, but 'crop', 'width', 'height' and 'thumbnail' can be used + """ + + illustration_path = handle_user_provided_file(illustration_location) + + if not illustration_path: + # given handle_user_provided_file logic, this is not supposed to happen besides + # when empty string is passed, hence the simple error message + raise ValueError("Illustration is missing") + + illustration = io.BytesIO() + illustration_format = format_for(illustration_path, from_suffix=False) + if illustration_format == "SVG": + convert_svg2png(illustration_path, illustration, width, height) + else: + if illustration_format != "PNG": + convert_image(illustration_path, illustration, fmt="PNG") + else: + illustration = io.BytesIO(illustration_path.read_bytes()) + resize_image(illustration, width, height, method=resize_method) + + optimized_illustration = io.BytesIO() + optimize_png(illustration, optimized_illustration) + + return optimized_illustration diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py index 7d2b75d..24bed69 100644 --- a/src/zimscraperlib/zim/metadata.py +++ b/src/zimscraperlib/zim/metadata.py @@ -10,6 +10,8 @@ import regex from zimscraperlib.constants import ( + DEFAULT_ZIM_ILLLUSTRATION_SCALE, + DEFAULT_ZIM_ILLLUSTRATION_SIZE, ILLUSTRATIONS_METADATA_RE, MAXIMUM_DESCRIPTION_METADATA_LENGTH, MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH, @@ -423,8 +425,8 @@ def __init__( @mandatory class DefaultIllustrationMetadata(IllustrationBasedMetadata): meta_name = "Illustration_48x48@1" - illustration_size: int = 48 - illustration_scale: int = 1 + illustration_size: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE + illustration_scale: int = DEFAULT_ZIM_ILLLUSTRATION_SCALE @mandatory diff --git a/tests/image/test_illustration.py b/tests/image/test_illustration.py new file mode 100644 index 0000000..b9ae0a7 --- /dev/null +++ b/tests/image/test_illustration.py @@ -0,0 +1,69 @@ +from pathlib import Path + +import pytest +from PIL.Image import open as pilopen + +from zimscraperlib.image.illustration import get_zim_illustration + +COMMONS_IMAGE_PATH = (Path(__file__) / "../../files/commons.png").resolve() +COMMONS_48_IMAGE_PATH = (Path(__file__) / "../../files/commons48.png").resolve() +NINJA_IMAGE_PATH = (Path(__file__) / "../../files/ninja.webp").resolve() + + +@pytest.mark.parametrize( + "user_illustration, expected_max_filesize", + [ + pytest.param(COMMONS_IMAGE_PATH, 5000, id="big_commons"), + pytest.param(COMMONS_48_IMAGE_PATH, 4000, id="small_commons"), + pytest.param(NINJA_IMAGE_PATH, 5000, id="ninja"), + pytest.param( + "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Commons-logo.svg/250px-Commons-logo.svg.png", + 4000, + id="png_url", + ), + pytest.param( + "https://upload.wikimedia.org/wikipedia/commons/4/4a/Commons-logo.svg", + 4000, + id="svg_url", + ), + ], +) +def test_get_zim_illustration( + user_illustration: str | Path, + expected_max_filesize: int, +): + image = get_zim_illustration(user_illustration) + assert len(image.getvalue()) < expected_max_filesize + with pilopen(image) as image_details: + assert image_details.format == "PNG" + assert image_details.size == (48, 48) + + +def test_get_missing_user_zim_illustration(): + with pytest.raises(Exception, match="missing.png could not be found"): + get_zim_illustration("./missing.png") + + +def test_get_missing_default_zim_illustration(): + with pytest.raises(Exception, match="Illustration is missing"): + get_zim_illustration("") + + +def test_get_zim_illustration_custom_size(): + image = get_zim_illustration(NINJA_IMAGE_PATH, 96, 120) + assert len(image.getvalue()) < 21000 + with pilopen(image) as image_details: + assert image_details.format == "PNG" + assert image_details.size == (96, 120) + + +def test_get_zim_illustration_method(): + image_cover = get_zim_illustration(NINJA_IMAGE_PATH, resize_method="cover") + image_contain = get_zim_illustration(NINJA_IMAGE_PATH, resize_method="contain") + # cover image is always bigger than contain image size more pixels are + # "used/non-transparent" + assert len(image_cover.getvalue()) > len(image_contain.getvalue()) + for image in [image_cover, image_contain]: + with pilopen(image) as image_details: + assert image_details.format == "PNG" + assert image_details.size == (48, 48)