diff --git a/meta/bindings/python/.doctrees/environment.pickle b/meta/bindings/python/.doctrees/environment.pickle new file mode 100644 index 000000000..0439daa39 Binary files /dev/null and b/meta/bindings/python/.doctrees/environment.pickle differ diff --git a/meta/bindings/python/.doctrees/examples.doctree b/meta/bindings/python/.doctrees/examples.doctree new file mode 100644 index 000000000..88e7cc6b1 Binary files /dev/null and b/meta/bindings/python/.doctrees/examples.doctree differ diff --git a/meta/bindings/python/.doctrees/framework_examples.doctree b/meta/bindings/python/.doctrees/framework_examples.doctree new file mode 100644 index 000000000..3de17f591 Binary files /dev/null and b/meta/bindings/python/.doctrees/framework_examples.doctree differ diff --git a/meta/bindings/python/.doctrees/index.doctree b/meta/bindings/python/.doctrees/index.doctree new file mode 100644 index 000000000..4f2ae7a01 Binary files /dev/null and b/meta/bindings/python/.doctrees/index.doctree differ diff --git a/meta/bindings/python/.doctrees/readme.doctree b/meta/bindings/python/.doctrees/readme.doctree new file mode 100644 index 000000000..8f0733c72 Binary files /dev/null and b/meta/bindings/python/.doctrees/readme.doctree differ diff --git a/meta/bindings/python/README.md b/meta/bindings/python/README.md new file mode 100644 index 000000000..5c2d0d978 --- /dev/null +++ b/meta/bindings/python/README.md @@ -0,0 +1,303 @@ +# Paper Muncher Python Bindings + +## Usage examples + +### Functional Usage + +Paper Muncher includes both synchronous and asynchronous functional APIs. + +```python +from paper_muncher.synchronous import render + + +html = """ +
This is a simple example of using Paper Muncher in a synchronous context.
+""" + + +def main(): + pdf_bytes = render(html, mode="print") + with open("output.pdf", "wb") as f: + f.write(pdf_bytes) +``` + +**N.B.** The synchronous API is based on a per-OS integration for IO timeouts. + +1. For POSIX systems, it relies on selectors. +2. For Windows with Python 3.12+, it puts the file in non-blocking mode. +3. For Windows with Python < 3.12, it falls back to a potentially blocking read without timeout. + +```python +from paper_muncher.asynchronous import render + + +html = """ +This is a simple example of using Paper Muncher in an asynchronous context.
+""" + + +async def main(): + pdf_bytes = await render(html, mode="print") + with open("output_async.pdf", "wb") as f: + f.write(pdf_bytes) +``` + +In addition to that it also includes a context based approach to automatically +handle synchronous and asynchronous code execution. + +```python +from paper_muncher import render + + +html = """ +This is a simple example of using Paper Muncher in an auto context.
+""" + +async def main_async(): + pdf_bytes = await render(html, mode="print") + with open("output_async.pdf", "wb") as f: + f.write(pdf_bytes) + + print("PDF generated and saved as output_async.pdf") + + +def main_sync(): + pdf_bytes = render(html, mode="print") + with open("output_sync.pdf", "wb") as f: + f.write(pdf_bytes) + + print("PDF generated and saved as output_sync.pdf") +``` + +### Context Manager Usage + +Paper Muncher includes both synchronous and asynchronous context manager APIs. + +```python +from paper_muncher.synchronous import rendered + + +html = """ +This is a simple example of using Paper Muncher in a synchronous context.
+""" + + +def main(): + with rendered(html, mode="print") as (pdf_io_stream, std_err): + pdf = pdf_io_stream.read() + + with open("output_sync.pdf", "wb") as f: +``` + +**N.B.** The synchronous API is based on a per-OS integration for IO timeouts. + +1. For POSIX systems, it relies on selectors. +2. For Windows with Python 3.12+, it puts the file in non-blocking mode. +3. For Windows with Python < 3.12, it falls back to a potentially blocking read without timeout. + +```python +from paper_muncher.asynchronous import rendered + + +html = """ +This is a simple example of using Paper Muncher in an asynchronous context.
+""" + + +async def main(): + async with rendered(html, mode="print") as (pdf_stream_reader, std_err): + pdf = await pdf_stream_reader.read() + + with open("output_async.pdf", "wb") as f: + f.write(pdf) +``` + +In addition to that it also includes a context based approach to automatically +handle synchronous and asynchronous code execution. + +```python +from paper_muncher import rendered + + +html = """ +This is a simple example of using Paper Muncher in an auto context.
+""" + +def main_sync(): + with rendered(html, mode="print") as (pdf_io_stream, std_err): + pdf = pdf_io_stream.read() + + with open("output_sync.pdf", "wb") as f: + f.write(pdf) + + print("PDF generated and saved as output_sync.pdf") + + +async def main_async(): + async with rendered(html, mode="print") as (pdf_stream_reader, std_err): + pdf = await pdf_stream_reader.read() + + with open("output_async.pdf", "wb") as f: + f.write(pdf) + + print("PDF generated and saved as output_async.pdf") +``` + +Paper Muncher comes with pre-made integration with some +of the most popular frameworks as well! + +* Flask +* Quart +* Fast API +* Django + +Your favorite framework is not in the list? +No worries! Some general implementation are also +present! + +* agnostic WSGI integration +* agnostic ASGI integration + +### Flask + +```python +from paper_muncher.frameworks.flask import register_paper_muncher +from flask import Flask, Response + +app = Flask(__name__) +register_paper_muncher(app) + + +@app.route("/") +def index(): + html_content = "This is a simple example of using Paper Muncher in an asynchronous context.
+""" + + +async def main(): + async with rendered(html, mode="print") as (pdf_stream_reader, std_err): + pdf = await pdf_stream_reader.read() + + with open("output_async.pdf", "wb") as f: + f.write(pdf) + + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) diff --git a/meta/bindings/python/examples/async_example.py b/meta/bindings/python/examples/async_example.py new file mode 100644 index 000000000..9c41a2900 --- /dev/null +++ b/meta/bindings/python/examples/async_example.py @@ -0,0 +1,20 @@ +from paper_muncher.asynchronous import render + + +html = """ +This is a simple example of using Paper Muncher in an asynchronous context.
+""" + + +async def main(): + pdf_bytes = await render(html, mode="print") + with open("output_async.pdf", "wb") as f: + f.write(pdf_bytes) + + print("PDF generated and saved as output_async.pdf") + + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) diff --git a/meta/bindings/python/examples/auto_mode_cm_example.py b/meta/bindings/python/examples/auto_mode_cm_example.py new file mode 100644 index 000000000..db8cd2f00 --- /dev/null +++ b/meta/bindings/python/examples/auto_mode_cm_example.py @@ -0,0 +1,32 @@ +from paper_muncher import rendered + + +html = """ +This is a simple example of using Paper Muncher in an auto context.
+""" + +def main_sync(): + with rendered(html, mode="print") as (pdf_io_stream, std_err): + pdf = pdf_io_stream.read() + + with open("output_sync.pdf", "wb") as f: + f.write(pdf) + + print("PDF generated and saved as output_sync.pdf") + + +async def main_async(): + async with rendered(html, mode="print") as (pdf_stream_reader, std_err): + pdf = await pdf_stream_reader.read() + + with open("output_async.pdf", "wb") as f: + f.write(pdf) + + print("PDF generated and saved as output_async.pdf") + + +if __name__ == "__main__": + main_sync() + import asyncio + asyncio.run(main_async()) diff --git a/meta/bindings/python/examples/auto_mode_example.py b/meta/bindings/python/examples/auto_mode_example.py new file mode 100644 index 000000000..3cc9d225c --- /dev/null +++ b/meta/bindings/python/examples/auto_mode_example.py @@ -0,0 +1,28 @@ +from paper_muncher import render + + +html = """ +This is a simple example of using Paper Muncher in an auto context.
+""" + +async def main_async(): + pdf_bytes = await render(html, mode="print") + with open("output_async.pdf", "wb") as f: + f.write(pdf_bytes) + + print("PDF generated and saved as output_async.pdf") + + +def main_sync(): + pdf_bytes = render(html, mode="print") + with open("output_sync.pdf", "wb") as f: + f.write(pdf_bytes) + + print("PDF generated and saved as output_sync.pdf") + + +if __name__ == "__main__": + main_sync() + import asyncio + asyncio.run(main_async()) diff --git a/meta/bindings/python/examples/django_asgi_example.py b/meta/bindings/python/examples/django_asgi_example.py new file mode 100644 index 000000000..57bb36594 --- /dev/null +++ b/meta/bindings/python/examples/django_asgi_example.py @@ -0,0 +1,46 @@ +import os +import asyncio + +from django.conf import settings +from django.core.asgi import get_asgi_application +from django.http import HttpResponse +from django.urls import path +from django.core.management import execute_from_command_line + +from asgiref.sync import async_to_sync +from hypercorn.config import Config +from hypercorn.asyncio import serve + +from paper_muncher.frameworks.django_asgi import register_paper_muncher # Your patch + + +BASE_DIR = os.path.dirname(__file__) +settings.configure( + DEBUG=True, + ROOT_URLCONF=__name__, + SECRET_KEY="dummy", + ALLOWED_HOSTS=["*"], + MIDDLEWARE=[], +) + + +def index(request): + html = "This is a simple example of using Paper Muncher in a synchronous context.
+""" + + +def main(): + with rendered(html, mode="print") as (pdf_io_stream, std_err): + pdf = pdf_io_stream.read() + + with open("output_sync.pdf", "wb") as f: + f.write(pdf) + + +if __name__ == "__main__": + main() diff --git a/meta/bindings/python/examples/sync_example.py b/meta/bindings/python/examples/sync_example.py new file mode 100644 index 000000000..0a49e4fb0 --- /dev/null +++ b/meta/bindings/python/examples/sync_example.py @@ -0,0 +1,18 @@ +from paper_muncher.synchronous import render + + +html = """ +This is a simple example of using Paper Muncher in a synchronous context.
+""" + + +def main(): + pdf_bytes = render(html, mode="print") + with open("output.pdf", "wb") as f: + f.write(pdf_bytes) + print("PDF generated and saved as output.pdf") + + +if __name__ == "__main__": + main() diff --git a/meta/bindings/python/paper_muncher/__init__.py b/meta/bindings/python/paper_muncher/__init__.py new file mode 100644 index 000000000..073a7355e --- /dev/null +++ b/meta/bindings/python/paper_muncher/__init__.py @@ -0,0 +1,4 @@ +from .autochronous import render, rendered +from .asynchronous import render as async_render, rendered as async_rendered +from .synchronous import render as sync_render, rendered as sync_rendered +from .binary import can_use_paper_muncher diff --git a/meta/bindings/python/paper_muncher/asynchronous/__init__.py b/meta/bindings/python/paper_muncher/asynchronous/__init__.py new file mode 100644 index 000000000..b6c28117f --- /dev/null +++ b/meta/bindings/python/paper_muncher/asynchronous/__init__.py @@ -0,0 +1,9 @@ +"""The :mod:`paper_muncher.asynchronous` module +provides the core functionality for rendering documents +using the Paper Muncher engine. +It includes the main rendering functions and utilities +for managing the rendering process. +""" + +from .interface import rendered, render +from ..binary import can_use_paper_muncher diff --git a/meta/bindings/python/paper_muncher/asynchronous/asyncify.py b/meta/bindings/python/paper_muncher/asynchronous/asyncify.py new file mode 100644 index 000000000..6cfdf31f9 --- /dev/null +++ b/meta/bindings/python/paper_muncher/asynchronous/asyncify.py @@ -0,0 +1,14 @@ +from ..typing import AsyncRunner, Runner + +def asyncify_runner(runner: Runner) -> AsyncRunner: + """Convert a synchronous runner function to an asynchronous one. + + :param Runner runner: A synchronous function that takes a path as input + and returns bytes. + :return: An asynchronous version of the input runner function. + :rtype: AsyncRunner + """ + async def async_runner(path: str) -> bytes: + generator = runner(path) + return generator + return async_runner diff --git a/meta/bindings/python/paper_muncher/asynchronous/interface.py b/meta/bindings/python/paper_muncher/asynchronous/interface.py new file mode 100644 index 000000000..10f34a5d1 --- /dev/null +++ b/meta/bindings/python/paper_muncher/asynchronous/interface.py @@ -0,0 +1,293 @@ +""" +The :mod:`.paper_muncher.synchronous.interface` module provides +utilities for interacting with Paper Muncher, a subprocess used to render +HTML content into or Image format. +""" + + +import logging +from asyncio import wait_for, TimeoutError as AsyncTimeoutError +from asyncio.subprocess import PIPE as APIPE +from datetime import datetime, timezone +from contextlib import asynccontextmanager +from collections.abc import Generator +from email.utils import format_datetime +from io import BytesIO +from inspect import isawaitable +from itertools import count +from typing import BinaryIO, Optional, Union + +from .asyncify import asyncify_runner +from .request import ( + consume_paper_muncher_request, + read_paper_muncher_request, +) +from .io_with_timeout import ( + read_all_with_timeout, + write_with_timeout, +) +from .popen import Popen +from ..binary import get_paper_muncher_binary, can_use_paper_muncher + +from ..typing import AsyncRunner, Runner + +_logger = logging.getLogger(__name__) + +AUTHORIZED_MODE = {'print', 'render'} +DEFAULT_READ_TIMEOUT = 60 # seconds +DEFAULT_READLINE_TIMEOUT = 60 * 15 # seconds (15 minutes is for the put request) +DEFAULT_WRITE_TIMEOUT = 30 # seconds +DEFAULT_CHUNK_SIZE = 4096 # bytes +DEFAULT_WAIT_TIMEOUT = 5 # seconds +NOT_RENDERABLE_OPTIONS = { + 'read_timeout', + 'readline_timeout', + 'write_timeout', + 'chunk_size', + 'wait_timeout', +} +SERVER_SOFTWARE = b'Paper Muncher (Fully Asynchronous Engine)' + + +@asynccontextmanager +async def rendered( + content: BytesIO, + mode: str = "print", + runner: Optional[ + Union[ + AsyncRunner, + Runner, + ] + ] = None, + **options, +) -> Generator[tuple[BinaryIO], None, None]: + """Async context manager to render HTML content using Paper Muncher. + + :param content: The HTML content to render, as a BytesIO object. + :param mode: The rendering mode, either 'print' or 'render'. + :param runner: Optional AsyncRunner function to handle asset requests. + :param options: Additional options to pass to Paper Muncher. + :return: A generator yielding the stdout and stderr streams of the + Paper Muncher process. + :raises RuntimeError: If Paper Muncher is not available or crashes. + :raises ValueError: If an invalid mode is specified. + """ + + if not can_use_paper_muncher(): + raise RuntimeError( + "Paper Muncher is not available in the current session. " + "Ensure it is installed and available in the system PATH." + ) + + if not mode in AUTHORIZED_MODE: + raise ValueError( + f"Invalid mode '{mode}', must be one of {AUTHORIZED_MODE}" + ) + + readline_timeout = options.get( + 'readline_timeout', + DEFAULT_READLINE_TIMEOUT, + ) + write_timeout = options.get('write_timeout', DEFAULT_WRITE_TIMEOUT) + wait_timeout = options.get('wait_timeout', DEFAULT_WAIT_TIMEOUT) + + extra_args = [] + for option, value in options.items(): + if option in NOT_RENDERABLE_OPTIONS: + continue + extra_args.extend([ + f'--{option}', str(value), + ]) + + if not (binary := get_paper_muncher_binary()): + raise RuntimeError( + "Paper Muncher binary not found or not usable. " + "Ensure it is installed and available in the system PATH." + ) + + if runner is not None and not isawaitable(runner): + runner = asyncify_runner(runner) + + async with Popen( + [binary, mode, "pipe:", '-o', "pipe:"] + extra_args, + stdin=APIPE, + stdout=APIPE, + stderr=APIPE, + ) as process: + # Phase 1: send HTML content headers and body + try: + await consume_paper_muncher_request( + process.stdout, + timeout=readline_timeout, + ) + except EOFError as early_eof: + raise RuntimeError( + "Paper Muncher terminated prematurely (phase 1)" + ) from early_eof + + if process.returncode is not None: + raise RuntimeError( + "Paper Muncher crashed before receiving content") + + now = datetime.now(timezone.utc) + response_headers = ( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: %(length)d\r\n" + b"Content-Type: text/html\r\n" + b"Date: %(date)s\r\n" + b"Server: %(server)s\r\n" + b"\r\n" + ) % { + b'length': len(content.encode()), + b'date': format_datetime(now, usegmt=True).encode(), + b'server': SERVER_SOFTWARE, + } + + await write_with_timeout( + process.stdin, + response_headers, + timeout=write_timeout, + ) + await write_with_timeout( + process.stdin, + content.encode(), + timeout=write_timeout, + ) + + if process.returncode is not None: + raise RuntimeError( + "Paper Muncher crashed while sending HTML content") + + # Phase 2: serve asset requests until the rendered content is ready + for request_no in count(start=1): + try: + path = await read_paper_muncher_request( + process.stdout, + timeout=readline_timeout, + ) + except (EOFError, TimeoutError): + process.kill() + await process.wait() + raise + + if path is None: + break + + for chunk in await runner(path): + await write_with_timeout( + process.stdin, + chunk, + timeout=write_timeout + ) + + if process.returncode is not None: + raise RuntimeError( + "Paper Muncher crashed while serving asset" + f" {request_no}: {path}" + ) + + # Phase 3: send final OK and close the process + now = datetime.now(timezone.utc) + final_response = ( + b"HTTP/1.1 200 OK\r\n" + b"Date: %(date)s\r\n" + b"Server: %(server)s\r\n" + b"\r\n" + ) % { + b'date': format_datetime(now, usegmt=True).encode(), + b'server': SERVER_SOFTWARE, + } + + await write_with_timeout( + process.stdin, + final_response, + timeout=write_timeout, + ) + try: + process.stdin.write_eof() + except (NotImplementedError, AttributeError): + process.stdin.close() + await process.stdin.wait_closed() + + if process.returncode is not None: + raise RuntimeError( + "Paper Muncher crashed before returning the rendered content" + ) + + try: + yield process.stdout, process.stderr + finally: + try: + await wait_for( + process.wait(), + timeout=wait_timeout, + ) + except AsyncTimeoutError: + process.kill() + await process.wait() + _logger.warning( + "Paper Muncher did not terminate in time," + "forcefully killed it" + ) + + if process.returncode != 0: + _logger.warning( + "Paper Muncher exited with code %d", + process.returncode, + ) + + +async def render( + content: BytesIO, + mode: str = "print", + runner: Optional[ + Union[ + AsyncRunner, + Runner, + ] + ] = None, + **options, +) -> bytes: + """Render HTML content using Paper Muncher and return the rendered output. + + :param content: The HTML content to render, as a BytesIO object. + :param mode: The rendering mode, either 'print' or 'render'. + :param runner: Optional AsyncRunner function to handle asset requests. + :param options: Additional options to pass to Paper Muncher. + :return: The rendered content as bytes. + :raises RuntimeError: If Paper Muncher is not available or crashes. + :raises ValueError: If an invalid mode is specified. + """ + + async with rendered( + content, + mode=mode, + runner=runner, + **options, + ) as (content_stream, error_stream): + read_timeout = options.get('read_timeout', DEFAULT_READ_TIMEOUT) + chunk_size = options.get('chunk_size', DEFAULT_CHUNK_SIZE) + rendered_content = await read_all_with_timeout( + content_stream, + chunk_size=chunk_size, + timeout=read_timeout, + ) + stderr_output = await read_all_with_timeout( + error_stream, + chunk_size=chunk_size, + timeout=read_timeout, + ) + + if stderr_output: + _logger.warning( + "Paper Muncher error output: %s", + stderr_output.decode('utf-8', errors='replace'), + ) + + if mode == "print": + if not rendered_content.startswith(b'%PDF-'): + raise RuntimeError( + "Paper Muncher did not return valid PDF content" + ) + + return rendered_content diff --git a/meta/bindings/python/paper_muncher/asynchronous/io_with_timeout.py b/meta/bindings/python/paper_muncher/asynchronous/io_with_timeout.py new file mode 100644 index 000000000..7aac030d1 --- /dev/null +++ b/meta/bindings/python/paper_muncher/asynchronous/io_with_timeout.py @@ -0,0 +1,118 @@ +from asyncio import wait_for, TimeoutError as AsyncTimeoutError + +# typing imports +from asyncio import StreamReader, StreamWriter + + +async def readline_with_timeout( + reader: StreamReader, + timeout: int, +) -> bytes: + """Read a full line ending with '\\n' from an asyncio StreamReader within a + timeout. + + :param asyncio.StreamReader reader: StreamReader to read from + (must be in binary mode). + :param int timeout: Max seconds to wait for line data. + :return: A line of bytes ending in '\\n'. + :rtype: bytes + :raises TimeoutError: If timeout is reached before a line is read. + :raises EOFError: If EOF is reached before a line is read. + """ + line_buffer = bytearray() + + while True: + try: + next_byte = await wait_for(reader.read(1), timeout=timeout) + except AsyncTimeoutError as ate: + raise TimeoutError("Timeout reached while reading line") from ate + + if not next_byte: + raise EOFError("EOF reached while reading line") + + line_buffer += next_byte + if next_byte == b'\n': + break + + return bytes(line_buffer) + + +async def read_all_with_timeout( + reader: StreamReader, + timeout: int, + chunk_size: int, +) -> bytes: + """Read all data from an asyncio StreamReader until EOF, with a timeout per + chunk. + + :param asyncio.StreamReader reader: StreamReader to read from. + :param int timeout: Timeout in seconds for the entire read operation. + :param int chunk_size: Number of bytes to read per chunk. + :return: All bytes read until EOF. + :rtype: bytes + :raises TimeoutError: If no data is read within the timeout period. + """ + data = bytearray() + while True: + try: + chunk = await wait_for(reader.read(chunk_size), timeout=timeout) + except AsyncTimeoutError as ate: + raise TimeoutError("Timeout reached while reading data") from ate + + if not chunk: + break + data.extend(chunk) + + return bytes(data) + + +async def write_with_timeout( + writer: StreamWriter, + data: bytes, + timeout: int, +) -> None: + """Write data to an asyncio StreamWriter. + + :param asyncio.StreamWriter writer: StreamWriter to write to. + :param bytes data: Data to write. + :param int timeout: Timeout in seconds for the drain operation. + :return: None + :rtype: None + :raises TimeoutError: If the drain operation exceeds the timeout. + """ + writer.write(data) # always non-blocking + try: + await wait_for(writer.drain(), timeout) + except AsyncTimeoutError as ate: + writer.close() + raise TimeoutError("Timeout reached while writing data") from ate + + +if __name__ == "__main__": + import asyncio + + async def main(): + proc = await asyncio.create_subprocess_exec( + "cat", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + ) + + try: + await write_with_timeout(proc.stdin, b"Hello, World!\n", timeout=5) + proc.stdin.close() + + output = await readline_with_timeout(proc.stdout, timeout=5) + print(f"Output: {output.decode().strip()}") + + await proc.wait() + finally: + if proc.returncode is None: + proc.terminate() + try: + await wait_for(proc.wait(), timeout=5) + except AsyncTimeoutError: + proc.kill() + await proc.wait() + + asyncio.run(main()) diff --git a/meta/bindings/python/paper_muncher/asynchronous/popen.py b/meta/bindings/python/paper_muncher/asynchronous/popen.py new file mode 100644 index 000000000..4d15fc531 --- /dev/null +++ b/meta/bindings/python/paper_muncher/asynchronous/popen.py @@ -0,0 +1,43 @@ +from asyncio import wait_for, TimeoutError as AsyncTimeoutError +from asyncio.subprocess import create_subprocess_exec +from contextlib import asynccontextmanager + + +@asynccontextmanager +async def Popen(*args, **kwargs): + """Async context manager for asyncio subprocess that sets non-blocking I/O + for stdin, stdout, and stderr. + This is necessary for Windows to avoid deadlocks when reading + from subprocess streams. + + :param args: Positional arguments for subprocess.Popen. + :param kwargs: Keyword arguments for subprocess.Popen. + :return: A context manager that yields the subprocess.Popen object. + """ + if isinstance(args[0], list): + args = args[0] + proc = await create_subprocess_exec(*args, **kwargs) + try: + yield proc + finally: + if proc.returncode is None: + proc.terminate() + try: + await wait_for(proc.wait(), timeout=5) + except AsyncTimeoutError: + proc.kill() + await proc.wait() + + +if __name__ == "__main__": + import asyncio + + async def main(): + async with Popen("cat", stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE) as proc: + proc.stdin.write(b"Hello, World!\n") + await proc.stdin.drain() + proc.stdin.close() + output = await proc.stdout.read() + print(f"Output: {output.decode().strip()}") + + asyncio.run(main()) diff --git a/meta/bindings/python/paper_muncher/asynchronous/request.py b/meta/bindings/python/paper_muncher/asynchronous/request.py new file mode 100644 index 000000000..a44243733 --- /dev/null +++ b/meta/bindings/python/paper_muncher/asynchronous/request.py @@ -0,0 +1,92 @@ +"""The :mod:`paper_muncher.synchronous.request` +module provides utilities for consuming and reading +Paper Muncher requests. +It includes functions to read the request line, +and to consume the request headers. +It also handles timeouts for reading lines from the request. +""" + + +import logging +import time + +from asyncio import StreamReader +from typing import Optional + +from .io_with_timeout import readline_with_timeout + +_logger = logging.getLogger(__name__) + + +def remaining_time(deadline: float) -> float: + remaining = deadline - time.monotonic() + if remaining <= 0: + raise TimeoutError("Timeout exceeded") + return remaining + + +async def consume_paper_muncher_request( + stdout: StreamReader, + timeout: int +) -> None: + """Read and discard all header lines from a Paper Muncher request. + + :param BinaryIO stdout: File-like stdout stream from Paper Muncher. + :param int timeout: Timeout in seconds for each line read. + :return: None + :rtype: None + """ + deadline = time.monotonic() + timeout + while line := await readline_with_timeout( + stdout, + timeout=remaining_time(deadline) + ): + _logger.debug("Paper Muncher request line: %s", line.rstrip()) + if line == b"\r\n": + return + if not line: + raise EOFError("EOF reached while reading request headers") + + +async def read_paper_muncher_request( + stdout: StreamReader, + timeout: int, +) -> Optional[str]: + """Read the HTTP-like request line from Paper Muncher and return the path. + + :param BinaryIO stdout: File-like stdout stream from Paper Muncher. + :param int timeout: Timeout in seconds for each line read. + :return: The requested asset path, or ``None`` if the method is PUT. + :rtype: str or None + :raises EOFError: If no request line is found. + :raises ValueError: If the request format is invalid or the method is + unsupported. + """ + deadline = time.monotonic() + timeout + first_line_bytes = await readline_with_timeout( + stdout, + timeout=remaining_time(deadline) + ) + + if not first_line_bytes: + raise EOFError("EOF reached while reading first line from subprocess") + + first_line = first_line_bytes.decode('utf-8').rstrip('\r\n') + + _logger.debug("First Paper Muncher request line: %s", first_line) + + parts = first_line.split(' ') + if len(parts) != 3: + raise ValueError( + f"Invalid HTTP request line from Paper Muncher: {first_line}") + + method, path, _ = parts + if method == 'PUT': + path = None + elif method != 'GET': + raise ValueError( + f"Unexpected HTTP method: {method} in line: {first_line}") + + await consume_paper_muncher_request(stdout, timeout=remaining_time(deadline)) + + return path diff --git a/meta/bindings/python/paper_muncher/autochronous/__init__.py b/meta/bindings/python/paper_muncher/autochronous/__init__.py new file mode 100644 index 000000000..240665e0e --- /dev/null +++ b/meta/bindings/python/paper_muncher/autochronous/__init__.py @@ -0,0 +1,8 @@ +"""The :mod:`paper_muncher.autochronous` module +provides rendering capabilities that automatically +choose between synchronous and asynchronous execution +based on the context. +""" + +from ..binary import can_use_paper_muncher +from .interface import render, rendered diff --git a/meta/bindings/python/paper_muncher/autochronous/interface.py b/meta/bindings/python/paper_muncher/autochronous/interface.py new file mode 100644 index 000000000..310b7eb10 --- /dev/null +++ b/meta/bindings/python/paper_muncher/autochronous/interface.py @@ -0,0 +1,18 @@ +import asyncio + +from .proxy import RenderedProxy +from ..asynchronous import render as async_render +from ..synchronous import render as sync_render + +def render(*args, **kwargs): + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop is not None and loop.is_running(): + return async_render(*args, **kwargs) + return sync_render(*args, **kwargs) + +def rendered(*args, **kwargs): + return RenderedProxy(*args, **kwargs) diff --git a/meta/bindings/python/paper_muncher/autochronous/proxy.py b/meta/bindings/python/paper_muncher/autochronous/proxy.py new file mode 100644 index 000000000..7f6dd53ea --- /dev/null +++ b/meta/bindings/python/paper_muncher/autochronous/proxy.py @@ -0,0 +1,23 @@ +from ..asynchronous import rendered as async_rendered +from ..synchronous import rendered as sync_rendered + + + +class RenderedProxy: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def __enter__(self): + self._sync_cm = sync_rendered(*self.args, **self.kwargs) + return self._sync_cm.__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + return self._sync_cm.__exit__(exc_type, exc_val, exc_tb) + + async def __aenter__(self): + self._async_cm = async_rendered(*self.args, **self.kwargs) + return await self._async_cm.__aenter__() + + async def __aexit__(self, exc_type, exc_val, exc_tb): + return await self._async_cm.__aexit__(exc_type, exc_val, exc_tb) diff --git a/meta/bindings/python/paper_muncher/binary.py b/meta/bindings/python/paper_muncher/binary.py new file mode 100644 index 000000000..a9e65d4f2 --- /dev/null +++ b/meta/bindings/python/paper_muncher/binary.py @@ -0,0 +1,58 @@ +"""The :mod:`paper_muncher.utils.binary` module +provides utilities to locate and validate the Paper Muncher binary. +""" + + +import logging +import os +import subprocess +from shutil import which + +from typing import Optional + + +_logger = logging.getLogger(__name__) + +FALLBACK_BINARY = '/opt/paper-muncher/bin/paper-muncher' + + +def find_in_path(name): + path = os.environ.get('PATH', os.defpath).split(os.pathsep) + return which(name, path=os.pathsep.join(path)) + + +def get_paper_muncher_binary() -> Optional[str]: + """Find and validate the Paper Muncher binary + + :return: Path to the Paper Muncher binary if found and usable, + None otherwise. + :rtype: str or None + """ + try: + binary = find_in_path('paper-muncher') + except OSError: + _logger.debug("Cannot locate in path paper-muncher", exc_info=True) + binary = FALLBACK_BINARY + + try: + subprocess.run( + [binary, '--version'], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + ) + except subprocess.CalledProcessError: + _logger.debug("Cannot use paper-muncher", exc_info=True) + return None + + return binary + + +def can_use_paper_muncher() -> bool: + """Check if Paper Muncher binary is available and usable. + + :return: True if Paper Muncher is in debug session and available, + False otherwise. + :rtype: bool + """ + return bool(get_paper_muncher_binary()) diff --git a/meta/bindings/python/paper_muncher/frameworks/__init__.py b/meta/bindings/python/paper_muncher/frameworks/__init__.py new file mode 100644 index 000000000..1841cbfc6 --- /dev/null +++ b/meta/bindings/python/paper_muncher/frameworks/__init__.py @@ -0,0 +1,11 @@ +"""The :mod:`.paper_muncher.frameworks` module provides +integration with popular web frameworks. +Currently supported frameworks are: +- Flask +- Quart +- FastAPI +- Django + +- generic WSGI applications +- generic ASGI applications +""" diff --git a/meta/bindings/python/paper_muncher/frameworks/asgi_app.py b/meta/bindings/python/paper_muncher/frameworks/asgi_app.py new file mode 100644 index 000000000..eaa566158 --- /dev/null +++ b/meta/bindings/python/paper_muncher/frameworks/asgi_app.py @@ -0,0 +1,20 @@ +"""The :mod:`paper_muncher.frameworks.asgi_app` module +provides integration with generic ASGI applications. +""" + +from contextvars import ContextVar +from ..runners.asgi import asgi_runner_factory +from ..asynchronous import render + + +_current_scope: ContextVar[dict] = ContextVar("current_scope") + +def register_paper_muncher(asgi_application): + async def run_paper_muncher(content, mode="print", **options): + scope = _current_scope.get() + runner = asgi_runner_factory(asgi_application, scope) + return await render(content, mode=mode, runner=runner, **options) + + asgi_application.run_paper_muncher = run_paper_muncher + + return _current_scope diff --git a/meta/bindings/python/paper_muncher/frameworks/django_asgi.py b/meta/bindings/python/paper_muncher/frameworks/django_asgi.py new file mode 100644 index 000000000..79146cd13 --- /dev/null +++ b/meta/bindings/python/paper_muncher/frameworks/django_asgi.py @@ -0,0 +1,38 @@ +"""The :mod:`paper_muncher.frameworks.django_asgi` module +provides integration with Django ASGI applications. +""" + +from contextvars import ContextVar +from ..runners.asgi import asgi_runner_factory +from ..asynchronous import render + + +_current_scope: ContextVar[dict] = ContextVar("current_scope") + + +def register_paper_muncher(django_asgi_app): + """ + Registers the `run_paper_muncher` method on a Django ASGI app object. + Adds middleware to capture the scope. + """ + class PaperMuncherScopeMiddleware: + def __init__(self, app): + self.app = app + + async def __call__(self, scope, receive, send): + token = _current_scope.set(scope) + try: + await self.app(scope, receive, send) + finally: + _current_scope.reset(token) + + async def run_paper_muncher(content, mode="print", **options): + scope = _current_scope.get() + runner = asgi_runner_factory(django_asgi_app, scope) + return await render(content, mode=mode, runner=runner, **options) + + django_asgi_app.run_paper_muncher = run_paper_muncher + middleware = PaperMuncherScopeMiddleware(django_asgi_app) + middleware.run_paper_muncher = run_paper_muncher + + return middleware diff --git a/meta/bindings/python/paper_muncher/frameworks/django_wsgi.py b/meta/bindings/python/paper_muncher/frameworks/django_wsgi.py new file mode 100644 index 000000000..804c959a0 --- /dev/null +++ b/meta/bindings/python/paper_muncher/frameworks/django_wsgi.py @@ -0,0 +1,38 @@ +"""The :mod:`paper_muncher.frameworks.django_wsgi` module +provides integration with Django WSGI applications. +""" + +from contextvars import ContextVar +from ..runners.wsgi import wsgi_runner_factory +from ..synchronous import render + + +_current_environ: ContextVar[dict] = ContextVar("current_environ") + +def register_paper_muncher(django_wsgi_app): + """ + Registers the `run_paper_muncher` method on a Django WSGI app object. + Adds middleware to capture the WSGI environ. + """ + class PaperMuncherEnvironMiddleware: + def __init__(self, app): + self.app = app + + def __call__(self, environ, start_response): + token = _current_environ.set(environ) + try: + return self.app(environ, start_response) + finally: + _current_environ.reset(token) + + def run_paper_muncher(content, mode="print", **options): + environ = _current_environ.get() + runner = wsgi_runner_factory(django_wsgi_app, environ) + return render(content, mode=mode, runner=runner, **options) + + django_wsgi_app.run_paper_muncher = run_paper_muncher + + middleware = PaperMuncherEnvironMiddleware(django_wsgi_app) + middleware.run_paper_muncher = run_paper_muncher + + return middleware diff --git a/meta/bindings/python/paper_muncher/frameworks/fastapi.py b/meta/bindings/python/paper_muncher/frameworks/fastapi.py new file mode 100644 index 000000000..fdad014a6 --- /dev/null +++ b/meta/bindings/python/paper_muncher/frameworks/fastapi.py @@ -0,0 +1,32 @@ +"""The :mod:`paper_muncher.frameworks.fastapi` module +provides integration with FastAPI applications. +""" + +from contextvars import ContextVar +from ..runners.asgi import asgi_runner_factory +from ..asynchronous import render + + +_current_scope: ContextVar[dict] = ContextVar("current_scope") + +def register_paper_muncher(fastapi_app): + """ + Registers the `run_paper_muncher` method on a FastAPI application. + + Automatically adds middleware to capture the ASGI scope on each request. + """ + + async def run_paper_muncher(content, mode="print", **options): + scope = _current_scope.get() + runner = asgi_runner_factory(fastapi_app, scope) + return await render(content, mode=mode, runner=runner, **options) + + fastapi_app.run_paper_muncher = run_paper_muncher + + @fastapi_app.middleware("http") + async def capture_scope_middleware(request, call_next): + token = _current_scope.set(request.scope) + try: + return await call_next(request) + finally: + _current_scope.reset(token) diff --git a/meta/bindings/python/paper_muncher/frameworks/flask.py b/meta/bindings/python/paper_muncher/frameworks/flask.py new file mode 100644 index 000000000..61a5e73a8 --- /dev/null +++ b/meta/bindings/python/paper_muncher/frameworks/flask.py @@ -0,0 +1,15 @@ +"""The :mod:`paper_muncher.frameworks.flask` module +provides integration with Flask applications. +""" + +from ..runners.wsgi import wsgi_runner_factory +from ..synchronous import render +from flask import request + + +def register_paper_muncher(flask_application): + def run_paper_muncher(content, mode="print", **options): + runner = wsgi_runner_factory(flask_application, request.environ) + return render(content, mode=mode, runner=runner, **options) + + flask_application.run_paper_muncher = run_paper_muncher diff --git a/meta/bindings/python/paper_muncher/frameworks/quart.py b/meta/bindings/python/paper_muncher/frameworks/quart.py new file mode 100644 index 000000000..3426c071d --- /dev/null +++ b/meta/bindings/python/paper_muncher/frameworks/quart.py @@ -0,0 +1,15 @@ +"""The :mod:`paper_muncher.frameworks.quart` module +provides integration with Quart applications. +""" + +from quart import request +from ..runners.asgi import asgi_runner_factory +from ..asynchronous import render + + +def register_paper_muncher(quart_application): + async def run_paper_muncher(content, mode="print", **options): + runner = asgi_runner_factory(quart_application, request.scope) + return await render(content, mode=mode, runner=runner, **options) + + quart_application.run_paper_muncher = run_paper_muncher diff --git a/meta/bindings/python/paper_muncher/frameworks/wsgi_app.py b/meta/bindings/python/paper_muncher/frameworks/wsgi_app.py new file mode 100644 index 000000000..a1bfe0a26 --- /dev/null +++ b/meta/bindings/python/paper_muncher/frameworks/wsgi_app.py @@ -0,0 +1,27 @@ +"""The :mod:`paper_muncher.frameworks.wsgi_app` module +provides integration with generic WSGI applications. +""" + +from contextvars import ContextVar +from ..runners.wsgi import wsgi_runner_factory +from ..synchronous import render + + +_current_environ: ContextVar[dict] = ContextVar("current_environ") + +def patch(application): + """ + Monkey-patches a WSGI application to add `run_paper_muncher()` that + can render content using the current WSGI environ from the request. + + Requires a WSGI middleware to set the environ per request. + """ + + def run_paper_muncher(content, mode="print", **options): + environ = _current_environ.get() + runner = wsgi_runner_factory(application, environ) + return render(content, mode=mode, runner=runner, **options) + + application.run_paper_muncher = run_paper_muncher + + return _current_environ diff --git a/meta/bindings/python/paper_muncher/runners/asgi.py b/meta/bindings/python/paper_muncher/runners/asgi.py new file mode 100644 index 000000000..d279f9e11 --- /dev/null +++ b/meta/bindings/python/paper_muncher/runners/asgi.py @@ -0,0 +1,77 @@ +import logging +from datetime import datetime, timezone +from email.utils import format_datetime +from typing import AsyncGenerator + +import httpx + +_logger = logging.getLogger(__name__) +SERVER_SOFTWARE = 'Paper Muncher (ASGI Request SIMULATION)' + + +async def generate_http_response( + request_path: str, + application, + scope: dict, +) -> AsyncGenerator[bytes, None]: + """Simulate an internal HTTP GET request to an ASGI app and yield + the full HTTP response (headers + body) as bytes. + + :param request_path: Path to query within the ASGI app. + :param application: The ASGI application to query. + :param scope: The ASGI scope from the current request. + :yield: Chunks of the full HTTP response. + """ + headers = { + "host": scope.get("headers", {}).get(b"host", b"localhost").decode("latin1"), + "user-agent": SERVER_SOFTWARE, + } + + client_addr = scope.get("client", ("127.0.0.1", 0))[0] + if client_addr: + headers["x-forwarded-for"] = client_addr + + host = headers.get(b"host", b"localhost").decode() + + async with httpx.AsyncClient( + app=application, + base_url=host, + ) as client: + response = await client.get(request_path, headers=headers) + + now = datetime.now(timezone.utc) + response_header = ( + f"HTTP/1.1 {response.status_code} {response.reason_phrase}\r\n" + f"Date: {format_datetime(now, usegmt=True)}\r\n" + f"Server: {SERVER_SOFTWARE}\r\n" + f"Content-Length: {len(response.content)}\r\n" + f"Content-Type: {response.headers.get('Content-Type', 'application/octet-stream')}\r\n" + "\r\n" + ).encode() + + yield response_header + yield response.content + + +def asgi_runner_factory(application, scope: dict): + """Create a runner coroutine that can generate HTTP responses + from an ASGI application using the current request scope. + + :param application: The ASGI app. + :param scope: The current ASGI request scope. + :return: Async function taking a request path and yielding bytes. + """ + _logger.debug( + "Creating ASGI runner for application %r with scope %r", + application, + { + "client": scope.get("client"), + "headers": dict(scope.get("headers", [])), + } + ) + + async def runner(request_path: str) -> AsyncGenerator[bytes, None]: + async for chunk in generate_http_response(request_path, application, scope): + yield chunk + + return runner diff --git a/meta/bindings/python/paper_muncher/runners/wsgi.py b/meta/bindings/python/paper_muncher/runners/wsgi.py new file mode 100644 index 000000000..4fe4407bf --- /dev/null +++ b/meta/bindings/python/paper_muncher/runners/wsgi.py @@ -0,0 +1,151 @@ +"""The :mod:`paper_muncher.runners.wsgi` module +provides utilities to simulate HTTP requests to a WSGI application. +It includes functions to generate WSGI environments and simulate +HTTP responses from a WSGI app. +""" +import logging +import os +from collections.abc import Generator +from datetime import datetime, timezone +from email.utils import format_datetime +from typing import Optional +try: + from wsgiref.types import WSGIEnvironment, WSGIApplication +except ImportError: + from typing import Any, Callable, Iterable, Tuple + WSGIStartResponse = Callable[ + [str, list[Tuple[str, str]], Optional[Exception]], + None + ] + WSGIEnvironment = dict[str, Any] + WSGIApplication = Callable[ + [WSGIEnvironment, WSGIStartResponse], + Iterable[bytes] + ] + +from werkzeug.test import create_environ, run_wsgi_app + +_logger = logging.getLogger(__name__) +SERVER_SOFTWARE = 'Paper Muncher (WSGI Request SIMULATION)' + + +def generate_environ( + path: str, + current_environ: WSGIEnvironment, +) -> WSGIEnvironment: + """Generate a WSGI environment for the given path. + This is used to simulate an HTTP request to a WSGI application. + :param str path: The HTTP request path. + :return: The WSGI environment dictionary. (See PEP 3333) + :rtype: WSGIEnvironment + """ + url, _, query_string = path.partition('?') + environ = create_environ( + method='GET', + path=url, + query_string=query_string, + headers={ + 'Host': current_environ['HTTP_HOST'], + 'User-Agent': SERVER_SOFTWARE, + 'http_cookie': current_environ['HTTP_COOKIE'], + 'remote_addr': current_environ['REMOTE_ADDR'], + } + ) + return environ + + +def generate_http_response( + request_path: str, + application: WSGIApplication, + environ: WSGIEnvironment, +) -> Generator[bytes, None, None]: + """Simulate an internal HTTP GET request to an WSGI app and yield + the HTTP response headers and body as bytes. + The use of it is mainly permitting to call a wsgi application from an + inline external application, such as a subprocess requesting resources. + + Note: This function doesn't preserves the thread-local data. + + usage example: + .. code-block:: python + + from paper_muncher.runners.wsgi import generate_http_response + + for chunk in generate_http_response('/my/request/path'): + print(chunk.decode()) + + :param str request_path: Path to query within the wsgi app. + :param WSGIApplication application: The WSGI application to query. + :param WSGIEnvironment environ: The current WSGI environment. + :yields: Chunks of the full HTTP response to the simulated request. + :rtype: Generator[bytes, None, None] + """ + + response_iterable, http_status, http_response_headers = run_wsgi_app( + application, generate_environ( + path=request_path, + current_environ=environ + ) + ) + + if "X-Sendfile" in http_response_headers: + with open(http_response_headers["X-Sendfile"], 'rb') as file: + now = datetime.now(timezone.utc) + http_response_status_line_and_headers = ( + f"HTTP/1.1 {http_status}\r\n" + f"Date: {format_datetime(now, usegmt=True)}\r\n" + f"Server: {SERVER_SOFTWARE}\r\n" + f"Content-Length: {os.path.getsize(http_response_headers['X-Sendfile'])}\r\n" + f"Content-Type: {http_response_headers['Content-Type']}\r\n" + "\r\n" + ).encode() + + yield http_response_status_line_and_headers + yield from file + + else: + now = datetime.now(timezone.utc) + http_response_status_line_and_headers = ( + f"HTTP/1.1 {http_status}\r\n" + f"Date: {format_datetime(now, usegmt=True)}\r\n" + f"Server: {SERVER_SOFTWARE}\r\n" + f"Content-Length: {http_response_headers['Content-Length']}\r\n" + f"Content-Type: {http_response_headers['Content-Type']}\r\n" + "\r\n" + ).encode() + + yield http_response_status_line_and_headers + yield from response_iterable + + +def wsgi_runner_factory( + application: WSGIApplication, + environ: WSGIEnvironment, +): + """Create a runner function that can be used to generate HTTP responses + from a WSGI application. + + :param WSGIApplication application: The WSGI application to query. + :param WSGIEnvironment environ: The current WSGI environment. + (See PEP 3333) This environment only needs to provide the + necessary keys to build a new environment for each request. + (Host, http_cookie, remote_addr) + :return: A function that takes a request path and yields the HTTP response. + :rtype: Callable[[str], Generator[bytes, None, None]] + """ + _logger.debug( + "Creating WSGI runner for application %r with environ %r", + application, + {k: environ[k] for k in ( + 'HTTP_HOST', + 'REMOTE_ADDR', + ) if k in environ} + ) + + def runner(request_path: str) -> Generator[bytes, None, None]: + return generate_http_response( + request_path, + application, + environ, + ) + return runner diff --git a/meta/bindings/python/paper_muncher/synchronous/__init__.py b/meta/bindings/python/paper_muncher/synchronous/__init__.py new file mode 100644 index 000000000..9821b3a97 --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/__init__.py @@ -0,0 +1,9 @@ +"""The :mod:`paper_muncher.synchronous` module +provides the core functionality for rendering documents +using the Paper Muncher engine. +It includes the main rendering functions and utilities +for managing the rendering process. +""" + +from .interface import rendered, render +from ..binary import can_use_paper_muncher diff --git a/meta/bindings/python/paper_muncher/synchronous/interface.py b/meta/bindings/python/paper_muncher/synchronous/interface.py new file mode 100644 index 000000000..2920c17a0 --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/interface.py @@ -0,0 +1,274 @@ +""" +The :mod:`.paper_muncher.synchronous.interface` module provides +utilities for interacting with Paper Muncher, a subprocess used to render +HTML content into or Image format. +""" + + +import logging +import subprocess + +from datetime import datetime, timezone +from contextlib import contextmanager +from collections.abc import Generator +from email.utils import format_datetime +from io import BytesIO +from itertools import count +from typing import BinaryIO, Optional + +from .request import ( + consume_paper_muncher_request, + read_paper_muncher_request, +) +from .io_with_timeout import ( + read_all_with_timeout, + write_with_timeout, +) +from .popen import Popen +from ..binary import get_paper_muncher_binary, can_use_paper_muncher + +from ..typing import Runner + +_logger = logging.getLogger(__name__) + +AUTHORIZED_MODE = {'print', 'render'} +DEFAULT_READ_TIMEOUT = 60 # seconds +DEFAULT_READLINE_TIMEOUT = 60 * 15 # seconds (15 minutes is for the put request) +DEFAULT_WRITE_TIMEOUT = 30 # seconds +DEFAULT_CHUNK_SIZE = 4096 # bytes +DEFAULT_WAIT_TIMEOUT = 5 # seconds +NOT_RENDERABLE_OPTIONS = { + 'read_timeout', + 'readline_timeout', + 'write_timeout', + 'chunk_size', + 'wait_timeout', +} +SERVER_SOFTWARE = b'Paper Muncher (Fully Synchronous Engine)' + + +@contextmanager +def rendered( + content: BytesIO, + mode: str = "print", + runner: Optional[Runner] = None, + **options, +) -> Generator[tuple[BinaryIO], None, None]: + """Context manager to render HTML content using Paper Muncher. + + :param content: The HTML content to render, as a BytesIO object. + :param mode: The rendering mode, either 'print' or 'render'. + :param runner: Optional runner function to handle asset requests. + :param options: Additional options to pass to Paper Muncher. + :return: A generator yielding the stdout and stderr streams of the + Paper Muncher process. + :raises RuntimeError: If Paper Muncher is not available or crashes. + :raises ValueError: If an invalid mode is specified. + """ + + if not can_use_paper_muncher(): + raise RuntimeError( + "Paper Muncher is not available in the current session. " + "Ensure it is installed and available in the system PATH." + ) + + if not mode in AUTHORIZED_MODE: + raise ValueError( + f"Invalid mode '{mode}', must be one of {AUTHORIZED_MODE}" + ) + + readline_timeout = options.get( + 'readline_timeout', + DEFAULT_READLINE_TIMEOUT, + ) + write_timeout = options.get('write_timeout', DEFAULT_WRITE_TIMEOUT) + wait_timeout = options.get('wait_timeout', DEFAULT_WAIT_TIMEOUT) + + extra_args = [] + for option, value in options.items(): + if option in NOT_RENDERABLE_OPTIONS: + continue + extra_args.extend([ + f'--{option}', str(value), + ]) + + if not (binary := get_paper_muncher_binary()): + raise RuntimeError( + "Paper Muncher binary not found or not usable. " + "Ensure it is installed and available in the system PATH." + ) + + with Popen( + [binary, mode, "pipe:", '-o', "pipe:"] + extra_args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) as process: + # Phase 1: send HTML content headers and body + try: + consume_paper_muncher_request( + process.stdout, + timeout=readline_timeout, + ) + except EOFError as early_eof: + raise RuntimeError( + "Paper Muncher terminated prematurely (phase 1)" + ) from early_eof + + if process.poll() is not None: + raise RuntimeError( + "Paper Muncher crashed before receiving content") + + now = datetime.now(timezone.utc) + response_headers = ( + b"HTTP/1.1 200 OK\r\n" + b"Content-Length: %(length)d\r\n" + b"Content-Type: text/html\r\n" + b"Date: %(date)s\r\n" + b"Server: %(server)s\r\n" + b"\r\n" + ) % { + b'length': len(content.encode()), + b'date': format_datetime(now, usegmt=True).encode(), + b'server': SERVER_SOFTWARE, + } + + write_with_timeout( + process.stdin, + response_headers, + timeout=write_timeout, + ) + write_with_timeout( + process.stdin, + content.encode(), + timeout=write_timeout, + ) + process.stdin.flush() + + if process.poll() is not None: + raise RuntimeError( + "Paper Muncher crashed while sending HTML content") + + # Phase 2: serve asset requests until the rendered content is ready + for request_no in count(start=1): + try: + path = read_paper_muncher_request( + process.stdout, + timeout=readline_timeout, + ) + except (EOFError, TimeoutError): + process.kill() + process.wait() + raise + + if path is None: + break + + for chunk in runner(path): + write_with_timeout( + process.stdin, + chunk, + timeout=write_timeout + ) + process.stdin.flush() + + if process.poll() is not None: + raise RuntimeError( + "Paper Muncher crashed while serving asset" + f" {request_no}: {path}" + ) + + # Phase 3: send final OK and close the process + now = datetime.now(timezone.utc) + final_response = ( + b"HTTP/1.1 200 OK\r\n" + b"Date: %(date)s\r\n" + b"Server: %(server)s\r\n" + b"\r\n" + ) % { + b'date': format_datetime(now, usegmt=True).encode(), + b'server': SERVER_SOFTWARE, + } + + write_with_timeout( + process.stdin, + final_response, + timeout=write_timeout, + ) + process.stdin.flush() + process.stdin.close() + + if process.poll() is not None: + raise RuntimeError( + "Paper Muncher crashed before returning the rendered content" + ) + + try: + yield process.stdout, process.stderr + finally: + try: + process.wait(timeout=wait_timeout) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + _logger.warning( + "Paper Muncher did not terminate in time," + "forcefully killed it" + ) + + if process.returncode != 0: + _logger.warning( + "Paper Muncher exited with code %d", + process.returncode, + ) + + +def render( + content: BytesIO, + mode: str = "print", + runner: Optional[Runner] = None, + **options, +) -> bytes: + """Render HTML content using Paper Muncher and return the rendered output. + + :param content: The HTML content to render, as a BytesIO object. + :param mode: The rendering mode, either 'print' or 'render'. + :param runner: Optional runner function to handle asset requests. + :param options: Additional options to pass to Paper Muncher. + :return: The rendered content as bytes. + :raises RuntimeError: If Paper Muncher is not available or crashes. + :raises ValueError: If an invalid mode is specified. + """ + + with rendered( + content, + mode=mode, + runner=runner, + **options, + ) as (content_stream, error_stream): + read_timeout = options.get('read_timeout', DEFAULT_READ_TIMEOUT) + chunk_size = options.get('chunk_size', DEFAULT_CHUNK_SIZE) + rendered_content = read_all_with_timeout( + content_stream, + chunk_size=chunk_size, + timeout=read_timeout, + ) + stderr_output = read_all_with_timeout( + error_stream, + chunk_size=chunk_size, + timeout=read_timeout, + ) + + if stderr_output: + _logger.warning( + "Paper Muncher error output: %s", + stderr_output.decode('utf-8', errors='replace'), + ) + + if mode == "print": + if not rendered_content.startswith(b'%PDF-'): + raise RuntimeError( + "Paper Muncher did not return valid PDF content" + ) + + return rendered_content diff --git a/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/__init__.py b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/__init__.py new file mode 100644 index 000000000..4e78a2e0e --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/__init__.py @@ -0,0 +1,37 @@ +"""The :mod:`synchronous.io_with_timeout` +module provides cross-platform utilities for I/O operations with timeouts. +It includes functions for reading and writing data with a specified timeout, +and handles platform-specific differences in I/O behavior when possible. +""" + +import os +import logging +import sys + +_logger = logging.getLogger(__name__) + + +if os.name == 'posix': + _logger.info("Using POSIX communications module") + from .posix.communications import ( + read_all_with_timeout, + readline_with_timeout, + write_with_timeout, + ) +elif os.name == 'nt' and sys.version_info >= (3, 12): + _logger.info("Using NT communications module") + from .nt.communications import ( + read_all_with_timeout, + readline_with_timeout, + write_with_timeout, + ) +else: + _logger.warning( + "Using basic communications module without proper" + " anti stalled process handling" + ) + from .fallback.communications import ( + read_all_with_timeout, + readline_with_timeout, + write_with_timeout, + ) diff --git a/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/common.py b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/common.py new file mode 100644 index 000000000..1c8d9bd60 --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/common.py @@ -0,0 +1,29 @@ +"""The :mod:`synchronous.io_with_timeout.common` +module provides utilities for managing timeouts in I/O operations. +It includes a function to calculate the remaining time until a deadline, +and raises a `TimeoutError` if the deadline has already passed. +""" + +import inspect +import time + + +def remaining_time(deadline: float) -> float: + """Calculate the remaining time until a deadline. + + :param float deadline: The deadline timestamp. + :return: Remaining time in seconds. + :rtype: float + :raises TimeoutError: If the deadline has already passed. + """ + remaining = deadline - time.monotonic() + if remaining <= 0: + caller_frame = inspect.currentframe().f_back + raise TimeoutError( + "Timeout exceeded in function %(function)s at line %(line)d" + " in file %(file)s" % { + 'function': caller_frame.f_code.co_name, + 'line': caller_frame.f_lineno, + 'file': caller_frame.f_code.co_filename, + }) + return remaining diff --git a/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/fallback/communications.py b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/fallback/communications.py new file mode 100644 index 000000000..97ce4246b --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/fallback/communications.py @@ -0,0 +1,70 @@ +"""The :mod:`synchronous.io_with_timeout.fallback.communications` +module provides fallback implementations for reading and writing data +with timeouts in a file-like object. +This means it does not support timeouts and may lead to stalled processes. +""" + +import logging + +from typing import BinaryIO + +_logger = logging.getLogger(__name__) + + +def readline_with_timeout( + file_object: BinaryIO, + timeout: int, +) -> bytes: + """Read a full line ending with '\\n' from a file-like object. + + :param BinaryIO file_object: File-like object to read from + (must be in binary mode). + :param int timeout: UNUSED timeout parameter. + (Fallback implementation does not support timeouts) + :return: A line of bytes ending in '\\n'. + :rtype: bytes + """ + _logger.warning( + "Using fallback readline_with_timeout. " + "This may lead to stalled processes." + ) + return file_object.readline() + + +def read_all_with_timeout( + file_object: BinaryIO, + timeout: int, +) -> bytes: + """Read all data from a file-like object until EOF. + + :param BinaryIO file_object: File-like object to read from. + :param int timeout: UNUSED timeout parameter. + (Fallback implementation does not support timeouts) + :return: All bytes read from the file-like object. + :rtype: bytes + """ + _logger.warning( + "Using fallback readlines_with_timeout. " + "This may lead to stalled processes." + ) + return file_object.read() + + +def write_with_timeout( + file_object: BinaryIO, + data: bytes, + timeout: int, +) -> None: + """Write data to a file-like object. + + :param BinaryIO file_object: File-like object to write to. + :param bytes data: Data to write. + :param int timeout: UNUSED timeout parameter. + (Fallback implementation does not support timeouts) + """ + _logger.warning( + "Using fallback write_with_timeout. " + "This may lead to stalled processes." + ) + file_object.write(data) + file_object.flush() diff --git a/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/nt/communications.py b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/nt/communications.py new file mode 100644 index 000000000..414353cbf --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/nt/communications.py @@ -0,0 +1,129 @@ +"""The :mod:`synchronous.io_with_timeout.nt.communications` +module provides cross-platform utilities for reading and writing data +with timeouts on Windows systems. It includes functions for reading lines +and chunks of data, as well as writing data to file-like objects +with a specified timeout. +:note: This module is specifically designed for Windows and requires +Python 3.12 or later. +""" + +import sys +if sys.version_info < (3, 12): + raise ImportError( + "This module requires Python 3.12 or later" + ) + +import logging +import os +import time + +from typing import BinaryIO + +from ..common import remaining_time + +_logger = logging.getLogger(__name__) + + +def readline_with_timeout( + file_object: BinaryIO, + timeout: int, +) -> bytes: + """Read a full line ending with '\\n' from a file-like object within a + timeout. + + :param BinaryIO file_object: File-like object to read from + (must be in binary mode). + :param int timeout: Max seconds to wait for line data. + :return: A line of bytes ending in '\\n'. + :rtype: bytes + :raises TimeoutError: If timeout is reached before a line is read. + :raises EOFError: If EOF is reached before a line is read. + """ + fd = file_object.fileno() + deadline = time.monotonic() + timeout + line_buffer = bytearray() + + if os.get_blocking(fd): + os.set_blocking(fd, False) + + while remaining_time(deadline): + next_byte = os.read(fd, 1) + if next_byte is None: + time.sleep(0.01) + elif not next_byte: + raise EOFError("EOF reached while reading line") + else: + line_buffer += next_byte + if next_byte == b'\n': + break + + return bytes(line_buffer) + + +def read_all_with_timeout( + file_object: BinaryIO, + timeout: int, + chunk_size: int, +) -> bytes: + """Read all data from a file-like object until EOF, with a timeout per + chunk. + + :param BinaryIO file_object: File-like object to read from. + :param int timeout: Timeout in seconds for the entire read operation. + :param int chunk_size: Number of bytes to read per chunk. + :return: All bytes read until EOF. + :rtype: bytes + :raises TimeoutError: If no data is read within the timeout period. + """ + fd = file_object.fileno() + data = bytearray() + deadline = time.monotonic() + timeout + + if os.get_blocking(fd): + os.set_blocking(fd, False) + + while remaining_time(deadline): + chunk = os.read(fd, chunk_size) + if chunk is None: + time.sleep(0.01) + elif not chunk: + break + else: + data.extend(chunk) + + _logger.debug( + "Elapsed time reading: %.3f seconds", + time.monotonic() - (deadline - timeout) + ) + return bytes(data) + + +def write_with_timeout( + file_object: BinaryIO, + data: bytes, + timeout: int, +) -> None: + """Write all data to a file-like object within a timeout, using selectors. + + :param BinaryIO file_object: File-like object to write to. + :param bytes data: Bytes to write. + :param int timeout: Max seconds to wait for write readiness. + :raises TimeoutError: If writing cannot complete within timeout. + """ + fd = file_object.fileno() + total_written = 0 + deadline = time.monotonic() + timeout + + if os.get_blocking(fd): + os.set_blocking(fd, False) + + while remaining_time(deadline): + written = os.write(fd, data[total_written:]) + if written is None: + time.sleep(0.01) + elif written == 0: + raise RuntimeError("Write operation returned zero bytes") + else: + total_written += written + if total_written >= len(data): + break diff --git a/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/posix/communications.py b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/posix/communications.py new file mode 100644 index 000000000..8271b4ba4 --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/io_with_timeout/posix/communications.py @@ -0,0 +1,126 @@ +"""The :mod:`synchronous.io_with_timeout.posix.communications` +module provides POSIX-specific implementations for reading and writing data +with timeouts in a file-like object. +This module uses the `selectors` module to handle I/O operations +in a non-blocking manner, allowing for timeouts on read and write operations. +""" + +import logging +import os +import selectors +import time + +from typing import BinaryIO + +from ..common import remaining_time + +_logger = logging.getLogger(__name__) + + +def readline_with_timeout( + file_object: BinaryIO, + timeout: int, +) -> bytes: + """Read a full line ending with '\\n' from a file-like object within a + timeout. + + :param BinaryIO file_object: File-like object to read from + (must be in binary mode). + :param int timeout: Max seconds to wait for line data. + :return: A line of bytes ending in '\\n'. + :rtype: bytes + :raises TimeoutError: If timeout is reached before a line is read. + :raises EOFError: If EOF is reached before a line is read. + """ + fd = file_object.fileno() + deadline = time.monotonic() + timeout + line_buffer = bytearray() + + with selectors.DefaultSelector() as selector: + selector.register(fd, selectors.EVENT_READ) + + while selector.select(timeout=remaining_time(deadline)): + next_byte = os.read(fd, 1) + if not next_byte: + raise EOFError("EOF reached while reading line") + + line_buffer += next_byte + if next_byte == b'\n': + break + + _logger.debug( + "Elapsed time reading line: %.3f seconds", + time.monotonic() - (deadline - timeout) + ) + return bytes(line_buffer) + + +def read_all_with_timeout( + file_object: BinaryIO, + timeout: int, + chunk_size: int, +) -> bytes: + """Read all data from a file-like object until EOF, with a timeout per + chunk. + + :param BinaryIO file_object: File-like object to read from. + :param int timeout: Timeout in seconds for the entire read operation. + :param int chunk_size: Number of bytes to read per chunk. + :return: All bytes read until EOF. + :rtype: bytes + :raises TimeoutError: If no data is read within the timeout period. + """ + fd = file_object.fileno() + data = bytearray() + deadline = time.monotonic() + timeout + + with selectors.DefaultSelector() as selector: + selector.register(fd, selectors.EVENT_READ) + while selector.select(timeout=remaining_time(deadline)): + chunk = os.read(fd, chunk_size) + if not chunk: + break + data.extend(chunk) + + _logger.debug( + "Elapsed time reading: %.3f seconds", + time.monotonic() - (deadline - timeout) + ) + return bytes(data) + + +def write_with_timeout( + file_object: BinaryIO, + data: bytes, + timeout: int, +) -> None: + """Write all data to a file-like object within a timeout, using selectors. + + :param BinaryIO file_object: File-like object to write to. + :param bytes data: Bytes to write. + :param int timeout: Max seconds to wait for write readiness. + :raises TimeoutError: If writing cannot complete within timeout. + """ + fd = file_object.fileno() + total_written = 0 + deadline = time.monotonic() + timeout + + with selectors.DefaultSelector() as selector: + selector.register(fd, selectors.EVENT_WRITE) + + while total_written < len(data): + events = selector.select(timeout=remaining_time(deadline)) + if not events: + raise TimeoutError( + "Timeout exceeded while writing to subprocess" + ) + + written = os.write(fd, data[total_written:]) + if written == 0: + raise RuntimeError("Write returned zero bytes") + total_written += written + + _logger.debug( + "Elapsed time writing: %.3f seconds", + time.monotonic() - (deadline - timeout) + ) diff --git a/meta/bindings/python/paper_muncher/synchronous/popen.py b/meta/bindings/python/paper_muncher/synchronous/popen.py new file mode 100644 index 000000000..232691760 --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/popen.py @@ -0,0 +1,33 @@ +"""The :mod:`paper_muncher.synchronous.popen` module +provides a cross-platform context manager for +subprocess.Popen that ensures non-blocking I/O for Windows. +This is necessary to avoid deadlocks when reading from subprocess streams. +""" + + +import os +import subprocess +import sys + + +if os.name == 'nt' and sys.version_info >= (3, 12): + from contextlib import contextmanager + + @contextmanager + def Popen(*args, **kwargs): + """Context manager for subprocess.Popen that sets non-blocking I/O + for stdin, stdout, and stderr. + This is necessary for Windows to avoid deadlocks when reading + from subprocess streams. + + :param args: Positional arguments for subprocess.Popen. + :param kwargs: Keyword arguments for subprocess.Popen. + :return: A context manager that yields the subprocess.Popen object. + """ + with subprocess.Popen(*args, **kwargs, bufsize=0) as proc: + os.set_blocking(proc.stdout, False) + os.set_blocking(proc.stderr, False) + os.set_blocking(proc.stdin, False) + yield proc +else: + Popen = subprocess.Popen diff --git a/meta/bindings/python/paper_muncher/synchronous/request.py b/meta/bindings/python/paper_muncher/synchronous/request.py new file mode 100644 index 000000000..6bf7be09d --- /dev/null +++ b/meta/bindings/python/paper_muncher/synchronous/request.py @@ -0,0 +1,91 @@ +"""The :mod:`paper_muncher.synchronous.request` +module provides utilities for consuming and reading +Paper Muncher requests. +It includes functions to read the request line, +and to consume the request headers. +It also handles timeouts for reading lines from the request. +""" + + +import logging +import time + +from typing import BinaryIO, Optional + +from .io_with_timeout import readline_with_timeout + +_logger = logging.getLogger(__name__) + + +def remaining_time(deadline: float) -> float: + remaining = deadline - time.monotonic() + if remaining <= 0: + raise TimeoutError("Timeout exceeded") + return remaining + + +def consume_paper_muncher_request( + stdout: BinaryIO, + timeout: int +) -> None: + """Read and discard all header lines from a Paper Muncher request. + + :param BinaryIO stdout: File-like stdout stream from Paper Muncher. + :param int timeout: Timeout in seconds for each line read. + :return: None + :rtype: None + """ + deadline = time.monotonic() + timeout + while line := readline_with_timeout( + stdout, + timeout=remaining_time(deadline) + ): + _logger.debug("Paper Muncher request line: %s", line.rstrip()) + if line == b"\r\n": + return + if not line: + raise EOFError("EOF reached while reading request headers") + + +def read_paper_muncher_request( + stdout: BinaryIO, + timeout: int, +) -> Optional[str]: + """Read the HTTP-like request line from Paper Muncher and return the path. + + :param BinaryIO stdout: File-like stdout stream from Paper Muncher. + :param int timeout: Timeout in seconds for each line read. + :return: The requested asset path, or ``None`` if the method is PUT. + :rtype: str or None + :raises EOFError: If no request line is found. + :raises ValueError: If the request format is invalid or the method is + unsupported. + """ + deadline = time.monotonic() + timeout + first_line_bytes = readline_with_timeout( + stdout, + timeout=remaining_time(deadline) + ) + + if not first_line_bytes: + raise EOFError("EOF reached while reading first line from subprocess") + + first_line = first_line_bytes.decode('utf-8').rstrip('\r\n') + + _logger.debug("First Paper Muncher request line: %s", first_line) + + parts = first_line.split(' ') + if len(parts) != 3: + raise ValueError( + f"Invalid HTTP request line from Paper Muncher: {first_line}") + + method, path, _ = parts + if method == 'PUT': + path = None + elif method != 'GET': + raise ValueError( + f"Unexpected HTTP method: {method} in line: {first_line}") + + consume_paper_muncher_request(stdout, timeout=remaining_time(deadline)) + + return path diff --git a/meta/bindings/python/paper_muncher/typing.py b/meta/bindings/python/paper_muncher/typing.py new file mode 100644 index 000000000..07d2ceedc --- /dev/null +++ b/meta/bindings/python/paper_muncher/typing.py @@ -0,0 +1,15 @@ +"""The :mod:`paper_muncher.typing` module provides +type definitions used in Paper Muncher. +""" + +from typing import Callable + + +class Runner(Callable): + def __call__(self, path: str) -> bytes: + pass + + +class AsyncRunner(Callable): + async def __call__(self, path: str) -> bytes: + pass diff --git a/meta/bindings/python/papermuncher.py b/meta/bindings/python/papermuncher.py deleted file mode 100644 index 07b688f6f..000000000 --- a/meta/bindings/python/papermuncher.py +++ /dev/null @@ -1,141 +0,0 @@ -import dataclasses as dc -from email.message import Message -from pathlib import Path -from email.parser import BytesParser -import subprocess -import tempfile -from typing import IO -import magic - - -class Loader: - def handleRequest( - self, url: str, headers: dict[str, str] - ) -> tuple[int, dict[str, str], bytes]: - return ( - 404, - { - "mime": "text/html", - }, - b"404 Not Found", - ) - - -@dc.dataclass -class StaticDir(Loader): - _path: Path - - def __init__(self, path: Path): - self._path = path - - def handleRequest( - self, url: str, headers: dict[str, str] - ) -> tuple[int, dict[str, str], bytes]: - path = self._path / url - if not path.exists(): - return ( - 404, - { - "mime": "text/html", - }, - b"404 Not Found", - ) - with open(path, "rb") as f: - return ( - 200, - { - "mime": magic.Magic(mime=True).from_file(path), - }, - f.read(), - ) - - -def _run( - args: list[str], - loader=Loader(), -) -> bytes: - def _readRequest(fd: IO) -> Message[str, str] | None: - # Read the request header from the file descriptor - parser = BytesParser() - return parser.parse(fd) - - def _sendResponse(fd: IO, status: int, headers: dict[str, str], body: bytes): - fd.write(f"HTTP/2 {status}\r\n".encode()) - for key, value in headers.items(): - fd.write(f"{key}: {value}\r\n".encode()) - fd.write(b"\r\n") - fd.write(body) - - with subprocess.Popen( - args, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) as proc: - stdout = proc.stdout - if stdout is None: - raise ValueError("stdout is None") - - stderr = proc.stderr - if stderr is None: - raise ValueError("stderr is None") - - stdin = proc.stdin - if stdin is None: - raise ValueError("stdin is None") - - while True: - request = _readRequest(stdout) - if request is None: - raise ValueError("request is None") - - if request.preamble is None: - raise ValueError("request.preamble is None") - - preamble = request.preamble.split(" ") - if preamble[0] == b"GET": - _sendResponse(stdin, *loader.handleRequest(preamble[1], dict(request))) - elif preamble[0] == b"POST": - payload = request.get_payload() - if not isinstance(payload, bytes): - raise ValueError("payload is not bytes") - proc.terminate() - return payload - else: - raise ValueError("Invalid request") - - -def find() -> Path: - return Path(__file__).parent / "bin" - - -def print( - document: bytes | str | Path, - mime: str = "text/html", - loader: Loader = StaticDir(Path.cwd()), - bin: Path = find(), - **kwargs: str, -) -> bytes: - extraArgs = [] - for key, value in kwargs.items(): - extraArgs.append(f"--{key}") - extraArgs.append(str(value)) - - if isinstance(document, Path): - return _run( - [str(bin), "print", "-i", str(document), "-o", "out.pdf"] + extraArgs, - loader, - ) - else: - with tempfile.NamedTemporaryFile(delete=False) as f: - if isinstance(document, str): - document = document.encode() - f.write(document) - return _run( - [str(bin), "print", "-i", f.name, "-o", "out.pdf"] + extraArgs, - loader, - ) - return b"" - - -__all__ = ["Loader", "StaticDir", "print"] diff --git a/meta/bindings/python/pyproject.toml b/meta/bindings/python/pyproject.toml new file mode 100644 index 000000000..4e7b7ead4 --- /dev/null +++ b/meta/bindings/python/pyproject.toml @@ -0,0 +1,31 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "paper_muncher" +version = "0.1.0" +description = "Python bindings for Paper Muncher" +readme = "README.md" +requires-python = ">=3.10" +license = { text = "LGPL-3.0-or-later" } +authors = [ + { name = "Odoo", email = "info@odoo.com" } +] + +[tool.hatch.build] +exclude = [ + "/.doctrees", + "/documentation_source", + "/examples", +] + +[tool.hatch.build.targets.wheel] +packages = ["paper_muncher"] + +[tool.hatch.build.targets.sdist] +exclude = [ + "/.doctrees", + "/documentation_source", + "/examples", +] diff --git a/meta/bindings/python/sample.py b/meta/bindings/python/sample.py deleted file mode 100644 index e1a59c492..000000000 --- a/meta/bindings/python/sample.py +++ /dev/null @@ -1,12 +0,0 @@ -import papermuncher - -with open("out.pdf", "wb") as f: - document = """ -Hello, world!
- """ - f.write( - papermuncher.print( - document, - paper="a4", - ) - )