diff --git a/.github/workflows/e2e_test-on-change.yml b/.github/workflows/e2e_test-on-change.yml index f34797df..1cf61439 100644 --- a/.github/workflows/e2e_test-on-change.yml +++ b/.github/workflows/e2e_test-on-change.yml @@ -13,23 +13,25 @@ on: jobs: e2e-tests: runs-on: ubuntu-latest - strategy: - matrix: - python-version: ['3.13'] steps: - - name: Checkout Code + - name: Checkout code uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Set up PDM - uses: pdm-project/setup-pdm@v4 + + - name: Install Nix + uses: cachix/install-nix-action@v31 with: - python-version: ${{ matrix.python-version }} + github_access_token: ${{ secrets.GITHUB_TOKEN }} + - name: Install dependencies run: | - pdm sync -d - - name: Run e2e tests + nix develop -c pdm sync -d + + - name: Run end-to-end tests run: | - pdm run test:e2e + nix develop -c pdm run test:e2e -o log_cli_level=DEBUG |& tee test_e2e.out + + - name: Upload test_e2e.out + uses: actions/upload-artifact@v4 + with: + name: test_e2e + path: test_e2e.out diff --git a/.gitignore b/.gitignore index db8799dc..2979f2aa 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ __pycache__/ # Distribution / packaging .Python build/ +result develop-eggs/ dist/ downloads/ @@ -25,6 +26,7 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +/result # PyInstaller # Usually these files are written by a python script from a template @@ -174,4 +176,4 @@ cython_debug/ .pypirc # Test Reports (directories) -reports-*/ \ No newline at end of file +reports-*/ diff --git a/Dockerfile.e2e-test b/Dockerfile.e2e-test new file mode 100644 index 00000000..4ebdc682 --- /dev/null +++ b/Dockerfile.e2e-test @@ -0,0 +1,20 @@ +FROM docker.nix-community.org/nixpkgs/nix-flakes AS builder + +# Don't stay in the root directory for this. +# https://github.com/NixOS/nix/issues/11217 +WORKDIR /workspace + +# Copy just enough for Nix pdm install without bringing in the entire project. +# This ensures we don't run the nix develop too often. +COPY flake.nix flake.lock pyproject.toml pdm.lock . + +# Build Nix shell and install all dev PDM dependencies. +RUN nix develop path:///workspace -c pdm sync -d + +# Copy the actual module and E2E tests, then install self. +# This is what will be edited during development, so we run it last. +COPY inference_perf ./inference_perf +COPY e2e ./e2e + +ENTRYPOINT ["nix", "develop", "path:///workspace", "-c"] +CMD ["pdm", "run", "test:e2e"] diff --git a/e2e/testdata/models/.gitignore b/e2e/testdata/models/.gitignore new file mode 100644 index 00000000..1a9073c1 --- /dev/null +++ b/e2e/testdata/models/.gitignore @@ -0,0 +1,4 @@ +* +!.gitignore +!*.tar.gz +!*.tar.zst diff --git a/e2e/testdata/models/google_gemma-3-270m.tar.gz b/e2e/testdata/models/google_gemma-3-270m.tar.gz new file mode 100644 index 00000000..60813591 Binary files /dev/null and b/e2e/testdata/models/google_gemma-3-270m.tar.gz differ diff --git a/e2e/tests/test_llm_d_inference_sim.py b/e2e/tests/test_llm_d_inference_sim.py new file mode 100644 index 00000000..57792198 --- /dev/null +++ b/e2e/tests/test_llm_d_inference_sim.py @@ -0,0 +1,127 @@ +""" +End-to-end integration testing of inference-perf using llm-d-inference-sim[1]. + +In order for these tests to run, you must have `llm-d-inference-sim` in your +PATH. The GitHub Actions runner will have this, but you may also install it +locally by following llm-d-inference-sim's README or by entering the Nix shell +of this repository (i.e. `nix develop`). + +If your local environment is missing `llm-d-inference-sim`, tests here will +automatically be skipped. + +[1]: https://github.com/llm-d/llm-d-inference-sim +""" + +import pytest + +from utils.llm_d_inference_sim import LLMDInferenceSimRunner +from utils.benchmark import run_benchmark_minimal +from utils.testdata import extract_tarball + + +TEST_MODEL_NAME = "google/gemma-3-270m" +TEST_MODEL_TARBALL = "e2e/testdata/models/google_gemma-3-270m.tar.gz" + + +@pytest.mark.asyncio +@pytest.mark.skipif(not LLMDInferenceSimRunner.is_available(), reason="local environment missing llm-d-inference-sim") +@pytest.mark.parametrize( + "data", + [ + pytest.param( + { + "type": "mock", + }, + id="data_mock", + ), + pytest.param( + { + "type": "shared_prefix", + "shared_prefix": { + "num_groups": 256, + "num_prompts_per_group": 16, + "system_prompt_len": 512, + "question_len": 256, + "output_len": 256, + }, + }, + id="data_shared_prefix", + ), + ], +) +@pytest.mark.parametrize( + "load", + [ + pytest.param( + { + "type": "constant", + "stages": [{"rate": 1, "duration": 5}], + "num_workers": 2, + }, + id="load_constant_slow", + ), + pytest.param( + { + "type": "constant", + "interval": 2, + "stages": [{"rate": 1, "duration": 5}, {"rate": 2, "duration": 5}], + "num_workers": 2, + }, + id="load_constant_slow_two_stages", + ), + pytest.param( + { + "type": "constant", + "stages": [{"rate": 100, "duration": 5}], + "num_workers": 2, + }, + id="load_constant_fast", + ), + ], +) +async def test_completion_successful_run(data: dict, load: dict): + """ + Very simple inference-perf integration test that ensures a wide range of + vLLM benchmarking configurations can run successfully. + """ + model_name = TEST_MODEL_NAME + model_path = extract_tarball(TEST_MODEL_TARBALL) + + async with LLMDInferenceSimRunner(model_name, port=18000) as sim: + result = await run_benchmark_minimal( + { + "data": data, + "load": load, + "api": { + "type": "completion", + "streaming": True, + }, + "server": { + "type": "vllm", + "model_name": model_name, + "base_url": f"http://{sim.host}:{sim.port}", + "ignore_eos": True, + }, + "tokenizer": { + "pretrained_model_name_or_path": str(model_path), + }, + "report": { + "request_lifecycle": { + "summary": True, + "per_stage": True, + "per_request": True, + }, + }, + } + ) + + assert result.success, "Benchmark failed" + assert result.reports, "No reports generated from benchmark" + + requests_report = result.reports["per_request_lifecycle_metrics.json"] + assert requests_report, "Missing requests report" + assert len(requests_report) > 1 + + summary_report = result.reports["summary_lifecycle_metrics.json"] + assert summary_report, "Missing summary report" + assert summary_report["successes"]["count"] > 1 diff --git a/e2e/tests/test_mock_client.py b/e2e/tests/test_mock_client.py index 98458add..60a02f25 100644 --- a/e2e/tests/test_mock_client.py +++ b/e2e/tests/test_mock_client.py @@ -3,8 +3,9 @@ from utils.benchmark import run_benchmark_minimal -def test_simple_mock_client_benchmark(): - result = run_benchmark_minimal("e2e/configs/e2e_simple_mock_client.yaml", timeout_sec=None) +@pytest.mark.asyncio +async def test_simple_mock_client_benchmark(): + result = await run_benchmark_minimal("e2e/configs/e2e_simple_mock_client.yaml", timeout_sec=None) assert result.success, "Benchmark failed" assert result.reports, "No reports generated from benchmark" assert result.reports["per_request_lifecycle_metrics.json"], "Missing requests report" diff --git a/e2e/utils/benchmark.py b/e2e/utils/benchmark.py index 1badafc4..4da39d3c 100644 --- a/e2e/utils/benchmark.py +++ b/e2e/utils/benchmark.py @@ -1,10 +1,13 @@ import json import os -import shlex -import subprocess +import asyncio +import aiofiles +import aiofiles.os import tempfile import yaml +import signal import logging +import textwrap from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, Optional, List, Union @@ -18,23 +21,36 @@ class BenchmarkResult: success: bool # True if process exit code == 0 and not timed out timed_out: bool # True if we hit timeout and killed the process - returncode: int # Raw process return code (or -9/-15 on kill) + return_code: int # Raw process return code (or -9/-15 on kill) stdout: str # Combined stdout/stderr text work_dir: Path # Working directory used for the run reports: Optional[Dict[str, Any]] # Parsed json for reports if present -def _process_yaml_config(config: Union[str, Path, Dict[str, Any]], out_dir: Path) -> Path: +async def _process_yaml_config(config: Union[str, Path, Dict[str, Any]], out_dir: Path) -> Path: out_dir.mkdir(parents=True, exist_ok=True) cfg_path = out_dir / "config_input.yaml" - if isinstance(config, (str, Path)): - src = Path(config) - if not src.exists(): - raise FileNotFoundError(f"Config file not found: {src}") - config = yaml.safe_load(src.read_text(encoding="utf-8")) - - # Overwrite output path to temporaty folder + # if config is a string pointing to an existing path, then convert it to + # Path. + if isinstance(config, str): + try: + await aiofiles.os.stat(config) + config = Path(config) + except Exception: + pass + + # if config is a Path, then open it as a file. + if isinstance(config, Path): + async with aiofiles.open(config, mode="r") as file: + config = await file.read() + + # if config is (still) a string, then directly parse it as YAML. + if isinstance(config, str): + config = yaml.safe_load(config) + assert isinstance(config, dict) + + # Overwrite output path to temporary folder config["storage"] = {"local_storage": {"path": out_dir.as_posix()}} cfg_path.write_text( @@ -52,7 +68,7 @@ def _find_report_files(path: Path) -> Optional[List[Path]]: return candidates -def run_benchmark_minimal( +async def run_benchmark_minimal( config: Union[str, Path, Dict[str, Any]], *, work_dir: Optional[Union[str, Path]] = None, @@ -70,37 +86,50 @@ def run_benchmark_minimal( - marks `timed_out=True`, returns collected stdout up to kill. """ wd = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="inference-perf-e2e-")) - cfg_path = _process_yaml_config(config, wd) + cfg_path = await _process_yaml_config(config, wd) env = os.environ.copy() if extra_env: env.update({k: str(v) for k, v in extra_env.items()}) - cmd = f"{shlex.quote(executable)} --config_file {shlex.quote(str(cfg_path))} --log-level DEBUG" + args = [executable, "--config_file", str(cfg_path), "--log-level", "DEBUG"] + logger.debug(f"starting inference-perf, {args=}") + + proc = await asyncio.create_subprocess_exec( + *args, + cwd=str(wd), + env=env, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + preexec_fn=os.setpgrp, # use process groups + ) + logger.debug("inference-perf started!") + stdout = "" timed_out = False + return_code = -1 try: - proc = subprocess.run( - cmd, - cwd=str(wd), - env=env, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - timeout=timeout_sec, - ) - stdout = proc.stdout + stdout_bytes, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout_sec) + stdout = stdout_bytes.decode() + logger.info(f"benchmark status {proc.returncode}, output:\n{textwrap.indent(stdout, ' | ')}") + assert proc.returncode is not None return_code = proc.returncode - except subprocess.TimeoutExpired as e: + except asyncio.exceptions.TimeoutError: timed_out = True - stdout = e.stdout return_code = -9 + finally: + try: + # kill whole process group to ensure that forked workers are also + # terminated. + pgid = os.getpgid(proc.pid) + os.killpg(pgid, signal.SIGTERM) + # wait for process to finish cleaning up. + await proc.wait() + except ProcessLookupError: + pass success = (return_code == 0) and (not timed_out) - logger.info("Benchmark output:\n%s", stdout) - # Attempt to read report.json (optional) report_path = _find_report_files(wd) reports = {report.name: json.loads(report.read_text(encoding="utf-8")) for report in report_path} if report_path else None @@ -108,8 +137,8 @@ def run_benchmark_minimal( return BenchmarkResult( success=success, timed_out=timed_out, - returncode=return_code, - stdout=stdout or "", + return_code=return_code, + stdout=stdout, work_dir=wd, reports=reports, ) diff --git a/e2e/utils/llm_d_inference_sim.py b/e2e/utils/llm_d_inference_sim.py new file mode 100644 index 00000000..0785f790 --- /dev/null +++ b/e2e/utils/llm_d_inference_sim.py @@ -0,0 +1,154 @@ +import aiohttp +import asyncio +import logging +import sys +import textwrap +import shutil +from contextlib import AsyncContextDecorator + + +logger = logging.getLogger(__name__) + + +class LLMDInferenceSimRunner(AsyncContextDecorator): + @staticmethod + def is_available(executable: str = "llm-d-inference-sim") -> bool: + """ + Returns whether llm-d-inference-sim is present in the local + environment. + """ + return shutil.which(executable) is not None + + executable: str + argv: list[str] + + _host = "127.0.0.1" + _port: int + _proc: asyncio.subprocess.Process | None = None + _wait_until_ready: bool + + def __init__( + self, + model: str, + *cmd_args: str, + port: int = 8000, + max_waiting_queue_length: int = 10000, + executable: str = "llm-d-inference-sim", + wait_until_ready=True, + ) -> None: + self.executable = executable + self.argv = [ + *("--port", str(port)), + *("--model", model), + *("--max-waiting-queue-length", str(max_waiting_queue_length)), + *cmd_args, + ] + self._port = port + self._wait_until_ready = wait_until_ready + + @property + def host(self): + return self._host + + @property + def port(self): + return self._port + + async def __aenter__(self) -> "LLMDInferenceSimRunner": + """ + Starts running the llm-d-inference-sim server in the background. + Once the contextmanager exits, stop the server using a SIGTERM. + """ + if not LLMDInferenceSimRunner.is_available(self.executable): + raise FileNotFoundError(f"executable not found: {self.executable}") + + logger.debug(f"starting server: {self.argv=}") + self._proc = await asyncio.create_subprocess_exec( + self.executable, + *self.argv, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + ) + + if self._wait_until_ready: + try: + await self.wait_until_ready() + except Exception: + await self.__aexit__(*sys.exc_info()) + raise + + return self + + async def __aexit__(self, *exc): + """ + Sends a SIGTERM to the server and waits a bit for it to stop. + Returns true if process exited gracefully. + """ + terminate_task = asyncio.create_task(self._terminate()) + await self._wait() + await terminate_task + + async def wait_until_ready( + self, + polling_sec: float = 0.5, + timeout_sec: float | None = 10, + ) -> None: + """Waits until the server is ready to serve requests.""" + assert self._proc + + async def wait_http(): + async with aiohttp.ClientSession() as http: + while True: + try: + async with http.head(f"http://{self._host}:{self._port}") as resp: + await resp.read() + logger.debug(f"querying server's / endpoint returned {resp.status=}") + return True + except (asyncio.exceptions.CancelledError, asyncio.exceptions.TimeoutError): + logger.error(f"llm-d-inference-sim server did not become ready after {timeout_sec}s!") + raise + except Exception as e: + logger.debug(f"http polling error: {e}, retrying...") + await asyncio.sleep(polling_sec) + continue + + async def wait_proc(): + await self._wait() + raise ConnectionRefusedError("server process exited before port was ready") + + done, pending = await asyncio.wait( + [asyncio.create_task(x) for x in [wait_http(), wait_proc()]], + return_when=asyncio.FIRST_COMPLETED, + timeout=timeout_sec, + ) + [task.cancel() for task in pending] + if done: + # either client finished polling or process ended early, so read the + # result to raise any potential exceptions. + [task.result() for task in done] + else: + # everything timed out, so one of these will have the timeout + # exception. await it so it's thrown. + [await task for task in pending] + + async def _wait(self) -> None: + proc = self._proc + assert proc + + stdout, _ = await proc.communicate() + stdout_pretty = textwrap.indent(stdout.decode(), " | ") + logger.debug(f"server exited with status {proc.returncode}, output:\n{stdout_pretty}") + + async def _terminate(self) -> None: + proc = self._proc + assert proc + + try: + proc.terminate() + await asyncio.sleep(2) + proc.kill() + except ProcessLookupError: + pass # process already exited + except Exception as e: + logger.debug(f"server failed to be terminated: {e}") + raise diff --git a/e2e/utils/testdata.py b/e2e/utils/testdata.py new file mode 100644 index 00000000..ed7ff21e --- /dev/null +++ b/e2e/utils/testdata.py @@ -0,0 +1,30 @@ +import os +import pathlib +import subprocess + +TEST_E2E_DIR = pathlib.Path(__file__).parent.parent +TEST_E2E_TESTDATA = TEST_E2E_DIR.joinpath("testdata") + + +def extract_tarball(name: str | pathlib.Path) -> pathlib.Path: + """ + Extract tarball with the given path to the directory that that tarball is + in. + + The returned path is the folder containing the content of the tarball, named + after the tarball name itself without the extension. + """ + name = pathlib.Path(name).resolve() + + dest = name + while dest.suffix: + dest = dest.with_suffix("") + + if not dest.is_dir(): + if not name.is_file(): + raise FileNotFoundError(f"Tarball {name} not found!") + + os.makedirs(dest) + subprocess.run(["tar", "-xzvf", name, "-C", dest], check=True) + + return dest diff --git a/flake.lock b/flake.lock new file mode 100644 index 00000000..36530151 --- /dev/null +++ b/flake.lock @@ -0,0 +1,82 @@ +{ + "nodes": { + "flake-parts": { + "inputs": { + "nixpkgs-lib": "nixpkgs-lib" + }, + "locked": { + "lastModified": 1763759067, + "narHash": "sha256-LlLt2Jo/gMNYAwOgdRQBrsRoOz7BPRkzvNaI/fzXi2Q=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "2cccadc7357c0ba201788ae99c4dfa90728ef5e0", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1763835633, + "narHash": "sha256-HzxeGVID5MChuCPESuC0dlQL1/scDKu+MmzoVBJxulM=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "050e09e091117c3d7328c7b2b7b577492c43c134", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-lib": { + "locked": { + "lastModified": 1761765539, + "narHash": "sha256-b0yj6kfvO8ApcSE+QmA6mUfu8IYG6/uU28OFn4PaC8M=", + "owner": "nix-community", + "repo": "nixpkgs.lib", + "rev": "719359f4562934ae99f5443f20aa06c2ffff91fc", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "nixpkgs.lib", + "type": "github" + } + }, + "pyproject-nix": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1763716960, + "narHash": "sha256-PUlomle4klGbnZr0wOn8z61Mbt7tXh6Yp3hZ9/CQkq0=", + "owner": "pyproject-nix", + "repo": "pyproject.nix", + "rev": "d6c61dbe0be75e2f4cf0efcdc62428175be4cfb5", + "type": "github" + }, + "original": { + "owner": "pyproject-nix", + "repo": "pyproject.nix", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-parts": "flake-parts", + "nixpkgs": "nixpkgs", + "pyproject-nix": "pyproject-nix" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 00000000..94b61be8 --- /dev/null +++ b/flake.nix @@ -0,0 +1,134 @@ +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable"; + flake-parts.url = "github:hercules-ci/flake-parts"; + + pyproject-nix.url = "github:pyproject-nix/pyproject.nix"; + pyproject-nix.inputs.nixpkgs.follows = "nixpkgs"; + }; + + outputs = + { + self, + nixpkgs, + flake-parts, + pyproject-nix, + ... + }@inputs: + flake-parts.lib.mkFlake { inherit inputs; } ( + { config, ... }: + { + systems = [ + "x86_64-linux" + ]; + flake = { + lib = { + pyproject = pyproject-nix.lib.project.loadPyproject { + projectRoot = self; + }; + }; + }; + perSystem = + { pkgs, self', ... }@systemInputs: + let + python = pkgs.python3; + in + { + devShells.default = pkgs.mkShell { + # PATH-only packages: + packages = + with pkgs; + with python.pkgs; + with self'.packages; + [ + llm-d-inference-sim + pdm + python + + # choose either python-lsp-server or pyright: + basedpyright + # python-lsp-server + # pylsp-mypy + ]; + + buildInputs = + with pkgs; + with python.pkgs; + [ + numpy + torch + ]; + + shellHook = '' + python -m venv .venv + source .venv/bin/activate + pdm sync -d + ''; + }; + + packages = rec { + default = inference-perf; + + inference-perf = + let + buildAttrs = self.lib.pyproject.renderers.buildPythonPackage { + inherit python; + }; + in + python.pkgs.buildPythonPackage (buildAttrs // { }); + + llm-d-inference-sim = pkgs.buildGoModule rec { + pname = "llm-d-inference-sim"; + version = "0.6.1"; + + src = pkgs.fetchFromGitHub { + owner = "llm-d"; + repo = "llm-d-inference-sim"; + tag = "v${version}"; + hash = "sha256-KdA7dgdy1jGjRhrqXfkg4Z9V3SXPcKp1FnTtm+e5DSA="; + }; + vendorHash = "sha256-MINH7J2ozTORFK/KgZvXBlwThYRISL1wlHebdZxvuvw="; + + nativeBuildInputs = with pkgs; [ + pkg-config + ]; + + buildInputs = with pkgs; [ + zeromq + libtokenizers + ]; + + # several tests require networking. + doCheck = false; + + meta = { + description = "A light weight vLLM simulator, for mocking out replicas"; + homepage = "https://github.com/llm-d/llm-d-inference-sim"; + license = with nixpkgs.lib.licenses; asl20; + mainProgram = "llm-d-inference-sim"; + }; + }; + + libtokenizers = pkgs.rustPlatform.buildRustPackage rec { + pname = "libtokenizers"; + version = "1.22.1"; # keep same as llm-d-inference-sim's version + + src = pkgs.fetchFromGitHub { + owner = "daulet"; + repo = "tokenizers"; + tag = "v${version}"; + hash = "sha256-unGAXpD4GHWVFcXAwd0zU/u30wzH909tDcRYRPsSKwQ="; + }; + cargoHash = "sha256-rY3YAcCbbx5CY6qu44Qz6UQhJlWVxAWdTaUSagHDn2o="; + + meta = { + description = "Go bindings for Tiktoken & HuggingFace Tokenizer"; + homepage = "https://github.com/daulet/tokenizers"; + license = with nixpkgs.lib.licenses; mit; + }; + }; + }; + }; + } + ); +} diff --git a/pdm.lock b/pdm.lock index ecae0ccf..860f6450 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,11 +5,22 @@ groups = ["default", "lint", "test", "types"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:af64fbc282da5d43a35e5075e047b7242d140df8dd989be2da412540a5232708" +content_hash = "sha256:5c16ab3d80a140f0b8b76bcda27cb2fae7d42ba9fc47a741da246396edb54b2b" [[metadata.targets]] requires_python = ">=3.12" +[[package]] +name = "aiofiles" +version = "25.1.0" +requires_python = ">=3.9" +summary = "File support for asyncio." +groups = ["default"] +files = [ + {file = "aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695"}, + {file = "aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2"}, +] + [[package]] name = "aiohappyeyeballs" version = "2.6.1" diff --git a/pyproject.toml b/pyproject.toml index 314cee66..c0303ac1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "boto3>=1.39.0", "uvloop>=0.21.0", "tqdm>=4.67.1", + "aiofiles>=25.1.0", ] requires-python = ">=3.12" readme = "README.md" @@ -64,11 +65,16 @@ distribution = true [tool.pdm.scripts] format = "ruff format" lint = "ruff check" -test = "pytest tests" -"test:e2e" = "pytest e2e" type-check = "mypy --strict ./inference_perf ./tests" validate = {composite = ["format", "lint", "type-check"]} +"test" = "pytest tests" +"test:e2e" = "pytest e2e" +"test:e2e:docker" = "pdm run docker:e2e-test:run" + +"docker:e2e-test:build".cmd = "docker buildx b -f Dockerfile.e2e-test {args:-t inference-perf-e2e-test} ." +"docker:e2e-test:run".shell = "rm result && pdm run docker:e2e-test:build --iidfile result && docker run --rm -it $(< result)" + [tool.ruff] # The GitHub editor is 127 chars wide line-length = 127 @@ -114,6 +120,9 @@ docstring-code-format = false docstring-code-line-length = "dynamic" [tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "session" +log_cli = true log_cli_level = "INFO" testpaths = ["."] python_files = ["test_*.py"]