diff --git a/.github/workflows/e2e_test-on-change.yml b/.github/workflows/e2e_test-on-change.yml
index f34797df..1cf61439 100644
--- a/.github/workflows/e2e_test-on-change.yml
+++ b/.github/workflows/e2e_test-on-change.yml
@@ -13,23 +13,25 @@ on:
 jobs:
   e2e-tests:
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.13']
     steps:
-      - name: Checkout Code
+      - name: Checkout code
         uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Set up PDM
-        uses: pdm-project/setup-pdm@v4
+
+      - name: Install Nix
+        uses: cachix/install-nix-action@v31
         with:
-          python-version: ${{ matrix.python-version }}
+          github_access_token: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Install dependencies
         run: |
-          pdm sync -d
-      - name: Run e2e tests
+          nix develop -c pdm sync -d
+
+      - name: Run end-to-end tests
         run: |
-          pdm run test:e2e
+          nix develop -c pdm run test:e2e -o log_cli_level=DEBUG |& tee test_e2e.out
+
+      - name: Upload test_e2e.out
+        uses: actions/upload-artifact@v4
+        with:
+          name: test_e2e
+          path: test_e2e.out
diff --git a/.gitignore b/.gitignore
index db8799dc..2979f2aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ __pycache__/
 # Distribution / packaging
 .Python
 build/
+result
 develop-eggs/
 dist/
 downloads/
@@ -25,6 +26,7 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+/result
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
@@ -174,4 +176,4 @@ cython_debug/
 .pypirc
 
 # Test Reports (directories)
-reports-*/
\ No newline at end of file
+reports-*/
diff --git a/Dockerfile.e2e-test b/Dockerfile.e2e-test
new file mode 100644
index 00000000..4ebdc682
--- /dev/null
+++ b/Dockerfile.e2e-test
@@ -0,0 +1,20 @@
+FROM docker.nix-community.org/nixpkgs/nix-flakes AS builder
+
+# Don't stay in the root directory for this.
+# https://github.com/NixOS/nix/issues/11217
+WORKDIR /workspace
+
+# Copy just enough for Nix pdm install without bringing in the entire project.
+# This ensures we don't run the nix develop too often.
+COPY flake.nix flake.lock pyproject.toml pdm.lock .
+
+# Build Nix shell and install all dev PDM dependencies.
+RUN nix develop path:///workspace -c pdm sync -d
+
+# Copy the actual module and E2E tests, then install self.
+# This is what will be edited during development, so we run it last.
+COPY inference_perf ./inference_perf
+COPY e2e ./e2e
+
+ENTRYPOINT ["nix", "develop", "path:///workspace", "-c"]
+CMD ["pdm", "run", "test:e2e"]
diff --git a/e2e/testdata/models/.gitignore b/e2e/testdata/models/.gitignore
new file mode 100644
index 00000000..1a9073c1
--- /dev/null
+++ b/e2e/testdata/models/.gitignore
@@ -0,0 +1,4 @@
+*
+!.gitignore
+!*.tar.gz
+!*.tar.zst
diff --git a/e2e/testdata/models/google_gemma-3-270m.tar.gz b/e2e/testdata/models/google_gemma-3-270m.tar.gz
new file mode 100644
index 00000000..60813591
Binary files /dev/null and b/e2e/testdata/models/google_gemma-3-270m.tar.gz differ
diff --git a/e2e/tests/test_llm_d_inference_sim.py b/e2e/tests/test_llm_d_inference_sim.py
new file mode 100644
index 00000000..57792198
--- /dev/null
+++ b/e2e/tests/test_llm_d_inference_sim.py
@@ -0,0 +1,127 @@
+"""
+End-to-end integration testing of inference-perf using llm-d-inference-sim[1].
+
+In order for these tests to run, you must have `llm-d-inference-sim` in your
+PATH. The GitHub Actions runner will have this, but you may also install it
+locally by following llm-d-inference-sim's README or by entering the Nix shell
+of this repository (i.e. `nix develop`).
+
+If your local environment is missing `llm-d-inference-sim`, tests here will
+automatically be skipped.
+
+[1]: https://github.com/llm-d/llm-d-inference-sim
+"""
+
+import pytest
+
+from utils.llm_d_inference_sim import LLMDInferenceSimRunner
+from utils.benchmark import run_benchmark_minimal
+from utils.testdata import extract_tarball
+
+
+TEST_MODEL_NAME = "google/gemma-3-270m"
+TEST_MODEL_TARBALL = "e2e/testdata/models/google_gemma-3-270m.tar.gz"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not LLMDInferenceSimRunner.is_available(), reason="local environment missing llm-d-inference-sim")
+@pytest.mark.parametrize(
+    "data",
+    [
+        pytest.param(
+            {
+                "type": "mock",
+            },
+            id="data_mock",
+        ),
+        pytest.param(
+            {
+                "type": "shared_prefix",
+                "shared_prefix": {
+                    "num_groups": 256,
+                    "num_prompts_per_group": 16,
+                    "system_prompt_len": 512,
+                    "question_len": 256,
+                    "output_len": 256,
+                },
+            },
+            id="data_shared_prefix",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "load",
+    [
+        pytest.param(
+            {
+                "type": "constant",
+                "stages": [{"rate": 1, "duration": 5}],
+                "num_workers": 2,
+            },
+            id="load_constant_slow",
+        ),
+        pytest.param(
+            {
+                "type": "constant",
+                "interval": 2,
+                "stages": [{"rate": 1, "duration": 5}, {"rate": 2, "duration": 5}],
+                "num_workers": 2,
+            },
+            id="load_constant_slow_two_stages",
+        ),
+        pytest.param(
+            {
+                "type": "constant",
+                "stages": [{"rate": 100, "duration": 5}],
+                "num_workers": 2,
+            },
+            id="load_constant_fast",
+        ),
+    ],
+)
+async def test_completion_successful_run(data: dict, load: dict):
+    """
+    Very simple inference-perf integration test that ensures a wide range of
+    vLLM benchmarking configurations can run successfully.
+    """
+    model_name = TEST_MODEL_NAME
+    model_path = extract_tarball(TEST_MODEL_TARBALL)
+
+    async with LLMDInferenceSimRunner(model_name, port=18000) as sim:
+        result = await run_benchmark_minimal(
+            {
+                "data": data,
+                "load": load,
+                "api": {
+                    "type": "completion",
+                    "streaming": True,
+                },
+                "server": {
+                    "type": "vllm",
+                    "model_name": model_name,
+                    "base_url": f"http://{sim.host}:{sim.port}",
+                    "ignore_eos": True,
+                },
+                "tokenizer": {
+                    "pretrained_model_name_or_path": str(model_path),
+                },
+                "report": {
+                    "request_lifecycle": {
+                        "summary": True,
+                        "per_stage": True,
+                        "per_request": True,
+                    },
+                },
+            }
+        )
+
+    assert result.success, "Benchmark failed"
+    assert result.reports, "No reports generated from benchmark"
+
+    requests_report = result.reports["per_request_lifecycle_metrics.json"]
+    assert requests_report, "Missing requests report"
+    assert len(requests_report) > 1
+
+    summary_report = result.reports["summary_lifecycle_metrics.json"]
+    assert summary_report, "Missing summary report"
+    assert summary_report["successes"]["count"] > 1
diff --git a/e2e/tests/test_mock_client.py b/e2e/tests/test_mock_client.py
index 98458add..60a02f25 100644
--- a/e2e/tests/test_mock_client.py
+++ b/e2e/tests/test_mock_client.py
@@ -3,8 +3,9 @@
 from utils.benchmark import run_benchmark_minimal
 
 
-def test_simple_mock_client_benchmark():
-    result = run_benchmark_minimal("e2e/configs/e2e_simple_mock_client.yaml", timeout_sec=None)
+@pytest.mark.asyncio
+async def test_simple_mock_client_benchmark():
+    result = await run_benchmark_minimal("e2e/configs/e2e_simple_mock_client.yaml", timeout_sec=None)
     assert result.success, "Benchmark failed"
     assert result.reports, "No reports generated from benchmark"
     assert result.reports["per_request_lifecycle_metrics.json"], "Missing requests report"
diff --git a/e2e/utils/benchmark.py b/e2e/utils/benchmark.py
index 1badafc4..4da39d3c 100644
--- a/e2e/utils/benchmark.py
+++ b/e2e/utils/benchmark.py
@@ -1,10 +1,13 @@
 import json
 import os
-import shlex
-import subprocess
+import asyncio
+import aiofiles
+import aiofiles.os
 import tempfile
 import yaml
+import signal
 import logging
+import textwrap
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Optional, List, Union
@@ -18,23 +21,36 @@ class BenchmarkResult:
 
     success: bool  # True if process exit code == 0 and not timed out
     timed_out: bool  # True if we hit timeout and killed the process
-    returncode: int  # Raw process return code (or -9/-15 on kill)
+    return_code: int  # Raw process return code (or -9/-15 on kill)
     stdout: str  # Combined stdout/stderr text
     work_dir: Path  # Working directory used for the run
     reports: Optional[Dict[str, Any]]  # Parsed json for reports if present
 
 
-def _process_yaml_config(config: Union[str, Path, Dict[str, Any]], out_dir: Path) -> Path:
+async def _process_yaml_config(config: Union[str, Path, Dict[str, Any]], out_dir: Path) -> Path:
     out_dir.mkdir(parents=True, exist_ok=True)
     cfg_path = out_dir / "config_input.yaml"
 
-    if isinstance(config, (str, Path)):
-        src = Path(config)
-        if not src.exists():
-            raise FileNotFoundError(f"Config file not found: {src}")
-        config = yaml.safe_load(src.read_text(encoding="utf-8"))
-
-    # Overwrite output path to temporaty folder
+    # if config is a string pointing to an existing path, then convert it to
+    # Path.
+    if isinstance(config, str):
+        try:
+            await aiofiles.os.stat(config)
+            config = Path(config)
+        except Exception:
+            pass
+
+    # if config is a Path, then open it as a file.
+    if isinstance(config, Path):
+        async with aiofiles.open(config, mode="r") as file:
+            config = await file.read()
+
+    # if config is (still) a string, then directly parse it as YAML.
+    if isinstance(config, str):
+        config = yaml.safe_load(config)
+        assert isinstance(config, dict)
+
+    # Overwrite output path to temporary folder
     config["storage"] = {"local_storage": {"path": out_dir.as_posix()}}
 
     cfg_path.write_text(
@@ -52,7 +68,7 @@ def _find_report_files(path: Path) -> Optional[List[Path]]:
     return candidates
 
 
-def run_benchmark_minimal(
+async def run_benchmark_minimal(
     config: Union[str, Path, Dict[str, Any]],
     *,
     work_dir: Optional[Union[str, Path]] = None,
@@ -70,37 +86,50 @@ def run_benchmark_minimal(
       - marks `timed_out=True`, returns collected stdout up to kill.
     """
     wd = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="inference-perf-e2e-"))
-    cfg_path = _process_yaml_config(config, wd)
+    cfg_path = await _process_yaml_config(config, wd)
 
     env = os.environ.copy()
     if extra_env:
         env.update({k: str(v) for k, v in extra_env.items()})
 
-    cmd = f"{shlex.quote(executable)} --config_file {shlex.quote(str(cfg_path))} --log-level DEBUG"
+    args = [executable, "--config_file", str(cfg_path), "--log-level", "DEBUG"]
+    logger.debug(f"starting inference-perf, {args=}")
+
+    proc = await asyncio.create_subprocess_exec(
+        *args,
+        cwd=str(wd),
+        env=env,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.STDOUT,
+        preexec_fn=os.setpgrp,  # use process groups
+    )
+    logger.debug("inference-perf started!")
 
+    stdout = ""
     timed_out = False
+    return_code = -1
     try:
-        proc = subprocess.run(
-            cmd,
-            cwd=str(wd),
-            env=env,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            timeout=timeout_sec,
-        )
-        stdout = proc.stdout
+        stdout_bytes, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout_sec)
+        stdout = stdout_bytes.decode()
+        logger.info(f"benchmark status {proc.returncode}, output:\n{textwrap.indent(stdout, '  | ')}")
+        assert proc.returncode is not None
         return_code = proc.returncode
-    except subprocess.TimeoutExpired as e:
+    except asyncio.exceptions.TimeoutError:
         timed_out = True
-        stdout = e.stdout
         return_code = -9
+    finally:
+        try:
+            # kill whole process group to ensure that forked workers are also
+            # terminated.
+            pgid = os.getpgid(proc.pid)
+            os.killpg(pgid, signal.SIGTERM)
+            # wait for process to finish cleaning up.
+            await proc.wait()
+        except ProcessLookupError:
+            pass
 
     success = (return_code == 0) and (not timed_out)
 
-    logger.info("Benchmark output:\n%s", stdout)
-
     # Attempt to read report.json (optional)
     report_path = _find_report_files(wd)
     reports = {report.name: json.loads(report.read_text(encoding="utf-8")) for report in report_path} if report_path else None
@@ -108,8 +137,8 @@ def run_benchmark_minimal(
     return BenchmarkResult(
         success=success,
         timed_out=timed_out,
-        returncode=return_code,
-        stdout=stdout or "",
+        return_code=return_code,
+        stdout=stdout,
         work_dir=wd,
         reports=reports,
     )
diff --git a/e2e/utils/llm_d_inference_sim.py b/e2e/utils/llm_d_inference_sim.py
new file mode 100644
index 00000000..0785f790
--- /dev/null
+++ b/e2e/utils/llm_d_inference_sim.py
@@ -0,0 +1,154 @@
+import aiohttp
+import asyncio
+import logging
+import sys
+import textwrap
+import shutil
+from contextlib import AsyncContextDecorator
+
+
+logger = logging.getLogger(__name__)
+
+
+class LLMDInferenceSimRunner(AsyncContextDecorator):
+    @staticmethod
+    def is_available(executable: str = "llm-d-inference-sim") -> bool:
+        """
+        Returns whether llm-d-inference-sim is present in the local
+        environment.
+        """
+        return shutil.which(executable) is not None
+
+    executable: str
+    argv: list[str]
+
+    _host = "127.0.0.1"
+    _port: int
+    _proc: asyncio.subprocess.Process | None = None
+    _wait_until_ready: bool
+
+    def __init__(
+        self,
+        model: str,
+        *cmd_args: str,
+        port: int = 8000,
+        max_waiting_queue_length: int = 10000,
+        executable: str = "llm-d-inference-sim",
+        wait_until_ready=True,
+    ) -> None:
+        self.executable = executable
+        self.argv = [
+            *("--port", str(port)),
+            *("--model", model),
+            *("--max-waiting-queue-length", str(max_waiting_queue_length)),
+            *cmd_args,
+        ]
+        self._port = port
+        self._wait_until_ready = wait_until_ready
+
+    @property
+    def host(self):
+        return self._host
+
+    @property
+    def port(self):
+        return self._port
+
+    async def __aenter__(self) -> "LLMDInferenceSimRunner":
+        """
+        Starts running the llm-d-inference-sim server in the background.
+        Once the contextmanager exits, stop the server using a SIGTERM.
+        """
+        if not LLMDInferenceSimRunner.is_available(self.executable):
+            raise FileNotFoundError(f"executable not found: {self.executable}")
+
+        logger.debug(f"starting server: {self.argv=}")
+        self._proc = await asyncio.create_subprocess_exec(
+            self.executable,
+            *self.argv,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.STDOUT,
+        )
+
+        if self._wait_until_ready:
+            try:
+                await self.wait_until_ready()
+            except Exception:
+                await self.__aexit__(*sys.exc_info())
+                raise
+
+        return self
+
+    async def __aexit__(self, *exc):
+        """
+        Sends a SIGTERM to the server and waits a bit for it to stop.
+        Returns true if process exited gracefully.
+        """
+        terminate_task = asyncio.create_task(self._terminate())
+        await self._wait()
+        await terminate_task
+
+    async def wait_until_ready(
+        self,
+        polling_sec: float = 0.5,
+        timeout_sec: float | None = 10,
+    ) -> None:
+        """Waits until the server is ready to serve requests."""
+        assert self._proc
+
+        async def wait_http():
+            async with aiohttp.ClientSession() as http:
+                while True:
+                    try:
+                        async with http.head(f"http://{self._host}:{self._port}") as resp:
+                            await resp.read()
+                            logger.debug(f"querying server's / endpoint returned {resp.status=}")
+                        return True
+                    except (asyncio.exceptions.CancelledError, asyncio.exceptions.TimeoutError):
+                        logger.error(f"llm-d-inference-sim server did not become ready after {timeout_sec}s!")
+                        raise
+                    except Exception as e:
+                        logger.debug(f"http polling error: {e}, retrying...")
+                        await asyncio.sleep(polling_sec)
+                        continue
+
+        async def wait_proc():
+            await self._wait()
+            raise ConnectionRefusedError("server process exited before port was ready")
+
+        done, pending = await asyncio.wait(
+            [asyncio.create_task(x) for x in [wait_http(), wait_proc()]],
+            return_when=asyncio.FIRST_COMPLETED,
+            timeout=timeout_sec,
+        )
+        [task.cancel() for task in pending]
+        if done:
+            # either client finished polling or process ended early, so read the
+            # result to raise any potential exceptions.
+            [task.result() for task in done]
+        else:
+            # everything timed out, so one of these will have the timeout
+            # exception. await it so it's thrown.
+            [await task for task in pending]
+
+    async def _wait(self) -> None:
+        proc = self._proc
+        assert proc
+
+        stdout, _ = await proc.communicate()
+        stdout_pretty = textwrap.indent(stdout.decode(), "  | ")
+        logger.debug(f"server exited with status {proc.returncode}, output:\n{stdout_pretty}")
+
+    async def _terminate(self) -> None:
+        proc = self._proc
+        assert proc
+
+        try:
+            proc.terminate()
+            await asyncio.sleep(2)
+            proc.kill()
+        except ProcessLookupError:
+            pass  # process already exited
+        except Exception as e:
+            logger.debug(f"server failed to be terminated: {e}")
+            raise
diff --git a/e2e/utils/testdata.py b/e2e/utils/testdata.py
new file mode 100644
index 00000000..ed7ff21e
--- /dev/null
+++ b/e2e/utils/testdata.py
@@ -0,0 +1,30 @@
+import os
+import pathlib
+import subprocess
+
+TEST_E2E_DIR = pathlib.Path(__file__).parent.parent
+TEST_E2E_TESTDATA = TEST_E2E_DIR.joinpath("testdata")
+
+
+def extract_tarball(name: str | pathlib.Path) -> pathlib.Path:
+    """
+    Extract tarball with the given path to the directory that that tarball is
+    in.
+
+    The returned path is the folder containing the content of the tarball, named
+    after the tarball name itself without the extension.
+    """
+    name = pathlib.Path(name).resolve()
+
+    dest = name
+    while dest.suffix:
+        dest = dest.with_suffix("")
+
+    if not dest.is_dir():
+        if not name.is_file():
+            raise FileNotFoundError(f"Tarball {name} not found!")
+
+        os.makedirs(dest)
+        subprocess.run(["tar", "-xzvf", name, "-C", dest], check=True)
+
+    return dest
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 00000000..36530151
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,82 @@
+{
+  "nodes": {
+    "flake-parts": {
+      "inputs": {
+        "nixpkgs-lib": "nixpkgs-lib"
+      },
+      "locked": {
+        "lastModified": 1763759067,
+        "narHash": "sha256-LlLt2Jo/gMNYAwOgdRQBrsRoOz7BPRkzvNaI/fzXi2Q=",
+        "owner": "hercules-ci",
+        "repo": "flake-parts",
+        "rev": "2cccadc7357c0ba201788ae99c4dfa90728ef5e0",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hercules-ci",
+        "repo": "flake-parts",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1763835633,
+        "narHash": "sha256-HzxeGVID5MChuCPESuC0dlQL1/scDKu+MmzoVBJxulM=",
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "rev": "050e09e091117c3d7328c7b2b7b577492c43c134",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nixos",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs-lib": {
+      "locked": {
+        "lastModified": 1761765539,
+        "narHash": "sha256-b0yj6kfvO8ApcSE+QmA6mUfu8IYG6/uU28OFn4PaC8M=",
+        "owner": "nix-community",
+        "repo": "nixpkgs.lib",
+        "rev": "719359f4562934ae99f5443f20aa06c2ffff91fc",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-community",
+        "repo": "nixpkgs.lib",
+        "type": "github"
+      }
+    },
+    "pyproject-nix": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1763716960,
+        "narHash": "sha256-PUlomle4klGbnZr0wOn8z61Mbt7tXh6Yp3hZ9/CQkq0=",
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "rev": "d6c61dbe0be75e2f4cf0efcdc62428175be4cfb5",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-parts": "flake-parts",
+        "nixpkgs": "nixpkgs",
+        "pyproject-nix": "pyproject-nix"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 00000000..94b61be8
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,134 @@
+{
+  inputs = {
+    nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
+    flake-parts.url = "github:hercules-ci/flake-parts";
+
+    pyproject-nix.url = "github:pyproject-nix/pyproject.nix";
+    pyproject-nix.inputs.nixpkgs.follows = "nixpkgs";
+  };
+
+  outputs =
+    {
+      self,
+      nixpkgs,
+      flake-parts,
+      pyproject-nix,
+      ...
+    }@inputs:
+    flake-parts.lib.mkFlake { inherit inputs; } (
+      { config, ... }:
+      {
+        systems = [
+          "x86_64-linux"
+        ];
+        flake = {
+          lib = {
+            pyproject = pyproject-nix.lib.project.loadPyproject {
+              projectRoot = self;
+            };
+          };
+        };
+        perSystem =
+          { pkgs, self', ... }@systemInputs:
+          let
+            python = pkgs.python3;
+          in
+          {
+            devShells.default = pkgs.mkShell {
+              # PATH-only packages:
+              packages =
+                with pkgs;
+                with python.pkgs;
+                with self'.packages;
+                [
+                  llm-d-inference-sim
+                  pdm
+                  python
+
+                  # choose either python-lsp-server or pyright:
+                  basedpyright
+                  # python-lsp-server
+                  # pylsp-mypy
+                ];
+
+              buildInputs =
+                with pkgs;
+                with python.pkgs;
+                [
+                  numpy
+                  torch
+                ];
+
+              shellHook = ''
+                python -m venv .venv
+                source .venv/bin/activate
+                pdm sync -d
+              '';
+            };
+
+            packages = rec {
+              default = inference-perf;
+
+              inference-perf =
+                let
+                  buildAttrs = self.lib.pyproject.renderers.buildPythonPackage {
+                    inherit python;
+                  };
+                in
+                python.pkgs.buildPythonPackage (buildAttrs // { });
+
+              llm-d-inference-sim = pkgs.buildGoModule rec {
+                pname = "llm-d-inference-sim";
+                version = "0.6.1";
+
+                src = pkgs.fetchFromGitHub {
+                  owner = "llm-d";
+                  repo = "llm-d-inference-sim";
+                  tag = "v${version}";
+                  hash = "sha256-KdA7dgdy1jGjRhrqXfkg4Z9V3SXPcKp1FnTtm+e5DSA=";
+                };
+                vendorHash = "sha256-MINH7J2ozTORFK/KgZvXBlwThYRISL1wlHebdZxvuvw=";
+
+                nativeBuildInputs = with pkgs; [
+                  pkg-config
+                ];
+
+                buildInputs = with pkgs; [
+                  zeromq
+                  libtokenizers
+                ];
+
+                # several tests require networking.
+                doCheck = false;
+
+                meta = {
+                  description = "A light weight vLLM simulator, for mocking out replicas";
+                  homepage = "https://github.com/llm-d/llm-d-inference-sim";
+                  license = with nixpkgs.lib.licenses; asl20;
+                  mainProgram = "llm-d-inference-sim";
+                };
+              };
+
+              libtokenizers = pkgs.rustPlatform.buildRustPackage rec {
+                pname = "libtokenizers";
+                version = "1.22.1"; # keep same as llm-d-inference-sim's version
+
+                src = pkgs.fetchFromGitHub {
+                  owner = "daulet";
+                  repo = "tokenizers";
+                  tag = "v${version}";
+                  hash = "sha256-unGAXpD4GHWVFcXAwd0zU/u30wzH909tDcRYRPsSKwQ=";
+                };
+                cargoHash = "sha256-rY3YAcCbbx5CY6qu44Qz6UQhJlWVxAWdTaUSagHDn2o=";
+
+                meta = {
+                  description = "Go bindings for Tiktoken & HuggingFace Tokenizer";
+                  homepage = "https://github.com/daulet/tokenizers";
+                  license = with nixpkgs.lib.licenses; mit;
+                };
+              };
+            };
+          };
+      }
+    );
+}
diff --git a/pdm.lock b/pdm.lock
index ecae0ccf..860f6450 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -5,11 +5,22 @@
 groups = ["default", "lint", "test", "types"]
 strategy = ["inherit_metadata"]
 lock_version = "4.5.0"
-content_hash = "sha256:af64fbc282da5d43a35e5075e047b7242d140df8dd989be2da412540a5232708"
+content_hash = "sha256:5c16ab3d80a140f0b8b76bcda27cb2fae7d42ba9fc47a741da246396edb54b2b"
 
 [[metadata.targets]]
 requires_python = ">=3.12"
 
+[[package]]
+name = "aiofiles"
+version = "25.1.0"
+requires_python = ">=3.9"
+summary = "File support for asyncio."
+groups = ["default"]
+files = [
+    {file = "aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695"},
+    {file = "aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2"},
+]
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.6.1"
diff --git a/pyproject.toml b/pyproject.toml
index 314cee66..c0303ac1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "boto3>=1.39.0",
     "uvloop>=0.21.0",
     "tqdm>=4.67.1",
+    "aiofiles>=25.1.0",
 ]
 requires-python = ">=3.12"
 readme = "README.md"
@@ -64,11 +65,16 @@ distribution = true
 [tool.pdm.scripts]
 format = "ruff format"
 lint = "ruff check"
-test = "pytest tests"
-"test:e2e" = "pytest e2e"
 type-check = "mypy --strict ./inference_perf ./tests"
 validate = {composite = ["format", "lint", "type-check"]}
 
+"test" = "pytest tests"
+"test:e2e" = "pytest e2e"
+"test:e2e:docker" = "pdm run docker:e2e-test:run"
+
+"docker:e2e-test:build".cmd = "docker buildx b -f Dockerfile.e2e-test {args:-t inference-perf-e2e-test} ."
+"docker:e2e-test:run".shell = "rm result && pdm run docker:e2e-test:build --iidfile result && docker run --rm -it $(< result)"
+
 [tool.ruff]
 # The GitHub editor is 127 chars wide
 line-length = 127
@@ -114,6 +120,9 @@ docstring-code-format = false
 docstring-code-line-length = "dynamic"
 
 [tool.pytest.ini_options]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
+log_cli = true
 log_cli_level = "INFO"
 testpaths = ["."]
 python_files = ["test_*.py"]