vllm-project · parfeniuklits · Jun 26, 2024 · Jun 26, 2024 · Jun 27, 2024 · Jun 27, 2024
diff --git a/Makefile b/Makefile
@@ -1,26 +1,27 @@
 install:
 	pip install -r requirements.txt
 
+
 install-dev:
 	pip install -e .[dev]
 
+
 quality:
 	ruff check src tests
 	isort --check-only src tests
 	flake8 src tests --max-line-length 88
-	mypy src
+
 
 style:
-	ruff format src tests
-	isort src tests
-	flake8 src tests --max-line-length 88
+	python -m ruff format src tests
+	python -m isort src tests
+	python -m flake8 src tests
 
-# test:
-#     pytest tests
 
 build:
 	python setup.py sdist bdist_wheel
 
+
 clean:
 	rm -rf __pycache__
 	rm -rf build
@@ -31,4 +32,5 @@ clean:
 	rm -rf .mypy_cache
 	rm -rf .pytest_cache
 
-.PHONY: install install-dev quality style test test-unit test-integration test-e2e test-smoke test-sanity test-regression build clean
+
+.PHONY: install install-dev quality style build clean
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,9 +8,32 @@ target-version = ['py38']
 
 [tool.isort]
 profile = "black"
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+line_length = 88
+src_paths = ["."]
 
 [tool.mypy]
-files = "src/guidellm"
+python_version = '3.8'
+files = ['*.py']
+exclude = ["venv", "docs"]
+
+show_error_codes = true
+namespace_packages = false
+check_untyped_defs = true
+
+warn_redundant_casts = true
+warn_unused_ignores = true
+
+# Silint "type import errors" as our 3rd-party libs does not have types
+# Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery
+follow_imports = 'silent'
+
+[[tool.mypy.overrides]]
+module = []
+ignore_missing_imports=true
 
 [tool.ruff]
 exclude = ["build", "dist", "env", ".venv"]
@@ -20,6 +43,11 @@ lint.select = ["E", "F", "W"]
 max-line-length = 88
 
 [tool.pytest.ini_options]
+addopts = '-s -vvv --cache-clear'
+asyncio_mode = 'auto'
+cache_dir = '/tmp'
+python_files = 'tests.py test_*.py'
+python_functions = 'test_* *_test'
 markers = [
     "smoke: quick tests to check basic functionality",
     "sanity: detailed tests to ensure major functions work correctly",

diff --git a/setup.py b/setup.py
@@ -29,6 +29,7 @@ def _setup_long_description() -> Tuple[str, str]:
     extras_require={
         'dev': [
             'pytest',
+            'pytest-asyncio',
             'sphinx',
             'ruff',
             'mypy',

diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py
@@ -1,9 +1,9 @@
-from .base import Backend, BackendTypes, GenerativeResponse
+from .base import Backend, BackendType, GenerativeResponse
 from .openai import OpenAIBackend
 
 __all__ = [
     "Backend",
-    "BackendTypes",
+    "BackendType",
     "GenerativeResponse",
     "OpenAIBackend",
 ]
diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py
@@ -2,17 +2,17 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from enum import Enum
-from typing import Iterator, List, Optional, Type, Union
+from typing import Iterator, List, Optional, Type
 
 from loguru import logger
 
 from guidellm.core.request import TextGenerationRequest
 from guidellm.core.result import TextGenerationResult
 
-__all__ = ["Backend", "BackendTypes", "GenerativeResponse"]
+__all__ = ["Backend", "BackendType", "GenerativeResponse"]
 
 
-class BackendTypes(Enum):
+class BackendType(str, Enum):
     TEST = "test"
     OPENAI_SERVER = "openai_server"
 
@@ -39,12 +39,12 @@ class Backend(ABC):
     _registry = {}
 
     @staticmethod
-    def register_backend(backend_type: BackendTypes):
+    def register_backend(backend_type: BackendType):
         """
         A decorator to register a backend class in the backend registry.
 
         :param backend_type: The type of backend to register.
-        :type backend_type: BackendTypes
+        :type backend_type: BackendType
         """
 
         def inner_wrapper(wrapped_class: Type["Backend"]):
@@ -54,21 +54,23 @@ def inner_wrapper(wrapped_class: Type["Backend"]):
         return inner_wrapper
 
     @staticmethod
-    def create_backend(backend_type: Union[str, BackendTypes], **kwargs) -> "Backend":
+    def create_backend(backend_type: BackendType, **kwargs) -> "Backend":
         """
         Factory method to create a backend based on the backend type.
 
         :param backend_type: The type of backend to create.
-        :type backend_type: BackendTypes
+        :type backend_type: BackendType
         :param kwargs: Additional arguments for backend initialization.
         :type kwargs: dict
         :return: An instance of a subclass of Backend.
         :rtype: Backend
         """
         logger.info(f"Creating backend of type {backend_type}")
-        if backend_type not in Backend._registry:
+
+        if backend_type not in Backend._registry.keys():
             logger.error(f"Unsupported backend type: {backend_type}")
             raise ValueError(f"Unsupported backend type: {backend_type}")
+
         return Backend._registry[backend_type](**kwargs)
 
     def submit(self, request: TextGenerationRequest) -> TextGenerationResult:

diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
@@ -1,16 +1,16 @@
 from typing import Any, Iterator, List, Optional
 
-import openai
 from loguru import logger
+from openai import OpenAI
 from transformers import AutoTokenizer
 
-from guidellm.backend import Backend, BackendTypes, GenerativeResponse
+from guidellm.backend import Backend, BackendType, GenerativeResponse
 from guidellm.core.request import TextGenerationRequest
 
 __all__ = ["OpenAIBackend"]
 
 
-@Backend.register_backend(BackendTypes.OPENAI_SERVER)
+@Backend.register_backend(BackendType.OPENAI_SERVER)
 class OpenAIBackend(Backend):
     """
     An OpenAI backend implementation for the generative AI result.
@@ -53,8 +53,7 @@ def __init__(
             path_incl = path if path else ""
             self.target = f"http://{host}{port_incl}{path_incl}"
 
-        openai.api_base = self.target
-        openai.api_key = api_key
+        self._openai_client = OpenAI(api_key=api_key, base_url=self.target)
 
         if not model:
             self.model = self.default_model()
@@ -88,7 +87,7 @@ def make_request(
         if self.request_args:
             request_args.update(self.request_args)
 
-        response = openai.Completion.create(
+        response = self._openai_client.completions.create(
             engine=self.model,
             prompt=request.prompt,
             stream=True,
@@ -129,7 +128,18 @@ def available_models(self) -> List[str]:
         :return: A list of available models.
         :rtype: List[str]
         """
-        models = [model["id"] for model in openai.Engine.list()["data"]]
+
+        # FIX:You tried to access openai.Engine, but this is no longer supported
+        #     in openai>=1.0.0 - see the README at
+        #     https://github.com/openai/openai-python for the API.
+        #     You can run `openai migrate` to automatically upgrade your codebase
+        #     to use the 1.0.0 interface. Alternatively, you can pin your installation
+        #     to the old version, e.g. `pip install openai==0.28`
+        #     A detailed migration guide is available here:
+        #     https://github.com/openai/openai-python/discussions/742
+
+        # in progress
+        models = [model["id"] for model in self._openai_client.models.list()["data"]]
         logger.info(f"Available models: {models}")
         return models
 

diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py
@@ -599,3 +599,6 @@ def add_benchmark(self, benchmark: TextGenerationBenchmark):
         """
         self._benchmarks.append(benchmark)
         logger.debug(f"Added result: {benchmark}")
+
+    def to_dict(self) -> dict:
+        raise NotImplementedError
diff --git a/src/guidellm/request/base.py b/src/guidellm/request/base.py
@@ -1,6 +1,6 @@
 import asyncio
 from abc import ABC, abstractmethod
-from typing import Iterator, Optional, Union
+from typing import AsyncIterator, Generator, Iterator, Optional, Union
 
 from loguru import logger
 from transformers import AutoTokenizer, PreTrainedTokenizer
@@ -25,14 +25,15 @@ class RequestGenerator(ABC):
 
     def __init__(
         self,
+        *_,
         tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
         mode: str = "async",
         async_queue_size: int = 50,
     ):
-        self._async_queue_size = async_queue_size
-        self._mode = mode
-        self._queue = asyncio.Queue(maxsize=async_queue_size)
-        self._stop_event = asyncio.Event()
+        self._async_queue_size: int = async_queue_size
+        self._mode: str = mode
+        self._queue: asyncio.Queue = asyncio.Queue(maxsize=async_queue_size)
+        self._stop_event: asyncio.Event = asyncio.Event()
 
         if tokenizer is not None:
             self._tokenizer = (
@@ -59,24 +60,38 @@ def __repr__(self) -> str:
             f"tokenizer={self._tokenizer})"
         )
 
-    def __iter__(self) -> Iterator[TextGenerationRequest]:
+    def __iter__(self) -> Generator[TextGenerationRequest, None, None]:
         """
         Provide an iterator interface to generate new requests.
+        """
 
-        :return: An iterator over result requests.
-        :rtype: Iterator[TextGenerationRequest]
-        """
-        if self.mode == "async":
-            while not self._stop_event.is_set():
-                try:
-                    item = self._queue.get_nowait()
-                    self._queue.task_done()
-                    yield item
-                except asyncio.QueueEmpty:
-                    continue
-        else:
-            while not self._stop_event.is_set():
-                yield self.create_item()
+        while not self._stop_event.is_set():
+            yield self.create_item()
+
+    def __aiter__(self) -> AsyncIterator["TextGenerationRequest"]:
+        """
+        Provide an async iterator interface to generate new requests.
+        """
+
+        return self
+
+    async def __anext__(self) -> "TextGenerationRequest":
+        """
+        Asynchronously get the next item from the queue or wait if the queue is empty.
+        """
+
+        asyncio.create_task(self._populate_queue())
+
+        while not self._stop_event.is_set():
+            try:
+                item = self._queue.get_nowait()
+                self._queue.task_done()
+                return item
+            except asyncio.QueueEmpty:
+                # Throttle and release the control
+                await asyncio.sleep(0)
+
+        raise StopAsyncIteration
 
     @property
     def tokenizer(self) -> Optional[PreTrainedTokenizer]:
@@ -118,31 +133,13 @@ def create_item(self) -> TextGenerationRequest:
         """
         raise NotImplementedError()
 
-    def start(self):
-        """
-        Start the background task that populates the queue.
-        """
-        if self.mode == "async":
-            try:
-                loop = asyncio.get_running_loop()
-                logger.info("Using existing event loop")
-            except RuntimeError:
-                raise RuntimeError("No running event loop found for async mode")
-
-            loop.call_soon_threadsafe(
-                lambda: asyncio.create_task(self._populate_queue())
-            )
-            logger.info(
-                f"RequestGenerator started in async mode with queue size: "
-                f"{self._async_queue_size}"
-            )
-        else:
-            logger.info("RequestGenerator started in sync mode")
-
     def stop(self):
         """
         Stop the background task that populates the queue.
         """
+
+        # TODO: Consider moving to the __anext__
+
         logger.info("Stopping RequestGenerator...")
         self._stop_event.set()
         logger.info("RequestGenerator stopped")
@@ -151,13 +148,16 @@ async def _populate_queue(self):
         """
         Populate the request queue in the background.
         """
+
         while not self._stop_event.is_set():
             if self._queue.qsize() < self._async_queue_size:
                 item = self.create_item()
                 await self._queue.put(item)
+
                 logger.debug(
                     f"Item added to queue. Current queue size: {self._queue.qsize()}"
                 )
             else:
-                await asyncio.sleep(0.1)
+                await asyncio.sleep(0)
+
         logger.info("RequestGenerator stopped populating queue")