do_bulk_inference: add worker_count parameter

mhaas · mhaas · commit 5bbf4a27c42b · 2022-06-17T17:35:56.000+02:00
This is mainly useful to fix the tests which rely on the
mocks being called in a certain order. One of the tests supports
concurrency by mocking in a better way, but this was not feasible
for the other tests.

This commit also updates the documentation build tools to the
latest version to fix the documentation build on my local machine.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,3 +1,3 @@
 # docs
-sphinx==2.4.1
-sphinx_rtd_theme==0.5.0
+sphinx==5.0.2
+sphinx_rtd_theme==1.0.0
diff --git a/sap/aibus/dar/client/exceptions.py b/sap/aibus/dar/client/exceptions.py
@@ -123,6 +123,14 @@ class JobNotFound(DARException):
     pass
 
 
+class InvalidWorkerCount(DARException):
+    """
+    Invalid worker_count parameter is specified.
+
+    .. versionadded:: 0.12.0
+    """
+
+
 class ModelAlreadyExists(DARException):
     """
     Model already exists and must be deleted first.
diff --git a/sap/aibus/dar/client/inference_client.py b/sap/aibus/dar/client/inference_client.py
@@ -7,7 +7,7 @@
 from requests import RequestException
 
 from sap.aibus.dar.client.base_client import BaseClientWithSession
-from sap.aibus.dar.client.exceptions import DARHTTPException
+from sap.aibus.dar.client.exceptions import DARHTTPException, InvalidWorkerCount
 from sap.aibus.dar.client.inference_constants import InferencePaths
 from sap.aibus.dar.client.util.lists import split_list
 
@@ -17,6 +17,8 @@
 #: How many labels to predict for a single object by default
 TOP_N = 1
 
+# pylint: disable=too-many-arguments
+
 
 class InferenceClient(BaseClientWithSession):
     """
@@ -77,6 +79,7 @@ def do_bulk_inference(
         objects: List[dict],
         top_n: int = TOP_N,
         retry: bool = True,
+        worker_count: int = 4,
     ) -> List[Union[dict, None]]:
         """
         Performs bulk inference for larger collections.
@@ -146,14 +149,32 @@ def do_bulk_inference(
            has value `None`.
            A `_sdk_error` key is added with the Exception details.
 
+        .. versionadded:: 0.12.0
+           The `worker_count` parameter allows to fine-tune the number of concurrent
+           request threads. Set `worker_count` to `1` to disable concurrent execution of
+           requests.
+
 
         :param model_name: name of the model used for inference
         :param objects: Objects to be classified
         :param top_n: How many predictions to return per object
         :param retry: whether to retry on errors. Default: True
+        :param worker_count: maximum number of concurrent requests
+        :raises: InvalidWorkerCount if worker_count param is incorrect
         :return: the aggregated ObjectPrediction dictionaries
         """
 
+        if worker_count is None:
+            raise InvalidWorkerCount("worker_count cannot be None!")
+
+        if worker_count > 4:
+            msg = "worker_count too high: %s. Up to 4 allowed." % worker_count
+            raise InvalidWorkerCount(msg)
+
+        if worker_count <= 0:
+            msg = "worker_count must be greater than 0!"
+            raise InvalidWorkerCount(msg)
+
         def predict_call(work_package):
             try:
                 response = self.create_inference_request(
@@ -180,7 +201,7 @@ def predict_call(work_package):
 
         results = []
 
-        with ThreadPoolExecutor(max_workers=4) as pool:
+        with ThreadPoolExecutor(max_workers=worker_count) as pool:
             results_iterator = pool.map(
                 predict_call, split_list(objects, LIMIT_OBJECTS_PER_CALL)
             )
diff --git a/system_tests/workflow/test_end_to_end.py b/system_tests/workflow/test_end_to_end.py
@@ -249,12 +249,19 @@ def _assert_inference_works(self, inference_client, model_name):
         # One object has been classified
         assert len(response["predictions"]) == 1
 
+        # do_bulk_inference with concurrency
         big_to_be_classified = [to_be_classified[0] for _ in range(123)]
         response = inference_client.do_bulk_inference(
             model_name=model_name, objects=big_to_be_classified
         )
         assert len(response) == 123
 
+        # do_bulk_inference without concurrency
+        response = inference_client.do_bulk_inference(
+            model_name=model_name, objects=big_to_be_classified, worker_count=1
+        )
+        assert len(response) == 123
+
         url = os.environ["DAR_URL"]
         if url[-1] == "/":
             url = url[:-1]
diff --git a/tests/sap/aibus/dar/client/test_exceptions.py b/tests/sap/aibus/dar/client/test_exceptions.py
@@ -1,7 +1,10 @@
 import datetime
 from unittest.mock import PropertyMock
 
-from sap.aibus.dar.client.exceptions import DARHTTPException, ModelAlreadyExists
+from sap.aibus.dar.client.exceptions import (
+    DARHTTPException,
+    ModelAlreadyExists,
+)
 from tests.sap.aibus.dar.client.test_dar_session import create_mock_response
 
 # TODO: test __str__
diff --git a/tests/sap/aibus/dar/client/test_inference_client.py b/tests/sap/aibus/dar/client/test_inference_client.py
@@ -8,7 +8,7 @@
 import pytest
 from requests import RequestException, Timeout
 
-from sap.aibus.dar.client.exceptions import DARHTTPException
+from sap.aibus.dar.client.exceptions import DARHTTPException, InvalidWorkerCount
 from sap.aibus.dar.client.inference_client import InferenceClient
 from tests.sap.aibus.dar.client.test_data_manager_client import (
     AbstractDARClientConstruction,
@@ -180,7 +180,11 @@ def _assert_bulk_inference_works(
             retry_kwarg["retry"] = retry_flag
 
         response = inference_client.do_bulk_inference(
-            model_name="test-model", objects=many_objects, top_n=4, **retry_kwarg
+            model_name="test-model",
+            objects=many_objects,
+            top_n=4,
+            worker_count=1,  # Disable concurrency to make tests deterministic.
+            **retry_kwarg,
         )
 
         # The return value is the concatenation of all 'predictions' of the individual
@@ -348,6 +352,7 @@ def test_bulk_inference_error_no_object_ids(
             model_name="test-model",
             objects=inference_objects,
             top_n=4,
+            worker_count=1,  # disable concurrency to make tests deterministic
         )
         expected_error_response = {
             "objectId": None,
@@ -368,3 +373,33 @@ def test_bulk_inference_error_no_object_ids(
         )
 
         assert response == expected_response
+
+    def test_worker_count_validation(self, inference_client: InferenceClient):
+
+        many_objects = [self.objects()[0] for _ in range(75)]
+
+        with pytest.raises(InvalidWorkerCount) as context:
+            inference_client.do_bulk_inference(
+                model_name="test-model", objects=many_objects, worker_count=5
+            )
+        assert "worker_count too high: 5. Up to 4 allowed." in str(context.value)
+
+        with pytest.raises(InvalidWorkerCount) as context:
+            inference_client.do_bulk_inference(
+                model_name="test-model", objects=many_objects, worker_count=0
+            )
+        assert "worker_count must be greater than 0" in str(context.value)
+
+        with pytest.raises(InvalidWorkerCount) as context:
+            inference_client.do_bulk_inference(
+                model_name="test-model", objects=many_objects, worker_count=-1
+            )
+        assert "worker_count must be greater than 0" in str(context.value)
+
+        with pytest.raises(InvalidWorkerCount) as context:
+            inference_client.do_bulk_inference(
+                model_name="test-model",
+                objects=many_objects,
+                worker_count=None,
+            )
+            assert "worker_count cannot be None" in str(context.value)