do_bulk_inference: add worker_count parameter

mhaas · mhaas · commit 31da95196b6d · 2022-06-17T17:34:30.000+02:00
This is mainly useful to fix the tests which rely on the
mocks being called in a certain order. One of the tests supports
concurrency by mocking in a better way, but this was not feasible
for the other tests.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,3 +1,3 @@
 # docs
-sphinx==2.4.1
-sphinx_rtd_theme==0.5.0
+sphinx==5.0.2
+sphinx_rtd_theme==1.0.0
diff --git a/sap/aibus/dar/client/exceptions.py b/sap/aibus/dar/client/exceptions.py
@@ -123,6 +123,14 @@ class JobNotFound(DARException):
     pass
 
 
+class InvalidWorkerCount(DARException):
+    """
+    Invalid worker_count parameter is specified.
+
+    .. versionadded:: 0.12.0
+    """
+
+
 class ModelAlreadyExists(DARException):
     """
     Model already exists and must be deleted first.
diff --git a/sap/aibus/dar/client/inference_client.py b/sap/aibus/dar/client/inference_client.py
@@ -7,7 +7,7 @@
 from requests import RequestException
 
 from sap.aibus.dar.client.base_client import BaseClientWithSession
-from sap.aibus.dar.client.exceptions import DARHTTPException
+from sap.aibus.dar.client.exceptions import DARHTTPException, InvalidWorkerCount
 from sap.aibus.dar.client.inference_constants import InferencePaths
 from sap.aibus.dar.client.util.lists import split_list
 
@@ -17,6 +17,8 @@
 #: How many labels to predict for a single object by default
 TOP_N = 1
 
+# pylint: disable=too-many-arguments
+
 
 class InferenceClient(BaseClientWithSession):
     """
@@ -77,6 +79,7 @@ def do_bulk_inference(
         objects: List[dict],
         top_n: int = TOP_N,
         retry: bool = True,
+        worker_count: int = 4,
     ) -> List[Union[dict, None]]:
         """
         Performs bulk inference for larger collections.
@@ -146,14 +149,32 @@ def do_bulk_inference(
            has value `None`.
            A `_sdk_error` key is added with the Exception details.
 
+        .. versionadded:: 0.12.0
+           The `worker_count` parameter allows to fine-tune the number of concurrent
+           request threads. Set `worker_count` to `1` to disable concurrent execution of
+           requests.
+
 
         :param model_name: name of the model used for inference
         :param objects: Objects to be classified
         :param top_n: How many predictions to return per object
         :param retry: whether to retry on errors. Default: True
+        :param worker_count: maximum number of concurrent requests
+        :raises: InvalidWorkerCount if worker_count param is incorrect
         :return: the aggregated ObjectPrediction dictionaries
         """
 
+        if worker_count is None:
+            raise InvalidWorkerCount("worker_count cannot be None!")
+
+        if worker_count > 4:
+            msg = "worker_count too high: %s. Up to 4 allowed." % worker_count
+            raise InvalidWorkerCount(msg)
+
+        if worker_count <= 0:
+            msg = "worker_count must be greater than 0!"
+            raise InvalidWorkerCount(msg)
+
         def predict_call(work_package):
             try:
                 response = self.create_inference_request(
@@ -180,7 +201,7 @@ def predict_call(work_package):
 
         results = []
 
-        with ThreadPoolExecutor(max_workers=4) as pool:
+        with ThreadPoolExecutor(max_workers=worker_count) as pool:
             results_iterator = pool.map(
                 predict_call, split_list(objects, LIMIT_OBJECTS_PER_CALL)
             )
diff --git a/system_tests/workflow/test_end_to_end.py b/system_tests/workflow/test_end_to_end.py
@@ -249,12 +249,19 @@ def _assert_inference_works(self, inference_client, model_name):
         # One object has been classified
         assert len(response["predictions"]) == 1
 
+        # do_bulk_inference with concurrency
         big_to_be_classified = [to_be_classified[0] for _ in range(123)]
         response = inference_client.do_bulk_inference(
             model_name=model_name, objects=big_to_be_classified
         )
         assert len(response) == 123
 
+        # do_bulk_inference without concurrency
+        response = inference_client.do_bulk_inference(
+            model_name=model_name, objects=big_to_be_classified, worker_count=1
+        )
+        assert len(response) == 123
+
         url = os.environ["DAR_URL"]
         if url[-1] == "/":
             url = url[:-1]
diff --git a/tests/sap/aibus/dar/client/test_exceptions.py b/tests/sap/aibus/dar/client/test_exceptions.py
@@ -1,7 +1,10 @@
 import datetime
 from unittest.mock import PropertyMock
 
-from sap.aibus.dar.client.exceptions import DARHTTPException, ModelAlreadyExists
+from sap.aibus.dar.client.exceptions import (
+    DARHTTPException,
+    ModelAlreadyExists,
+)
 from tests.sap.aibus.dar.client.test_dar_session import create_mock_response
 
 # TODO: test __str__
diff --git a/tests/sap/aibus/dar/client/test_inference_client.py b/tests/sap/aibus/dar/client/test_inference_client.py
@@ -8,7 +8,7 @@
 import pytest
 from requests import RequestException, Timeout
 
-from sap.aibus.dar.client.exceptions import DARHTTPException
+from sap.aibus.dar.client.exceptions import DARHTTPException, InvalidWorkerCount
 from sap.aibus.dar.client.inference_client import InferenceClient
 from tests.sap.aibus.dar.client.test_data_manager_client import (
     AbstractDARClientConstruction,
@@ -180,7 +180,11 @@ def _assert_bulk_inference_works(
             retry_kwarg["retry"] = retry_flag
 
         response = inference_client.do_bulk_inference(
-            model_name="test-model", objects=many_objects, top_n=4, **retry_kwarg
+            model_name="test-model",
+            objects=many_objects,
+            top_n=4,
+            worker_count=1,  # Disable concurrency to make tests deterministic.
+            **retry_kwarg,
         )
 
         # The return value is the concatenation of all 'predictions' of the individual
@@ -348,6 +352,7 @@ def test_bulk_inference_error_no_object_ids(
             model_name="test-model",
             objects=inference_objects,
             top_n=4,
+            worker_count=1,  # disable concurrency to make tests deterministic
         )
         expected_error_response = {
             "objectId": None,
@@ -368,3 +373,33 @@ def test_bulk_inference_error_no_object_ids(
         )
 
         assert response == expected_response
+
+    def test_worker_count_validation(self, inference_client: InferenceClient):
+
+        many_objects = [self.objects()[0] for _ in range(75)]
+
+        with pytest.raises(InvalidWorkerCount) as context:
+            inference_client.do_bulk_inference(
+                model_name="test-model", objects=many_objects, worker_count=5
+            )
+        assert "worker_count too high: 5. Up to 4 allowed." in str(context.value)
+
+        with pytest.raises(InvalidWorkerCount) as context:
+            inference_client.do_bulk_inference(
+                model_name="test-model", objects=many_objects, worker_count=0
+            )
+        assert "worker_count must be greater than 0" in str(context.value)
+
+        with pytest.raises(InvalidWorkerCount) as context:
+            inference_client.do_bulk_inference(
+                model_name="test-model", objects=many_objects, worker_count=-1
+            )
+        assert "worker_count must be greater than 0" in str(context.value)
+
+        with pytest.raises(InvalidWorkerCount) as context:
+            inference_client.do_bulk_inference(
+                model_name="test-model",
+                objects=many_objects,
+                worker_count=None,
+            )
+            assert "worker_count cannot be None" in str(context.value)