Improve resilience and performance of do_bulk_inference

mhaas · mhaas · commit 7941f4c5898f · 2022-05-19T18:13:02.000+02:00
In case of errors, the `InferenceClient.do_bulk_inference` method will now return `None` for the affected objects instead of aborting the entire bulk inference operation (and discarding any successfully processed objects). Fixes issue #68 The fix for #68 is different than what is described in #68. Instead of using a generator based approach which will require the SDK consumer to implement the error handling themselves, the SDK itself now handles the errors. The downside of not using a generator is a larger memory footprint to accumulate the results in a list. As an alternative, we can consider using a generator to either yield the successfully processed inference results or the list containing `None`. This approach will save memory. Additionally, this commit introduces parallel processing in `InferenceClient.do_bulk_inference`. This will greatly improve performance. Due to the non-lazy implementation of `ThreadPoolProcessor.map`, this increases memory usage slightly ([cpython issue #74028]) [cpython issue #74028]: python/cpython#74028
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@ default_language_version:
     python: python3.7
 repos:
 -   repo: https://github.com/ambv/black
-    rev: 21.6b0
+    rev: 22.3.0
     hooks:
     - id: black
       language_version: python3.7
diff --git a/sap/aibus/dar/client/inference_client.py b/sap/aibus/dar/client/inference_client.py
@@ -1,9 +1,13 @@
 """
 Client API for the Inference microservice.
 """
-from typing import List
+from concurrent.futures import ThreadPoolExecutor
+from typing import List, Union
+
+from requests import RequestException
 
 from sap.aibus.dar.client.base_client import BaseClientWithSession
+from sap.aibus.dar.client.exceptions import DARHTTPException
 from sap.aibus.dar.client.inference_constants import InferencePaths
 from sap.aibus.dar.client.util.lists import split_list
 
@@ -73,7 +77,7 @@ def do_bulk_inference(
         objects: List[dict],
         top_n: int = TOP_N,
         retry: bool = True,
-    ) -> List[dict]:
+    ) -> List[Union[dict, None]]:
         """
         Performs bulk inference for larger collections.
 
@@ -88,15 +92,11 @@ def do_bulk_inference(
             This method calls the inference endpoint multiple times to process all data.
             For non-trial service instances, each call will incur a cost.
 
-            If one of the calls fails, this method will raise an Exception and the
-            progress will be lost. In this case, all calls until the Exception happened
-            will be charged.
-
             To reduce the likelihood of a failed request terminating the bulk inference
             process, this method will retry failed requests.
 
             There is a small chance that even retried requests will be charged, e.g.
-            if a problem occurs with the request on the client side outside of the
+            if a problem occurs with the request on the client side outside the
             control of the service and after the service has processed the request.
             To disable `retry` behavior simply pass `retry=False` to the method.
 
@@ -114,10 +114,30 @@ def do_bulk_inference(
         :param retry: whether to retry on errors. Default: True
         :return: the aggregated ObjectPrediction dictionaries
         """
-        result = []  # type: List[dict]
-        for work_package in split_list(objects, LIMIT_OBJECTS_PER_CALL):
-            response = self.create_inference_request(
-                model_name, work_package, top_n=top_n, retry=retry
+
+        def predict_call(work_package):
+            try:
+                response = self.create_inference_request(
+                    model_name, work_package, top_n=top_n, retry=retry
+                )
+                return response["predictions"]
+            except (DARHTTPException, RequestException) as exc:
+                self.log.warning(
+                    "Caught %s during bulk inference. "
+                    "Setting results to None for this batch!",
+                    exc,
+                    exc_info=True,
+                )
+                return [None for _ in range(len(work_package))]
+
+        results = []
+
+        with ThreadPoolExecutor(max_workers=4) as pool:
+            results_iterator = pool.map(
+                predict_call, split_list(objects, LIMIT_OBJECTS_PER_CALL)
             )
-            result.extend(response["predictions"])
-        return result
+
+            for predictions in results_iterator:
+                results.extend(predictions)
+
+        return results
diff --git a/tests/sap/aibus/dar/client/test_exceptions.py b/tests/sap/aibus/dar/client/test_exceptions.py
@@ -8,22 +8,28 @@
 
 url = "http://localhost:4321/test/"
 
+correlation_id = "412d84ae-0eb5-4421-863d-956570c2da54"
+vcap_request_id = "d9cd7dec-4d74-4a7a-a953-4ca583c8d912"
+
+
+def create_mock_response_404():
+    mock_response = create_mock_response()
+
+    mock_response.headers["X-Correlation-ID"] = correlation_id
+    mock_response.headers["X-Vcap-Request-Id"] = vcap_request_id
+    mock_response.headers["Server"] = "Gunicorn"
+    mock_response.headers["X-Cf-Routererror"] = "unknown_route"
+    mock_response.status_code = 404
+    mock_response.request.method = "GET"
+    mock_response.reason = b"\xc4\xd6\xdc Not Found"
+    return mock_response
+
 
 class TestDARHTTPException:
     url = "http://localhost:4321/test/"
 
     def test_basic(self):
-        mock_response = create_mock_response()
-
-        correlation_id = "412d84ae-0eb5-4421-863d-956570c2da54"
-        mock_response.headers["X-Correlation-ID"] = correlation_id
-        vcap_request_id = "d9cd7dec-4d74-4a7a-a953-4ca583c8d912"
-        mock_response.headers["X-Vcap-Request-Id"] = vcap_request_id
-        mock_response.headers["Server"] = "Gunicorn"
-        mock_response.headers["X-Cf-Routererror"] = "unknown_route"
-        mock_response.status_code = 404
-        mock_response.request.method = "GET"
-        mock_response.reason = b"\xc4\xd6\xdc Not Found"
+        mock_response = create_mock_response_404()
 
         exception = DARHTTPException.create_from_response(url, mock_response)
 
@@ -130,7 +136,6 @@ class TestDARHTTPExceptionReason:
     # status line: https://tools.ietf.org/html/rfc7230#section-3.1.2
 
     def test_reason_works_iso8859_1(self):
-
         mock_response = create_mock_response()
         # ÄÖÜ encoded as ISO-8859-1
         mock_response.reason = b"\xc4\xd6\xdc"
diff --git a/tests/sap/aibus/dar/client/test_inference_client.py b/tests/sap/aibus/dar/client/test_inference_client.py
@@ -7,12 +7,15 @@
 from unittest.mock import call
 
 import pytest
+from requests import RequestException, Timeout
 
+from sap.aibus.dar.client.exceptions import DARHTTPException
 from sap.aibus.dar.client.inference_client import InferenceClient
 from tests.sap.aibus.dar.client.test_data_manager_client import (
     AbstractDARClientConstruction,
     prepare_client,
 )
+from tests.sap.aibus.dar.client.test_exceptions import create_mock_response_404
 
 
 class TestInferenceClientConstruction(AbstractDARClientConstruction):
@@ -203,3 +206,38 @@ def _assert_bulk_inference_works(
             inference_client.session.post_to_endpoint.call_args_list
             == expected_calls_to_post
         )
+
+    def test_bulk_inference_error(self, inference_client: InferenceClient):
+        """
+        Tests if do_bulk_inference method will recover from errors.
+        """
+
+        response_404 = create_mock_response_404()
+        url = "http://localhost:4321/test/"
+
+        exception_404 = DARHTTPException.create_from_response(url, response_404)
+
+        exceptions = [exception_404, RequestException, Timeout]
+        # Try different exceptions
+        for exc in exceptions:
+            inference_client.session.post_to_endpoint.return_value.json.side_effect = [
+                self.inference_response(50),
+                exc,
+                self.inference_response(40),
+            ]
+
+            many_objects = [self.objects[0] for _ in range(50 + 50 + 40)]
+            assert len(many_objects) == 50 + 50 + 40
+
+            response = inference_client.do_bulk_inference(
+                model_name="test-model",
+                objects=many_objects,
+                top_n=4,
+            )
+
+            expected_response = []
+            expected_response.extend(self.inference_response(50)["predictions"])
+            expected_response.extend(None for _ in range(50))
+            expected_response.extend(self.inference_response(40)["predictions"])
+
+            assert response == expected_response