From 56d613d57823664cf5df18a204b45bcb9ddf6e20 Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 8 Feb 2022 14:38:50 +0000 Subject: [PATCH 01/10] fix: make count endpoints work with empty where filter input #309 --- .../src/search_api/query_filter_factory.py | 143 +++++++++--------- 1 file changed, 74 insertions(+), 69 deletions(-) diff --git a/datagateway_api/src/search_api/query_filter_factory.py b/datagateway_api/src/search_api/query_filter_factory.py index f0dd05d2..816818d9 100644 --- a/datagateway_api/src/search_api/query_filter_factory.py +++ b/datagateway_api/src/search_api/query_filter_factory.py @@ -106,84 +106,89 @@ def get_where_filter(where_filter_input, entity_name): """ where_filters = [] - if ( - list(where_filter_input.keys())[0] == "and" - or list(where_filter_input.keys())[0] == "or" - ): - log.debug("and/or operators found: %s", list(where_filter_input.keys())[0]) - boolean_operator = list(where_filter_input.keys())[0] - conditions = list(where_filter_input.values())[0] - conditional_where_filters = [] - - for condition in conditions: - # Could be nested AND/OR - where_filter = { - "filter": {"where": condition}, - } - conditional_where_filters.extend( - SearchAPIQueryFilterFactory.get_query_filter( - where_filter, entity_name, - ), + if where_filter_input: + if ( + list(where_filter_input.keys())[0] == "and" + or list(where_filter_input.keys())[0] == "or" + ): + log.debug( + "and/or operators found: %s", list(where_filter_input.keys())[0], ) + boolean_operator = list(where_filter_input.keys())[0] + conditions = list(where_filter_input.values())[0] + conditional_where_filters = [] + + for condition in conditions: + # Could be nested AND/OR + where_filter = { + "filter": {"where": condition}, + } + conditional_where_filters.extend( + SearchAPIQueryFilterFactory.get_query_filter( + where_filter, entity_name, + ), + ) - nested = NestedWhereFilters( - conditional_where_filters[:-1], - conditional_where_filters[-1], - boolean_operator, - SearchAPIQuery(entity_name), - ) - where_filters.append(nested) - elif list(where_filter_input.keys())[0] == "text": - log.debug("Text operator found within JSON where object") - try: - entity_class = getattr(search_api_models, entity_name) - except AttributeError as e: - raise SearchAPIError( - f"No text operator fields have been defined for {entity_name}" - f", {e.args}", + nested = NestedWhereFilters( + conditional_where_filters[:-1], + conditional_where_filters[-1], + boolean_operator, + SearchAPIQuery(entity_name), ) + where_filters.append(nested) + elif list(where_filter_input.keys())[0] == "text": + log.debug("Text operator found within JSON where object") + try: + entity_class = getattr(search_api_models, entity_name) + except AttributeError as e: + raise SearchAPIError( + f"No text operator fields have been defined for {entity_name}" + f", {e.args}", + ) - or_conditional_filters = [] - field_names = entity_class._text_operator_fields - log.debug( - "Text operators found for PaNOSC %s: %s", entity_name, field_names, - ) - if not field_names: - # No text operator fields present, simply log and move on, we should - # ignore text operator queries on entities where `_text_operator_fields` - # is empty (meaning they are not present in the origina PaNOSC data - # model) - log.info( - "No text operator fields found for PaNOSC entity %s, will" - " ignore", - entity_name, + or_conditional_filters = [] + field_names = entity_class._text_operator_fields + log.debug( + "Text operators found for PaNOSC %s: %s", entity_name, field_names, ) - else: - for field_name in field_names: - or_conditional_filters.append( - {field_name: {"like": where_filter_input["text"]}}, + if not field_names: + # No text operator fields present, simply log and move on, we should + # ignore text operator queries on entities where + # `_text_operator_fields` is empty (meaning they are not present in + # the origina PaNOSC data model) + log.info( + "No text operator fields found for PaNOSC entity %s, will" + " ignore", + entity_name, ) + else: + for field_name in field_names: + or_conditional_filters.append( + {field_name: {"like": where_filter_input["text"]}}, + ) - where_filter = { - "filter": {"where": {"or": or_conditional_filters}}, - } - where_filters.extend( - SearchAPIQueryFilterFactory.get_query_filter( - where_filter, entity_name, + where_filter = { + "filter": {"where": {"or": or_conditional_filters}}, + } + where_filters.extend( + SearchAPIQueryFilterFactory.get_query_filter( + where_filter, entity_name, + ), + ) + else: + log.info( + "Basic where filter found, extracting field, value and operation", + ) + filter_data = SearchAPIQueryFilterFactory.get_condition_values( + where_filter_input, + ) + where_filters.append( + SearchAPIWhereFilter( + field=filter_data[0], + value=filter_data[1], + operation=filter_data[2], ), ) - else: - log.info("Basic where filter found, extracting field, value and operation") - filter_data = SearchAPIQueryFilterFactory.get_condition_values( - where_filter_input, - ) - where_filters.append( - SearchAPIWhereFilter( - field=filter_data[0], - value=filter_data[1], - operation=filter_data[2], - ), - ) return where_filters From 786eb1a83bfaeddac18ed83076a6cfadcf83abf3 Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 8 Feb 2022 14:49:03 +0000 Subject: [PATCH 02/10] test: unskip relevant tests #309 --- .../endpoints/test_count_dataset_files.py | 9 +-------- test/search_api/endpoints/test_count_endpoint.py | 16 ++-------------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/test/search_api/endpoints/test_count_dataset_files.py b/test/search_api/endpoints/test_count_dataset_files.py index 32749a1f..1f475fa6 100644 --- a/test/search_api/endpoints/test_count_dataset_files.py +++ b/test/search_api/endpoints/test_count_dataset_files.py @@ -12,8 +12,6 @@ class TestSearchAPICountDatasetFilesEndpoint: "{}", {"count": 56}, id="Basic /datasets/{pid}/files/count request", - # Skipped because empty dict for filter doesn't work on where - marks=pytest.mark.skip, ), pytest.param( "0-8401-1070-7", @@ -40,12 +38,7 @@ class TestSearchAPICountDatasetFilesEndpoint: id="Count dataset files with filter to return zero count", ), pytest.param( - "unknown pid", - "{}", - {"count": 0}, - id="Non-existent dataset pid", - # Skipped because empty dict for filter doesn't work on where - marks=pytest.mark.skip, + "unknown pid", "{}", {"count": 0}, id="Non-existent dataset pid", ), ], ) diff --git a/test/search_api/endpoints/test_count_endpoint.py b/test/search_api/endpoints/test_count_endpoint.py index 2e41d18c..a89ec03d 100644 --- a/test/search_api/endpoints/test_count_endpoint.py +++ b/test/search_api/endpoints/test_count_endpoint.py @@ -8,28 +8,16 @@ class TestSearchAPICountEndpoint: "endpoint_name, request_filter, expected_json", [ pytest.param( - "datasets", - "{}", - {"count": 479}, - id="Basic /datasets/count request", - # Skipped because empty dict for filter doesn't work on where - marks=pytest.mark.skip, + "datasets", "{}", {"count": 479}, id="Basic /datasets/count request", ), pytest.param( - "documents", - "{}", - {"count": 239}, - id="Basic /documents/count request", - # Skipped because empty dict for filter doesn't work on where - marks=pytest.mark.skip, + "documents", "{}", {"count": 239}, id="Basic /documents/count request", ), pytest.param( "instruments", "{}", {"count": 14}, id="Basic /instruments/count request", - # Skipped because empty dict for filter doesn't work on where - marks=pytest.mark.skip, ), pytest.param( "datasets", From 1357c91a5a3a4522c4ff34fdb684b6e6e9474fc6 Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 8 Feb 2022 14:56:07 +0000 Subject: [PATCH 03/10] fix: raise `FilterError` when where filter input is not object #309 --- datagateway_api/src/search_api/query_filter_factory.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datagateway_api/src/search_api/query_filter_factory.py b/datagateway_api/src/search_api/query_filter_factory.py index 816818d9..ff1150ae 100644 --- a/datagateway_api/src/search_api/query_filter_factory.py +++ b/datagateway_api/src/search_api/query_filter_factory.py @@ -104,6 +104,11 @@ def get_where_filter(where_filter_input, entity_name): :return: The list of `NestedWhereFilters` and/ or `SearchAPIWhereFilter` objects created """ + if not isinstance(where_filter_input, dict): + raise FilterError( + "Bad where filter input, please ensure that it is provided as an " + "object", + ) where_filters = [] if where_filter_input: From d37cd1bd3225e27d38e76dae5e80519b29eecbc5 Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 8 Feb 2022 14:57:25 +0000 Subject: [PATCH 04/10] test: test `FilterError` raised when filter input is not object #309 --- test/search_api/test_search_api_query_filter_factory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/search_api/test_search_api_query_filter_factory.py b/test/search_api/test_search_api_query_filter_factory.py index 9744c1c5..9bcc9863 100644 --- a/test/search_api/test_search_api_query_filter_factory.py +++ b/test/search_api/test_search_api_query_filter_factory.py @@ -2063,6 +2063,7 @@ def test_valid_filter_input_with_all_filters( }, id="Unsupported skip filter in scope of include filter", ), + pytest.param({"filter": {"where": []}}, id="Bad where filter input"), pytest.param( {"filter": {"where": {"isPublic": {"lt": True}}}}, id="Unsupported operator in where filter with boolean value", From d043133ac5f3bba14e9b35b6b922e703347bbd60 Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 8 Feb 2022 15:14:18 +0000 Subject: [PATCH 05/10] docs: update docstring of `get_where_filter` method #309 --- datagateway_api/src/search_api/query_filter_factory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datagateway_api/src/search_api/query_filter_factory.py b/datagateway_api/src/search_api/query_filter_factory.py index ff1150ae..7ca70153 100644 --- a/datagateway_api/src/search_api/query_filter_factory.py +++ b/datagateway_api/src/search_api/query_filter_factory.py @@ -103,6 +103,7 @@ def get_where_filter(where_filter_input, entity_name): :type entity_name: :class:`str` :return: The list of `NestedWhereFilters` and/ or `SearchAPIWhereFilter` objects created + :raises FilterError: If the where filter input is not provided as an object """ if not isinstance(where_filter_input, dict): raise FilterError( From 263ad34be42fe57e1e7ebf67e0a534d505b78a2a Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Thu, 10 Feb 2022 10:13:59 +0000 Subject: [PATCH 06/10] refactor: reword error message for when entity model is not found #309 --- datagateway_api/src/search_api/query_filter_factory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datagateway_api/src/search_api/query_filter_factory.py b/datagateway_api/src/search_api/query_filter_factory.py index abd10b4d..e199bc75 100644 --- a/datagateway_api/src/search_api/query_filter_factory.py +++ b/datagateway_api/src/search_api/query_filter_factory.py @@ -147,10 +147,10 @@ def get_where_filter(where_filter_input, entity_name): log.debug("Text operator found within JSON where object") try: entity_class = getattr(search_api_models, entity_name) - except AttributeError as e: + except AttributeError: raise SearchAPIError( - f"No text operator fields have been defined for {entity_name}" - f", {e.args}", + f"No model for {entity_name} could be found, a different entity" + f"name should be used", ) or_conditional_filters = [] From df449819192653c3655f59b77dd77b25b77f470d Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 15 Feb 2022 14:18:54 +0000 Subject: [PATCH 07/10] fix: return 404 on `/files` endpoint with non-existent dataset pid #327 --- datagateway_api/src/search_api/helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datagateway_api/src/search_api/helpers.py b/datagateway_api/src/search_api/helpers.py index 43513174..16adb0c4 100644 --- a/datagateway_api/src/search_api/helpers.py +++ b/datagateway_api/src/search_api/helpers.py @@ -146,6 +146,9 @@ def get_files(entity_name, pid, filters): "Entity Name: %s, Filters: %s", entity_name, filters, ) + # Check if dataset with such pid exists before proceeding + get_with_pid("Dataset", pid, []) + filters.append(SearchAPIWhereFilter("dataset.pid", pid, "eq")) return get_search(entity_name, filters) From 4d09a30bac50d8f48a09ab0a3b38253dbb5a6ae6 Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 15 Feb 2022 14:19:43 +0000 Subject: [PATCH 08/10] fix: return 404 on `/files/count` endpoint with non-existent dataset pid #327 --- datagateway_api/src/search_api/helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datagateway_api/src/search_api/helpers.py b/datagateway_api/src/search_api/helpers.py index 16adb0c4..27399324 100644 --- a/datagateway_api/src/search_api/helpers.py +++ b/datagateway_api/src/search_api/helpers.py @@ -174,5 +174,8 @@ def get_files_count(entity_name, filters, pid): "Entity Name: %s, Filters: %s", entity_name, filters, ) + # Check if dataset with such pid exists before proceeding + get_with_pid("Dataset", pid, []) + filters.append(SearchAPIWhereFilter("dataset.pid", pid, "eq")) return get_count(entity_name, filters) From 2130f0b439e9d2d253f9ac16af480b1c63c5b591 Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 15 Feb 2022 14:20:53 +0000 Subject: [PATCH 09/10] test: unskip test for `/files` with non-existent dataset pid #327 --- test/search_api/endpoints/test_get_dataset_files.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/test/search_api/endpoints/test_get_dataset_files.py b/test/search_api/endpoints/test_get_dataset_files.py index e0d47f6a..99b75e82 100644 --- a/test/search_api/endpoints/test_get_dataset_files.py +++ b/test/search_api/endpoints/test_get_dataset_files.py @@ -141,14 +141,7 @@ def test_valid_get_dataset_files_endpoint( pytest.param( "0-8401-1070-7", '{"include": ""}', 400, id="Bad include filter", ), - pytest.param( - "my 404 test pid", - "{}", - 404, - id="Non-existent dataset pid", - # Skipped because this actually returns 200 - marks=pytest.mark.skip, - ), + pytest.param("my 404 test pid", "{}", 404, id="Non-existent dataset pid"), ], ) def test_invalid_get_dataset_files_endpoint( From 8e0cfc1833e2315d7c19ec0e12a5204e186798a7 Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov Date: Tue, 15 Feb 2022 14:22:31 +0000 Subject: [PATCH 10/10] test: add test for `/files/count` with non-existent dataset pid #327 --- .../endpoints/test_count_dataset_files.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/test/search_api/endpoints/test_count_dataset_files.py b/test/search_api/endpoints/test_count_dataset_files.py index 1f475fa6..b48e1c20 100644 --- a/test/search_api/endpoints/test_count_dataset_files.py +++ b/test/search_api/endpoints/test_count_dataset_files.py @@ -37,9 +37,6 @@ class TestSearchAPICountDatasetFilesEndpoint: {"count": 0}, id="Count dataset files with filter to return zero count", ), - pytest.param( - "unknown pid", "{}", {"count": 0}, id="Non-existent dataset pid", - ), ], ) def test_valid_count_dataset_files_endpoint( @@ -54,22 +51,24 @@ def test_valid_count_dataset_files_endpoint( assert test_response.json == expected_json @pytest.mark.parametrize( - "pid, request_filter", + "pid, request_filter, expected_status_code", [ - pytest.param("0-8401-1070-7", '{"bad filter"}', id="Bad filter"), + pytest.param("0-8401-1070-7", '{"bad filter"}', 400, id="Bad filter"), pytest.param( "0-8401-1070-7", '{"where": {"name": "FILE 4"}}', + 400, id="Where filter inside where query param", ), + pytest.param("my 404 test pid", "{}", 404, id="Non-existent dataset pid"), ], ) def test_invalid_count_dataset_files_endpoint( - self, flask_test_app_search_api, pid, request_filter, + self, flask_test_app_search_api, pid, request_filter, expected_status_code, ): test_response = flask_test_app_search_api.get( f"{Config.config.search_api.extension}/datasets/{pid}/files/count" f"?where={request_filter}", ) - assert test_response.status_code == 400 + assert test_response.status_code == expected_status_code