From 4770a43649cad7b5db28d32c627aa6a548b8a79b Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Wed, 20 Nov 2024 00:00:30 -0500 Subject: [PATCH] add enhanced support for limits (RFC5) (#1856) --- .github/workflows/containers.yml | 2 +- .github/workflows/docs.yml | 4 +- .github/workflows/flake8.yml | 4 +- docker/default.config.yml | 4 +- docs/source/configuration.rst | 8 +++- pygeoapi/api/__init__.py | 42 ++++++++++++++++- pygeoapi/api/environmental_data_retrieval.py | 15 ++++++- pygeoapi/api/itemtypes.py | 45 ++++++------------- pygeoapi/api/processes.py | 38 +++++----------- .../schemas/config/pygeoapi-config-0.x.yml | 33 +++++++++++++- tests/api/test_api.py | 41 ++++++++++++++++- tests/api/test_itemtypes.py | 10 ++--- tests/api/test_processes.py | 8 ++-- tests/cite/cite.config.yml | 4 +- tests/pygeoapi-test-config-admin.yml | 4 +- tests/pygeoapi-test-config-apirules.yml | 4 +- tests/pygeoapi-test-config-enclosure.yml | 4 +- tests/pygeoapi-test-config-envvars.yml | 4 +- .../pygeoapi-test-config-hidden-resources.yml | 4 +- tests/pygeoapi-test-config-ogr.yml | 4 +- ...ygeoapi-test-config-postgresql-manager.yml | 4 +- tests/pygeoapi-test-config-postgresql.yml | 4 +- tests/pygeoapi-test-config.yml | 4 +- 23 files changed, 205 insertions(+), 89 deletions(-) diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index 04897a440..f04834da5 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -25,7 +25,7 @@ jobs: contents: read steps: - name: Check out the repo - uses: actions/checkout@v3 + uses: actions/checkout@master - name: Set up QEMU uses: docker/setup-qemu-action@v2.1.0 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0da62d013..9de9ab025 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -23,8 +23,8 @@ jobs: include: - python-version: '3.10' steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@master + - uses: actions/setup-python@v5 name: Setup Python ${{ matrix.python-version }} with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml index 284b93655..b57ba36c2 100644 --- a/.github/workflows/flake8.yml +++ b/.github/workflows/flake8.yml @@ -7,8 +7,8 @@ jobs: flake8_py3: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 + - uses: actions/checkout@master + - uses: actions/setup-python@v5 name: setup Python with: python-version: '3.10' diff --git a/docker/default.config.yml b/docker/default.config.yml index 29174bbf5..84ca3aeaf 100644 --- a/docker/default.config.yml +++ b/docker/default.config.yml @@ -46,7 +46,9 @@ server: cors: true pretty_print: true admin: ${PYGEOAPI_SERVER_ADMIN:-false} - limit: 10 + limits: + defaultitems: 10 + maxitems: 50 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 9dff49a17..be2ffbaf4 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -49,7 +49,13 @@ For more information related to API design rules (the ``api_rules`` property in gzip: false # default server config to gzip/compress responses to requests with gzip in the Accept-Encoding header cors: true # boolean on whether server should support CORS pretty_print: true # whether JSON responses should be pretty-printed - limit: 10 # server limit on number of items to return + + limits: # server limits on number of items to return. This property can also be defined at the resource level to override global server settings + defaultitems: 10 + maxitems: 100 + maxdistance: [25, 25] + on_exceed: throttle # throttle or error (default=throttle) + admin: false # whether to enable the Admin API # optional configuration to specify a different set of templates for HTML pages. Recommend using absolute paths. Omit this to use the default provided templates diff --git a/pygeoapi/api/__init__.py b/pygeoapi/api/__init__.py index eda22e4f4..a9e95955a 100644 --- a/pygeoapi/api/__init__.py +++ b/pygeoapi/api/__init__.py @@ -40,7 +40,7 @@ Returns content from plugins and sets responses. """ -from collections import OrderedDict +from collections import ChainMap, OrderedDict from copy import deepcopy from datetime import datetime from functools import partial @@ -1590,3 +1590,43 @@ def validate_subset(value: str) -> dict: subsets[subset_name] = list(map(get_typed_value, values)) return subsets + + +def evaluate_limit(requested: Union[None, int], server_limits: dict, + collection_limits: dict) -> int: + """ + Helper function to evaluate limit parameter + + :param requested: the limit requested by the client + :param server_limits: `dict` of server limits + :param collection_limits: `dict` of collection limits + + :returns: `int` of evaluated limit + """ + + effective_limits = ChainMap(collection_limits, server_limits) + + default = effective_limits.get('defaultitems', 10) + max_ = effective_limits.get('maxitems', 10) + on_exceed = effective_limits.get('on_exceed', 'throttle') + + LOGGER.debug(f'Requested limit: {requested}') + LOGGER.debug(f'Default limit: {default}') + LOGGER.debug(f'Maximum limit: {max_}') + LOGGER.debug(f'On exceed: {on_exceed}') + + if requested is None: + LOGGER.debug('no limit requested; returning default') + return default + + requested2 = get_typed_value(requested) + if not isinstance(requested2, int): + raise ValueError('limit value should be an integer') + + if requested2 <= 0: + raise ValueError('limit value should be strictly positive') + elif requested2 > max_ and on_exceed == 'error': + raise RuntimeError('Limit exceeded; throwing errror') + else: + LOGGER.debug('limit requested') + return min(requested2, max_) diff --git a/pygeoapi/api/environmental_data_retrieval.py b/pygeoapi/api/environmental_data_retrieval.py index 945cffb8a..016f29ef1 100644 --- a/pygeoapi/api/environmental_data_retrieval.py +++ b/pygeoapi/api/environmental_data_retrieval.py @@ -47,6 +47,7 @@ from shapely.wkt import loads as shapely_loads from pygeoapi import l10n +from pygeoapi.api import evaluate_limit from pygeoapi.plugin import load_plugin, PLUGINS from pygeoapi.provider.base import ProviderGenericError from pygeoapi.util import ( @@ -175,6 +176,16 @@ def get_collection_edr_query(api: API, request: APIRequest, HTTPStatus.BAD_REQUEST, headers, request.format, 'InvalidParameterValue', msg) + LOGGER.debug('Processing limit parameter') + try: + limit = evaluate_limit(request.params.get('limit'), + api.config['server'].get('limits', {}), + collections[dataset].get('limits', {})) + except ValueError as err: + return api.get_exception( + HTTPStatus.BAD_REQUEST, headers, request.format, + 'InvalidParameterValue', str(err)) + query_args = dict( query_type=query_type, instance=instance, @@ -186,8 +197,8 @@ def get_collection_edr_query(api: API, request: APIRequest, bbox=bbox, within=within, within_units=within_units, - limit=int(api.config['server']['limit']), - location_id=location_id, + limit=limit, + location_id=location_id ) try: diff --git a/pygeoapi/api/itemtypes.py b/pygeoapi/api/itemtypes.py index 0cd50be40..10150f28c 100644 --- a/pygeoapi/api/itemtypes.py +++ b/pygeoapi/api/itemtypes.py @@ -48,6 +48,7 @@ from pyproj.exceptions import CRSError from pygeoapi import l10n +from pygeoapi.api import evaluate_limit from pygeoapi.formatter.base import FormatterSerializationError from pygeoapi.linked_data import geojson2jsonld from pygeoapi.plugin import load_plugin, PLUGINS @@ -240,33 +241,24 @@ def get_collection_items( return api.get_exception( HTTPStatus.BAD_REQUEST, headers, request.format, 'InvalidParameterValue', msg) - except TypeError as err: - LOGGER.warning(err) - offset = 0 except ValueError: msg = 'offset value should be an integer' return api.get_exception( HTTPStatus.BAD_REQUEST, headers, request.format, 'InvalidParameterValue', msg) + except TypeError as err: + LOGGER.warning(err) + offset = 0 LOGGER.debug('Processing limit parameter') try: - limit = int(request.params.get('limit')) - # TODO: We should do more validation, against the min and max - # allowed by the server configuration - if limit <= 0: - msg = 'limit value should be strictly positive' - return api.get_exception( - HTTPStatus.BAD_REQUEST, headers, request.format, - 'InvalidParameterValue', msg) - except TypeError as err: - LOGGER.warning(err) - limit = int(api.config['server']['limit']) - except ValueError: - msg = 'limit value should be an integer' + limit = evaluate_limit(request.params.get('limit'), + api.config['server'].get('limits', {}), + collections[dataset].get('limits', {})) + except ValueError as err: return api.get_exception( HTTPStatus.BAD_REQUEST, headers, request.format, - 'InvalidParameterValue', msg) + 'InvalidParameterValue', str(err)) resulttype = request.params.get('resulttype') or 'results' @@ -693,22 +685,13 @@ def post_collection_items( LOGGER.debug('Processing limit parameter') try: - limit = int(request.params.get('limit')) - # TODO: We should do more validation, against the min and max - # allowed by the server configuration - if limit <= 0: - msg = 'limit value should be strictly positive' - return api.get_exception( - HTTPStatus.BAD_REQUEST, headers, request.format, - 'InvalidParameterValue', msg) - except TypeError as err: - LOGGER.warning(err) - limit = int(api.config['server']['limit']) - except ValueError: - msg = 'limit value should be an integer' + limit = evaluate_limit(request.params.get('limit'), + api.config['server'].get('limits', {}), + collections[dataset].get('limits', {})) + except ValueError as err: return api.get_exception( HTTPStatus.BAD_REQUEST, headers, request.format, - 'InvalidParameterValue', msg) + 'InvalidParameterValue', str(err)) resulttype = request.params.get('resulttype') or 'results' diff --git a/pygeoapi/api/processes.py b/pygeoapi/api/processes.py index 2ff57005d..32c4b6458 100644 --- a/pygeoapi/api/processes.py +++ b/pygeoapi/api/processes.py @@ -49,6 +49,7 @@ import urllib.parse from pygeoapi import l10n +from pygeoapi.api import evaluate_limit from pygeoapi.util import ( json_serial, render_j2_template, JobStatus, RequestedProcessExecutionMode, to_json, DATETIME_FORMAT) @@ -101,23 +102,14 @@ def describe_processes(api: API, request: APIRequest, else: LOGGER.debug('Processing limit parameter') try: - limit = int(request.params.get('limit')) - - if limit <= 0: - msg = 'limit value should be strictly positive' - return api.get_exception( - HTTPStatus.BAD_REQUEST, headers, request.format, - 'InvalidParameterValue', msg) - + limit = evaluate_limit(request.params.get('limit'), + api.config['server'].get('limits', {}), + {}) relevant_processes = list(api.manager.processes)[:limit] - except TypeError: - LOGGER.debug('returning all processes') - relevant_processes = api.manager.processes.keys() - except ValueError: - msg = 'limit value should be an integer' + except ValueError as err: return api.get_exception( HTTPStatus.BAD_REQUEST, headers, request.format, - 'InvalidParameterValue', msg) + 'InvalidParameterValue', str(err)) for key in relevant_processes: p = api.manager.get_processor(key) @@ -244,21 +236,13 @@ def get_jobs(api: API, request: APIRequest, **api.api_headers) LOGGER.debug('Processing limit parameter') try: - limit = int(request.params.get('limit')) - - if limit <= 0: - msg = 'limit value should be strictly positive' - return api.get_exception( - HTTPStatus.BAD_REQUEST, headers, request.format, - 'InvalidParameterValue', msg) - except TypeError: - limit = int(api.config['server']['limit']) - LOGGER.debug('returning all jobs') - except ValueError: - msg = 'limit value should be an integer' + limit = evaluate_limit(request.params.get('limit'), + api.config['server'].get('limits', {}), + {}) + except ValueError as err: return api.get_exception( HTTPStatus.BAD_REQUEST, headers, request.format, - 'InvalidParameterValue', msg) + 'InvalidParameterValue', str(err)) LOGGER.debug('Processing offset parameter') try: diff --git a/pygeoapi/schemas/config/pygeoapi-config-0.x.yml b/pygeoapi/schemas/config/pygeoapi-config-0.x.yml index ecc146c8c..18e01a491 100644 --- a/pygeoapi/schemas/config/pygeoapi-config-0.x.yml +++ b/pygeoapi/schemas/config/pygeoapi-config-0.x.yml @@ -60,8 +60,36 @@ properties: default: false limit: type: integer - description: server limit on number of items to return default: 10 + description: "limit of items to return. DEPRECATED: use limits instead" + limits: &x-limits + type: object + description: server level limits on number of items to return + properties: + maxitems: + type: integer + description: maximum limit of items to return for feature and record providers + minimum: 1 + default: 10 + defaultitems: + type: integer + description: default limit of items to return for feature and record providers + minimum: 1 + default: 10 + maxdistance: + type: array + description: maximum distance in x and y for all data providers + minItems: 2 + maxItems: 2 + items: + type: number + on_exceed: + type: string + description: how to handle limit exceeding + default: throttle + enum: + - error + - throttle templates: type: object description: optional configuration to specify a different set of templates for HTML pages. Recommend using absolute paths. Omit this to use the default provided templates @@ -417,6 +445,9 @@ properties: default: 'http://www.opengis.net/def/uom/ISO-8601/0/Gregorian' required: - spatial + limits: + <<: *x-limits + description: collection level limits on number of items to return providers: type: array description: required connection information diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 09dd8e6fa..47c7fc21f 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -40,7 +40,7 @@ from pygeoapi.api import ( API, APIRequest, FORMAT_TYPES, F_HTML, F_JSON, F_JSONLD, F_GZIP, - __version__, validate_bbox, validate_datetime, + __version__, validate_bbox, validate_datetime, evaluate_limit, validate_subset, landing_page, openapi_, conformance, describe_collections, get_collection_schema, ) @@ -848,3 +848,42 @@ def test_get_exception(config, api_): assert content['description'] == 'oops' d = api_.get_exception(500, {}, 'html', 'NoApplicableCode', 'oops') + + +def test_evaluate_limit(): + collection = {} + server = {} + + with pytest.raises(ValueError): + assert evaluate_limit('1.1', server, collection) == 10 + + with pytest.raises(ValueError): + assert evaluate_limit('-12', server, collection) == 10 + + assert evaluate_limit('1', server, collection) == 1 + + collection = {} + server = {'defaultitems': 2, 'maxitems': 3} + + assert evaluate_limit(None, server, collection) == 2 + assert evaluate_limit('1', server, collection) == 1 + assert evaluate_limit('4', server, collection) == 3 + + collection = {'defaultitems': 10, 'maxitems': 50} + server = {'defaultitems': 100, 'maxitems': 1000} + + assert evaluate_limit(None, server, collection) == 10 + assert evaluate_limit('40', server, collection) == 40 + assert evaluate_limit('60', server, collection) == 50 + + collection = {} + server = {'defaultitems': 2, 'maxitems': 3, 'on_exceed': 'error'} + + with pytest.raises(RuntimeError): + assert evaluate_limit('40', server, collection) == 40 + + collection = {'defaultitems': 10} + server = {'defaultitems': 2, 'maxitems': 3} + + assert evaluate_limit(None, server, collection) == 10 + assert evaluate_limit('40', server, collection) == 3 diff --git a/tests/api/test_itemtypes.py b/tests/api/test_itemtypes.py index ae19c28d6..cc41b39a7 100644 --- a/tests/api/test_itemtypes.py +++ b/tests/api/test_itemtypes.py @@ -179,7 +179,7 @@ def test_get_collection_items(config, api_): assert len(features['features']) == 0 # Invalid limit - req = mock_api_request({'limit': 0}) + req = mock_api_request({'limit': '0'}) rsp_headers, code, response = get_collection_items(api_, req, 'obs') features = json.loads(response) @@ -199,7 +199,7 @@ def test_get_collection_items(config, api_): assert len(features['features']) == 1 assert features['numberMatched'] == 1 - req = mock_api_request({'limit': 2}) + req = mock_api_request({'limit': '2'}) rsp_headers, code, response = get_collection_items(api_, req, 'obs') features = json.loads(response) @@ -246,8 +246,8 @@ def test_get_collection_items(config, api_): assert links[4]['rel'] == 'collection' req = mock_api_request({ - 'offset': 1, - 'limit': 1, + 'offset': '1', + 'limit': '1', 'bbox': '-180,90,180,90' }) rsp_headers, code, response = get_collection_items(api_, req, 'obs') @@ -536,7 +536,7 @@ def test_manage_collection_item_editable_options_req(config, openapi): def test_get_collection_items_json_ld(config, api_): req = mock_api_request({ 'f': 'jsonld', - 'limit': 2 + 'limit': '2' }) rsp_headers, code, response = get_collection_items(api_, req, 'obs') diff --git a/tests/api/test_processes.py b/tests/api/test_processes.py index 6062f5666..5f22e2c98 100644 --- a/tests/api/test_processes.py +++ b/tests/api/test_processes.py @@ -46,7 +46,7 @@ def test_describe_processes(config, api_): - req = mock_api_request({'limit': 1}) + req = mock_api_request({'limit': '1'}) # Test for description of single processes rsp_headers, code, response = describe_processes(api_, req) data = json.loads(response) @@ -471,7 +471,7 @@ def test_get_jobs_pagination(api_): headers, code, response = get_jobs( api_, - mock_api_request({'limit': 10, 'offset': 9}), + mock_api_request({'limit': '10', 'offset': '9'}), job_id=None) job_response_offset = json.loads(response) # check to get 1 same job id with an offset of 9 and limit of 10 @@ -486,8 +486,8 @@ def test_get_jobs_pagination(api_): # test custom limit headers, code, response = get_jobs( api_, - mock_api_request({'limit': 20}), + mock_api_request({'limit': '20'}), job_id=None) job_response = json.loads(response) # might be more than 11 due to test interaction - assert len(job_response['jobs']) > 10 + assert len(job_response['jobs']) >= 10 diff --git a/tests/cite/cite.config.yml b/tests/cite/cite.config.yml index 22d39510d..286b34671 100644 --- a/tests/cite/cite.config.yml +++ b/tests/cite/cite.config.yml @@ -8,7 +8,9 @@ server: language: en-US cors: true pretty_print: true - limit: 100 + limits: + defaultitems: 10 + maxitems: 10 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png diff --git a/tests/pygeoapi-test-config-admin.yml b/tests/pygeoapi-test-config-admin.yml index fa9b08d84..2f4031c73 100644 --- a/tests/pygeoapi-test-config-admin.yml +++ b/tests/pygeoapi-test-config-admin.yml @@ -39,7 +39,9 @@ server: - en-US cors: true pretty_print: true - limit: 10 + limits: + defaultitems: 10 + maxitems: 10 map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png attribution: '© OpenStreetMap contributors' diff --git a/tests/pygeoapi-test-config-apirules.yml b/tests/pygeoapi-test-config-apirules.yml index 6aba4c71c..4844f1bf9 100644 --- a/tests/pygeoapi-test-config-apirules.yml +++ b/tests/pygeoapi-test-config-apirules.yml @@ -43,7 +43,9 @@ server: - fr-CA cors: true pretty_print: true - limit: 10 + limits: + defaultitems: 10 + maxitems: 10 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png diff --git a/tests/pygeoapi-test-config-enclosure.yml b/tests/pygeoapi-test-config-enclosure.yml index aa65b0595..1bb005b74 100644 --- a/tests/pygeoapi-test-config-enclosure.yml +++ b/tests/pygeoapi-test-config-enclosure.yml @@ -41,7 +41,9 @@ server: - fr-CA cors: true pretty_print: true - limit: 10 + limits: + defaultitems: 10 + maxitems: 10 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png diff --git a/tests/pygeoapi-test-config-envvars.yml b/tests/pygeoapi-test-config-envvars.yml index 3784f13dd..5b0a4c47c 100644 --- a/tests/pygeoapi-test-config-envvars.yml +++ b/tests/pygeoapi-test-config-envvars.yml @@ -38,7 +38,9 @@ server: gzip: false cors: true pretty_print: true - limit: 10 + limits: + defaultitems: 10 + maxitems: 10 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png diff --git a/tests/pygeoapi-test-config-hidden-resources.yml b/tests/pygeoapi-test-config-hidden-resources.yml index 3682bc2c9..7c7c0a686 100644 --- a/tests/pygeoapi-test-config-hidden-resources.yml +++ b/tests/pygeoapi-test-config-hidden-resources.yml @@ -41,7 +41,9 @@ server: - fr-CA cors: true pretty_print: true - limit: 10 + limits: + defaultitems: 10 + maxitems: 10 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png diff --git a/tests/pygeoapi-test-config-ogr.yml b/tests/pygeoapi-test-config-ogr.yml index e4237941a..c8bf1a621 100644 --- a/tests/pygeoapi-test-config-ogr.yml +++ b/tests/pygeoapi-test-config-ogr.yml @@ -38,7 +38,9 @@ server: cors: true gzip: false pretty_print: true - limit: 10 + limits: + defaultitems: 10 + maxitems: 10 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png diff --git a/tests/pygeoapi-test-config-postgresql-manager.yml b/tests/pygeoapi-test-config-postgresql-manager.yml index e0fe947d0..f3e927ad3 100644 --- a/tests/pygeoapi-test-config-postgresql-manager.yml +++ b/tests/pygeoapi-test-config-postgresql-manager.yml @@ -41,7 +41,9 @@ server: - fr-CA cors: true pretty_print: true - limit: 10 + limits: + defaultitems: 10 + maxitems: 10 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png diff --git a/tests/pygeoapi-test-config-postgresql.yml b/tests/pygeoapi-test-config-postgresql.yml index 05a09668e..a019aece6 100644 --- a/tests/pygeoapi-test-config-postgresql.yml +++ b/tests/pygeoapi-test-config-postgresql.yml @@ -43,7 +43,9 @@ server: - fr-CA # cors: true pretty_print: true - limit: 100 + limits: + defaultitems: 100 + maxitems: 100 # templates: # path: /path/to/Jinja2/templates # static: /path/to/static/folder # css/js/img diff --git a/tests/pygeoapi-test-config.yml b/tests/pygeoapi-test-config.yml index 58b62484f..4f99b5553 100644 --- a/tests/pygeoapi-test-config.yml +++ b/tests/pygeoapi-test-config.yml @@ -41,7 +41,9 @@ server: - fr-CA cors: true pretty_print: true - limit: 10 + limits: + defaultitems: 10 + maxitems: 10 # templates: /path/to/templates map: url: https://tile.openstreetmap.org/{z}/{x}/{y}.png