Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Changed

- Removed ENV_MAX_LIMIT environment variable; maximum limits are now handled by the default global limit environment variable. [#482](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/482)
- Changed the default and maximum pagination limits for collections/items endpoints. [#482](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/482)

### Fixed

[v6.5.1] - 2025-09-30
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,11 @@ You can customize additional settings in your `.env` file:
| `ENABLE_COLLECTIONS_SEARCH` | Enable collection search extensions (sort, fields, free text search, structured filtering, and datetime filtering) on the core `/collections` endpoint. | `true` | Optional |
| `ENABLE_COLLECTIONS_SEARCH_ROUTE` | Enable the custom `/collections-search` endpoint (both GET and POST methods). When disabled, the custom endpoint will not be available, but collection search extensions will still be available on the core `/collections` endpoint if `ENABLE_COLLECTIONS_SEARCH` is true. | `false` | Optional |
| `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. This is useful for deployments where mutating the catalog via the API should be prevented. If set to `true`, the POST `/collections` route for search will be unavailable in the API. | `true` | Optional |
| `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional |
| `STAC_GLOBAL_COLLECTION_MAX_LIMIT` | Configures the maximum number of STAC collections that can be returned in a single search request. | N/A | Optional |
| `STAC_DEFAULT_COLLECTION_LIMIT` | Configures the default number of STAC collections returned when no limit parameter is specified in the request. | `300` | Optional |
| `STAC_GLOBAL_ITEM_MAX_LIMIT` | Configures the maximum number of STAC items that can be returned in a single search request. | N/A | Optional |
| `STAC_DEFAULT_ITEM_LIMIT` | Configures the default number of STAC items returned when no limit parameter is specified in the request. | `10` | Optional |
| `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional |
| `ENV_MAX_LIMIT` | Configures the environment variable in SFEOS to override the default `MAX_LIMIT`, which controls the limit parameter for returned items and STAC collections. | `10,000` | Optional |
| `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional |

> [!NOTE]
Expand Down
76 changes: 44 additions & 32 deletions stac_fastapi/core/stac_fastapi/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,34 +270,31 @@ async def all_collections(
"""
base_url = str(request.base_url)

# Get the global limit from environment variable
global_limit = None
env_limit = os.getenv("STAC_ITEM_LIMIT")
if env_limit:
try:
global_limit = int(env_limit)
except ValueError:
# Handle invalid integer in environment variable
pass

# Apply global limit if it exists
if global_limit is not None:
# If a limit was provided, use the smaller of the two
if limit is not None:
limit = min(limit, global_limit)
else:
limit = global_limit
global_max_limit = (
int(os.getenv("STAC_GLOBAL_COLLECTION_MAX_LIMIT"))
if os.getenv("STAC_GLOBAL_COLLECTION_MAX_LIMIT")
else None
)
default_limit = int(os.getenv("STAC_DEFAULT_COLLECTION_LIMIT", 300))
query_limit = request.query_params.get("limit")

body_limit = None
try:
if request.method == "POST" and request.body():
body_data = await request.json()
body_limit = body_data.get("limit")
except Exception:
pass
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using request.query_params.get("limit") or body_data.get("limit"), can you just use limit from the function params?


if body_limit is not None:
limit = int(body_limit)
elif query_limit:
limit = int(query_limit)
else:
# No global limit, use provided limit or default
if limit is None:
query_limit = request.query_params.get("limit")
if query_limit:
try:
limit = int(query_limit)
except ValueError:
limit = 10
else:
limit = 10
limit = default_limit

if global_max_limit is not None:
limit = min(limit, global_max_limit)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we do this - limit = min(limit, global_max_limit) - are we ignoring the fact that someone may want a higher global limit? We should write down what the expectations are and then add a test I think.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe test these cases - let me know what you think:

Test with no global max limit set (should allow any limit).
Test with global max limit set (should cap at that limit).
Test with no limit specified (should use default).
Test with limit higher than global max (should be capped).
Test with limit lower than global max (should be respected).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was agreed that we should set a global limit for /collections and /items, as allowing too many results slows down the API. The value can always be overridden by an env var, also I am not sure if we should allow unlimited returned items ever.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jonhealy1 I have added the tests, removed not relevant test:
Test with no limit specified for items/collections (should use default). -test_default_item_limit_without_limit_parameter_set(), test_default_collection_limit()

Test with limit higher than global max items/collections (should be capped). - test_global_collection_max_limit_set(), test_default_collection_limit()

Test with limit lower than global max (should be respected). - not sure if we need this test.

Test with no global max limit set (should allow any limit). - not relevant, unless agree to allow any limit. Also, overpopulating the test collections/items slows down the test.

Test with global max limit set for items/collection (should cap at that limit). - test_global_collection_max_limit_set(), test_global_item_max_limit_set()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can the value be overridden by an env var? Maybe I am missing something here?

Copy link
Collaborator

@jonhealy1 jonhealy1 Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Test with no global max limit set (should allow any limit). - not relevant, unless agree to allow any limit. Also, overpopulating the test collections/items slows down the test"

@YuriZmytrakov No one envisions having an api returning unlimited items. You set the default max limit for Items to 100. If a User wants to return 110 items, they are unable to (unless I am missing something, please explain). There is no reason to place these restrictions. People who use this project expect flexibility.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you test this by showing how the limit param filters down to execute_search - test what its final value is - without ingesting over 100 Items?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed with the team — yes, we should not restrict the number of items, collections that can be returned by default. This was fixed by removing the default values items=100, collections=300. For this reason, I’m adding proper tests to verify that when no global max limit is set (any limit is allowed) populated 20 items, collections and querying to ensure any limit is allowed.


# Get token from query params only if not already provided (for GET requests)
if token is None:
Expand Down Expand Up @@ -569,7 +566,7 @@ async def item_collection(
request (Request): FastAPI Request object.
bbox (Optional[BBox]): Optional bounding box filter.
datetime (Optional[str]): Optional datetime or interval filter.
limit (Optional[int]): Optional page size. Defaults to env ``STAC_ITEM_LIMIT`` when unset.
limit (Optional[int]): Optional page size. Defaults to env `STAC_DEFAULT_ITEM_LIMIT` when unset.
sortby (Optional[str]): Optional sort specification. Accepts repeated values
like ``sortby=-properties.datetime`` or ``sortby=+id``. Bare fields (e.g. ``sortby=id``)
imply ascending order.
Expand Down Expand Up @@ -660,15 +657,12 @@ async def get_search(
q (Optional[List[str]]): Free text query to filter the results.
intersects (Optional[str]): GeoJSON geometry to search in.
kwargs: Additional parameters to be passed to the API.

Returns:
ItemCollection: Collection of `Item` objects representing the search results.

Raises:
HTTPException: If any error occurs while searching the catalog.
"""
limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10)))

base_args = {
"collections": collections,
"ids": ids,
Expand Down Expand Up @@ -743,6 +737,25 @@ async def post_search(
Raises:
HTTPException: If there is an error with the cql2_json filter.
"""
global_max_limit = (
int(os.getenv("STAC_GLOBAL_ITEM_MAX_LIMIT"))
if os.getenv("STAC_GLOBAL_ITEM_MAX_LIMIT")
else None
)
default_limit = int(os.getenv("STAC_DEFAULT_ITEM_LIMIT", 10))

requested_limit = getattr(search_request, "limit", None)

if requested_limit is None:
limit = default_limit
else:
limit = requested_limit

if global_max_limit:
limit = min(limit, global_max_limit)

search_request.limit = limit

base_url = str(request.base_url)

search = self.database.make_search()
Expand Down Expand Up @@ -819,7 +832,6 @@ async def post_search(
if hasattr(search_request, "sortby") and getattr(search_request, "sortby"):
sort = self.database.populate_sort(getattr(search_request, "sortby"))

limit = 10
if search_request.limit:
limit = search_request.limit

Expand Down
10 changes: 1 addition & 9 deletions stac_fastapi/core/stac_fastapi/core/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,7 @@

from stac_fastapi.types.stac import Item


def get_max_limit():
"""
Retrieve a MAX_LIMIT value from an environment variable.

Returns:
int: The int value parsed from the environment variable.
"""
return int(os.getenv("ENV_MAX_LIMIT", 10000))
MAX_LIMIT = 10000


def get_bool_env(name: str, default: Union[bool, str] = False) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit
from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon, get_bool_env
from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings
from stac_fastapi.elasticsearch.config import (
ElasticsearchSettings as SyncElasticsearchSettings,
Expand Down Expand Up @@ -816,7 +816,7 @@ async def execute_search(
index_param = ITEM_INDICES
query = add_collections_to_body(collection_ids, query)

max_result_window = get_max_limit()
max_result_window = MAX_LIMIT

size_limit = min(limit + 1, max_result_window)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from stac_fastapi.core.base_database_logic import BaseDatabaseLogic
from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
from stac_fastapi.core.utilities import bbox2polygon, get_bool_env, get_max_limit
from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon, get_bool_env
from stac_fastapi.extensions.core.transaction.request import (
PartialCollection,
PartialItem,
Expand Down Expand Up @@ -808,7 +808,7 @@ async def execute_search(

search_body["sort"] = sort if sort else DEFAULT_SORT

max_result_window = get_max_limit()
max_result_window = MAX_LIMIT

size_limit = min(limit + 1, max_result_window)

Expand Down
96 changes: 64 additions & 32 deletions stac_fastapi/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1475,70 +1475,102 @@ def create_items(date_prefix: str, start_day: int, count: int) -> dict:


@pytest.mark.asyncio
async def test_collections_limit_env_variable(app_client, txn_client, load_test_data):
limit = "5"
os.environ["STAC_ITEM_LIMIT"] = limit
item = load_test_data("test_collection.json")
async def test_global_collection_max_limit_set(app_client, txn_client, load_test_data):
"""Test with global collection max limit set, expect cap the limit"""
os.environ["STAC_GLOBAL_COLLECTION_MAX_LIMIT"] = "5"

for i in range(10):
test_collection = item.copy()
test_collection["id"] = f"test-collection-env-{i}"
test_collection["title"] = f"Test Collection Env {i}"
test_collection = load_test_data("test_collection.json")
test_collection_id = f"test-collection-global-{i}"
test_collection["id"] = test_collection_id
await create_collection(txn_client, test_collection)

resp = await app_client.get("/collections?limit=10")
assert resp.status_code == 200
resp_json = resp.json()
assert len(resp_json["collections"]) == 5

del os.environ["STAC_GLOBAL_COLLECTION_MAX_LIMIT"]


@pytest.mark.asyncio
async def test_default_collection_limit(app_client, txn_client, load_test_data):
"""Test default collection limit set, should use default when no limit provided"""
os.environ["STAC_DEFAULT_COLLECTION_LIMIT"] = "5"

for i in range(10):
test_collection = load_test_data("test_collection.json")
test_collection_id = f"test-collection-default-{i}"
test_collection["id"] = test_collection_id
await create_collection(txn_client, test_collection)

resp = await app_client.get("/collections")
assert resp.status_code == 200
resp_json = resp.json()
assert int(limit) == len(resp_json["collections"])
assert len(resp_json["collections"]) == 5

del os.environ["STAC_DEFAULT_COLLECTION_LIMIT"]


@pytest.mark.asyncio
async def test_search_collection_limit_env_variable(
app_client, txn_client, load_test_data
):
limit = "5"
os.environ["STAC_ITEM_LIMIT"] = limit
async def test_no_global_item_max_limit_set(app_client, txn_client, load_test_data):
"""Test with no global max limit set for items"""

if "STAC_GLOBAL_ITEM_MAX_LIMIT" in os.environ:
del os.environ["STAC_GLOBAL_ITEM_MAX_LIMIT"]

test_collection = load_test_data("test_collection.json")
test_collection_id = "test-collection-search-limit"
test_collection_id = "test-collection-no-global-limit"
test_collection["id"] = test_collection_id
await create_collection(txn_client, test_collection)

item = load_test_data("test_item.json")
item["collection"] = test_collection_id

for i in range(10):
for i in range(20):
test_item = item.copy()
test_item["id"] = f"test-item-search-{i}"
test_item["id"] = f"test-item-{i}"
await create_item(txn_client, test_item)

resp = await app_client.get("/search", params={"collections": [test_collection_id]})
resp = await app_client.get(f"/collections/{test_collection_id}/items?limit=20")
assert resp.status_code == 200
resp_json = resp.json()
assert len(resp_json["features"]) == 20

resp = await app_client.get(f"/search?collections={test_collection_id}&limit=20")
assert resp.status_code == 200
resp_json = resp.json()
assert int(limit) == len(resp_json["features"])
assert len(resp_json["features"]) == 20

resp = await app_client.post(
"/search", json={"collections": [test_collection_id], "limit": 20}
)
assert resp.status_code == 200
resp_json = resp.json()
assert len(resp_json["features"]) == 20

async def test_search_max_item_limit(
app_client, load_test_data, txn_client, monkeypatch
):
limit = "10"
monkeypatch.setenv("ENV_MAX_LIMIT", limit)

test_collection = load_test_data("test_collection.json")
await create_collection(txn_client, test_collection)
@pytest.mark.asyncio
async def test_no_global_collection_max_limit_set(
app_client, txn_client, load_test_data
):
"""Test with no global max limit set for collections"""

item = load_test_data("test_item.json")
if "STAC_GLOBAL_COLLECTION_MAX_LIMIT" in os.environ:
del os.environ["STAC_GLOBAL_COLLECTION_MAX_LIMIT"]

test_collections = []
for i in range(20):
test_item = item.copy()
test_item["id"] = f"test-item-collection-{i}"
await create_item(txn_client, test_item)

resp = await app_client.get("/search", params={"limit": 20})
test_collection = load_test_data("test_collection.json")
test_collection_id = f"test-collection-no-global-limit-{i}"
test_collection["id"] = test_collection_id
await create_collection(txn_client, test_collection)
test_collections.append(test_collection_id)

resp = await app_client.get("/collections?limit=20")
assert resp.status_code == 200
resp_json = resp.json()
assert int(limit) == len(resp_json["features"])
assert len(resp_json["collections"]) == 20


@pytest.mark.asyncio
Expand Down
Loading