From 952a29018677d12aa8f8d3bab132f004a8675cd8 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Mon, 29 Sep 2025 20:27:48 +0800 Subject: [PATCH 1/6] update mappings --- .../sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index b2d7264d..18f6b816 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -160,7 +160,9 @@ class Geometry(Protocol): # noqa "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, "properties": { "id": {"type": "keyword"}, - "extent.spatial.bbox": {"type": "long"}, + "title": {"type": "text"}, + "description": {"type": "text"}, + "bbox_shape": {"type": "geo_shape"}, "extent.temporal.interval": { "type": "date", "format": "strict_date_optional_time||epoch_millis", From f30b4e4be3364b9c8a8d33c7358f1e47949e7d98 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Mon, 29 Sep 2025 20:37:40 +0800 Subject: [PATCH 2/6] update core args --- stac_fastapi/core/stac_fastapi/core/core.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index ac2f228d..df07a057 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -240,14 +240,17 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage: async def all_collections( self, - datetime: Optional[str] = None, limit: Optional[int] = None, + bbox: Optional[BBox] = None, + datetime: Optional[str] = None, fields: Optional[List[str]] = None, sortby: Optional[Union[str, List[str]]] = None, filter_expr: Optional[str] = None, filter_lang: Optional[str] = None, q: Optional[Union[str, List[str]]] = None, query: Optional[str] = None, + request: Request = None, + token: Optional[str] = None, **kwargs, ) -> stac_types.Collections: """Read all collections from the database. @@ -266,7 +269,6 @@ async def all_collections( Returns: A Collections object containing all the collections in the database and links to various resources. """ - request = kwargs["request"] base_url = str(request.base_url) # Get the global limit from environment variable From d167a29c93f2fef625245447374e08acefe833af Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 30 Sep 2025 13:24:07 +0800 Subject: [PATCH 3/6] bbox scratch --- CHANGELOG.md | 6 + compose.yml | 2 + recreate_collections_index.py | 121 +++++++++ .../stac_fastapi/core/base_database_logic.py | 4 + stac_fastapi/core/stac_fastapi/core/core.py | 10 +- .../core/extensions/collections_search.py | 1 + .../core/stac_fastapi/core/serializers.py | 55 ++++- .../elasticsearch/database_logic.py | 42 +++- .../stac_fastapi/opensearch/database_logic.py | 42 +++- .../tests/api/test_api_search_collections.py | 229 ++++++++++++++++++ update_collections_mapping.py | 137 +++++++++++ 11 files changed, 643 insertions(+), 6 deletions(-) create mode 100644 recreate_collections_index.py create mode 100644 update_collections_mapping.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 94dbe9a9..635fdca2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries. +- Migration scripts (`update_collections_mapping.py` and `recreate_collections_index.py`) to help add `bbox_shape` field to existing deployments. + ### Changed ### Fixed +- Issue where token was not being passed from request to POST collections search logic +- Issue where datetime param was not being passed from POST collections search logic to Elasticsearch + [v6.5.0] - 2025-09-29 ### Added diff --git a/compose.yml b/compose.yml index 77d64198..8c83ae12 100644 --- a/compose.yml +++ b/compose.yml @@ -22,6 +22,7 @@ services: - ES_VERIFY_CERTS=false - BACKEND=elasticsearch - DATABASE_REFRESH=true + - ENABLE_COLLECTIONS_SEARCH_ROUTE=true ports: - "8080:8080" volumes: @@ -56,6 +57,7 @@ services: - ES_VERIFY_CERTS=false - BACKEND=opensearch - STAC_FASTAPI_RATE_LIMIT=200/minute + - ENABLE_COLLECTIONS_SEARCH_ROUTE=true ports: - "8082:8082" volumes: diff --git a/recreate_collections_index.py b/recreate_collections_index.py new file mode 100644 index 00000000..047a1467 --- /dev/null +++ b/recreate_collections_index.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +Script to delete and recreate the collections index. + +WARNING: This will DELETE all existing collections! +Only use this in development environments. + +Usage: + python recreate_collections_index.py +""" + +import asyncio +import os +import sys + +from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX + +# Determine which backend to use +BACKEND = os.getenv("BACKEND", "elasticsearch").lower() + +if BACKEND == "opensearch": + from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, + ) + from stac_fastapi.opensearch.database_logic import ( + create_collection_index, + create_index_templates, + ) +else: + from stac_fastapi.elasticsearch.config import ( + AsyncElasticsearchSettings as AsyncSearchSettings, + ) + from stac_fastapi.elasticsearch.database_logic import ( + create_collection_index, + create_index_templates, + ) + + +async def recreate_index(): + """Delete and recreate the collections index.""" + settings = AsyncSearchSettings() + client = settings.create_client + + print(f"Using backend: {BACKEND}") + print(f"\n{'=' * 60}") + print("WARNING: This will DELETE all existing collections!") + print(f"{'=' * 60}\n") + + # Check if running in production + env = os.getenv("ENVIRONMENT", "development").lower() + if env == "production": + print("ERROR: This script should not be run in production!") + print("Use update_collections_mapping.py instead.") + sys.exit(1) + + response = input("Are you sure you want to continue? (yes/no): ") + if response.lower() != "yes": + print("Aborted.") + sys.exit(0) + + try: + # Delete the collections index + index_name = f"{COLLECTIONS_INDEX}-000001" + alias_name = COLLECTIONS_INDEX + + print(f"\nDeleting index {index_name}...") + exists = await client.indices.exists(index=index_name) + if exists: + await client.indices.delete(index=index_name) + print(f"✓ Deleted index {index_name}") + else: + print(f"⊘ Index {index_name} does not exist") + + # Check if alias exists and delete it + alias_exists = await client.indices.exists_alias(name=alias_name) + if alias_exists: + print(f"Deleting alias {alias_name}...") + await client.indices.delete_alias( + index="_all", name=alias_name, ignore=[404] + ) + print(f"✓ Deleted alias {alias_name}") + + # Recreate index templates + print("\nRecreating index templates...") + await create_index_templates() + print("✓ Index templates created") + + # Recreate the collections index + print("\nRecreating collections index...") + await create_collection_index() + print("✓ Collections index created") + + # Verify the mapping includes bbox_shape + print("\nVerifying mapping...") + mapping = await client.indices.get_mapping(index=index_name) + properties = mapping[index_name]["mappings"]["properties"] + + if "bbox_shape" in properties: + print( + f"✓ bbox_shape field is present in mapping: {properties['bbox_shape']}" + ) + else: + print("✗ WARNING: bbox_shape field is NOT in the mapping!") + + print("\n" + "=" * 60) + print("Collections index successfully recreated!") + print("You can now create collections with bbox_shape support.") + print("=" * 60) + + except Exception as e: + print(f"\n✗ Error: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + finally: + await client.close() + + +if __name__ == "__main__": + asyncio.run(recreate_index()) diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py index c592b6d2..c0a85498 100644 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -3,6 +3,8 @@ import abc from typing import Any, Dict, Iterable, List, Optional, Tuple +from stac_pydantic.shared import BBox + class BaseDatabaseLogic(abc.ABC): """ @@ -19,6 +21,7 @@ async def get_all_collections( limit: int, request: Any = None, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[BBox] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Retrieve a list of collections from the database, supporting pagination. @@ -27,6 +30,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Any, optional): The FastAPI request object. Defaults to None. sort (Optional[List[Dict[str, Any]]], optional): Optional sort parameter. Defaults to None. + bbox (Optional[BBox], optional): Bounding box to filter collections by spatial extent. Defaults to None. Returns: A tuple of (collections, next pagination token if any). diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index df07a057..e2bdf495 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -300,7 +300,9 @@ async def all_collections( else: limit = 10 - token = request.query_params.get("token") + # Get token from query params only if not already provided (for GET requests) + if token is None: + token = request.query_params.get("token") # Process fields parameter for filtering collection properties includes, excludes = set(), set() @@ -400,6 +402,7 @@ async def all_collections( limit=limit, request=request, sort=sort, + bbox=bbox, q=q_list, filter=parsed_filter, query=parsed_query, @@ -501,6 +504,11 @@ async def post_all_collections( # Pass all parameters from search_request to all_collections return await self.all_collections( limit=search_request.limit if hasattr(search_request, "limit") else None, + bbox=search_request.bbox if hasattr(search_request, "bbox") else None, + datetime=search_request.datetime + if hasattr(search_request, "datetime") + else None, + token=search_request.token if hasattr(search_request, "token") else None, fields=fields, sortby=sortby, filter_expr=search_request.filter diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py b/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py index 0ddbefed..62ec0034 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py @@ -18,6 +18,7 @@ class CollectionsSearchRequest(ExtendedSearch): """Extended search model for collections with free text search support.""" q: Optional[Union[str, List[str]]] = None + token: Optional[str] = None class CollectionsSearchEndpointExtension(ApiExtension): diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py index 1700ac59..a8c17d2b 100644 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -1,6 +1,7 @@ """Serializers.""" import abc +import logging from copy import deepcopy from typing import Any, List, Optional @@ -9,10 +10,12 @@ from stac_fastapi.core.datetime_utils import now_to_rfc3339_str from stac_fastapi.core.models.links import CollectionLinks -from stac_fastapi.core.utilities import get_bool_env +from stac_fastapi.core.utilities import bbox2polygon, get_bool_env from stac_fastapi.types import stac as stac_types from stac_fastapi.types.links import ItemLinks, resolve_links +logger = logging.getLogger(__name__) + @attr.s class Serializer(abc.ABC): @@ -141,6 +144,53 @@ def stac_to_db( collection.get("links", []), str(request.base_url) ) + # Convert bbox to bbox_shape for geospatial queries + if "extent" in collection and "spatial" in collection["extent"]: + spatial_extent = collection["extent"]["spatial"] + if "bbox" in spatial_extent and spatial_extent["bbox"]: + # Get the first bbox (collections can have multiple bboxes, but we use the first one) + bbox = ( + spatial_extent["bbox"][0] + if isinstance(spatial_extent["bbox"][0], list) + else spatial_extent["bbox"] + ) + collection_id = collection.get("id", "unknown") + logger.debug( + f"Converting bbox to bbox_shape for collection '{collection_id}': bbox={bbox}" + ) + + if len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For 2D polygon, we only need the x,y coordinates and discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox: [minx, miny, maxx, maxy] + maxx, maxy = bbox[2], bbox[3] + logger.debug( + f"Collection '{collection_id}': Processing 2D bbox" + ) + else: + # 3D bbox: [minx, miny, minz, maxx, maxy, maxz] + # Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + logger.debug( + f"Collection '{collection_id}': Processing 3D bbox, discarding altitude values at indices 2 and 5" + ) + + # Convert bbox to GeoJSON polygon + bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy) + collection["bbox_shape"] = { + "type": "Polygon", + "coordinates": bbox_polygon_coords, + } + logger.info( + f"Collection '{collection_id}': Created bbox_shape from bbox [{minx}, {miny}, {maxx}, {maxy}]" + ) + else: + logger.warning( + f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4" + ) + if get_bool_env("STAC_INDEX_ASSETS"): collection["assets"] = [ {"es_key": k, **v} for k, v in collection.get("assets", {}).items() @@ -168,6 +218,9 @@ def db_to_stac( # Avoid modifying the input dict in-place ... doing so breaks some tests collection = deepcopy(collection) + # Remove internal bbox_shape field (not part of STAC spec) + collection.pop("bbox_shape", None) + # Set defaults collection_id = collection.get("id") collection.setdefault("type", "Collection") diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index f4f33cb9..699e436a 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -175,6 +175,7 @@ async def get_all_collections( limit: int, request: Request, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[List[float]] = None, q: Optional[List[str]] = None, filter: Optional[Dict[str, Any]] = None, query: Optional[Dict[str, Dict[str, Any]]] = None, @@ -187,6 +188,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. + bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent. q (Optional[List[str]]): Free text search terms. query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters. filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. @@ -309,12 +311,49 @@ async def get_all_collections( query_parts.append(search_dict["query"]) except Exception as e: - logger = logging.getLogger(__name__) logger.error(f"Error converting query to Elasticsearch: {e}") # If there's an error, add a query that matches nothing query_parts.append({"bool": {"must_not": {"match_all": {}}}}) raise + # Apply bbox filter if provided + if bbox: + # Parse bbox if it's a string (from GET requests) + if isinstance(bbox, str): + try: + bbox = [float(x.strip()) for x in bbox.split(",")] + except (ValueError, AttributeError) as e: + logger.error(f"Invalid bbox format: {bbox}, error: {e}") + bbox = None + + if bbox and len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For geospatial queries, we discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox + maxx, maxy = bbox[2], bbox[3] + else: + # 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + + # Convert bbox to a polygon for geo_shape query + bbox_polygon = { + "type": "Polygon", + "coordinates": bbox2polygon(minx, miny, maxx, maxy), + } + # Add geo_shape query to filter collections by bbox_shape field + query_parts.append( + { + "geo_shape": { + "bbox_shape": { + "shape": bbox_polygon, + "relation": "intersects", + } + } + } + ) + # Combine all query parts with AND logic if there are multiple datetime_filter = None if datetime: @@ -381,7 +420,6 @@ async def get_all_collections( try: matched = count_task.result().get("count") except Exception as e: - logger = logging.getLogger(__name__) logger.error(f"Count task failed: {e}") return collections, next_token, matched diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 8791390b..68239414 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -159,6 +159,7 @@ async def get_all_collections( limit: int, request: Request, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[List[float]] = None, q: Optional[List[str]] = None, filter: Optional[Dict[str, Any]] = None, query: Optional[Dict[str, Dict[str, Any]]] = None, @@ -171,6 +172,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. + bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent. q (Optional[List[str]]): Free text search terms. query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters. filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. @@ -293,12 +295,49 @@ async def get_all_collections( query_parts.append(search_dict["query"]) except Exception as e: - logger = logging.getLogger(__name__) logger.error(f"Error converting query to OpenSearch: {e}") # If there's an error, add a query that matches nothing query_parts.append({"bool": {"must_not": {"match_all": {}}}}) raise + # Apply bbox filter if provided + if bbox: + # Parse bbox if it's a string (from GET requests) + if isinstance(bbox, str): + try: + bbox = [float(x.strip()) for x in bbox.split(",")] + except (ValueError, AttributeError) as e: + logger.error(f"Invalid bbox format: {bbox}, error: {e}") + bbox = None + + if bbox and len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For geospatial queries, we discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox + maxx, maxy = bbox[2], bbox[3] + else: + # 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + + # Convert bbox to a polygon for geo_shape query + bbox_polygon = { + "type": "Polygon", + "coordinates": bbox2polygon(minx, miny, maxx, maxy), + } + # Add geo_shape query to filter collections by bbox_shape field + query_parts.append( + { + "geo_shape": { + "bbox_shape": { + "shape": bbox_polygon, + "relation": "intersects", + } + } + } + ) + # Combine all query parts with AND logic if there are multiple datetime_filter = None if datetime: @@ -365,7 +404,6 @@ async def get_all_collections( try: matched = count_task.result().get("count") except Exception as e: - logger = logging.getLogger(__name__) logger.error(f"Count task failed: {e}") return collections, next_token, matched diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 8f5bed73..719b85ef 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -857,3 +857,232 @@ async def test_collections_pagination_all_endpoints(app_client, txn_client, ctx) for i, expected_id in enumerate(expected_ids): assert test_found[i]["id"] == expected_id + + +@pytest.mark.asyncio +async def test_collections_bbox_all_endpoints(app_client, txn_client, ctx): + """Verify GET /collections, GET /collections-search, and POST /collections-search honor the bbox parameter.""" + # Create multiple collections with different spatial extents + base_collection = ctx.collection + + # Use unique prefixes to avoid conflicts between tests + test_prefix = f"bbox-{uuid.uuid4().hex[:8]}" + + # Create collections with different bboxes + # Collection 1: Europe bbox + collection_europe = base_collection.copy() + collection_europe["id"] = f"{test_prefix}-europe" + collection_europe["title"] = "Europe Collection" + collection_europe["extent"] = { + "spatial": {"bbox": [[-10.0, 35.0, 40.0, 70.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_europe) + + # Collection 2: North America bbox + collection_na = base_collection.copy() + collection_na["id"] = f"{test_prefix}-north-america" + collection_na["title"] = "North America Collection" + collection_na["extent"] = { + "spatial": {"bbox": [[-170.0, 15.0, -50.0, 75.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_na) + + # Collection 3: Asia bbox + collection_asia = base_collection.copy() + collection_asia["id"] = f"{test_prefix}-asia" + collection_asia["title"] = "Asia Collection" + collection_asia["extent"] = { + "spatial": {"bbox": [[60.0, -10.0, 150.0, 55.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_asia) + + # Collection 4: Global bbox (should match any query) + collection_global = base_collection.copy() + collection_global["id"] = f"{test_prefix}-global" + collection_global["title"] = "Global Collection" + collection_global["extent"] = { + "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_global) + + # Collection 5: 3D bbox (with altitude) - should still work for 2D queries + collection_3d = base_collection.copy() + collection_3d["id"] = f"{test_prefix}-3d-europe" + collection_3d["title"] = "3D Europe Collection" + collection_3d["extent"] = { + "spatial": {"bbox": [[-10.0, 35.0, 0.0, 40.0, 70.0, 5000.0]]}, # 3D bbox + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_3d) + + await refresh_indices(txn_client) + + # Test 1: Query for Europe region - should match Europe, Global, and 3D Europe collections + europe_bbox = [0.0, 40.0, 20.0, 60.0] # Central Europe + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, europe_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, europe_bbox)))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"bbox": europe_bbox}, + }, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find Europe, Global, and 3D Europe collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-europe" in found_ids + ), f"Europe collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" in found_ids + ), f"3D Europe collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find North America or Asia + assert ( + f"{test_prefix}-north-america" not in found_ids + ), f"North America should not match Europe bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-asia" not in found_ids + ), f"Asia should not match Europe bbox in {endpoint['method']} {endpoint['path']}" + + # Test 2: Query for North America region - should match North America and Global collections + na_bbox = [-120.0, 30.0, -80.0, 50.0] # Central North America + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, na_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, na_bbox)))], + }, + {"method": "POST", "path": "/collections-search", "body": {"bbox": na_bbox}}, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find North America and Global collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-north-america" in found_ids + ), f"North America collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find Europe, Asia, or 3D Europe + assert ( + f"{test_prefix}-europe" not in found_ids + ), f"Europe should not match North America bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-asia" not in found_ids + ), f"Asia should not match North America bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" not in found_ids + ), f"3D Europe should not match North America bbox in {endpoint['method']} {endpoint['path']}" + + # Test 3: Query for Asia region - should match Asia and Global collections + asia_bbox = [100.0, 20.0, 130.0, 45.0] # East Asia + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, asia_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, asia_bbox)))], + }, + {"method": "POST", "path": "/collections-search", "body": {"bbox": asia_bbox}}, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find Asia and Global collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-asia" in found_ids + ), f"Asia collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find Europe, North America, or 3D Europe + assert ( + f"{test_prefix}-europe" not in found_ids + ), f"Europe should not match Asia bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-north-america" not in found_ids + ), f"North America should not match Asia bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" not in found_ids + ), f"3D Europe should not match Asia bbox in {endpoint['method']} {endpoint['path']}" diff --git a/update_collections_mapping.py b/update_collections_mapping.py new file mode 100644 index 00000000..331d5aa8 --- /dev/null +++ b/update_collections_mapping.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Script to update the collections index mapping to add the bbox_shape field. + +This script will: +1. Add the bbox_shape field to the existing collections index +2. Reindex all collections to populate the bbox_shape field + +Usage: + python update_collections_mapping.py +""" + +import asyncio +import os +from unittest.mock import Mock + +from stac_fastapi.core.serializers import CollectionSerializer +from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX + +# Determine which backend to use +BACKEND = os.getenv("BACKEND", "elasticsearch").lower() + +if BACKEND == "opensearch": + from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, + ) +else: + from stac_fastapi.elasticsearch.config import ( + AsyncElasticsearchSettings as AsyncSearchSettings, + ) + + +async def update_mapping(): + """Update the collections index mapping to add bbox_shape field.""" + settings = AsyncSearchSettings() + client = settings.create_client + + print(f"Connecting to {BACKEND}...") + + # Check if index exists + index_name = f"{COLLECTIONS_INDEX}-000001" + exists = await client.indices.exists(index=index_name) + + if not exists: + print(f"Index {index_name} does not exist. Creating it...") + from stac_fastapi.elasticsearch.database_logic import create_collection_index + + await create_collection_index() + print("Index created successfully!") + return + + print(f"Index {index_name} exists. Updating mapping...") + + # Add the bbox_shape field to the mapping + try: + await client.indices.put_mapping( + index=index_name, body={"properties": {"bbox_shape": {"type": "geo_shape"}}} + ) + print("✓ Mapping updated successfully!") + except Exception as e: + print(f"✗ Error updating mapping: {e}") + return + + # Now reindex all collections to populate bbox_shape + print("\nReindexing collections to populate bbox_shape field...") + + try: + # Get all collections + response = await client.search( + index=index_name, + body={ + "query": {"match_all": {}}, + "size": 1000, # Adjust if you have more collections + }, + ) + + collections = response["hits"]["hits"] + print(f"Found {len(collections)} collections to update") + + if len(collections) == 0: + print("No collections to update.") + return + + # Create a mock request for the serializer + mock_request = Mock() + mock_request.base_url = "http://localhost:8080/" + + updated_count = 0 + error_count = 0 + + for hit in collections: + collection = hit["_source"] + collection_id = collection.get("id", "unknown") + + try: + # Use the serializer to convert bbox to bbox_shape + updated_collection = CollectionSerializer.stac_to_db( + collection, mock_request + ) + + # Check if bbox_shape was created + if "bbox_shape" in updated_collection: + # Update the document + await client.update( + index=index_name, + id=hit["_id"], + body={"doc": {"bbox_shape": updated_collection["bbox_shape"]}}, + refresh=True, + ) + print(f" ✓ Updated collection '{collection_id}'") + updated_count += 1 + else: + print(f" ⊘ Collection '{collection_id}' has no bbox to convert") + except Exception as e: + print(f" ✗ Error updating collection '{collection_id}': {e}") + error_count += 1 + + print("\n" + "=" * 60) + print("Summary:") + print(f" Total collections: {len(collections)}") + print(f" Successfully updated: {updated_count}") + print(f" Errors: {error_count}") + print(f" Skipped (no bbox): {len(collections) - updated_count - error_count}") + print("=" * 60) + + except Exception as e: + print(f"✗ Error during reindexing: {e}") + import traceback + + traceback.print_exc() + finally: + await client.close() + + +if __name__ == "__main__": + print(f"Using backend: {BACKEND}") + asyncio.run(update_mapping()) From 6dea71543e599c654f4b259a41098b1ae61725b8 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 30 Sep 2025 13:30:10 +0800 Subject: [PATCH 4/6] update pagination test --- .../tests/api/test_api_search_collections.py | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 719b85ef..9bd37f0d 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -787,17 +787,35 @@ async def test_collections_pagination_all_endpoints(app_client, txn_client, ctx) for i, expected_id in enumerate(expected_ids): assert test_found[i]["id"] == expected_id - # Test second page using the token from the first page - if "token" in resp_json and resp_json["token"]: - token = resp_json["token"] - - # Make the request with token + # Test second page using the token from the next link + next_link = None + for link in resp_json.get("links", []): + if link.get("rel") == "next": + next_link = link + break + + if next_link: + # Extract token based on method if endpoint["method"] == "GET": - params = [(endpoint["param"], str(limit)), ("token", token)] - resp = await app_client.get(endpoint["path"], params=params) + # For GET, token is in the URL query params + from urllib.parse import parse_qs, urlparse + + parsed_url = urlparse(next_link["href"]) + query_params = parse_qs(parsed_url.query) + token = query_params.get("token", [None])[0] + + if token: + params = [(endpoint["param"], str(limit)), ("token", token)] + resp = await app_client.get(endpoint["path"], params=params) + else: + continue # Skip if no token found else: # POST - body = {endpoint["body_key"]: limit, "token": token} - resp = await app_client.post(endpoint["path"], json=body) + # For POST, token is in the body + body = next_link.get("body", {}) + if "token" in body: + resp = await app_client.post(endpoint["path"], json=body) + else: + continue # Skip if no token found assert ( resp.status_code == 200 @@ -805,10 +823,7 @@ async def test_collections_pagination_all_endpoints(app_client, txn_client, ctx) resp_json = resp.json() # Filter to our test collections - if endpoint["path"] == "/collections": - found_collections = resp_json - else: # For collection-search endpoints - found_collections = resp_json["collections"] + found_collections = resp_json["collections"] test_found = [ c for c in found_collections if c["id"].startswith(test_prefix) From 385e310f02ebdbb8bcbb47f63e2fcd7291d0d291 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 30 Sep 2025 13:54:44 +0800 Subject: [PATCH 5/6] update tests to use all endpoints --- CHANGELOG.md | 3 +- .../core/extensions/collections_search.py | 3 + .../tests/api/test_api_search_collections.py | 570 +++++++++++------- 3 files changed, 364 insertions(+), 212 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 635fdca2..4884d658 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,8 +16,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed -- Issue where token was not being passed from request to POST collections search logic +- Issue where token, query param was not being passed to POST collections search logic - Issue where datetime param was not being passed from POST collections search logic to Elasticsearch +- Collections search tests to ensure both GET /collections and POST /collections-search endpoints are tested [v6.5.0] - 2025-09-29 diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py b/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py index 62ec0034..d36197d0 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py @@ -19,6 +19,9 @@ class CollectionsSearchRequest(ExtendedSearch): q: Optional[Union[str, List[str]]] = None token: Optional[str] = None + query: Optional[ + str + ] = None # Legacy query extension (deprecated but still supported) class CollectionsSearchEndpointExtension(ApiExtension): diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 9bd37f0d..19c9c607 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -8,7 +8,7 @@ @pytest.mark.asyncio async def test_collections_sort_id_asc(app_client, txn_client, ctx): - """Verify GET /collections honors ascending sort on id.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search honor ascending sort on id.""" # Create multiple collections with different ids base_collection = ctx.collection @@ -25,29 +25,48 @@ async def test_collections_sort_id_asc(app_client, txn_client, ctx): await refresh_indices(txn_client) - # Test ascending sort by id - resp = await app_client.get( - "/collections", - params=[("sortby", "+id")], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - test_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + # Define endpoints to test + endpoints = [ + {"method": "GET", "path": "/collections", "params": [("sortby", "+id")]}, + { + "method": "GET", + "path": "/collections-search", + "params": [("sortby", "+id")], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"sortby": [{"field": "id", "direction": "asc"}]}, + }, ] - # Collections should be sorted alphabetically by id - sorted_ids = sorted(collection_ids) - assert len(test_collections) == len(collection_ids) - for i, expected_id in enumerate(sorted_ids): - assert test_collections[i]["id"] == expected_id + for endpoint in endpoints: + # Test ascending sort by id + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Collections should be sorted alphabetically by id + sorted_ids = sorted(collection_ids) + assert len(test_collections) == len(collection_ids) + for i, expected_id in enumerate(sorted_ids): + assert test_collections[i]["id"] == expected_id @pytest.mark.asyncio async def test_collections_sort_id_desc(app_client, txn_client, ctx): - """Verify GET /collections honors descending sort on id.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search honor descending sort on id.""" # Create multiple collections with different ids base_collection = ctx.collection @@ -64,24 +83,43 @@ async def test_collections_sort_id_desc(app_client, txn_client, ctx): await refresh_indices(txn_client) - # Test descending sort by id - resp = await app_client.get( - "/collections", - params=[("sortby", "-id")], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - test_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + # Define endpoints to test + endpoints = [ + {"method": "GET", "path": "/collections", "params": [("sortby", "-id")]}, + { + "method": "GET", + "path": "/collections-search", + "params": [("sortby", "-id")], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"sortby": [{"field": "id", "direction": "desc"}]}, + }, ] - # Collections should be sorted in reverse alphabetical order by id - sorted_ids = sorted(collection_ids, reverse=True) - assert len(test_collections) == len(collection_ids) - for i, expected_id in enumerate(sorted_ids): - assert test_collections[i]["id"] == expected_id + for endpoint in endpoints: + # Test descending sort by id + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Collections should be sorted in reverse alphabetical order by id + sorted_ids = sorted(collection_ids, reverse=True) + assert len(test_collections) == len(collection_ids) + for i, expected_id in enumerate(sorted_ids): + assert test_collections[i]["id"] == expected_id @pytest.mark.asyncio @@ -245,7 +283,7 @@ async def test_collections_free_text_all_endpoints( @pytest.mark.asyncio async def test_collections_filter_search(app_client, txn_client, ctx): - """Verify GET /collections honors the filter parameter for structured search.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search honor the filter parameter for structured search.""" # Create multiple collections with different content base_collection = ctx.collection @@ -287,52 +325,97 @@ async def test_collections_filter_search(app_client, txn_client, ctx): # Use the ID of the first test collection for the filter test_collection_id = test_collections[0]["id"] + # Test 1: CQL2-JSON format # Create a simple filter for exact ID match using CQL2-JSON filter_expr = {"op": "=", "args": [{"property": "id"}, test_collection_id]} # Convert to JSON string for URL parameter filter_json = json.dumps(filter_expr) - # Use CQL2-JSON format with explicit filter-lang - resp = await app_client.get( - f"/collections?filter={filter_json}&filter-lang=cql2-json", - ) + # Define endpoints to test + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("filter", filter_json), ("filter-lang", "cql2-json")], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("filter", filter_json), ("filter-lang", "cql2-json")], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"filter": filter_expr, "filter-lang": "cql2-json"}, + }, + ] - assert resp.status_code == 200 - resp_json = resp.json() + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - # Should find exactly one collection with the specified ID - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() - assert ( - len(found_collections) == 1 - ), f"Expected 1 collection with ID {test_collection_id}, found {len(found_collections)}" - assert found_collections[0]["id"] == test_collection_id + # Should find exactly one collection with the specified ID + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] - # Test 2: CQL2-text format with LIKE operator for more advanced filtering - # Use a filter that will match the test collection ID we created - filter_text = f"id LIKE '%{test_collection_id.split('-')[-1]}%'" + assert ( + len(found_collections) == 1 + ), f"Expected 1 collection with ID {test_collection_id}, found {len(found_collections)} for {endpoint['method']} {endpoint['path']}" + assert found_collections[0]["id"] == test_collection_id - resp = await app_client.get( - f"/collections?filter={filter_text}&filter-lang=cql2-text", - ) - assert resp.status_code == 200 - resp_json = resp.json() + # Test 2: CQL2-text format with LIKE operator + filter_text = f"id LIKE '%{test_collection_id.split('-')[-1]}%'" - # Should find the test collection we created - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("filter", filter_text), ("filter-lang", "cql2-text")], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("filter", filter_text), ("filter-lang", "cql2-text")], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"filter": filter_text, "filter-lang": "cql2-text"}, + }, ] - assert ( - len(found_collections) >= 1 - ), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter" + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Should find the test collection we created + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] + assert ( + len(found_collections) >= 1 + ), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter for {endpoint['method']} {endpoint['path']}" @pytest.mark.asyncio async def test_collections_query_extension(app_client, txn_client, ctx): - """Verify GET /collections honors the query extension.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search honor the query extension.""" # Create multiple collections with different content base_collection = ctx.collection # Use unique prefixes to avoid conflicts between tests @@ -370,75 +453,100 @@ async def test_collections_query_extension(app_client, txn_client, ctx): await refresh_indices(txn_client) - # Use the exact ID that was created + # Test 1: Query with equal operator sentinel_id = f"{test_prefix}-sentinel" - query = {"id": {"eq": sentinel_id}} - resp = await app_client.get( - "/collections", - params=[("query", json.dumps(query))], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - found_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("query", json.dumps(query))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("query", json.dumps(query))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"query": json.dumps(query)}, + }, ] - # Should only find the sentinel collection - assert len(found_collections) == 1 - assert found_collections[0]["id"] == f"{test_prefix}-sentinel" - - # Test query extension with equal operator on ID - query = {"id": {"eq": f"{test_prefix}-sentinel"}} + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - resp = await app_client.get( - "/collections", - params=[("query", json.dumps(query))], - ) - assert resp.status_code == 200 - resp_json = resp.json() + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() - # Filter collections to only include the ones we created for this test - found_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) - ] - found_ids = [c["id"] for c in found_collections] + # Filter collections to only include the ones we created for this test + found_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] - # Should find landsat and modis collections but not sentinel - assert len(found_collections) == 1 - assert f"{test_prefix}-sentinel" in found_ids - assert f"{test_prefix}-landsat" not in found_ids - assert f"{test_prefix}-modis" not in found_ids + # Should only find the sentinel collection + assert ( + len(found_collections) == 1 + ), f"Expected 1 collection for {endpoint['method']} {endpoint['path']}" + assert found_collections[0]["id"] == sentinel_id - # Test query extension with not-equal operator on ID + # Test 2: Query with not-equal operator query = {"id": {"neq": f"{test_prefix}-sentinel"}} - resp = await app_client.get( - "/collections", - params=[("query", json.dumps(query))], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - found_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("query", json.dumps(query))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("query", json.dumps(query))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"query": json.dumps(query)}, + }, ] - found_ids = [c["id"] for c in found_collections] - # Should find landsat and modis collections but not sentinel - assert len(found_collections) == 2 - assert f"{test_prefix}-sentinel" not in found_ids - assert f"{test_prefix}-landsat" in found_ids - assert f"{test_prefix}-modis" in found_ids + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + found_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + found_ids = [c["id"] for c in found_collections] + + # Should find landsat and modis collections but not sentinel + assert ( + len(found_collections) == 2 + ), f"Expected 2 collections for {endpoint['method']} {endpoint['path']}" + assert f"{test_prefix}-sentinel" not in found_ids + assert f"{test_prefix}-landsat" in found_ids + assert f"{test_prefix}-modis" in found_ids @pytest.mark.asyncio async def test_collections_datetime_filter(app_client, load_test_data, txn_client): - """Test filtering collections by datetime.""" + """Test filtering collections by datetime across all endpoints.""" # Create a test collection with a specific temporal extent base_collection = load_test_data("test_collection.json") @@ -450,66 +558,71 @@ async def test_collections_datetime_filter(app_client, load_test_data, txn_clien await create_collection(txn_client, base_collection) await refresh_indices(txn_client) - # Test 1: Datetime range that overlaps with collection's temporal extent - resp = await app_client.get( - "/collections?datetime=2020-06-01T00:00:00Z/2021-01-01T00:00:00Z" - ) - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] - assert ( - len(found_collections) == 1 - ), f"Expected to find collection {test_collection_id} with overlapping datetime range" - - # Test 2: Datetime range that is completely before collection's temporal extent - resp = await app_client.get( - "/collections?datetime=2019-01-01T00:00:00Z/2019-12-31T23:59:59Z" - ) - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id + # Test scenarios with different datetime ranges + test_scenarios = [ + { + "name": "overlapping range", + "datetime": "2020-06-01T00:00:00Z/2021-01-01T00:00:00Z", + "expected_count": 1, + }, + { + "name": "before range", + "datetime": "2019-01-01T00:00:00Z/2019-12-31T23:59:59Z", + "expected_count": 0, + }, + { + "name": "after range", + "datetime": "2021-01-01T00:00:00Z/2021-12-31T23:59:59Z", + "expected_count": 0, + }, + { + "name": "single datetime within range", + "datetime": "2020-06-15T12:00:00Z", + "expected_count": 1, + }, + { + "name": "open-ended future range", + "datetime": "2020-06-01T00:00:00Z/..", + "expected_count": 1, + }, ] - assert ( - len(found_collections) == 0 - ), f"Expected not to find collection {test_collection_id} with non-overlapping datetime range" - # Test 3: Datetime range that is completely after collection's temporal extent - resp = await app_client.get( - "/collections?datetime=2021-01-01T00:00:00Z/2021-12-31T23:59:59Z" - ) - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] - assert ( - len(found_collections) == 0 - ), f"Expected not to find collection {test_collection_id} with non-overlapping datetime range" + for scenario in test_scenarios: + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("datetime", scenario["datetime"])], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("datetime", scenario["datetime"])], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"datetime": scenario["datetime"]}, + }, + ] - # Test 4: Single datetime that falls within collection's temporal extent - resp = await app_client.get("/collections?datetime=2020-06-15T12:00:00Z") - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] - assert ( - len(found_collections) == 1 - ), f"Expected to find collection {test_collection_id} with datetime point within range" + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - # Test 5: Open-ended range (from a specific date to the future) - resp = await app_client.get("/collections?datetime=2020-06-01T00:00:00Z/..") - assert resp.status_code == 200 - resp_json = resp.json() - found_collections = [ - c for c in resp_json["collections"] if c["id"] == test_collection_id - ] - assert ( - len(found_collections) == 1 - ), f"Expected to find collection {test_collection_id} with open-ended future range" + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']} with {scenario['name']}" + resp_json = resp.json() + found_collections = [ + c for c in resp_json["collections"] if c["id"] == test_collection_id + ] + assert len(found_collections) == scenario["expected_count"], ( + f"Expected {scenario['expected_count']} collection(s) for {scenario['name']} " + f"on {endpoint['method']} {endpoint['path']}, found {len(found_collections)}" + ) # Test 6: Open-ended range (from the past to a date within the collection's range) # TODO: This test is currently skipped due to an unresolved issue with open-ended past range queries. @@ -528,7 +641,7 @@ async def test_collections_datetime_filter(app_client, load_test_data, txn_clien @pytest.mark.asyncio async def test_collections_number_matched_returned(app_client, txn_client, ctx): - """Verify GET /collections returns correct numberMatched and numberReturned values.""" + """Verify GET /collections, GET /collections-search, and POST /collections-search return correct numberMatched and numberReturned values.""" # Create multiple collections with different ids base_collection = ctx.collection @@ -545,56 +658,91 @@ async def test_collections_number_matched_returned(app_client, txn_client, ctx): await refresh_indices(txn_client) - # Test with limit=5 - resp = await app_client.get( - "/collections", - params=[("limit", "5")], - ) - assert resp.status_code == 200 - resp_json = resp.json() - - # Filter collections to only include the ones we created for this test - test_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + # Test 1: With limit=5 + endpoints = [ + {"method": "GET", "path": "/collections", "params": [("limit", "5")]}, + {"method": "GET", "path": "/collections-search", "params": [("limit", "5")]}, + {"method": "POST", "path": "/collections-search", "body": {"limit": 5}}, ] - # Should return 5 collections - assert len(test_collections) == 5 + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - # Check that numberReturned matches the number of collections returned - assert resp_json["numberReturned"] == len(resp_json["collections"]) + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() - # Check that numberMatched is greater than or equal to numberReturned - # (since there might be other collections in the database) - assert resp_json["numberMatched"] >= resp_json["numberReturned"] + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Should return 5 collections + assert ( + len(test_collections) == 5 + ), f"Expected 5 test collections for {endpoint['method']} {endpoint['path']}" - # Check that numberMatched includes at least all our test collections - assert resp_json["numberMatched"] >= len(collection_ids) + # Check that numberReturned matches the number of collections returned + assert resp_json["numberReturned"] == len(resp_json["collections"]) - # Now test with a query that should match only some collections + # Check that numberMatched is greater than or equal to numberReturned + assert resp_json["numberMatched"] >= resp_json["numberReturned"] + + # Check that numberMatched includes at least all our test collections + assert resp_json["numberMatched"] >= len(collection_ids) + + # Test 2: With a query that should match only one collection query = {"id": {"eq": f"{test_prefix}-1"}} - resp = await app_client.get( - "/collections", - params=[("query", json.dumps(query))], - ) - assert resp.status_code == 200 - resp_json = resp.json() - # Filter collections to only include the ones we created for this test - test_collections = [ - c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("query", json.dumps(query))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("query", json.dumps(query))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"query": json.dumps(query)}, + }, ] - # Should return only 1 collection - assert len(test_collections) == 1 - assert test_collections[0]["id"] == f"{test_prefix}-1" + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) - # Check that numberReturned matches the number of collections returned - assert resp_json["numberReturned"] == len(resp_json["collections"]) + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}" + resp_json = resp.json() + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in resp_json["collections"] if c["id"].startswith(test_prefix) + ] + + # Should return only 1 collection + assert ( + len(test_collections) == 1 + ), f"Expected 1 test collection for {endpoint['method']} {endpoint['path']}" + assert test_collections[0]["id"] == f"{test_prefix}-1" + + # Check that numberReturned matches the number of collections returned + assert resp_json["numberReturned"] == len(resp_json["collections"]) - # Check that numberMatched matches the number of collections that match the query - # (should be 1 in this case) - assert resp_json["numberMatched"] >= 1 + # Check that numberMatched matches the number of collections that match the query + assert resp_json["numberMatched"] >= 1 @pytest.mark.asyncio From 3f474f995d60ce6ee5f18089eb9318ccafd579b8 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 30 Sep 2025 15:54:05 +0800 Subject: [PATCH 6/6] changelog fix --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b00f0ac..bb673884 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed +[v6.5.1] - 2025-09-30 + +### Fixed + - Issue where token, query param was not being passed to POST collections search logic [#483](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/483) - Issue where datetime param was not being passed from POST collections search logic to Elasticsearch [#483](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/483) - Collections search tests to ensure both GET /collections and GET/POST /collections-search endpoints are tested [#483](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/483)