diff --git a/CHANGELOG.md b/CHANGELOG.md index cbd11060..bb673884 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries. +- Migration scripts (`update_collections_mapping.py` and `recreate_collections_index.py`) to help add `bbox_shape` field to existing deployments. + ### Changed ### Fixed diff --git a/recreate_collections_index.py b/recreate_collections_index.py new file mode 100644 index 00000000..047a1467 --- /dev/null +++ b/recreate_collections_index.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +Script to delete and recreate the collections index. + +WARNING: This will DELETE all existing collections! +Only use this in development environments. + +Usage: + python recreate_collections_index.py +""" + +import asyncio +import os +import sys + +from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX + +# Determine which backend to use +BACKEND = os.getenv("BACKEND", "elasticsearch").lower() + +if BACKEND == "opensearch": + from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, + ) + from stac_fastapi.opensearch.database_logic import ( + create_collection_index, + create_index_templates, + ) +else: + from stac_fastapi.elasticsearch.config import ( + AsyncElasticsearchSettings as AsyncSearchSettings, + ) + from stac_fastapi.elasticsearch.database_logic import ( + create_collection_index, + create_index_templates, + ) + + +async def recreate_index(): + """Delete and recreate the collections index.""" + settings = AsyncSearchSettings() + client = settings.create_client + + print(f"Using backend: {BACKEND}") + print(f"\n{'=' * 60}") + print("WARNING: This will DELETE all existing collections!") + print(f"{'=' * 60}\n") + + # Check if running in production + env = os.getenv("ENVIRONMENT", "development").lower() + if env == "production": + print("ERROR: This script should not be run in production!") + print("Use update_collections_mapping.py instead.") + sys.exit(1) + + response = input("Are you sure you want to continue? (yes/no): ") + if response.lower() != "yes": + print("Aborted.") + sys.exit(0) + + try: + # Delete the collections index + index_name = f"{COLLECTIONS_INDEX}-000001" + alias_name = COLLECTIONS_INDEX + + print(f"\nDeleting index {index_name}...") + exists = await client.indices.exists(index=index_name) + if exists: + await client.indices.delete(index=index_name) + print(f"✓ Deleted index {index_name}") + else: + print(f"⊘ Index {index_name} does not exist") + + # Check if alias exists and delete it + alias_exists = await client.indices.exists_alias(name=alias_name) + if alias_exists: + print(f"Deleting alias {alias_name}...") + await client.indices.delete_alias( + index="_all", name=alias_name, ignore=[404] + ) + print(f"✓ Deleted alias {alias_name}") + + # Recreate index templates + print("\nRecreating index templates...") + await create_index_templates() + print("✓ Index templates created") + + # Recreate the collections index + print("\nRecreating collections index...") + await create_collection_index() + print("✓ Collections index created") + + # Verify the mapping includes bbox_shape + print("\nVerifying mapping...") + mapping = await client.indices.get_mapping(index=index_name) + properties = mapping[index_name]["mappings"]["properties"] + + if "bbox_shape" in properties: + print( + f"✓ bbox_shape field is present in mapping: {properties['bbox_shape']}" + ) + else: + print("✗ WARNING: bbox_shape field is NOT in the mapping!") + + print("\n" + "=" * 60) + print("Collections index successfully recreated!") + print("You can now create collections with bbox_shape support.") + print("=" * 60) + + except Exception as e: + print(f"\n✗ Error: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + finally: + await client.close() + + +if __name__ == "__main__": + asyncio.run(recreate_index()) diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py index c592b6d2..c0a85498 100644 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -3,6 +3,8 @@ import abc from typing import Any, Dict, Iterable, List, Optional, Tuple +from stac_pydantic.shared import BBox + class BaseDatabaseLogic(abc.ABC): """ @@ -19,6 +21,7 @@ async def get_all_collections( limit: int, request: Any = None, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[BBox] = None, ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """Retrieve a list of collections from the database, supporting pagination. @@ -27,6 +30,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Any, optional): The FastAPI request object. Defaults to None. sort (Optional[List[Dict[str, Any]]], optional): Optional sort parameter. Defaults to None. + bbox (Optional[BBox], optional): Bounding box to filter collections by spatial extent. Defaults to None. Returns: A tuple of (collections, next pagination token if any). diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 143b4d5a..e2bdf495 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -241,6 +241,7 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage: async def all_collections( self, limit: Optional[int] = None, + bbox: Optional[BBox] = None, datetime: Optional[str] = None, fields: Optional[List[str]] = None, sortby: Optional[Union[str, List[str]]] = None, @@ -401,6 +402,7 @@ async def all_collections( limit=limit, request=request, sort=sort, + bbox=bbox, q=q_list, filter=parsed_filter, query=parsed_query, @@ -502,6 +504,7 @@ async def post_all_collections( # Pass all parameters from search_request to all_collections return await self.all_collections( limit=search_request.limit if hasattr(search_request, "limit") else None, + bbox=search_request.bbox if hasattr(search_request, "bbox") else None, datetime=search_request.datetime if hasattr(search_request, "datetime") else None, diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py index 1700ac59..a8c17d2b 100644 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ b/stac_fastapi/core/stac_fastapi/core/serializers.py @@ -1,6 +1,7 @@ """Serializers.""" import abc +import logging from copy import deepcopy from typing import Any, List, Optional @@ -9,10 +10,12 @@ from stac_fastapi.core.datetime_utils import now_to_rfc3339_str from stac_fastapi.core.models.links import CollectionLinks -from stac_fastapi.core.utilities import get_bool_env +from stac_fastapi.core.utilities import bbox2polygon, get_bool_env from stac_fastapi.types import stac as stac_types from stac_fastapi.types.links import ItemLinks, resolve_links +logger = logging.getLogger(__name__) + @attr.s class Serializer(abc.ABC): @@ -141,6 +144,53 @@ def stac_to_db( collection.get("links", []), str(request.base_url) ) + # Convert bbox to bbox_shape for geospatial queries + if "extent" in collection and "spatial" in collection["extent"]: + spatial_extent = collection["extent"]["spatial"] + if "bbox" in spatial_extent and spatial_extent["bbox"]: + # Get the first bbox (collections can have multiple bboxes, but we use the first one) + bbox = ( + spatial_extent["bbox"][0] + if isinstance(spatial_extent["bbox"][0], list) + else spatial_extent["bbox"] + ) + collection_id = collection.get("id", "unknown") + logger.debug( + f"Converting bbox to bbox_shape for collection '{collection_id}': bbox={bbox}" + ) + + if len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For 2D polygon, we only need the x,y coordinates and discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox: [minx, miny, maxx, maxy] + maxx, maxy = bbox[2], bbox[3] + logger.debug( + f"Collection '{collection_id}': Processing 2D bbox" + ) + else: + # 3D bbox: [minx, miny, minz, maxx, maxy, maxz] + # Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + logger.debug( + f"Collection '{collection_id}': Processing 3D bbox, discarding altitude values at indices 2 and 5" + ) + + # Convert bbox to GeoJSON polygon + bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy) + collection["bbox_shape"] = { + "type": "Polygon", + "coordinates": bbox_polygon_coords, + } + logger.info( + f"Collection '{collection_id}': Created bbox_shape from bbox [{minx}, {miny}, {maxx}, {maxy}]" + ) + else: + logger.warning( + f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4" + ) + if get_bool_env("STAC_INDEX_ASSETS"): collection["assets"] = [ {"es_key": k, **v} for k, v in collection.get("assets", {}).items() @@ -168,6 +218,9 @@ def db_to_stac( # Avoid modifying the input dict in-place ... doing so breaks some tests collection = deepcopy(collection) + # Remove internal bbox_shape field (not part of STAC spec) + collection.pop("bbox_shape", None) + # Set defaults collection_id = collection.get("id") collection.setdefault("type", "Collection") diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 9c136411..699e436a 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -175,6 +175,7 @@ async def get_all_collections( limit: int, request: Request, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[List[float]] = None, q: Optional[List[str]] = None, filter: Optional[Dict[str, Any]] = None, query: Optional[Dict[str, Dict[str, Any]]] = None, @@ -187,6 +188,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. + bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent. q (Optional[List[str]]): Free text search terms. query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters. filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. @@ -314,6 +316,44 @@ async def get_all_collections( query_parts.append({"bool": {"must_not": {"match_all": {}}}}) raise + # Apply bbox filter if provided + if bbox: + # Parse bbox if it's a string (from GET requests) + if isinstance(bbox, str): + try: + bbox = [float(x.strip()) for x in bbox.split(",")] + except (ValueError, AttributeError) as e: + logger.error(f"Invalid bbox format: {bbox}, error: {e}") + bbox = None + + if bbox and len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For geospatial queries, we discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox + maxx, maxy = bbox[2], bbox[3] + else: + # 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + + # Convert bbox to a polygon for geo_shape query + bbox_polygon = { + "type": "Polygon", + "coordinates": bbox2polygon(minx, miny, maxx, maxy), + } + # Add geo_shape query to filter collections by bbox_shape field + query_parts.append( + { + "geo_shape": { + "bbox_shape": { + "shape": bbox_polygon, + "relation": "intersects", + } + } + } + ) + # Combine all query parts with AND logic if there are multiple datetime_filter = None if datetime: diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index d16e8215..68239414 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -159,6 +159,7 @@ async def get_all_collections( limit: int, request: Request, sort: Optional[List[Dict[str, Any]]] = None, + bbox: Optional[List[float]] = None, q: Optional[List[str]] = None, filter: Optional[Dict[str, Any]] = None, query: Optional[Dict[str, Dict[str, Any]]] = None, @@ -171,6 +172,7 @@ async def get_all_collections( limit (int): The number of results to return. request (Request): The FastAPI request object. sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request. + bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent. q (Optional[List[str]]): Free text search terms. query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters. filter (Optional[Dict[str, Any]]): Structured query in CQL2 format. @@ -298,6 +300,44 @@ async def get_all_collections( query_parts.append({"bool": {"must_not": {"match_all": {}}}}) raise + # Apply bbox filter if provided + if bbox: + # Parse bbox if it's a string (from GET requests) + if isinstance(bbox, str): + try: + bbox = [float(x.strip()) for x in bbox.split(",")] + except (ValueError, AttributeError) as e: + logger.error(f"Invalid bbox format: {bbox}, error: {e}") + bbox = None + + if bbox and len(bbox) >= 4: + # Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz]) + # For geospatial queries, we discard altitude (z) values + minx, miny = bbox[0], bbox[1] + if len(bbox) == 4: + # 2D bbox + maxx, maxy = bbox[2], bbox[3] + else: + # 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz) + maxx, maxy = bbox[3], bbox[4] + + # Convert bbox to a polygon for geo_shape query + bbox_polygon = { + "type": "Polygon", + "coordinates": bbox2polygon(minx, miny, maxx, maxy), + } + # Add geo_shape query to filter collections by bbox_shape field + query_parts.append( + { + "geo_shape": { + "bbox_shape": { + "shape": bbox_polygon, + "relation": "intersects", + } + } + } + ) + # Combine all query parts with AND logic if there are multiple datetime_filter = None if datetime: diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index b2d7264d..18f6b816 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -160,7 +160,9 @@ class Geometry(Protocol): # noqa "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, "properties": { "id": {"type": "keyword"}, - "extent.spatial.bbox": {"type": "long"}, + "title": {"type": "text"}, + "description": {"type": "text"}, + "bbox_shape": {"type": "geo_shape"}, "extent.temporal.interval": { "type": "date", "format": "strict_date_optional_time||epoch_millis", diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index 029292ed..19c9c607 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -1020,3 +1020,232 @@ async def test_collections_pagination_all_endpoints(app_client, txn_client, ctx) for i, expected_id in enumerate(expected_ids): assert test_found[i]["id"] == expected_id + + +@pytest.mark.asyncio +async def test_collections_bbox_all_endpoints(app_client, txn_client, ctx): + """Verify GET /collections, GET /collections-search, and POST /collections-search honor the bbox parameter.""" + # Create multiple collections with different spatial extents + base_collection = ctx.collection + + # Use unique prefixes to avoid conflicts between tests + test_prefix = f"bbox-{uuid.uuid4().hex[:8]}" + + # Create collections with different bboxes + # Collection 1: Europe bbox + collection_europe = base_collection.copy() + collection_europe["id"] = f"{test_prefix}-europe" + collection_europe["title"] = "Europe Collection" + collection_europe["extent"] = { + "spatial": {"bbox": [[-10.0, 35.0, 40.0, 70.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_europe) + + # Collection 2: North America bbox + collection_na = base_collection.copy() + collection_na["id"] = f"{test_prefix}-north-america" + collection_na["title"] = "North America Collection" + collection_na["extent"] = { + "spatial": {"bbox": [[-170.0, 15.0, -50.0, 75.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_na) + + # Collection 3: Asia bbox + collection_asia = base_collection.copy() + collection_asia["id"] = f"{test_prefix}-asia" + collection_asia["title"] = "Asia Collection" + collection_asia["extent"] = { + "spatial": {"bbox": [[60.0, -10.0, 150.0, 55.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_asia) + + # Collection 4: Global bbox (should match any query) + collection_global = base_collection.copy() + collection_global["id"] = f"{test_prefix}-global" + collection_global["title"] = "Global Collection" + collection_global["extent"] = { + "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_global) + + # Collection 5: 3D bbox (with altitude) - should still work for 2D queries + collection_3d = base_collection.copy() + collection_3d["id"] = f"{test_prefix}-3d-europe" + collection_3d["title"] = "3D Europe Collection" + collection_3d["extent"] = { + "spatial": {"bbox": [[-10.0, 35.0, 0.0, 40.0, 70.0, 5000.0]]}, # 3D bbox + "temporal": {"interval": [[None, None]]}, + } + await create_collection(txn_client, collection_3d) + + await refresh_indices(txn_client) + + # Test 1: Query for Europe region - should match Europe, Global, and 3D Europe collections + europe_bbox = [0.0, 40.0, 20.0, 60.0] # Central Europe + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, europe_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, europe_bbox)))], + }, + { + "method": "POST", + "path": "/collections-search", + "body": {"bbox": europe_bbox}, + }, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find Europe, Global, and 3D Europe collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-europe" in found_ids + ), f"Europe collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" in found_ids + ), f"3D Europe collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find North America or Asia + assert ( + f"{test_prefix}-north-america" not in found_ids + ), f"North America should not match Europe bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-asia" not in found_ids + ), f"Asia should not match Europe bbox in {endpoint['method']} {endpoint['path']}" + + # Test 2: Query for North America region - should match North America and Global collections + na_bbox = [-120.0, 30.0, -80.0, 50.0] # Central North America + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, na_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, na_bbox)))], + }, + {"method": "POST", "path": "/collections-search", "body": {"bbox": na_bbox}}, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find North America and Global collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-north-america" in found_ids + ), f"North America collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find Europe, Asia, or 3D Europe + assert ( + f"{test_prefix}-europe" not in found_ids + ), f"Europe should not match North America bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-asia" not in found_ids + ), f"Asia should not match North America bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" not in found_ids + ), f"3D Europe should not match North America bbox in {endpoint['method']} {endpoint['path']}" + + # Test 3: Query for Asia region - should match Asia and Global collections + asia_bbox = [100.0, 20.0, 130.0, 45.0] # East Asia + + endpoints = [ + { + "method": "GET", + "path": "/collections", + "params": [("bbox", ",".join(map(str, asia_bbox)))], + }, + { + "method": "GET", + "path": "/collections-search", + "params": [("bbox", ",".join(map(str, asia_bbox)))], + }, + {"method": "POST", "path": "/collections-search", "body": {"bbox": asia_bbox}}, + ] + + for endpoint in endpoints: + if endpoint["method"] == "GET": + resp = await app_client.get(endpoint["path"], params=endpoint["params"]) + else: # POST + resp = await app_client.post(endpoint["path"], json=endpoint["body"]) + + assert ( + resp.status_code == 200 + ), f"Failed for {endpoint['method']} {endpoint['path']}: {resp.text}" + resp_json = resp.json() + + collections_list = resp_json["collections"] + + # Filter collections to only include the ones we created for this test + test_collections = [ + c for c in collections_list if c["id"].startswith(test_prefix) + ] + + # Should find Asia and Global collections + found_ids = {c["id"] for c in test_collections} + assert ( + f"{test_prefix}-asia" in found_ids + ), f"Asia collection not found {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-global" in found_ids + ), f"Global collection not found {endpoint['method']} {endpoint['path']}" + # Should NOT find Europe, North America, or 3D Europe + assert ( + f"{test_prefix}-europe" not in found_ids + ), f"Europe should not match Asia bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-north-america" not in found_ids + ), f"North America should not match Asia bbox in {endpoint['method']} {endpoint['path']}" + assert ( + f"{test_prefix}-3d-europe" not in found_ids + ), f"3D Europe should not match Asia bbox in {endpoint['method']} {endpoint['path']}" diff --git a/update_collections_mapping.py b/update_collections_mapping.py new file mode 100644 index 00000000..331d5aa8 --- /dev/null +++ b/update_collections_mapping.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Script to update the collections index mapping to add the bbox_shape field. + +This script will: +1. Add the bbox_shape field to the existing collections index +2. Reindex all collections to populate the bbox_shape field + +Usage: + python update_collections_mapping.py +""" + +import asyncio +import os +from unittest.mock import Mock + +from stac_fastapi.core.serializers import CollectionSerializer +from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX + +# Determine which backend to use +BACKEND = os.getenv("BACKEND", "elasticsearch").lower() + +if BACKEND == "opensearch": + from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, + ) +else: + from stac_fastapi.elasticsearch.config import ( + AsyncElasticsearchSettings as AsyncSearchSettings, + ) + + +async def update_mapping(): + """Update the collections index mapping to add bbox_shape field.""" + settings = AsyncSearchSettings() + client = settings.create_client + + print(f"Connecting to {BACKEND}...") + + # Check if index exists + index_name = f"{COLLECTIONS_INDEX}-000001" + exists = await client.indices.exists(index=index_name) + + if not exists: + print(f"Index {index_name} does not exist. Creating it...") + from stac_fastapi.elasticsearch.database_logic import create_collection_index + + await create_collection_index() + print("Index created successfully!") + return + + print(f"Index {index_name} exists. Updating mapping...") + + # Add the bbox_shape field to the mapping + try: + await client.indices.put_mapping( + index=index_name, body={"properties": {"bbox_shape": {"type": "geo_shape"}}} + ) + print("✓ Mapping updated successfully!") + except Exception as e: + print(f"✗ Error updating mapping: {e}") + return + + # Now reindex all collections to populate bbox_shape + print("\nReindexing collections to populate bbox_shape field...") + + try: + # Get all collections + response = await client.search( + index=index_name, + body={ + "query": {"match_all": {}}, + "size": 1000, # Adjust if you have more collections + }, + ) + + collections = response["hits"]["hits"] + print(f"Found {len(collections)} collections to update") + + if len(collections) == 0: + print("No collections to update.") + return + + # Create a mock request for the serializer + mock_request = Mock() + mock_request.base_url = "http://localhost:8080/" + + updated_count = 0 + error_count = 0 + + for hit in collections: + collection = hit["_source"] + collection_id = collection.get("id", "unknown") + + try: + # Use the serializer to convert bbox to bbox_shape + updated_collection = CollectionSerializer.stac_to_db( + collection, mock_request + ) + + # Check if bbox_shape was created + if "bbox_shape" in updated_collection: + # Update the document + await client.update( + index=index_name, + id=hit["_id"], + body={"doc": {"bbox_shape": updated_collection["bbox_shape"]}}, + refresh=True, + ) + print(f" ✓ Updated collection '{collection_id}'") + updated_count += 1 + else: + print(f" ⊘ Collection '{collection_id}' has no bbox to convert") + except Exception as e: + print(f" ✗ Error updating collection '{collection_id}': {e}") + error_count += 1 + + print("\n" + "=" * 60) + print("Summary:") + print(f" Total collections: {len(collections)}") + print(f" Successfully updated: {updated_count}") + print(f" Errors: {error_count}") + print(f" Skipped (no bbox): {len(collections) - updated_count - error_count}") + print("=" * 60) + + except Exception as e: + print(f"✗ Error during reindexing: {e}") + import traceback + + traceback.print_exc() + finally: + await client.close() + + +if __name__ == "__main__": + print(f"Using backend: {BACKEND}") + asyncio.run(update_mapping())