Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Added

- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries.
- Migration scripts (`update_collections_mapping.py` and `recreate_collections_index.py`) to help add `bbox_shape` field to existing deployments.

### Changed

### Fixed
Expand Down
121 changes: 121 additions & 0 deletions recreate_collections_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/usr/bin/env python3
"""
Script to delete and recreate the collections index.

WARNING: This will DELETE all existing collections!
Only use this in development environments.

Usage:
python recreate_collections_index.py
"""

import asyncio
import os
import sys

from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX

# Determine which backend to use
BACKEND = os.getenv("BACKEND", "elasticsearch").lower()

if BACKEND == "opensearch":
from stac_fastapi.opensearch.config import (
AsyncOpensearchSettings as AsyncSearchSettings,
)
from stac_fastapi.opensearch.database_logic import (
create_collection_index,
create_index_templates,
)
else:
from stac_fastapi.elasticsearch.config import (
AsyncElasticsearchSettings as AsyncSearchSettings,
)
from stac_fastapi.elasticsearch.database_logic import (
create_collection_index,
create_index_templates,
)


async def recreate_index():
"""Delete and recreate the collections index."""
settings = AsyncSearchSettings()
client = settings.create_client

print(f"Using backend: {BACKEND}")
print(f"\n{'=' * 60}")
print("WARNING: This will DELETE all existing collections!")
print(f"{'=' * 60}\n")

# Check if running in production
env = os.getenv("ENVIRONMENT", "development").lower()
if env == "production":
print("ERROR: This script should not be run in production!")
print("Use update_collections_mapping.py instead.")
sys.exit(1)

response = input("Are you sure you want to continue? (yes/no): ")
if response.lower() != "yes":
print("Aborted.")
sys.exit(0)

try:
# Delete the collections index
index_name = f"{COLLECTIONS_INDEX}-000001"
alias_name = COLLECTIONS_INDEX

print(f"\nDeleting index {index_name}...")
exists = await client.indices.exists(index=index_name)
if exists:
await client.indices.delete(index=index_name)
print(f"✓ Deleted index {index_name}")
else:
print(f"⊘ Index {index_name} does not exist")

# Check if alias exists and delete it
alias_exists = await client.indices.exists_alias(name=alias_name)
if alias_exists:
print(f"Deleting alias {alias_name}...")
await client.indices.delete_alias(
index="_all", name=alias_name, ignore=[404]
)
print(f"✓ Deleted alias {alias_name}")

# Recreate index templates
print("\nRecreating index templates...")
await create_index_templates()
print("✓ Index templates created")

# Recreate the collections index
print("\nRecreating collections index...")
await create_collection_index()
print("✓ Collections index created")

# Verify the mapping includes bbox_shape
print("\nVerifying mapping...")
mapping = await client.indices.get_mapping(index=index_name)
properties = mapping[index_name]["mappings"]["properties"]

if "bbox_shape" in properties:
print(
f"✓ bbox_shape field is present in mapping: {properties['bbox_shape']}"
)
else:
print("✗ WARNING: bbox_shape field is NOT in the mapping!")

print("\n" + "=" * 60)
print("Collections index successfully recreated!")
print("You can now create collections with bbox_shape support.")
print("=" * 60)

except Exception as e:
print(f"\n✗ Error: {e}")
import traceback

traceback.print_exc()
sys.exit(1)
finally:
await client.close()


if __name__ == "__main__":
asyncio.run(recreate_index())
4 changes: 4 additions & 0 deletions stac_fastapi/core/stac_fastapi/core/base_database_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import abc
from typing import Any, Dict, Iterable, List, Optional, Tuple

from stac_pydantic.shared import BBox


class BaseDatabaseLogic(abc.ABC):
"""
Expand All @@ -19,6 +21,7 @@ async def get_all_collections(
limit: int,
request: Any = None,
sort: Optional[List[Dict[str, Any]]] = None,
bbox: Optional[BBox] = None,
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
"""Retrieve a list of collections from the database, supporting pagination.

Expand All @@ -27,6 +30,7 @@ async def get_all_collections(
limit (int): The number of results to return.
request (Any, optional): The FastAPI request object. Defaults to None.
sort (Optional[List[Dict[str, Any]]], optional): Optional sort parameter. Defaults to None.
bbox (Optional[BBox], optional): Bounding box to filter collections by spatial extent. Defaults to None.

Returns:
A tuple of (collections, next pagination token if any).
Expand Down
3 changes: 3 additions & 0 deletions stac_fastapi/core/stac_fastapi/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage:
async def all_collections(
self,
limit: Optional[int] = None,
bbox: Optional[BBox] = None,
datetime: Optional[str] = None,
fields: Optional[List[str]] = None,
sortby: Optional[Union[str, List[str]]] = None,
Expand Down Expand Up @@ -401,6 +402,7 @@ async def all_collections(
limit=limit,
request=request,
sort=sort,
bbox=bbox,
q=q_list,
filter=parsed_filter,
query=parsed_query,
Expand Down Expand Up @@ -502,6 +504,7 @@ async def post_all_collections(
# Pass all parameters from search_request to all_collections
return await self.all_collections(
limit=search_request.limit if hasattr(search_request, "limit") else None,
bbox=search_request.bbox if hasattr(search_request, "bbox") else None,
datetime=search_request.datetime
if hasattr(search_request, "datetime")
else None,
Expand Down
55 changes: 54 additions & 1 deletion stac_fastapi/core/stac_fastapi/core/serializers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Serializers."""

import abc
import logging
from copy import deepcopy
from typing import Any, List, Optional

Expand All @@ -9,10 +10,12 @@

from stac_fastapi.core.datetime_utils import now_to_rfc3339_str
from stac_fastapi.core.models.links import CollectionLinks
from stac_fastapi.core.utilities import get_bool_env
from stac_fastapi.core.utilities import bbox2polygon, get_bool_env
from stac_fastapi.types import stac as stac_types
from stac_fastapi.types.links import ItemLinks, resolve_links

logger = logging.getLogger(__name__)


@attr.s
class Serializer(abc.ABC):
Expand Down Expand Up @@ -141,6 +144,53 @@ def stac_to_db(
collection.get("links", []), str(request.base_url)
)

# Convert bbox to bbox_shape for geospatial queries
if "extent" in collection and "spatial" in collection["extent"]:
spatial_extent = collection["extent"]["spatial"]
if "bbox" in spatial_extent and spatial_extent["bbox"]:
# Get the first bbox (collections can have multiple bboxes, but we use the first one)
bbox = (
spatial_extent["bbox"][0]
if isinstance(spatial_extent["bbox"][0], list)
else spatial_extent["bbox"]
)
collection_id = collection.get("id", "unknown")
logger.debug(
f"Converting bbox to bbox_shape for collection '{collection_id}': bbox={bbox}"
)

if len(bbox) >= 4:
# Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz])
# For 2D polygon, we only need the x,y coordinates and discard altitude (z) values
minx, miny = bbox[0], bbox[1]
if len(bbox) == 4:
# 2D bbox: [minx, miny, maxx, maxy]
maxx, maxy = bbox[2], bbox[3]
logger.debug(
f"Collection '{collection_id}': Processing 2D bbox"
)
else:
# 3D bbox: [minx, miny, minz, maxx, maxy, maxz]
# Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz)
maxx, maxy = bbox[3], bbox[4]
logger.debug(
f"Collection '{collection_id}': Processing 3D bbox, discarding altitude values at indices 2 and 5"
)

# Convert bbox to GeoJSON polygon
bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy)
collection["bbox_shape"] = {
"type": "Polygon",
"coordinates": bbox_polygon_coords,
}
logger.info(
f"Collection '{collection_id}': Created bbox_shape from bbox [{minx}, {miny}, {maxx}, {maxy}]"
)
else:
logger.warning(
f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4"
)

if get_bool_env("STAC_INDEX_ASSETS"):
collection["assets"] = [
{"es_key": k, **v} for k, v in collection.get("assets", {}).items()
Expand Down Expand Up @@ -168,6 +218,9 @@ def db_to_stac(
# Avoid modifying the input dict in-place ... doing so breaks some tests
collection = deepcopy(collection)

# Remove internal bbox_shape field (not part of STAC spec)
collection.pop("bbox_shape", None)

# Set defaults
collection_id = collection.get("id")
collection.setdefault("type", "Collection")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ async def get_all_collections(
limit: int,
request: Request,
sort: Optional[List[Dict[str, Any]]] = None,
bbox: Optional[List[float]] = None,
q: Optional[List[str]] = None,
filter: Optional[Dict[str, Any]] = None,
query: Optional[Dict[str, Dict[str, Any]]] = None,
Expand All @@ -187,6 +188,7 @@ async def get_all_collections(
limit (int): The number of results to return.
request (Request): The FastAPI request object.
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent.
q (Optional[List[str]]): Free text search terms.
query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters.
filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
Expand Down Expand Up @@ -314,6 +316,44 @@ async def get_all_collections(
query_parts.append({"bool": {"must_not": {"match_all": {}}}})
raise

# Apply bbox filter if provided
if bbox:
# Parse bbox if it's a string (from GET requests)
if isinstance(bbox, str):
try:
bbox = [float(x.strip()) for x in bbox.split(",")]
except (ValueError, AttributeError) as e:
logger.error(f"Invalid bbox format: {bbox}, error: {e}")
bbox = None

if bbox and len(bbox) >= 4:
# Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz])
# For geospatial queries, we discard altitude (z) values
minx, miny = bbox[0], bbox[1]
if len(bbox) == 4:
# 2D bbox
maxx, maxy = bbox[2], bbox[3]
else:
# 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz)
maxx, maxy = bbox[3], bbox[4]

# Convert bbox to a polygon for geo_shape query
bbox_polygon = {
"type": "Polygon",
"coordinates": bbox2polygon(minx, miny, maxx, maxy),
}
# Add geo_shape query to filter collections by bbox_shape field
query_parts.append(
{
"geo_shape": {
"bbox_shape": {
"shape": bbox_polygon,
"relation": "intersects",
}
}
}
)

# Combine all query parts with AND logic if there are multiple
datetime_filter = None
if datetime:
Expand Down
40 changes: 40 additions & 0 deletions stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ async def get_all_collections(
limit: int,
request: Request,
sort: Optional[List[Dict[str, Any]]] = None,
bbox: Optional[List[float]] = None,
q: Optional[List[str]] = None,
filter: Optional[Dict[str, Any]] = None,
query: Optional[Dict[str, Dict[str, Any]]] = None,
Expand All @@ -171,6 +172,7 @@ async def get_all_collections(
limit (int): The number of results to return.
request (Request): The FastAPI request object.
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent.
q (Optional[List[str]]): Free text search terms.
query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters.
filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
Expand Down Expand Up @@ -298,6 +300,44 @@ async def get_all_collections(
query_parts.append({"bool": {"must_not": {"match_all": {}}}})
raise

# Apply bbox filter if provided
if bbox:
# Parse bbox if it's a string (from GET requests)
if isinstance(bbox, str):
try:
bbox = [float(x.strip()) for x in bbox.split(",")]
except (ValueError, AttributeError) as e:
logger.error(f"Invalid bbox format: {bbox}, error: {e}")
bbox = None

if bbox and len(bbox) >= 4:
# Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz])
# For geospatial queries, we discard altitude (z) values
minx, miny = bbox[0], bbox[1]
if len(bbox) == 4:
# 2D bbox
maxx, maxy = bbox[2], bbox[3]
else:
# 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz)
maxx, maxy = bbox[3], bbox[4]

# Convert bbox to a polygon for geo_shape query
bbox_polygon = {
"type": "Polygon",
"coordinates": bbox2polygon(minx, miny, maxx, maxy),
}
# Add geo_shape query to filter collections by bbox_shape field
query_parts.append(
{
"geo_shape": {
"bbox_shape": {
"shape": bbox_polygon,
"relation": "intersects",
}
}
}
)

# Combine all query parts with AND logic if there are multiple
datetime_filter = None
if datetime:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ class Geometry(Protocol): # noqa
"dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES,
"properties": {
"id": {"type": "keyword"},
"extent.spatial.bbox": {"type": "long"},
"title": {"type": "text"},
"description": {"type": "text"},
"bbox_shape": {"type": "geo_shape"},
"extent.temporal.interval": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis",
Expand Down
Loading