Skip to content
Open
44 changes: 44 additions & 0 deletions examples/mara_join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python3
# /// script
# dependencies = [
# "arraylake",
# "icechunk",
# "xarray",
# "xarray-sql",
# "pandas",
# ]
# ///

"""Demo of a (spatial) join using Xarray-SQL and DataFusion for MARA."""
import pandas as pd
import xarray as xr
import xarray_sql as xql
from arraylake import Client

# Login and get access to EarthMover's Temporal ERA5 dataset.
client = Client()
client.login()
repo = client.get_repo('earthmover-public/era5-surface-aws')
ds = xr.open_zarr(
repo.readonly_session('main').store,
group='temporal',
chunks=None,
zarr_format=3,
consolidated=False,
)

# Create a DataFusion context and register the datasets as tables.
ctx = xql.XarrayContext()
ctx.from_dataset(
'era5', ds, chunks=dict(time=8736, latitude=12, longitude=12)
)
ctx.from_pandas(
pd.read_feather(
'https://github.com/wildlife-dynamics/ecoscope/raw/refs/heads/master/'
'tests/sample_data/vector/movebank_data.feather'
),
'movebank',
)

# TODO(alxmrs): When I'm not on cellular internet, explore the datasets and write the
# join in SQL.
Loading