diff --git a/datashader/__init__.py b/datashader/__init__.py index c06d5a30c..067be40a5 100644 --- a/datashader/__init__.py +++ b/datashader/__init__.py @@ -18,12 +18,36 @@ from . import datatypes # noqa (API import) # make pyct's example/data commands available if possible -from functools import partial +from functools import partial, wraps + + +def _warn_pyct_deprecated(stacklevel=2): + import warnings + + warnings.warn( + "The 'fetch_data()', 'copy_examples()', and 'examples()' functions are " + "deprecated since version 0.19 and will be removed in version 0.20. " + "For downloading sample datasets, use 'hvsampledata' instead. " + "For example: `hvsampledata.nyc_taxi_remote('pandas')`.", + category=FutureWarning, + stacklevel=stacklevel, + ) + + +def _deprecated_pyct_wrapper(func): + """Wrapper to add deprecation warning to pyct functions.""" + @wraps(func) # noqa: F821 + def wrapper(*args, **kwargs): + _warn_pyct_deprecated(stacklevel=3) + return func(*args, **kwargs) + return wrapper + + try: from pyct.cmd import copy_examples as _copy, fetch_data as _fetch, examples as _examples - copy_examples = partial(_copy,'datashader') - fetch_data = partial(_fetch,'datashader') - examples = partial(_examples,'datashader') + copy_examples = _deprecated_pyct_wrapper(partial(_copy, 'datashader')) + fetch_data = _deprecated_pyct_wrapper(partial(_fetch, 'datashader')) + examples = _deprecated_pyct_wrapper(partial(_examples, 'datashader')) except ImportError: def _missing_cmd(*args,**kw): return("install pyct to enable this command (e.g. `conda install pyct or " @@ -32,4 +56,4 @@ def _missing_cmd(*args,**kw): def err(): raise ValueError(_missing_cmd()) fetch_data = copy_examples = examples = err -del partial, _examples, _copy, _fetch +del partial, wraps, _examples, _copy, _fetch diff --git a/datashader/__main__.py b/datashader/__main__.py index 9c685d604..49050c59e 100644 --- a/datashader/__main__.py +++ b/datashader/__main__.py @@ -1,6 +1,10 @@ + + def main(args=None): try: import pyct.cmd + from . import _warn_pyct_deprecated + _warn_pyct_deprecated(stacklevel=3) except ImportError: import sys from . import _missing_cmd diff --git a/examples/getting_started/1_Introduction.ipynb b/examples/getting_started/1_Introduction.ipynb index d00c954a5..203e21089 100644 --- a/examples/getting_started/1_Introduction.ipynb +++ b/examples/getting_started/1_Introduction.ipynb @@ -24,9 +24,10 @@ "metadata": {}, "outputs": [], "source": [ - "import datashader as ds, pandas as pd, colorcet as cc\n", + "import datashader as ds, colorcet as cc\n", + "import hvsampledata as hvs\n", "\n", - "df = pd.read_csv('../data/nyc_taxi.csv', usecols=['dropoff_x', 'dropoff_y'])\n", + "df = hvs.nyc_taxi_remote(\"pandas\", engine_kwargs={\"columns\": ['dropoff_x', 'dropoff_y']})\n", "df.head()" ] }, @@ -35,9 +36,8 @@ "metadata": {}, "source": [ ":::{note}\n", - "The file `nyc_taxi.csv` used above is located at\n", - "[nyc_taxi.csv](https://github.com/holoviz/datashader/blob/main/examples/data/.data_stubs/nyc_taxi.csv) in the Datashader repository. When running this example, make sure the file is available locally and update the path accordingly.\n", - ":::\n" + "The first time this cell is run, it will download the NYC taxi dataset (about 260MB).\n", + ":::" ] }, { diff --git a/examples/taxi_preprocessing_example.py b/examples/taxi_preprocessing_example.py deleted file mode 100644 index f16488f5e..000000000 --- a/examples/taxi_preprocessing_example.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Download data needed for the examples""" - -from __future__ import annotations - -if __name__ == "__main__": - - from os import path, makedirs, remove - from download_sample_data import bar as progressbar - - import pandas as pd - import numpy as np - import sys - - try: - import requests - except ImportError: - print('Download script required requests package: conda install requests') - sys.exit(1) - - def _download_dataset(url): - r = requests.get(url, stream=True) - output_path = path.split(url)[1] - with open(output_path, 'wb') as f: - total_length = int(r.headers.get('content-length')) - for chunk in progressbar(r.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1): - if chunk: - f.write(chunk) - f.flush() - - examples_dir = path.dirname(path.realpath(__file__)) - data_dir = path.join(examples_dir, 'data') - if not path.exists(data_dir): - makedirs(data_dir) - - # Taxi data - def latlng_to_meters(df, lat_name, lng_name): - lat = df[lat_name] - lng = df[lng_name] - origin_shift = 2 * np.pi * 6378137 / 2.0 - mx = lng * origin_shift / 180.0 - my = np.log(np.tan((90 + lat) * np.pi / 360.0)) / (np.pi / 180.0) - my = my * origin_shift / 180.0 - df.loc[:, lng_name] = mx - df.loc[:, lat_name] = my - - taxi_path = path.join(data_dir, 'nyc_taxi.csv') - if not path.exists(taxi_path): - print("Downloading Taxi Data...") - url = ('https://storage.googleapis.com/tlc-trip-data/2015/' - 'yellow_tripdata_2015-01.csv') - - _download_dataset(url) - df = pd.read_csv('yellow_tripdata_2015-01.csv') - - print('Filtering Taxi Data') - df = df.loc[(df.pickup_longitude < -73.75) & - (df.pickup_longitude > -74.15) & - (df.dropoff_longitude < -73.75) & - (df.dropoff_longitude > -74.15) & - (df.pickup_latitude > 40.68) & - (df.pickup_latitude < 40.84) & - (df.dropoff_latitude > 40.68) & - (df.dropoff_latitude < 40.84)].copy() - - print('Reprojecting Taxi Data') - latlng_to_meters(df, 'pickup_latitude', 'pickup_longitude') - latlng_to_meters(df, 'dropoff_latitude', 'dropoff_longitude') - df.rename(columns={'pickup_longitude': 'pickup_x', 'dropoff_longitude': 'dropoff_x', - 'pickup_latitude': 'pickup_y', 'dropoff_latitude': 'dropoff_y'}, - inplace=True) - df.to_csv(taxi_path, index=False) - remove('yellow_tripdata_2015-01.csv') - - - print("\nAll data downloaded.") diff --git a/pixi.toml b/pixi.toml index 88a02273c..9cf6a69fc 100644 --- a/pixi.toml +++ b/pixi.toml @@ -80,6 +80,7 @@ scikit-image = "*" shapely = ">=2.0.0" spatialpandas = "*" streamz = "*" +hvsampledata = ">=0.1.5a3" # ============================================= # =================== TESTS =================== diff --git a/scripts/download_data.py b/scripts/download_data.py index 586548243..dfa47b93b 100644 --- a/scripts/download_data.py +++ b/scripts/download_data.py @@ -1,9 +1,10 @@ from contextlib import suppress - -import pyct.cmd from packaging.version import Version -pyct.cmd.fetch_data(name="data", path="examples", datasets="datasets.yml") +with suppress(ImportError): + import pyct.cmd + + pyct.cmd.fetch_data(name="data", path="examples", datasets="datasets.yml") with suppress(ImportError): @@ -21,4 +22,10 @@ gds.get_path("geoda.natregimes") gds.get_path("nybb") - gds.get_path('geoda health') + gds.get_path("geoda health") + + +with suppress(ImportError): + import hvsampledata as hvs + + path = hvs.download("nyc_taxi_remote")