diff --git a/altair/datasets/__init__.py b/altair/datasets/__init__.py index 84a3c3ba6..acf003475 100644 --- a/altair/datasets/__init__.py +++ b/altair/datasets/__init__.py @@ -77,7 +77,7 @@ from typing import TYPE_CHECKING -from altair.datasets._loader import Loader as Loader +from altair.datasets._loader import Loader if TYPE_CHECKING: import sys @@ -92,6 +92,8 @@ from altair.datasets._loader import _Load from altair.datasets._typing import Dataset, Extension +__all__ = ["Loader", "data", "load", "url"] + load: _Load[Any, Any] """ @@ -166,18 +168,17 @@ def url( return url -def __getattr__(name): - if name == "data": - from altair.datasets._data import data +if not TYPE_CHECKING: - return data - elif name == "load": - from altair.datasets._loader import load + def __getattr__(name): + if name == "data": + from altair.datasets._data import data - return load - elif name == "__all__": - # Define __all__ dynamically to avoid ruff errors - return ["Loader", "data", "load", "url"] - else: - msg = f"module {__name__!r} has no attribute {name!r}" - raise AttributeError(msg) + return data + elif name == "load": + from altair.datasets._loader import load + + return load + else: + msg = f"module {__name__!r} has no attribute {name!r}" + raise AttributeError(msg) diff --git a/altair/datasets/_cache.py b/altair/datasets/_cache.py index fa0c0708b..8ac6c9212 100644 --- a/altair/datasets/_cache.py +++ b/altair/datasets/_cache.py @@ -286,7 +286,7 @@ def schema_kwds(self, meta: Metadata, /) -> dict[str, Any]: # For pyarrow CSV reading, use the schema as intended # This will fail for non-ISO date formats, but that's the correct behavior # Users can handle this by using a different backend or converting dates manually - return {"convert_options": ConvertOptions(column_types=schema)} # pyright: ignore[reportCallIssue] + return {"convert_options": ConvertOptions(column_types=schema)} elif suffix == ".parquet": return {"schema": schema} diff --git a/altair/datasets/_loader.py b/altair/datasets/_loader.py index 984722623..c3ea91012 100644 --- a/altair/datasets/_loader.py +++ b/altair/datasets/_loader.py @@ -3,10 +3,8 @@ import typing as t from typing import Generic, final, overload -from narwhals.stable.v1.typing import IntoDataFrameT - from altair.datasets import _reader -from altair.datasets._reader import IntoFrameT +from altair.datasets._reader import IntoDataFrameT, IntoLazyFrameT if t.TYPE_CHECKING: import sys @@ -30,7 +28,7 @@ __all__ = ["Loader", "load"] -class Loader(Generic[IntoDataFrameT, IntoFrameT]): +class Loader(Generic[IntoDataFrameT, IntoLazyFrameT]): """ Load example datasets *remotely* from `vega-datasets`_, with caching. @@ -46,7 +44,7 @@ class Loader(Generic[IntoDataFrameT, IntoFrameT]): https://github.com/vega/vega-datasets """ - _reader: Reader[IntoDataFrameT, IntoFrameT] + _reader: Reader[IntoDataFrameT, IntoLazyFrameT] @overload @classmethod @@ -58,13 +56,11 @@ def from_backend( @classmethod def from_backend( cls, backend_name: Literal["pandas", "pandas[pyarrow]"], / - ) -> Loader[pd.DataFrame, pd.DataFrame]: ... + ) -> Loader[pd.DataFrame]: ... @overload @classmethod - def from_backend( - cls, backend_name: Literal["pyarrow"], / - ) -> Loader[pa.Table, pa.Table]: ... + def from_backend(cls, backend_name: Literal["pyarrow"], /) -> Loader[pa.Table]: ... @classmethod def from_backend( @@ -130,7 +126,7 @@ def from_backend( return cls.from_reader(_reader._from_backend(backend_name)) @classmethod - def from_reader(cls, reader: Reader[IntoDataFrameT, IntoFrameT], /) -> Self: + def from_reader(cls, reader: Reader[IntoDataFrameT, IntoLazyFrameT], /) -> Self: obj = cls.__new__(cls) obj._reader = reader return obj @@ -294,7 +290,7 @@ def __repr__(self) -> str: @final -class _Load(Loader[IntoDataFrameT, IntoFrameT]): +class _Load(Loader[IntoDataFrameT, IntoLazyFrameT]): @overload def __call__( # pyright: ignore[reportOverlappingOverload] self, diff --git a/altair/datasets/_reader.py b/altair/datasets/_reader.py index 07c545399..f9a268556 100644 --- a/altair/datasets/_reader.py +++ b/altair/datasets/_reader.py @@ -25,18 +25,17 @@ from importlib.util import find_spec from itertools import chain from pathlib import Path -from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, cast, overload +from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, overload from urllib.request import build_opener as _build_opener from narwhals.stable import v1 as nw -from narwhals.stable.v1.typing import IntoDataFrameT, IntoExpr from packaging.requirements import Requirement from altair.datasets import _readimpl from altair.datasets._cache import CsvCache, DatasetCache, SchemaCache, _iter_metadata from altair.datasets._constraints import is_parquet from altair.datasets._exceptions import AltairDatasetsError, module_not_found -from altair.datasets._readimpl import IntoFrameT, is_available +from altair.datasets._readimpl import IntoDataFrameT, IntoLazyFrameT, is_available if TYPE_CHECKING: import sys @@ -46,6 +45,7 @@ import pandas as pd import polars as pl import pyarrow as pa + from narwhals.stable.v1.typing import IntoExpr from altair.datasets._readimpl import BaseImpl, R, Read, Scan from altair.datasets._typing import Dataset, Extension, Metadata @@ -91,6 +91,12 @@ _Ibis, _PySpark, ) + _EagerAllowedImpl: TypeAlias = Literal[ + nw.Implementation.PANDAS, + nw.Implementation.POLARS, + nw.Implementation.PYARROW, + ] + _EagerAllowed: TypeAlias = Literal[_Pandas, _Polars, _PyArrow] _SupportProfile: TypeAlias = Mapping[ Literal["supported", "unsupported"], "Sequence[Dataset]" @@ -113,7 +119,7 @@ """ -class Reader(Generic[IntoDataFrameT, IntoFrameT]): +class Reader(Generic[IntoDataFrameT, IntoLazyFrameT]): """ Modular file reader, targeting remote & local tabular resources. @@ -124,7 +130,7 @@ class Reader(Generic[IntoDataFrameT, IntoFrameT]): _read: Sequence[Read[IntoDataFrameT]] """Eager file read functions.""" - _scan: Sequence[Scan[IntoFrameT]] + _scan: Sequence[Scan[IntoLazyFrameT]] """Lazy file read functions.""" _name: str @@ -134,7 +140,7 @@ class Reader(Generic[IntoDataFrameT, IntoFrameT]): Otherwise, has no concrete meaning. """ - _implementation: nw.Implementation + _implementation: _EagerAllowedImpl """ Corresponding `narwhals implementation`_. @@ -150,9 +156,9 @@ class Reader(Generic[IntoDataFrameT, IntoFrameT]): def __init__( self, read: Sequence[Read[IntoDataFrameT]], - scan: Sequence[Scan[IntoFrameT]], + scan: Sequence[Scan[IntoLazyFrameT]], name: str, - implementation: nw.Implementation, + implementation: _EagerAllowedImpl, ) -> None: self._read = read self._scan = scan @@ -173,7 +179,7 @@ def __repr__(self) -> str: def read_fn(self, meta: Metadata, /) -> Callable[..., IntoDataFrameT]: return self._solve(meta, self._read) - def scan_fn(self, meta: Metadata | Path | str, /) -> Callable[..., IntoFrameT]: + def scan_fn(self, meta: Metadata | Path | str, /) -> Callable[..., IntoLazyFrameT]: meta = meta if isinstance(meta, Mapping) else {"suffix": _into_suffix(meta)} return self._solve(meta, self._scan) @@ -330,13 +336,13 @@ def _merge_kwds(self, meta: Metadata, kwds: dict[str, Any], /) -> Mapping[str, A return kwds @property - def _metadata_frame(self) -> nw.LazyFrame[IntoFrameT]: + def _metadata_frame(self) -> nw.LazyFrame[IntoLazyFrameT]: fp = self._metadata_path return nw.from_native(self.scan_fn(fp)(fp)).lazy() def _scan_metadata( self, *predicates: OneOrSeq[IntoExpr], **constraints: Unpack[Metadata] - ) -> nw.LazyFrame[IntoFrameT]: + ) -> nw.LazyFrame[IntoLazyFrameT]: if predicates or constraints: return self._metadata_frame.filter(*predicates, **constraints) return self._metadata_frame @@ -373,7 +379,7 @@ def _dataset_names( ) -class _NoParquetReader(Reader[IntoDataFrameT, IntoFrameT]): +class _NoParquetReader(Reader[IntoDataFrameT]): def __repr__(self) -> str: return f"{super().__repr__()}\ncsv_cache\n {self.csv_cache!r}" @@ -384,8 +390,8 @@ def csv_cache(self) -> CsvCache: return self._csv_cache @property - def _metadata_frame(self) -> nw.LazyFrame[IntoFrameT]: - data = cast("dict[str, Any]", self.csv_cache.rotated) + def _metadata_frame(self) -> nw.LazyFrame[Any]: + data = self.csv_cache.rotated impl = self._implementation return nw.maybe_convert_dtypes(nw.from_dict(data, backend=impl)).lazy() @@ -397,31 +403,28 @@ def reader( *, name: str | None = ..., implementation: nw.Implementation = ..., -) -> Reader[IntoDataFrameT, nw.LazyFrame[IntoDataFrameT]]: ... +) -> Reader[IntoDataFrameT]: ... @overload def reader( read_fns: Sequence[Read[IntoDataFrameT]], - scan_fns: Sequence[Scan[IntoFrameT]], + scan_fns: Sequence[Scan[IntoLazyFrameT]], *, name: str | None = ..., implementation: nw.Implementation = ..., -) -> Reader[IntoDataFrameT, IntoFrameT]: ... +) -> Reader[IntoDataFrameT, IntoLazyFrameT]: ... def reader( read_fns: Sequence[Read[IntoDataFrameT]], - scan_fns: Sequence[Scan[IntoFrameT]] = (), + scan_fns: Sequence[Scan[IntoLazyFrameT]] = (), *, name: str | None = None, implementation: nw.Implementation = nw.Implementation.UNKNOWN, -) -> ( - Reader[IntoDataFrameT, IntoFrameT] - | Reader[IntoDataFrameT, nw.LazyFrame[IntoDataFrameT]] -): +) -> Reader[IntoDataFrameT, IntoLazyFrameT] | Reader[IntoDataFrameT]: name = name or Counter(el._inferred_package for el in read_fns).most_common(1)[0][0] - if implementation is nw.Implementation.UNKNOWN: + if not _is_eager_allowed(implementation): implementation = _into_implementation(Requirement(name)) if scan_fns: return Reader(read_fns, scan_fns, name, implementation) @@ -456,9 +459,9 @@ def infer_backend( @overload def _from_backend(name: _Polars, /) -> Reader[pl.DataFrame, pl.LazyFrame]: ... @overload -def _from_backend(name: _PandasAny, /) -> Reader[pd.DataFrame, pd.DataFrame]: ... +def _from_backend(name: _PandasAny, /) -> Reader[pd.DataFrame]: ... @overload -def _from_backend(name: _PyArrow, /) -> Reader[pa.Table, pa.Table]: ... +def _from_backend(name: _PyArrow, /) -> Reader[pa.Table]: ... # FIXME: The order this is defined in makes splitting the module complicated @@ -516,15 +519,28 @@ def _into_constraints( return m +def _is_eager_allowed(impl: nw.Implementation, /) -> TypeIs[_EagerAllowedImpl]: + return impl in { + nw.Implementation.PANDAS, + nw.Implementation.POLARS, + nw.Implementation.PYARROW, + } + + def _into_implementation( - backend: _NwSupport | _PandasAny | Requirement, / -) -> nw.Implementation: - primary = _import_guarded(backend) + backend: _NwSupport | _PandasAny | nw.Implementation | Requirement, / +) -> _EagerAllowedImpl: + req = ( + Requirement(str(backend)) if isinstance(backend, nw.Implementation) else backend + ) + primary = _import_guarded(req) impl = nw.Implementation.from_backend(primary) - if impl is not nw.Implementation.UNKNOWN: - return impl - msg = f"Package {primary!r} is not supported by `narwhals`." - raise ValueError(msg) + if not _is_eager_allowed(impl): + if impl is nw.Implementation.UNKNOWN: + msg = f"Package {primary!r} is not supported by `narwhals`." + raise ValueError(msg) + raise NotImplementedError(impl) + return impl def _into_suffix(obj: Path | str, /) -> Any: @@ -539,7 +555,7 @@ def _into_suffix(obj: Path | str, /) -> Any: def _steal_eager_parquet( read_fns: Sequence[Read[IntoDataFrameT]], / -) -> Sequence[Scan[nw.LazyFrame[IntoDataFrameT]]] | None: +) -> Sequence[Scan[Any]] | None: if convertable := next((rd for rd in read_fns if rd.include <= is_parquet), None): return (_readimpl.into_scan(convertable),) return None diff --git a/altair/datasets/_readimpl.py b/altair/datasets/_readimpl.py index 34b31070a..f7e593212 100644 --- a/altair/datasets/_readimpl.py +++ b/altair/datasets/_readimpl.py @@ -13,7 +13,6 @@ from narwhals.stable import v1 as nw from narwhals.stable.v1.dependencies import get_pandas, get_polars -from narwhals.stable.v1.typing import IntoDataFrameT from altair.datasets._constraints import ( is_arrow, @@ -51,16 +50,22 @@ __all__ = ["is_available", "pa_any", "pd_only", "pd_pyarrow", "pl_only", "read", "scan"] -R = TypeVar("R", bound="nwt.IntoFrame", covariant=True) -IntoFrameT = TypeVar( - "IntoFrameT", - bound="nwt.NativeFrame | nw.DataFrame[Any] | nw.LazyFrame[Any] | nwt.DataFrameLike", - default=nw.LazyFrame[Any], + +R = TypeVar( + "R", + bound="nwt.IntoDataFrame | nwt.IntoLazyFrame", + covariant=True, +) +IntoDataFrameT = TypeVar("IntoDataFrameT", bound="nwt.IntoDataFrame") +IntoLazyFrameT = TypeVar( + "IntoLazyFrameT", + bound="nwt.IntoLazyFrame", + default=Any, ) Read = TypeAliasType("Read", "BaseImpl[IntoDataFrameT]", type_params=(IntoDataFrameT,)) """An *eager* file read function.""" -Scan = TypeAliasType("Scan", "BaseImpl[IntoFrameT]", type_params=(IntoFrameT,)) +Scan = TypeAliasType("Scan", "BaseImpl[IntoLazyFrameT]", type_params=(IntoLazyFrameT,)) """A *lazy* file read function.""" @@ -206,21 +211,19 @@ def read( def scan( - fn: Callable[..., IntoFrameT], + fn: Callable[..., IntoLazyFrameT], /, include: MetaIs, exclude: MetaIs | None = None, **kwds: Any, -) -> Scan[IntoFrameT]: +) -> Scan[IntoLazyFrameT]: return BaseImpl(fn, include, exclude, kwds) -def into_scan(impl: Read[IntoDataFrameT], /) -> Scan[nw.LazyFrame[IntoDataFrameT]]: - def scan_fn( - fn: Callable[..., IntoDataFrameT], / - ) -> Callable[..., nw.LazyFrame[IntoDataFrameT]]: +def into_scan(impl: Read[IntoDataFrameT], /) -> Scan[Any]: + def scan_fn(fn: Callable[..., IntoDataFrameT], /) -> Callable[..., Any]: @wraps(_unwrap_partial(fn)) - def wrapper(*args: Any, **kwds: Any) -> nw.LazyFrame[IntoDataFrameT]: + def wrapper(*args: Any, **kwds: Any) -> nw.LazyFrame[Any]: return nw.from_native(fn(*args, **kwds)).lazy() return wrapper @@ -271,7 +274,7 @@ def _unwrap_partial(fn: Any, /) -> Any: return func -def pl_only() -> tuple[Sequence[Read[pl.DataFrame]], Sequence[Scan[pl.LazyFrame]]]: +def pl_only() -> tuple[Sequence[Read[pl.DataFrame]], Sequence[Scan[pl.LazyFrame]]]: # pyright: ignore[reportInvalidTypeForm] import polars as pl pl_read_json = read(_pl_read_json_roundtrip(get_polars()), is_json) @@ -350,7 +353,7 @@ def pa_any() -> Sequence[Read[pa.Table]]: return ( read(csv.read_csv, is_csv), _pa_read_json_impl(), - read(csv.read_csv, is_tsv, parse_options=csv.ParseOptions(delimiter="\t")), # pyright: ignore[reportCallIssue] + read(csv.read_csv, is_tsv, parse_options=csv.ParseOptions(delimiter="\t")), read(feather.read_table, is_arrow), read(parquet.read_table, is_parquet), ) diff --git a/pyproject.toml b/pyproject.toml index 3fa201fef..aa865dbdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ # If you update the minimum required jsonschema version, also update it in build.yml "jsonschema>=3.0", "packaging", - "narwhals>=1.27.1,<2.2" + "narwhals>=2.4.0", ] description = "Vega-Altair: A declarative statistical visualization library for Python." readme = "README.md" diff --git a/uv.lock b/uv.lock index 936525ea8..7219f2632 100644 --- a/uv.lock +++ b/uv.lock @@ -170,7 +170,7 @@ requires-dist = [ { name = "mistune", marker = "extra == 'dev'" }, { name = "mypy", marker = "extra == 'dev'" }, { name = "myst-parser", marker = "extra == 'doc'" }, - { name = "narwhals", specifier = ">=1.27.1,<2.2" }, + { name = "narwhals", specifier = ">=2.4.0" }, { name = "numpy", marker = "extra == 'all'" }, { name = "numpydoc", marker = "extra == 'doc'" }, { name = "packaging" }, @@ -2352,11 +2352,11 @@ wheels = [ [[package]] name = "narwhals" -version = "2.1.2" +version = "2.17.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/37/f0/b0550d9b84759f4d045fd43da2f811e8b23dc2001e38c3254456da7f3adb/narwhals-2.1.2.tar.gz", hash = "sha256:afb9597e76d5b38c2c4b7c37d27a2418b8cc8049a66b8a5aca9581c92ae8f8bf", size = 533772, upload-time = "2025-08-15T08:24:50.916Z" } +sdist = { url = "https://files.pythonhosted.org/packages/75/59/81d0f4cad21484083466f278e6b392addd9f4205b48d45b5c8771670ebf8/narwhals-2.17.0.tar.gz", hash = "sha256:ebd5bc95bcfa2f8e89a8ac09e2765a63055162837208e67b42d6eeb6651d5e67", size = 620306, upload-time = "2026-02-23T09:44:34.142Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/01/824fff6789ce92a53242d24b6f5f3a982df2f610c51020f934bf878d2a99/narwhals-2.1.2-py3-none-any.whl", hash = "sha256:136b2f533a4eb3245c54254f137c5d14cef5c4668cff67dc6e911a602acd3547", size = 392064, upload-time = "2025-08-15T08:24:48.788Z" }, + { url = "https://files.pythonhosted.org/packages/4b/27/20770bd6bf8fbe1e16f848ba21da9df061f38d2e6483952c29d2bb5d1d8b/narwhals-2.17.0-py3-none-any.whl", hash = "sha256:2ac5307b7c2b275a7d66eeda906b8605e3d7a760951e188dcfff86e8ebe083dd", size = 444897, upload-time = "2026-02-23T09:44:32.006Z" }, ] [[package]]