diff --git a/app_vaex.py b/app_vaex.py new file mode 100644 index 0000000..e3200da --- /dev/null +++ b/app_vaex.py @@ -0,0 +1,33 @@ +import os +import sys +from pathlib import Path + +import typer +import vaex as vx + +from utils import with_res_logger + +app = typer.Typer() + + +def load(year): + return vx.open(Path(os.environ["DATA"]) / f"{year}.csv") + + +@app.command() +@with_res_logger +def top_flop(year: str): + if ( + (sys.version_info.minor > 10) + and (sys.version_info.minor < 7) + and (sys.version_info.major == 3) + ): + raise Exception("Only python versions >=3.7,<3.11 are supported.") + df = ( + load(year) + .groupby("code_postal") + .agg({"id_mutation": vx.agg.nunique("id_mutation")}) + .sort("id_mutation", ascending=False) + ) + print(df.head(10)) + print(df.tail(10)) diff --git a/main.py b/main.py index 30c4dbe..97b9d8c 100755 --- a/main.py +++ b/main.py @@ -18,6 +18,7 @@ from app_explore import app as explore_app # noqa: E402 from app_pandas import app as pandas_app # noqa: E402 from app_polars import app as polars_app # noqa: E402 +from app_vaex import app as vaex_app # noqa: E402 from utils import with_res_logger # noqa: E402 app = typer.Typer() @@ -43,6 +44,7 @@ def wait(secs: int): app.add_typer(dask_app, name="dask") app.add_typer(polars_app, name="polars") app.add_typer(duckdb_app, name="duckdb") +app.add_typer(vaex_app, name="vaex") if __name__ == "__main__":