diff --git a/CHANGES.rst b/CHANGES.rst index 865dec22d..31a3f63b0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -44,6 +44,10 @@ Minor changes ``pandas.info()`` ) in a table. It can be sorted by each column. :pr:`1056` and :pr:`1068` by :user:`Jérôme Dockès `. +* The credit fraud dataset is now available with the + :func:`fetch_credit_fraud function`. + :pr:`1053` by :user:`Vincent Maladiere `. + * Added zero padding for column names in :class:`MinHashEncoder` to improve column ordering consistency. :pr:`1069` by :user:`Shreekant Nandiyawar `. diff --git a/doc/_static/08_example_aggjoiner.png b/doc/_static/08_example_aggjoiner.png new file mode 100644 index 000000000..e064c0829 Binary files /dev/null and b/doc/_static/08_example_aggjoiner.png differ diff --git a/doc/_static/08_example_data.png b/doc/_static/08_example_data.png new file mode 100644 index 000000000..0bea0f876 Binary files /dev/null and b/doc/_static/08_example_data.png differ diff --git a/doc/conf.py b/doc/conf.py index 6b7fb208b..b71d38be0 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -512,6 +512,7 @@ def notebook_modification_function(notebook_content, notebook_filename): "Series": "pandas.Series", "pandas.Index": "pandas.Index", "read_csv": "pandas.read_csv", + "pandas.melt": "pandas.melt", "pandas.merge": "pandas.merge", # Skrub "fetch_ken_table_aliases": "skrub.datasets.fetch_ken_table_aliases", diff --git a/doc/reference/downloading_a_dataset.rst b/doc/reference/downloading_a_dataset.rst index f471ef70c..3bbb423f5 100644 --- a/doc/reference/downloading_a_dataset.rst +++ b/doc/reference/downloading_a_dataset.rst @@ -18,6 +18,7 @@ Downloading a dataset fetch_drug_directory fetch_world_bank_indicator fetch_movielens + fetch_credit_fraud fetch_ken_table_aliases fetch_ken_types fetch_ken_embeddings diff --git a/examples/08_join_aggregation.py b/examples/08_join_aggregation.py index e26148928..eac307376 100644 --- a/examples/08_join_aggregation.py +++ b/examples/08_join_aggregation.py @@ -1,302 +1,342 @@ """ -Self-aggregation on MovieLens -============================= +AggJoiner on a credit fraud dataset +=================================== -MovieLens is a famous movie dataset used for both explicit -and implicit recommender systems. It provides a main table, -"ratings", that can be viewed as logs or transactions, comprised -of only 4 columns: ``userId``, ``movieId``, ``rating`` and ``timestamp``. -MovieLens also gives a contextual table "movies", including -``movieId``, ``title`` and ``types``, to enable content-based feature extraction. +Many problems involve tables whose entities have a one-to-many relationship. +To simplify aggregate-then-join operations for machine learning, we can include +the |AggJoiner| in our pipeline. -From the perspective of machine-learning pipelines, one challenge is to -transform the transaction log into features that can be fed to supervised learning. +In this example, we are tackling a fraudulent loan detection use case. +Because fraud is rare, this dataset is extremely imbalanced, with a prevalence of around +1.4%. -In this notebook, we only deal with the main table "ratings". -Our objective is **not to achieve state-of-the-art performance** on -the explicit regression task, but rather to illustrate how to perform -feature engineering in a simple way using |AggJoiner| and |AggTarget|. -Note that our performance is higher than the baseline of using the mean -rating per movies. +The data consists of two distinct entities: e-commerce "baskets", and "products". +Baskets can be tagged fraudulent (1) or not (0), and are essentially a list of products +of variable size. Each basket is linked to at least one products, e.g. basket 1 can have +product 1 and 2. -The benefit of using |AggJoiner| and |AggTarget| is that they readily -provide a full pipeline, from the original tables to the prediction, that can -be cross-validated or applied to new data to serve prediction. At the end of -this example, we showcase hyper-parameter optimization on the whole pipeline. +.. image:: ../../_static/08_example_data.png + :width: 450 px +| + +Our aim is to predict which baskets are fraudulent. + +The products dataframe can be joined on the baskets dataframe using the ``basket_ID`` +column. + +Each product has several attributes: + +- a category (marked by the column ``"item"``), +- a model (``"model"``), +- a brand (``"make"``), +- a merchant code (``"goods_code"``), +- a price per unit (``"cash_price"``), +- a quantity selected in the basket (``"Nbr_of_prod_purchas"``) .. |AggJoiner| replace:: :class:`~skrub.AggJoiner` -.. |AggTarget| replace:: - :class:`~skrub.AggTarget` +.. |Joiner| replace:: + :class:`~skrub.Joiner` + +.. |DropCols| replace:: + :class:`~skrub.DropCols` .. |TableVectorizer| replace:: :class:`~skrub.TableVectorizer` -.. |DatetimeEncoder| replace:: - :class:`~skrub.DatetimeEncoder` +.. |TableReport| replace:: + :class:`~skrub.TableReport` + +.. |MinHashEncoder| replace:: + :class:`~skrub.MinHashEncoder` .. |TargetEncoder| replace:: :class:`~sklearn.preprocessing.TargetEncoder` .. |make_pipeline| replace:: - :class:`~sklearn.pipeline.make_pipeline` + :func:`~sklearn.pipeline.make_pipeline` .. |Pipeline| replace:: :class:`~sklearn.pipeline.Pipeline` -.. |GridSearchCV| replace:: - :class:`~sklearn.model_selection.GridSearchCV` - -.. |TimeSeriesSplit| replace:: - :class:`~sklearn.model_selection.TimeSeriesSplit` - -.. |HGBR| replace:: - :class:`~sklearn.ensemble.HistGradientBoostingRegressor` -""" - -############################################################################### -# The data -# -------- -# -# We begin with loading the ratings table from MovieLens. -# Note that we use the light version (100k rows). -import pandas as pd +.. |HGBC| replace:: + :class:`~sklearn.ensemble.HistGradientBoostingClassifier` -from skrub.datasets import fetch_movielens +.. |OrdinalEncoder| replace:: + :class:`~sklearn.preprocessing.OrdinalEncoder` -ratings = fetch_movielens(dataset_id="ratings") -ratings = ratings.X.sort_values("timestamp").reset_index(drop=True) -ratings["timestamp"] = pd.to_datetime(ratings["timestamp"], unit="s") +.. |TunedThresholdClassifierCV| replace:: + :class:`~sklearn.model_selection.TunedThresholdClassifierCV` -X = ratings[["userId", "movieId", "timestamp"]] -y = ratings["rating"] -X.shape, y.shape -############################################################################### -X.head() +.. |CalibrationDisplay| replace:: + :class:`~sklearn.calibration.CalibrationDisplay` -############################################################################### -# Encoding the timestamp with a TableVectorizer -# --------------------------------------------- -# -# Our first step is to extract features from the timestamp, using the -# |TableVectorizer|. Natively, it uses the |DatetimeEncoder| on datetime -# columns, and doesn't interact with numerical columns. -from skrub import DatetimeEncoder, TableVectorizer - -table_vectorizer = TableVectorizer(datetime=DatetimeEncoder(add_weekday=True)) -X_date_encoded = table_vectorizer.fit_transform(X) -X_date_encoded.head() +.. |pandas.melt| replace:: + :func:`~pandas.melt` -############################################################################### -# We can now make a couple of plots and gain some insight on our dataset. -import seaborn as sns -from matplotlib import pyplot as plt - -sns.set_style("darkgrid") - - -def make_barplot(x, y, title): - fig, ax = plt.subplots(layout="constrained") - norm = plt.Normalize(y.min(), y.max()) - cmap = plt.get_cmap("magma") - - sns.barplot(x=x, y=y, palette=cmap(norm(y)), ax=ax) - ax.set_title(title) - ax.set_xticks(ax.get_xticks(), labels=ax.get_xticklabels(), rotation=30) - ax.set_ylabel(None) - - -# O is Monday, 6 is Sunday +""" +# %% +from skrub import TableReport +from skrub.datasets import fetch_credit_fraud -daily_volume = X_date_encoded["timestamp_weekday"].value_counts().sort_index() +bunch = fetch_credit_fraud() +products, baskets = bunch.products, bunch.baskets +TableReport(products) -make_barplot( - x=daily_volume.index, - y=daily_volume.values, - title="Daily volume of ratings", -) +# %% +TableReport(baskets) -############################################################################### -# We also display the distribution of our target ``y``. -rating_count = y.value_counts().sort_index() - -make_barplot( - x=rating_count.index, - y=rating_count.values, - title="Distribution of ratings given to movies", -) +# %% +# Naive aggregation +# ----------------- +# +# Let's explore a naive solution first. +# +# .. note:: +# +# Click :ref:`here` to skip this section and see the AggJoiner +# in action! +# +# +# The first idea that comes to mind to merge these two tables is to aggregate the +# products attributes into lists, using their basket IDs. +products_grouped = products.groupby("basket_ID").agg(list) +TableReport(products_grouped) + +# %% +# Then, we can expand all lists into columns, as if we were "flattening" the dataframe. +# We end up with a products dataframe ready to be joined on the baskets dataframe, using +# ``"basket_ID"`` as the join key. +import pandas as pd +products_flatten = [] +for col in products_grouped.columns: + cols = [f"{col}{idx}" for idx in range(24)] + products_flatten.append(pd.DataFrame(products_grouped[col].to_list(), columns=cols)) +products_flatten = pd.concat(products_flatten, axis=1) +products_flatten.insert(0, "basket_ID", products_grouped.index) +TableReport(products_flatten) -############################################################################### -# AggTarget: aggregate y, then join -# --------------------------------- +# %% +# Look at the "Stats" section of the |TableReport| above. Does anything strike you? +# +# Not only did we create 144 columns, but most of these columns are filled with NaN, +# which is very inefficient for learning! # -# We have just extracted datetime features from timestamps. +# This is because each basket contains a variable number of products, up to 24, and we +# created one column for each product attribute, for each position (up to 24) in +# the dataframe. # -# Let's now perform an expansion for the target ``y``, by aggregating it before -# joining it back on the main table. The biggest risk of doing target expansion -# with multiple dataframe operations yourself is to end up leaking the target. +# Moreover, if we wanted to replace text columns with encodings, we would create +# :math:`d \times 24 \times 2` columns (encoding of dimensionality :math:`d`, for +# 24 products, for the ``"item"`` and ``"make"`` columns), which would explode the +# memory usage. # -# To solve this, the |AggTarget| transformer allows you to -# aggregate the target ``y`` before joining it on the main table, without -# risk of leaking. Note that to perform aggregation then joining on the features -# ``X``, you need to use |AggJoiner| instead. +# .. _agg-joiner-anchor: # -# You can also think of it as a generalization of the |TargetEncoder|, which -# encodes categorical features based on the target. +# AggJoiner +# --------- +# Let's now see how the |AggJoiner| can help us solve this. We begin with splitting our +# basket dataset in a training and testing set. +from sklearn.model_selection import train_test_split + +X, y = baskets[["ID"]], baskets["fraud_flag"] +X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.1) +X_train.shape, y_train.shape + +# %% +# Before aggregating our product dataframe, we need to vectorize our categorical +# columns. To do so, we use: # -# We only focus on aggregating the target by **users**, but later we will -# also consider aggregating by **movies**. Here, we compute the histogram of the -# target with 3 bins, before joining it back on the initial table. +# - |MinHashEncoder| on "item" and "model" columns, because they both expose typos +# and text similarities. +# - |OrdinalEncoder| on "make" and "goods_code" columns, because they consist in +# orthogonal categories. # -# This feature answer questions like -# *"How many times has this user given a bad, medium or good rate to movies?"*. -from skrub import AggTarget - -agg_target_user = AggTarget( - main_key="userId", - suffix="_user", - operation="hist(3)", +# We bring this logic into a |TableVectorizer| to vectorize these columns in a +# single step. +# See `this example `_ +# for more details about these encoding choices. +from sklearn.preprocessing import OrdinalEncoder + +from skrub import MinHashEncoder, TableVectorizer + +vectorizer = TableVectorizer( + high_cardinality=MinHashEncoder(), # encode ["item", "model"] + specific_transformers=[ + (OrdinalEncoder(), ["make", "goods_code"]), + ], ) -X_transformed = agg_target_user.fit_transform(X, y) +products_transformed = vectorizer.fit_transform(products) +TableReport(products_transformed) -X_transformed.shape -############################################################################### -X_transformed.head() - -############################################################################### -# Similarly, we join on ``movieId`` instead of ``userId``. +# %% +# Our objective is now to aggregate this vectorized product dataframe by +# ``"basket_ID"``, then to merge it on the baskets dataframe, still on +# the ``"basket_ID"``. # -# This feature answer questions like -# *"How many times has this movie received a bad, medium or good rate from users?"*. -agg_target_movie = AggTarget( - main_key="movieId", - suffix="_movie", - operation="hist(3)", -) -X_transformed = agg_target_movie.fit_transform(X, y) -X_transformed.shape -############################################################################### -X_transformed.head() - -############################################################################### -# Chaining everything together in a pipeline -# ------------------------------------------ +# .. image:: ../../_static/08_example_aggjoiner.png +# :width: 900 # -# To perform cross-validation and enable hyper-parameter tuning, we gather -# all elements into a scikit-learn |Pipeline| by using |make_pipeline|, -# and define a scikit-learn |HGBR|. -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.pipeline import make_pipeline - -pipeline = make_pipeline( - table_vectorizer, - agg_target_user, - agg_target_movie, - HistGradientBoostingRegressor(learning_rate=0.1, max_depth=4, max_iter=40), -) - -pipeline - -############################################################################### -# Hyper-parameters tuning and cross validation -# -------------------------------------------- +# | +# +# |AggJoiner| can help us achieve exactly this. We need to pass the product dataframe as +# an auxiliary table argument to |AggJoiner| in ``__init__``. The ``aux_key`` argument +# represent both the columns used to groupby on, and the columns used to join on. # -# We can finally create our hyper-parameter search space, and use a -# |GridSearchCV|. We select the cross validation splitter to be -# the |TimeSeriesSplit| to prevent leakage, since our data are timestamped -# logs. +# The basket dataframe is our main table, and we indicate the columns to join on with +# ``main_key``. Note that we pass the main table during ``fit``, and we discuss the +# limitations of this design in the conclusion at the bottom of this notebook. # -# Note that you need the name of the pipeline elements to assign them -# hyper-parameters search. +# The minimum ("min") is the most appropriate operation to aggregate encodings from +# |MinHashEncoder|, for reasons that are out of the scope of this notebook. # -# You can lookup the name of the pipeline elements by doing: -list(pipeline.named_steps) +from skrub import AggJoiner +from skrub import _selectors as s + +# Skrub selectors allow us to select columns using regexes, which reduces +# the boilerplate. +minhash_cols_query = s.glob("item*") | s.glob("model*") +minhash_cols = s.select(products_transformed, minhash_cols_query).columns + +agg_joiner = AggJoiner( + aux_table=products_transformed, + aux_key="basket_ID", + main_key="ID", + cols=minhash_cols, + operations=["min"], +) +baskets_products = agg_joiner.fit_transform(baskets) +TableReport(baskets_products) -############################################################################### -# Alternatively, you can use scikit-learn |Pipeline| to name your transformers: -# ``Pipeline([("agg_target_user", agg_target_user), ...])`` +# %% +# Now that we understand how to use the |AggJoiner|, we can now assemble our pipeline by +# chaining two |AggJoiner| together: # -# We now perform the grid search over the ``AggTarget`` transformers to find the -# operation maximizing our validation score. -from sklearn.model_selection import GridSearchCV, TimeSeriesSplit - -operations = ["mean", "hist(3)", "hist(5)", "hist(7)", "value_counts"] -param_grid = [ - { - "aggtarget-2__operation": [op], - } - for op in operations -] - -cv = GridSearchCV(pipeline, param_grid, cv=TimeSeriesSplit(n_splits=10)) -cv.fit(X, y) - -results = pd.DataFrame(cv.cv_results_) - -cols = [f"split{idx}_test_score" for idx in range(10)] -results = results.set_index("param_aggtarget-2__operation")[cols].T -results - -############################################################################### -# The score used in this regression task is the R2. Remember that the R2 -# evaluates the relative performance compared to the naive baseline consisting -# in always predicting the mean value of ``y_test``. -# Therefore, the R2 is 0 when ``y_pred = y_true.mean()`` and is upper bounded -# to 1 when ``y_pred = y_true``. +# - the first one to deal with the |MinHashEncoder| vectors as we just saw +# - the second one to deal with the all the other columns # -# To get a better sense of the learning performances of our simple pipeline, -# we also compute the average rating of each movie in the training set, -# and uses this average to predict the ratings in the test set. -from sklearn.metrics import r2_score - - -def baseline_r2(X, y, train_idx, test_idx): - """Compute the average rating for all movies in the train set, - and map these averages to the test set as a prediction. - - If a movie in the test set is not present in the training set, - we simply predict the global average rating of the training set. - """ - X_train, y_train = X.iloc[train_idx].copy(), y.iloc[train_idx] - X_test, y_test = X.iloc[test_idx], y.iloc[test_idx] - - X_train["y"] = y_train - - movie_avg_rating = X_train.groupby("movieId")["y"].mean().to_frame().reset_index() - - y_pred = X_test.merge(movie_avg_rating, on="movieId", how="left")["y"] - y_pred = y_pred.fillna(y_pred.mean()) - - return r2_score(y_true=y_test, y_pred=y_pred) +# For the second |AggJoiner|, we use the mean, standard deviation, minimum and maximum +# operations to extract a representative summary of each distribution. +# +# |DropCols| is another skrub transformer which removes the "ID" column, which doesn't +# bring any information after the joining operation. +from scipy.stats import loguniform, randint +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.pipeline import make_pipeline +from skrub import DropCols + +model = make_pipeline( + AggJoiner( + aux_table=products_transformed, + aux_key="basket_ID", + main_key="ID", + cols=minhash_cols, + operations=["min"], + ), + AggJoiner( + aux_table=products_transformed, + aux_key="basket_ID", + main_key="ID", + cols=["make", "goods_code", "cash_price", "Nbr_of_prod_purchas"], + operations=["sum", "mean", "std", "min", "max"], + ), + DropCols(["ID"]), + HistGradientBoostingClassifier(), +) +model -all_baseline_r2 = [] -for train_idx, test_idx in TimeSeriesSplit(n_splits=10).split(X, y): - all_baseline_r2.append(baseline_r2(X, y, train_idx, test_idx)) +# %% +# We tune the hyper-parameters of the |HGBC| to get a good performance. +from time import time -results.insert(0, "naive mean estimator", all_baseline_r2) +from sklearn.model_selection import RandomizedSearchCV -# we only keep the 5 out of 10 last results -# because the initial size of the train set is rather small -fig, ax = plt.subplots(layout="constrained") -sns.boxplot(results.tail(5), palette="magma", ax=ax) -ax.set_ylabel("R2 score") -ax.set_title("Hyper parameters grid-search results") -plt.tight_layout() +param_distributions = dict( + histgradientboostingclassifier__learning_rate=loguniform(1e-3, 1), + histgradientboostingclassifier__max_depth=randint(3, 9), + histgradientboostingclassifier__max_leaf_nodes=[None, 10, 30, 60, 90], + histgradientboostingclassifier__max_iter=randint(50, 500), +) -############################################################################### -# The naive estimator has a lower performance than our pipeline, which means -# that our extracted features brought some predictive power. +tic = time() +search = RandomizedSearchCV( + model, + param_distributions, + scoring="neg_log_loss", + refit=False, + n_iter=10, + cv=3, + verbose=1, +).fit(X_train, y_train) +print(f"This operation took {time() - tic:.1f}s") +# %% +# The best hyper parameters are: + +pd.Series(search.best_params_) + +# %% +# To benchmark our performance, we plot the log loss of our model on the test set +# against the log loss of a dummy model that always output the observed probability of +# the two classes. +# +# As this dataset is extremely imbalanced, this dummy model should be a good baseline. +# +# The vertical bar represents one standard deviation around the mean of the cross +# validation log-loss. +import seaborn as sns +from matplotlib import pyplot as plt +from sklearn.dummy import DummyClassifier +from sklearn.metrics import log_loss + +results = search.cv_results_ +best_idx = search.best_index_ +log_loss_model_mean = -results["mean_test_score"][best_idx] +log_loss_model_std = results["std_test_score"][best_idx] + +dummy = DummyClassifier(strategy="prior").fit(X_train, y_train) +y_proba_dummy = dummy.predict_proba(X_test) +log_loss_dummy = log_loss(y_true=y_test, y_pred=y_proba_dummy) + +fig, ax = plt.subplots() +ax.bar( + height=[log_loss_model_mean, log_loss_dummy], + x=["AggJoiner model", "Dummy"], + color=["C0", "C4"], +) +for container in ax.containers: + ax.bar_label(container, padding=4) + +ax.vlines( + x="AggJoiner model", + ymin=log_loss_model_mean - log_loss_model_std, + ymax=log_loss_model_mean + log_loss_model_std, + linestyle="-", + linewidth=1, + color="k", +) +sns.despine() +ax.set_title("Log loss (lower is better)") + +# %% +# Conclusion +# ---------- +# With |AggJoiner|, you can bring the aggregation and joining operations within a +# sklearn pipeline, and train models more efficiently. +# +# One known limitation of both the |AggJoiner| and |Joiner| is that the auxiliary data +# to join is passed during the ``__init__`` method instead of the ``fit`` method, and +# is therefore fixed once the model has been trained. +# This limitation causes two main issues: +# +# 1. **Bigger model serialization:** Since the dataset has to be pickled along with +# the model, it can result in a massive file size on disk. # -# It seems that using the ``"value_counts"`` as an aggregation operator for -# |AggTarget| yields better performances than using the mean (which is -# equivalent to using the |TargetEncoder|). +# 2. **Inflexibility with new, unseen data in a production environment:** To use new +# auxiliary data, you would need to replace the auxiliary table in the |AggJoiner| that +# was used during ``fit`` with the updated data, which is a rather hacky approach. # -# Here, the number of bins encoding the target is proportional to the -# performance: computing the mean yields a single statistic, whereas histograms -# yield a density over a reduced set of bins, and ``"value_counts"`` yields an -# exhaustive histogram over all the possible values of ratings -# (here 10 different values, from 0.5 to 5). +# These limitations will be addressed later in skrub. diff --git a/examples/FIXME/08_join_aggregation_full.py b/examples/FIXME/08_join_aggregation_full.py new file mode 100644 index 000000000..e1e363e6f --- /dev/null +++ b/examples/FIXME/08_join_aggregation_full.py @@ -0,0 +1,548 @@ +""" +AggJoiner on a credit fraud dataset +=================================== + +In this example, we are tackling a fraudulent loan detection use case. +Because fraud is rare, this dataset is extremely imbalanced, with a prevalence of around +1.4%. + +Instead of focusing on arbitrary metrics like accuracy, we will derive a cost function +based on (questionable) assumptions about the data. In a real-world scenario, we would +need to consult with a domain expert within the company to develop a realistic utility +function. + +The data consists of two distinct concepts: a "basket," which can be tagged as fraud (1) +or not (0), and a list of "products." Each product has several attributes: + +- a category (marked by the column ``"item"``), +- a model (``"model"``), +- a brand (``"make"``), +- a merchant code (``"goods_code"``), +- a price per unit (``"cash_price"``), +- a quantity selected in the basket (``"Nbr_of_prod_purchas"``) + +Since the number of products in each basket varies, the creators of this dataset have +chosen to join all products and their attributes with their respective basket. They have +arbitrarily decided to cut off the basket at the 24th product. However, since most +baskets contain only one or two products, a large proportion of the columns are empty. +Therefore, the dataset is very sparse, which is challenging from a machine learning +perspective and also inefficient in terms of memory usage. + +.. |AggJoiner| replace:: + :class:`~skrub.AggJoiner` + +.. |Joiner| replace:: + :class:`~skrub.Joiner` + +.. |TableVectorizer| replace:: + :class:`~skrub.TableVectorizer` + +.. |MinHashEncoder| replace:: + :class:`~skrub.MinHashEncoder` + +.. |TargetEncoder| replace:: + :class:`~sklearn.preprocessing.TargetEncoder` + +.. |make_pipeline| replace:: + :func:`~sklearn.pipeline.make_pipeline` + +.. |Pipeline| replace:: + :class:`~sklearn.pipeline.Pipeline` + +.. |HGBC| replace:: + :class:`~sklearn.ensemble.HistGradientBoostingClassifier` + +.. |TunedThresholdClassifierCV| replace:: + :class:`~sklearn.model_selection.TunedThresholdClassifierCV` + +.. |CalibrationDisplay| replace:: + :class:`~sklearn.calibration.CalibrationDisplay` + +.. |pandas.melt| replace:: + :func:`~pandas.melt` + +""" + +# %% +# The data +# -------- +# +# We begin with loading the table from figshare. It has around 100k rows. +from skrub.datasets import fetch_figshare + +X = fetch_figshare("48931237").X + +# %% +# The total price is the sum of the price per unit of each product in the basket, +# multiplied by their quantity. This will also allow us to define a utility function +# later, in addition of being a useful feature for the learner. +import numpy as np +import pandas as pd + +from skrub import TableReport + + +def total_price(X): + total_price = pd.Series(np.zeros(X.shape[0]), index=X.index, name="total_price") + max_item = 24 + for idx in range(1, max_item + 1): + total_price += X[f"cash_price{idx}"].fillna(0) * X[ + f"Nbr_of_prod_purchas{idx}" + ].fillna(0) + + return total_price + + +X["total_price"] = total_price(X) +TableReport(X) + +# %% +# Metrics +# ------- +# +# To consider the problem from a business perspective, we define our utility function +# by the cost matrix in the function ``credit_gain_score``. False positive and false +# negative predictions incur a negative gain. +# +# Ultimately, we want to maximize this metric. To do so, we can train our learner to +# minimize a proper scoring rule like the log loss. +import sklearn +from sklearn.metrics import log_loss, make_scorer + + +def credit_gain_score(y_true, y_pred, amount): + """Define our utility function. + + These numbers are entirely made-up, don't try this at home! + """ + mask_tn = (y_true == 0) & (y_pred == 0) + mask_fp = (y_true == 0) & (y_pred == 1) + mask_fn = (y_true == 1) & (y_pred == 0) + + # Refusing a fraud yields 0 € + fraudulent_refuse = 0 + + # Accepting a fraud costs its whole amount + fraudulent_accept = -amount[mask_fn].sum() + + # Refusing a legitimate basket transactions cost 5 € + legitimate_refuse = mask_fp.sum() * -5 + + # Accepting a legitimate basket transaction yields 7% of its amount + legitimate_accept = (amount[mask_tn] * 0.07).sum() + + return fraudulent_refuse + fraudulent_accept + legitimate_refuse + legitimate_accept + + +def get_results(model, X_test, y_test, threshold, amount, time_to_fit): + y_proba = model.predict_proba(X_test)[:, 1] + return { + "log_loss": log_loss(y_test, y_proba), + "gain_score": credit_gain_score(y_test, y_proba > threshold, amount), + "y_proba": y_proba, + "y_test": y_test, + "time_to_fit": time_to_fit, + } + + +sklearn.set_config(enable_metadata_routing=True) +gain_score = make_scorer(credit_gain_score).set_score_request(amount=True) + +results = dict() + +# %% +# Dummy model +# ----------- +# +# We first evaluate the performance of a dummy model that always predict the negative +# class (i.e. all transactions are legit). +# This is a good sanity check to make sure our model actually learns something useful. +from time import time + +from sklearn.dummy import DummyClassifier +from sklearn.model_selection import train_test_split + +target_col = "fraud_flag" +X_ = X.drop(columns=[target_col]) +y_ = X[target_col] + +X_train, X_test, y_train, y_test = train_test_split( + X_, + y_, + test_size=0.1, + stratify=y_, + random_state=0, +) + +tic = time() +dummy_negative = DummyClassifier(strategy="constant", constant=0).fit(X_train, y_train) +time_to_fit = time() - tic + +results["Dummy Negative"] = get_results( + dummy_negative, + X_test, + y_test, + threshold=0.5, + amount=X_test["total_price"], + time_to_fit=time_to_fit, +) + +# %% +# Low effort estimator +# -------------------- +# +# Next, we use the |TableVectorizer| and a |HGBC| to create a very simple baseline model +# that uses the sparse dataset directly. Note that due to the large number of high +# cardinality columns, we can't use an multi-dimensional encoder like the +# |MinHashEncoder|, because the number of columns would then explode. +# +# Instead, we encode our categories with a |TargetEncoder|. +# +# We also further split the training set into a training and validation set for +# post-training tuning in the post-training phase below. +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import TargetEncoder + +from skrub import TableVectorizer + +X_train_, X_val, y_train_, y_val = train_test_split( + X_train, y_train, test_size=0.1, stratify=y_train, random_state=0 +) + +low_effort = make_pipeline( + TableVectorizer( + high_cardinality=TargetEncoder(), + ), + HistGradientBoostingClassifier(), +) + +tic = time() +low_effort.fit(X_train_, y_train_) +time_to_fit = time() - tic + +# %% +# To maximise our utility function, we have to find the best classification threshold to +# replace the default at 0.5. |TunedThresholdClassifierCV| is a scikit-learn +# meta-estimator that is designed for this exact purpose. +# More details in this `example from scikit-learn `_. +# +# We give it our trained model, and fit it on the validation dataset instead of the +# training dataset to avoid overfitting. Notice that the scoring method is the utility +# function, to which we pass the amount in ``fit`` +from sklearn.model_selection import TunedThresholdClassifierCV + +low_effort_tuned = TunedThresholdClassifierCV( + low_effort, cv="prefit", scoring=gain_score, refit=False +).fit(X_val, y_val, amount=X_val["total_price"]) + +results["Low effort"] = get_results( + low_effort, + X_test, + y_test, + threshold=low_effort_tuned.best_threshold_, + amount=X_test["total_price"], + time_to_fit=time_to_fit, +) + +# %% +# We define some plotting functions to display our results. +import seaborn as sns +from matplotlib import pyplot as plt +from sklearn.calibration import CalibrationDisplay + + +def plot_gain_tradeoff(results): + """Scatter plot of the score gain (y) vs the fit time (x) for each model.""" + + rows = [] + for estimator_name, result in results.items(): + result["estimator_name"] = estimator_name + rows.append(result) + df = pd.DataFrame(rows) + + names = df["estimator_name"].values + palette = dict(zip(names, sns.color_palette("colorblind", n_colors=len(names)))) + + fig, ax = plt.subplots(figsize=(5, 4), dpi=100) + sns.scatterplot( + df, + x="time_to_fit", + y="gain_score", + hue="estimator_name", + style="estimator_name", + ax=ax, + palette=palette, + s=200, + ) + ax.grid() + + ticks = df["time_to_fit"].round(3).tolist() + labels = [f"{tick}s" for tick in ticks] + ax.set_xticks(ticks, labels) + + ticks = df["gain_score"].round().tolist() + ticks.insert(1, 650_000) + labels = [f"{tick:,} €" for tick in ticks] + + ax.set_yticks(ticks, labels) + ax.set_ylabel("Gain score") + ax.set_xlabel("Time to fit") + ax.set_title("Gain score vs Time to fit") + plt.tight_layout() + + +def plot_calibration_curve(results): + """Plot a calibration curve and the log-loss.""" + + estimator_names = list(results) + palette = dict( + zip( + estimator_names, + sns.color_palette("colorblind", n_colors=len(estimator_names)), + ) + ) + fig, ax = plt.subplots(figsize=(6, 4), dpi=100) + for name, result in results.items(): + log_loss = str(round(result["log_loss"], 4)) + label = f"{name}, {'log_loss: ' + log_loss}" + CalibrationDisplay.from_predictions( + y_true=result["y_test"], + y_prob=result["y_proba"], + strategy="quantile", + label=label, + ax=ax, + color=palette[name], + n_bins=15, + ) + ax.set_xlim([-0.001, 0.13]) + ax.set_ylim([-0.001, 0.13]) + ax.set_title("Calibration curve") + + +# %% +# We see below that the low effort classifier significantly improves our gains compared +# to the dummy baseline. The former is of course slower to train than the latter. + +plot_gain_tradeoff(results) + + +# %% +# We also evaluate the calibration of both models. As very few classes are +# positive, we can expect all probabilities to be close to 0. We have to +# zoom on it, and use the "quantile" strategy of |CalibrationDisplay| in order to create +# bins containing an equal number of samples. + +plot_calibration_curve(results) + + +# %% +# Agg-Joiner based estimator +# -------------------------- +# +# We first need to split the dataframe between a dataframe representing baskets and a +# dataframe representing products. In other words, we need to revert the join operation +# performed by the creator of this dataset. Conceptually, this is close to a +# |pandas.melt| operation +# +# Note that we don't keep the product ordering information, which is probably not an +# important feature here. + + +def get_columns_at(idx, cols_2_idx): + """Small helper that give the position of each of the columns of the idx-th \ + product.""" + cols = [ + "ID", + target_col, + f"item{idx}", + f"cash_price{idx}", + f"make{idx}", + f"model{idx}", + f"goods_code{idx}", + f"Nbr_of_prod_purchas{idx}", + ] + return [cols_2_idx[col] for col in cols] + + +def melt_multi_columns(X): + """Create a dataframe where each product is a row.""" + products = [] + cols_2_idx = dict(zip(X.columns, range(X.shape[1]))) + for row in X.values: + n_products = min(row[cols_2_idx["Nb_of_items"]], 24) + for idx in range(1, n_products + 1): + cols = get_columns_at(idx, cols_2_idx) + products.append(row[cols]) + + cols = [ + "ID", + target_col, + "item", + "cash_price", + "make", + "model", + "goods_code", + "Nbr_of_prod_purchas", + ] + + products = pd.DataFrame(products, columns=cols) + + for col in ["make", "model"]: + products[col] = products[col].fillna("None") + + return products + + +X_train_[target_col] = y_train_ +X_val[target_col] = y_val +X_test[target_col] = y_test + +baskets_train = X_train_[["ID", "total_price", target_col]] +baskets_val = X_val[["ID", "total_price", target_col]] +baskets_test = X_test[["ID", "total_price", target_col]] + +products = melt_multi_columns(X) + +TableReport(products) + +# %% +# We have to aggregate the products dataframe before joining it back to the basket +# dataframe. Prior to that, we need to apply some preprocessing to deal with +# the high cardinality columns. Since these columns have some morphological variations +# and typos, we use the |MinHashEncoder|. +# +# ``goods_code`` is slightly different, as it represents some merchant IDs, which +# co-occurs for different products. Therefore, we encode it with a |TargetEncoder| as +# we previously did. +# +# To later perform the joiner operation, we must keep the basket ``ID`` with +# ``"passthrough"``. +from skrub import MinHashEncoder + + +def get_X_y(data): + return data.drop(columns=[target_col]), data[target_col] + + +tic = time() +vectorizer = TableVectorizer( + high_cardinality=MinHashEncoder(), # applied on ["item", "model", "make"] + specific_transformers=[ + (TargetEncoder(), ["goods_code"]), + ("passthrough", ["ID"]), + ], +) + +products_transformed = vectorizer.fit_transform(*get_X_y(products)) +time_to_fit = time() - tic + +TableReport(products_transformed) + +# %% +# Let's now detail how to leverage |AggJoiner| here. We have just encoded each product +# attributes, and now we need to somehow aggregate these product encodings into their +# respective baskets. +# +# By aggregating instead of concatenating, we obtain an invariant number of columns, +# and we remove the sparsity of the dataset. +# +# But which aggregation operation should we choose? Since the |MinHashEncoder| hashes +# ngrams with different hashing functions and return their minimum, it makes sense to +# aggregate different product encodings using their **minimum** for each dimension. +# You can view MinHash minimums as activations. +# +# For numeric columns and columns encoded with the |TargetEncoder|, we take the mean, +# standard deviation, minimum and maximum to extract a representative summary of each +# distribution. +# +# We can apply these two sets of operations by chaining together two |AggJoiner| in +# a |Pipeline| using |make_pipeline|. We also make use of skrub selectors to select +# columns with the ``glob`` syntax. +# +# We need to pass the product dataframe as an auxiliary table argument to AggJoiner +# in ``__init__``. The basket dataframe is our main table, and we pass it during +# ``fit``. We discuss the limitations of this design in the conclusion at the bottom +# of this notebook. +# +# Let's display the output of this preprocessing pipeline. + +from sklearn.pipeline import make_pipeline + +from skrub import AggJoiner +from skrub import _selectors as s + +minhash_cols = "ID" | s.glob("item_*") | s.glob("model_*") | s.glob("make_*") +single_cols = ["ID", "goods_code", "Nbr_of_prod_purchas", "cash_price"] + +pipe_agg_joiner = make_pipeline( + AggJoiner( + aux_table=s.select(products_transformed, minhash_cols), + key="ID", + operations=["min"], + ), + AggJoiner( + aux_table=s.select(products_transformed, single_cols), + key="ID", + operations=["mean", "sum", "std", "min", "max"], + ), +) +basket_train_transformed = pipe_agg_joiner.fit_transform(baskets_train) + +TableReport(basket_train_transformed) + +# %% +# Now that we get a sense of how the |AggJoiner| can help us, we complete this pipeline +# with a |HGBC| and evaluate our final model. + +tic = time() +agg_join_estimator = make_pipeline( + pipe_agg_joiner, + HistGradientBoostingClassifier(), +).fit(*get_X_y(baskets_train)) +time_to_fit += time() - tic + +agg_join_tuned = TunedThresholdClassifierCV( + agg_join_estimator, cv="prefit", scoring=gain_score, refit=False +).fit(*get_X_y(baskets_val), amount=baskets_val["total_price"]) + +results["Agg Joiner"] = get_results( + agg_join_tuned, + *get_X_y(baskets_test), + threshold=agg_join_tuned.best_threshold_, + amount=baskets_test["total_price"], + time_to_fit=time_to_fit, +) +# %% +# Not only did we improve the gains, but this operation is also much faster than the +# naive low effort! + +plot_gain_tradeoff(results) + +# %% +# We see that the agg-joiner model is slighly more calibrated, with a lower (better) +# log loss. + +plot_calibration_curve(results) + +# %% +# Conclusion +# ---------- +# +# Many problems involve tables where IDs have a one-to-many relationship. To simplify +# aggregate-then-join operations for machine learning, we can include the |AggJoiner| +# in our pipeline. +# +# One known limitation of both the |AggJoiner| and |Joiner| is that the auxiliary data +# to join is passed during the ``__init__`` method instead of the ``fit`` method, and +# is therefore fixed once the model has been trained. +# This limitation causes two main issues: +# +# 1. **Inefficient model serialization:** Since the dataset has to be pickled along with +# the model, it can result in a massive file size on disk. +# +# 2. **Inflexibility with new, unseen data in a production environment:** To use new +# auxiliary data, you would need to replace the auxiliary table in the AggJoiner that +# was used during ``fit`` with the updated data, which is a rather hacky approach. +# +# These limitations will be addresssed later in skrub. diff --git a/pixi.lock b/pixi.lock index 423522acd..fcb8f57e1 100644 --- a/pixi.lock +++ b/pixi.lock @@ -1873,11 +1873,11 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240116.2-cxx17_he02047a_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-had3b6fe_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-hf54134d_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240722.0-cxx17_h5888daf_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-hef0f6b3_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-he882d9a_17_cpu.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-24_linux64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda @@ -1906,9 +1906,9 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.29.0-h435de7b_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.29.0-h0121fbd_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.62.2-h15f2491_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.29.0-h438788a_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.29.0-h0121fbd_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.65.5-hf5c653b_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-24_linux64_openblas.conda @@ -1917,12 +1917,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_hac2b453_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_17_cpu.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.44-hadc24fc_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpq-16.4-h2d7952a_2.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-4.25.3-hd5b35b9_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-h5a48ba9_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-5.27.5-h5b01275_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-hbbce691_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.1-hadc24fc_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.1.0-hc0a3c3a_1.conda @@ -1950,7 +1950,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h669347b_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h690cf93_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.3-py312hf9745cd_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hba22ea6_2.conda @@ -1976,7 +1976,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.7.2-hadfd74e_5.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h7f4b329_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h77b4e00_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/s2n-1.5.3-h7b32b05_0.conda @@ -3038,11 +3038,11 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240116.2-cxx17_he02047a_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-had3b6fe_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-hf54134d_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240722.0-cxx17_h5888daf_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-hef0f6b3_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-he882d9a_17_cpu.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-24_linux64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda @@ -3071,9 +3071,9 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.29.0-h435de7b_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.29.0-h0121fbd_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.62.2-h15f2491_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.29.0-h438788a_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.29.0-h0121fbd_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.65.5-hf5c653b_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-24_linux64_openblas.conda @@ -3082,12 +3082,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_hac2b453_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_17_cpu.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.44-hadc24fc_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpq-16.4-h2d7952a_2.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-4.25.3-hd5b35b9_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-h5a48ba9_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-5.27.5-h5b01275_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-hbbce691_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.20-h4ab18f5_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.1-hadc24fc_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda @@ -3125,7 +3125,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h669347b_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h690cf93_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/overrides-7.7.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.3-py311h7db5c69_1.conda @@ -3172,7 +3172,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/pyzmq-26.2.0-py311h7deb3e3_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.7.2-hadfd74e_5.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h7f4b329_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h77b4e00_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.35.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda @@ -4217,11 +4217,11 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240116.2-cxx17_he02047a_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-had3b6fe_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_16_cpu.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-hf54134d_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240722.0-cxx17_h5888daf_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-hef0f6b3_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-he882d9a_17_cpu.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-24_linux64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda @@ -4250,9 +4250,9 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.29.0-h435de7b_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.29.0-h0121fbd_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.62.2-h15f2491_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.29.0-h438788a_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.29.0-h0121fbd_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.65.5-hf5c653b_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-24_linux64_openblas.conda @@ -4261,12 +4261,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_hac2b453_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_17_cpu.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.44-hadc24fc_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpq-16.4-h2d7952a_2.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-4.25.3-hd5b35b9_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-h5a48ba9_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-5.27.5-h5b01275_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-hbbce691_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.20-h4ab18f5_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.1-hadc24fc_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda @@ -4299,7 +4299,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h669347b_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h690cf93_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/overrides-7.7.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.3-py312hf9745cd_1.conda @@ -4334,7 +4334,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/pyzmq-26.2.0-py312hbf22597_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.7.2-hadfd74e_5.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h7f4b329_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h77b4e00_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/referencing-0.35.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda @@ -5591,11 +5591,30 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.12-h4ab18f5_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.7.31-h57bd9a3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.7.4-hfd43aa1_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.9.28-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.2.19-h756ea98_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.4.3-h29ce20c_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.8.10-h5e77a74_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.14.18-h33ff4e5_10.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.10.6-h02abb05_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.6.6-h834ce55_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.1.19-h756ea98_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.1.20-h756ea98_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.28.3-h469002c_5.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.407-h9f1560d_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-core-cpp-1.13.0-h935415a_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-identity-cpp-1.8.0-hd126650_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-storage-blobs-cpp-12.12.0-hd2e3451_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-storage-common-cpp-12.7.0-h10ac4d7_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/azure-storage-files-datalake-cpp-12.11.0-h325d260_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py312h2ec8cdc_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.33.1-heb4867d_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.8.30-hbcca054_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-hebfffa5_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2024.8.30-pyhd8ed1ab_0.conda @@ -5620,6 +5639,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.54.1-py312h66e93f0_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-h5888daf_1005.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/glog-0.7.1-hbabe93e_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.1.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-9.0.0-hda332d3_1.conda @@ -5637,6 +5658,11 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240722.0-cxx17_h5888daf_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-hef0f6b3_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_17_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-he882d9a_17_cpu.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-24_linux64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hb9d3cd8_2.conda @@ -5644,11 +5670,15 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-24_linux64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp19.1-19.1.0-default_hb5137d0_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libclang13-19.1.0-default_h9c6a7e4_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.10.1-hbbe4b11_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.21-h4bc722e_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.123-hb9d3cd8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.3-h5888daf_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.1.0-h77fa898_1.conda @@ -5661,20 +5691,30 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.29.0-h438788a_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.29.0-h0121fbd_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.65.5-hf5c653b_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-24_linux64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libllvm19-19.1.0-ha7bfdaf_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_hac2b453_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libopengl-1.7.0-ha4b6fd6_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_17_cpu.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.44-hadc24fc_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpq-16.4-h2d7952a_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-5.27.5-h5b01275_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-hbbce691_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.1-hadc24fc_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.1.0-hc0a3c3a_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.1.0-h4852527_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.20.0-h0e7cc3e_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-h6565414_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.8.0-h166bdaf_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda @@ -5683,6 +5723,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-he7c6b58_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libxslt-1.1.39-h76b75d6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py312h66e93f0_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.9.2-py312h7900ff3_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.9.2-py312hd3ec401_1.conda @@ -5694,13 +5735,17 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h690cf93_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.3-py312hf9745cd_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hba22ea6_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pillow-10.4.0-py312h56024de_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/polars-1.8.2-py312hfe7c9be_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-17.0.0-py312h9cebb41_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-17.0.0-py312h9cafe31_1_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda @@ -5716,12 +5761,15 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.7.2-hadfd74e_5.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h77b4e00_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/s2n-1.5.3-h7b32b05_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/scikit-learn-1.5.2-py312h7a48858_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/scipy-1.14.1-py312h7d485d2_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-75.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.1-ha2e4443_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-8.0.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_0.conda @@ -5766,11 +5814,30 @@ environments: - pypi: . osx-64: - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-auth-0.7.31-hb28a666_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-cal-0.7.4-h8128ea2_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-common-0.9.28-h00291cd_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-compression-0.2.19-h8128ea2_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-event-stream-0.4.3-hcd1ed9e_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-http-0.8.10-h2f86973_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-io-0.14.18-hf9a0f1c_10.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-mqtt-0.10.6-h9d7d61c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-s3-0.6.6-hd01826e_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-c-sdkutils-0.1.19-h8128ea2_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-checksums-0.1.20-h8128ea2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-crt-cpp-0.28.3-h21c617a_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/aws-sdk-cpp-1.11.407-h2e282c2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-core-cpp-1.13.0-hf8dbe3c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-identity-cpp-1.8.0-h60298e3_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-storage-blobs-cpp-12.12.0-h646f05d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-storage-common-cpp-12.7.0-hf91904f_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/azure-storage-files-datalake-cpp-12.11.0-h14965f0_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h00291cd_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h00291cd_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-python-1.1.0-py312h5861a67_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-hfdf4475_7.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/c-ares-1.33.1-h44e7173_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2024.8.30-h8857fd0_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2024.8.30-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/cffi-1.17.1-py312hf857d28_0.conda @@ -5784,38 +5851,65 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.54.1-py312hb553811_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/freetype-2.12.1-h60636b9_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/gflags-2.2.2-hac325c4_1005.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/glog-0.7.1-h2790a97_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.1.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.0.0-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.0.1-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/icu-75.1-h120a0e1_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.10-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.7-py312hc5c4d5f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/krb5-1.21.3-h37d8d59_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.16-ha2f27b4_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hb486fe8_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/libabseil-20240116.2-cxx17_hf036a51_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-17.0.0-h74c41f6_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-acero-17.0.0-hac325c4_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-dataset-17.0.0-hac325c4_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libarrow-substrait-17.0.0-hba007a9_16_cpu.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-22_osx64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.1.0-h00291cd_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.1.0-h00291cd_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.1.0-h00291cd_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-22_osx64_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libcrc32c-1.1.2-he49afe7_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/libcurl-8.10.1-h58e7537_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libcxx-19.1.0-hf95d169_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.21-hfdf4475_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libedit-3.1.20191231-h0678c8f_2.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/libev-4.33-h10d778d_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libevent-2.1.12-ha90c15b_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.6.3-hac325c4_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libffi-3.4.2-h0d85af4_5.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-13_2_0_h97931a8_3.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-13.2.0-h2873a65_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgoogle-cloud-2.29.0-hecd3d69_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgoogle-cloud-storage-2.29.0-h8126ed0_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libgrpc-1.62.2-h384b2fc_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.17-hd75f5a5_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-3.0.0-h0dc2134_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.9.0-22_osx64_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libnghttp2-1.58.0-h64cf6d3_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libopenblas-0.3.27-openmp_h8869122_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libparquet-17.0.0-hf1b0f52_16_cpu.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.44-h4b8f8c9_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libprotobuf-4.25.3-hd4aba4c_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libre2-11-2023.09.01-h81f5012_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.46.1-h4b8f8c9_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libssh2-1.11.0-hd019ec5_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libthrift-0.20.0-h75589b3_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.7.0-h5f227bf_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libutf8proc-2.8.0-hb7f2c08_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.4.0-h10d778d_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.17.0-hf1f96e2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.12.7-heaf3512_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.3.1-h87427d6_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-18.1.8-h15ab845_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/lz4-c-1.9.4-hf0c8a7f_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/markupsafe-2.1.5-py312hb553811_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.9.2-py312hb401068_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.9.2-py312h30cc4df_1.conda @@ -5825,11 +5919,15 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.2-h7310d3a_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/openssl-3.3.2-hd23fc13_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/orc-2.0.2-h22b2039_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/pandas-2.2.3-py312h98e817e_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/pillow-10.4.0-py312h683ea77_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/polars-1.8.2-py312h088783b_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-h00291cd_1002.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyarrow-17.0.0-py312h0be7463_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/pyarrow-core-17.0.0-py312h63b501a_1_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda @@ -5843,12 +5941,14 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/python_abi-3.12-5_cp312.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/qhull-2020.2-h3c5361c_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/re2-2023.09.01-hb168e87_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/readline-8.2-h9e318b2_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/scikit-learn-1.5.2-py312h9d777eb_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/scipy-1.14.1-py312he82a568_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-75.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/snappy-1.2.1-he1e6707_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-8.0.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_0.conda @@ -5873,11 +5973,30 @@ environments: - pypi: . osx-arm64: - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-auth-0.7.31-hc27b277_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-cal-0.7.4-h41dd001_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-common-0.9.28-hd74edd7_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-compression-0.2.19-h41dd001_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-event-stream-0.4.3-h40a8fc1_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-http-0.8.10-hf5a2c8c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-io-0.14.18-hc3cb426_10.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-mqtt-0.10.6-h3acc7b9_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-s3-0.6.6-hd16c091_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-sdkutils-0.1.19-h41dd001_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-checksums-0.1.20-h41dd001_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-crt-cpp-0.28.3-hdde83a9_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-sdk-cpp-1.11.407-h0455a66_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-core-cpp-1.13.0-hd01fc5c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-identity-cpp-1.8.0-h13ea094_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-storage-blobs-cpp-12.12.0-hfde595f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-storage-common-cpp-12.7.0-hcf3b6fd_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/azure-storage-files-datalake-cpp-12.11.0-h082e32e_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-1.1.0-hd74edd7_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-bin-1.1.0-hd74edd7_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-python-1.1.0-py312hde4cb15_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h99b78c6_7.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/c-ares-1.33.1-hd74edd7_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.8.30-hf0a4a13_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2024.8.30-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/cffi-1.17.1-py312h0fad829_0.conda @@ -5891,38 +6010,65 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/fonttools-4.54.1-py312h024a12e_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/freetype-2.12.1-hadb7bae_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/gflags-2.2.2-hf9b8971_1005.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/glog-0.7.1-heb240a5_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h2-4.1.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/hpack-4.0.0-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.0.1-pyhd8ed1ab_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/icu-75.1-hfee45f7_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.10-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/kiwisolver-1.4.7-py312h6142ec9_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.21.3-h237132a_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lcms2-2.16-ha0e7c42_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lerc-4.0.0-h9a09cb3_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libabseil-20240116.2-cxx17_h00cdb27_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-17.0.0-hc6a7651_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-acero-17.0.0-hf9b8971_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-dataset-17.0.0-hf9b8971_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-substrait-17.0.0-hbf8b706_16_cpu.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libblas-3.9.0-24_osxarm64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlicommon-1.1.0-hd74edd7_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlidec-1.1.0-hd74edd7_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlienc-1.1.0-hd74edd7_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcblas-3.9.0-24_osxarm64_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcrc32c-1.1.2-hbdafb3b_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcurl-8.10.1-h13a7ad3_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-19.1.0-ha82da77_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libdeflate-1.21-h99b78c6_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libedit-3.1.20191231-hc8eb9b7_2.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libev-4.33-h93a5062_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libevent-2.1.12-h2757513_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.6.3-hf9b8971_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgfortran-5.0.0-13_2_0_hd922786_3.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgfortran5-13.2.0-hf226fd6_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgoogle-cloud-2.29.0-hfa33a2f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgoogle-cloud-storage-2.29.0-h90fd6fa_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgrpc-1.62.2-h9c18a4f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libiconv-1.17-h0d3ecfb_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libjpeg-turbo-3.0.0-hb547adb_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblapack-3.9.0-24_osxarm64_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libnghttp2-1.58.0-ha4dd798_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libopenblas-0.3.27-openmp_h517c56d_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libparquet-17.0.0-hf0ba9ef_16_cpu.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libpng-1.6.44-hc14010f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libprotobuf-4.25.3-hc39d83c_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libre2-11-2023.09.01-h7b2c953_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.46.1-hc14010f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libssh2-1.11.0-h7a5bd25_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libthrift-0.20.0-h64651cc_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libtiff-4.7.0-h9c1d414_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libutf8proc-2.8.0-h1a8c8d9_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libwebp-base-1.4.0-h93a5062_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libxcb-1.17.0-hdb1d25a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libxml2-2.12.7-h01dff8b_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.1-hfb2fe0b_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvm-openmp-18.1.8-hde57baf_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lz4-c-1.9.4-hb7217d7_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/markupsafe-2.1.5-py312h024a12e_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/matplotlib-3.9.2-py312h1f38498_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/matplotlib-base-3.9.2-py312h9bd0bc6_1.conda @@ -5932,11 +6078,15 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openjpeg-2.5.2-h9f1df11_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.2-h8359307_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/orc-2.0.2-h75dedd0_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pandas-2.2.3-py312hcd31e36_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pillow-10.4.0-py312h8609ca0_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/polars-1.8.2-py312hcc4db84_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pthread-stubs-0.4-hd74edd7_1002.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-17.0.0-py312ha814d7c_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-core-17.0.0-py312he20ac61_1_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda @@ -5950,12 +6100,14 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python_abi-3.12-5_cp312.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/qhull-2020.2-h420ef59_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/re2-2023.09.01-h4cba328_2.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/scikit-learn-1.5.2-py312h387f99c_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/scipy-1.14.1-py312heb3a901_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-75.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/snappy-1.2.1-hd02b534_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-8.0.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_0.conda @@ -5981,11 +6133,25 @@ environments: win-64: - conda: https://conda.anaconda.org/conda-forge/win-64/_openmp_mutex-4.5-2_gnu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-1.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-auth-0.7.31-hce3b56f_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-cal-0.7.4-hf1fc857_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-common-0.9.28-h2466b09_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-compression-0.2.19-hf1fc857_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-event-stream-0.4.3-hd0ca3c1_2.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-http-0.8.10-heca9ddf_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-io-0.14.18-h3831a8d_10.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-mqtt-0.10.6-hf27581b_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-s3-0.6.6-h56e9fbd_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-c-sdkutils-0.1.19-hf1fc857_3.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-checksums-0.1.20-hf1fc857_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-crt-cpp-0.28.3-hd65be8e_5.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/aws-sdk-cpp-1.11.407-h25dd3c2_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-h2466b09_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-h2466b09_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-python-1.1.0-py312h275cf98_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h2466b09_7.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/c-ares-1.33.1-h2466b09_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2024.8.30-h56e8100_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/cairo-1.18.0-h32b962e_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/certifi-2024.8.30-pyhd8ed1ab_0.conda @@ -6025,31 +6191,49 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.3-hdf4eb48_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lcms2-2.16-h67d730c_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h63175ca_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/win-64/libabseil-20240116.2-cxx17_he0c23c2_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libarrow-17.0.0-h5bcb882_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libarrow-acero-17.0.0-he0c23c2_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libarrow-dataset-17.0.0-he0c23c2_16_cpu.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libarrow-substrait-17.0.0-h1f0e801_16_cpu.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-24_win64_mkl.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.1.0-h2466b09_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.1.0-h2466b09_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.1.0-h2466b09_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-24_win64_mkl.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libclang13-19.1.0-default_ha5278ca_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libcrc32c-1.1.2-h0e60522_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/win-64/libcurl-8.10.1-h1ee3ff0_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.21-h2466b09_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libevent-2.1.12-h3671451_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.6.3-he0c23c2_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/win-64/libgcc-14.1.0-h1383e82_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libglib-2.82.1-h7025463_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libgomp-14.1.0-h1383e82_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libgoogle-cloud-2.29.0-h5e7cea3_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libgoogle-cloud-storage-2.29.0-he5eb982_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libgrpc-1.62.2-h5273850_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libiconv-1.17-hcfcfb64_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libintl-0.22.5-h5728263_3.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libjpeg-turbo-3.0.0-hcfcfb64_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-24_win64_mkl.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libparquet-17.0.0-ha915800_16_cpu.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.44-h3ca93ac_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libprotobuf-4.25.3-h47a098d_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libre2-11-2023.09.01-hf8d8778_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.46.1-h2466b09_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libssh2-1.11.0-h7dfc565_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libthrift-0.20.0-hbe90ef8_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libtiff-4.7.0-hb151862_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libutf8proc-2.8.0-h82a8f57_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.4.0-hcfcfb64_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libwinpthread-12.0.0.r4.gg4f2fc60ca-h57928b3_8.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libxcb-1.17.0-h0e4246c_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libxml2-2.12.7-h0f24e4e_4.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libxslt-1.1.39-h3df6e99_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libzlib-1.3.1-h2466b09_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/lz4-c-1.9.4-hcfcfb64_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/markupsafe-2.1.5-py312h4389bb4_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.9.2-py312h2e8e312_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.9.2-py312h90004f6_1.conda @@ -6059,13 +6243,17 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.2-h3d672ee_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/openssl-3.3.2-h2466b09_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/orc-2.0.2-h784c2ca_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/pandas-2.2.3-py312h72972c8_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/pcre2-10.44-h3d7b363_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/pillow-10.4.0-py312h381445a_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/pixman-0.43.4-h63175ca_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/polars-1.8.2-py312ha0f2741_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-h0e40799_1002.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/pyarrow-17.0.0-py312h7e22eef_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/pyarrow-core-17.0.0-py312h6a9c419_1_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda @@ -6081,11 +6269,13 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/qhull-2020.2-hc790b64_5.conda - conda: https://conda.anaconda.org/conda-forge/win-64/qt6-main-6.7.2-h2fedb45_5.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/re2-2023.09.01-hd3b24a8_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/scikit-learn-1.5.2-py312h816cc57_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/scipy-1.14.1-py312h1f4e10d_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-75.1.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/win-64/snappy-1.2.1-h23299a8_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/sphinx-8.0.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_0.conda @@ -12374,6 +12564,27 @@ packages: purls: [] size: 1124364 timestamp: 1720857589333 +- kind: conda + name: libabseil + version: '20240722.0' + build: cxx17_h5888daf_1 + build_number: 1 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240722.0-cxx17_h5888daf_1.conda + sha256: 8f91429091183c26950f1e7ffa730e8632f0627ba35d2fccd71df31628c9b4e5 + md5: e1f604644fe8d78e22660e2fec6756bc + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + - libstdcxx >=13 + constrains: + - libabseil-static =20240722.0=cxx17* + - abseil-cpp =20240722.0 + license: Apache-2.0 + license_family: Apache + purls: [] + size: 1310521 + timestamp: 1727295454064 - kind: conda name: libarrow version: 15.0.2 @@ -12622,14 +12833,14 @@ packages: - kind: conda name: libarrow version: 17.0.0 - build: had3b6fe_16_cpu + build: hc6a7651_16_cpu build_number: 16 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-had3b6fe_16_cpu.conda - sha256: 9aa5598878cccc29de744ebc4b501c4a5a43332973edfdf0a19ddc521bd7248f - md5: c899e532e16be21570d32bc74ea3d34f + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-17.0.0-hc6a7651_16_cpu.conda + sha256: 1facd5aa7140031be0f68733ab5e413ea1505da40548e27a173b2407046f36b5 + md5: 05fecc4ae5930dc548327980a4bc7a83 depends: - - __glibc >=2.17,<3.0.a0 + - __osx >=11.0 - aws-crt-cpp >=0.28.3,<0.28.4.0a0 - aws-sdk-cpp >=1.11.407,<1.11.408.0a0 - azure-core-cpp >=1.13.0,<1.13.1.0a0 @@ -12637,17 +12848,15 @@ packages: - azure-storage-blobs-cpp >=12.12.0,<12.12.1.0a0 - azure-storage-files-datalake-cpp >=12.11.0,<12.11.1.0a0 - bzip2 >=1.0.8,<2.0a0 - - gflags >=2.2.2,<2.3.0a0 - glog >=0.7.1,<0.8.0a0 - libabseil * cxx17* - libabseil >=20240116.2,<20240117.0a0 - libbrotlidec >=1.1.0,<1.2.0a0 - libbrotlienc >=1.1.0,<1.2.0a0 - - libgcc >=13 + - libcxx >=17 - libgoogle-cloud >=2.29.0,<2.30.0a0 - libgoogle-cloud-storage >=2.29.0,<2.30.0a0 - libre2-11 >=2023.9.1,<2024.0a0 - - libstdcxx >=13 - libutf8proc >=2.8.0,<3.0a0 - libzlib >=1.3.1,<2.0a0 - lz4-c >=1.9.3,<1.10.0a0 @@ -12656,25 +12865,25 @@ packages: - snappy >=1.2.1,<1.3.0a0 - zstd >=1.5.6,<1.6.0a0 constrains: - - parquet-cpp <0.0a0 - - arrow-cpp <0.0a0 - apache-arrow-proc =*=cpu + - arrow-cpp <0.0a0 + - parquet-cpp <0.0a0 license: Apache-2.0 license_family: APACHE purls: [] - size: 8495428 - timestamp: 1726669963852 + size: 5318871 + timestamp: 1726669928492 - kind: conda name: libarrow version: 17.0.0 - build: hc6a7651_16_cpu - build_number: 16 - subdir: osx-arm64 - url: https://conda.anaconda.org/conda-forge/osx-arm64/libarrow-17.0.0-hc6a7651_16_cpu.conda - sha256: 1facd5aa7140031be0f68733ab5e413ea1505da40548e27a173b2407046f36b5 - md5: 05fecc4ae5930dc548327980a4bc7a83 + build: hef0f6b3_17_cpu + build_number: 17 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-hef0f6b3_17_cpu.conda + sha256: 923c17caa8d6b56d2bd0bf53640601959b00647a48debd73164b2301715181bf + md5: ae0b3234c958c6071f6523fcde7afe99 depends: - - __osx >=11.0 + - __glibc >=2.17,<3.0.a0 - aws-crt-cpp >=0.28.3,<0.28.4.0a0 - aws-sdk-cpp >=1.11.407,<1.11.408.0a0 - azure-core-cpp >=1.13.0,<1.13.1.0a0 @@ -12682,15 +12891,17 @@ packages: - azure-storage-blobs-cpp >=12.12.0,<12.12.1.0a0 - azure-storage-files-datalake-cpp >=12.11.0,<12.11.1.0a0 - bzip2 >=1.0.8,<2.0a0 + - gflags >=2.2.2,<2.3.0a0 - glog >=0.7.1,<0.8.0a0 - libabseil * cxx17* - - libabseil >=20240116.2,<20240117.0a0 + - libabseil >=20240722.0,<20240723.0a0 - libbrotlidec >=1.1.0,<1.2.0a0 - libbrotlienc >=1.1.0,<1.2.0a0 - - libcxx >=17 + - libgcc >=13 - libgoogle-cloud >=2.29.0,<2.30.0a0 - libgoogle-cloud-storage >=2.29.0,<2.30.0a0 - libre2-11 >=2023.9.1,<2024.0a0 + - libstdcxx >=13 - libutf8proc >=2.8.0,<3.0a0 - libzlib >=1.3.1,<2.0a0 - lz4-c >=1.9.3,<1.10.0a0 @@ -12699,14 +12910,13 @@ packages: - snappy >=1.2.1,<1.3.0a0 - zstd >=1.5.6,<1.6.0a0 constrains: + - parquet-cpp <0.0a0 - apache-arrow-proc =*=cpu - arrow-cpp <0.0a0 - - parquet-cpp <0.0a0 license: Apache-2.0 - license_family: APACHE purls: [] - size: 5318871 - timestamp: 1726669928492 + size: 8530218 + timestamp: 1727706025186 - kind: conda name: libarrow-acero version: 15.0.2 @@ -12785,22 +12995,21 @@ packages: - kind: conda name: libarrow-acero version: 17.0.0 - build: h5888daf_16_cpu - build_number: 16 + build: h5888daf_17_cpu + build_number: 17 subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_16_cpu.conda - sha256: 0ff4c712c7c61e60708c6ef4f8158200059e0f63c25d0a54c8e4cca7bd153d86 - md5: 18f796aae018a26a20ac51d19de69115 + url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_17_cpu.conda + sha256: 6c95ff9332ccade12f839d44f370bed74c0b144fcbad10dab0f1791a4f2a27a8 + md5: bc27c1f44562481b579f5ceae1a3e51e depends: - __glibc >=2.17,<3.0.a0 - - libarrow 17.0.0 had3b6fe_16_cpu + - libarrow 17.0.0 hef0f6b3_17_cpu - libgcc >=13 - libstdcxx >=13 license: Apache-2.0 - license_family: APACHE purls: [] - size: 608267 - timestamp: 1726669999941 + size: 608175 + timestamp: 1727706066011 - kind: conda name: libarrow-acero version: 17.0.0 @@ -12942,24 +13151,23 @@ packages: - kind: conda name: libarrow-dataset version: 17.0.0 - build: h5888daf_16_cpu - build_number: 16 + build: h5888daf_17_cpu + build_number: 17 subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_16_cpu.conda - sha256: e500e0154cf3ebb41bed3bdf41bd0ff5e0a6b7527a46ba755c05e59c8036e442 - md5: 5400efd6bf101674e0ce170906a0f7cb + url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_17_cpu.conda + sha256: 0da4757d75ad0cd838810ab8af0ff52a0edd5d78e55aafa218f001413b93eb11 + md5: b45dc76a8a9a23a421b8f166d7e16684 depends: - __glibc >=2.17,<3.0.a0 - - libarrow 17.0.0 had3b6fe_16_cpu - - libarrow-acero 17.0.0 h5888daf_16_cpu + - libarrow 17.0.0 hef0f6b3_17_cpu + - libarrow-acero 17.0.0 h5888daf_17_cpu - libgcc >=13 - - libparquet 17.0.0 h39682fd_16_cpu + - libparquet 17.0.0 h39682fd_17_cpu - libstdcxx >=13 license: Apache-2.0 - license_family: APACHE purls: [] - size: 585061 - timestamp: 1726670063965 + size: 583849 + timestamp: 1727706140203 - kind: conda name: libarrow-dataset version: 17.0.0 @@ -13454,27 +13662,26 @@ packages: - kind: conda name: libarrow-substrait version: 17.0.0 - build: hf54134d_16_cpu - build_number: 16 + build: he882d9a_17_cpu + build_number: 17 subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-hf54134d_16_cpu.conda - sha256: 53f3d5f12c9ea557f33a4e1cf9067ce2dbb4211eff0a095574eeb7f0528bc044 - md5: 1cbc3fb1ee28c99e5f8c52920a7717a3 + url: https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-he882d9a_17_cpu.conda + sha256: 3346fb0097fb9c5e2509487ab950cddb388d0458e3a7594ba9989bfdea8868c1 + md5: 3c3084245ba8384c07eb4504c1961497 depends: - __glibc >=2.17,<3.0.a0 - libabseil * cxx17* - - libabseil >=20240116.2,<20240117.0a0 - - libarrow 17.0.0 had3b6fe_16_cpu - - libarrow-acero 17.0.0 h5888daf_16_cpu - - libarrow-dataset 17.0.0 h5888daf_16_cpu + - libabseil >=20240722.0,<20240723.0a0 + - libarrow 17.0.0 hef0f6b3_17_cpu + - libarrow-acero 17.0.0 h5888daf_17_cpu + - libarrow-dataset 17.0.0 h5888daf_17_cpu - libgcc >=13 - - libprotobuf >=4.25.3,<4.25.4.0a0 + - libprotobuf >=5.27.5,<5.27.6.0a0 - libstdcxx >=13 license: Apache-2.0 - license_family: APACHE purls: [] - size: 550960 - timestamp: 1726670093831 + size: 515946 + timestamp: 1727706175064 - kind: conda name: libasprintf version: 0.22.5 @@ -15080,6 +15287,32 @@ packages: purls: [] size: 1241649 timestamp: 1725640926284 +- kind: conda + name: libgoogle-cloud + version: 2.29.0 + build: h438788a_1 + build_number: 1 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.29.0-h438788a_1.conda + sha256: cf5c97fb1a270a072faae6decd7e74681e7ead99a1cec6325c8d7a7213bcb2d1 + md5: 3d27459264de681a74c0aebbbd3ecd8f + depends: + - __glibc >=2.17,<3.0.a0 + - libabseil * cxx17* + - libabseil >=20240722.0,<20240723.0a0 + - libcurl >=8.10.1,<9.0a0 + - libgcc >=13 + - libgrpc >=1.65.5,<1.66.0a0 + - libprotobuf >=5.27.5,<5.27.6.0a0 + - libstdcxx >=13 + - openssl >=3.3.2,<4.0a0 + constrains: + - libgoogle-cloud 2.29.0 *_1 + license: Apache-2.0 + license_family: Apache + purls: [] + size: 1200532 + timestamp: 1727245497586 - kind: conda name: libgoogle-cloud version: 2.29.0 @@ -15175,6 +15408,30 @@ packages: purls: [] size: 781655 timestamp: 1725641060970 +- kind: conda + name: libgoogle-cloud-storage + version: 2.29.0 + build: h0121fbd_1 + build_number: 1 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.29.0-h0121fbd_1.conda + sha256: 78e22048ab9bb554c4269f5e2a4ab9baae2c0f490418e0cdecd04e5c59130805 + md5: ea93fded95ddff7798e28954c446e22f + depends: + - __glibc >=2.17,<3.0.a0 + - libabseil + - libcrc32c >=1.1.2,<1.2.0a0 + - libcurl + - libgcc >=13 + - libgoogle-cloud 2.29.0 h438788a_1 + - libstdcxx >=13 + - libzlib >=1.3.1,<2.0a0 + - openssl + license: Apache-2.0 + license_family: Apache + purls: [] + size: 781418 + timestamp: 1727245657213 - kind: conda name: libgoogle-cloud-storage version: 2.29.0 @@ -15364,6 +15621,33 @@ packages: purls: [] size: 5016525 timestamp: 1713392846329 +- kind: conda + name: libgrpc + version: 1.65.5 + build: hf5c653b_0 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.65.5-hf5c653b_0.conda + sha256: d279abd46262e817c7a00aeb4df9b5ed4de38130130b248e2c50875e982f30fa + md5: 3b0048cabc6815a4d8874a0240519d32 + depends: + - __glibc >=2.17,<3.0.a0 + - c-ares >=1.32.3,<2.0a0 + - libabseil * cxx17* + - libabseil >=20240722.0,<20240723.0a0 + - libgcc >=13 + - libprotobuf >=5.27.5,<5.27.6.0a0 + - libre2-11 >=2023.9.1,<2024.0a0 + - libstdcxx >=13 + - libzlib >=1.3.1,<2.0a0 + - openssl >=3.3.2,<4.0a0 + - re2 + constrains: + - grpc-cpp =1.65.5 + license: Apache-2.0 + license_family: APACHE + purls: [] + size: 7229891 + timestamp: 1727200905306 - kind: conda name: libhwloc version: 2.11.1 @@ -16142,24 +16426,23 @@ packages: - kind: conda name: libparquet version: 17.0.0 - build: h39682fd_16_cpu - build_number: 16 + build: h39682fd_17_cpu + build_number: 17 subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_16_cpu.conda - sha256: 09bc64111e5e1e9f5fee78efdd62592e01c681943fe6e91b369f6580dc8726c4 - md5: dd1fee2da0659103080fdd74004656df + url: https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_17_cpu.conda + sha256: c94d49b69a3023cf37cff4e0ede610c45576c0dea5876f0550b210e6d354b092 + md5: dee1563caf816cb8f8f7e84622876487 depends: - __glibc >=2.17,<3.0.a0 - - libarrow 17.0.0 had3b6fe_16_cpu + - libarrow 17.0.0 hef0f6b3_17_cpu - libgcc >=13 - libstdcxx >=13 - libthrift >=0.20.0,<0.20.1.0a0 - openssl >=3.3.2,<4.0a0 license: Apache-2.0 - license_family: APACHE purls: [] - size: 1186069 - timestamp: 1726670048098 + size: 1188999 + timestamp: 1727706121710 - kind: conda name: libparquet version: 17.0.0 @@ -16399,6 +16682,27 @@ packages: purls: [] size: 2883090 timestamp: 1727161327039 +- kind: conda + name: libprotobuf + version: 5.27.5 + build: h5b01275_2 + build_number: 2 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-5.27.5-h5b01275_2.conda + sha256: 79ac9726cd0a1cb1ba335f7fc7ccac5f679a66d71d9553ca88a805b8787d55ce + md5: 66ed3107adbdfc25ba70454ba11e6d1e + depends: + - __glibc >=2.17,<3.0.a0 + - libabseil * cxx17* + - libabseil >=20240722.0,<20240723.0a0 + - libgcc >=13 + - libstdcxx >=13 + - libzlib >=1.3.1,<2.0a0 + license: BSD-3-Clause + license_family: BSD + purls: [] + size: 2940269 + timestamp: 1727424395109 - kind: conda name: libre2-11 version: 2023.09.01 @@ -16461,6 +16765,28 @@ packages: purls: [] size: 184017 timestamp: 1708947106275 +- kind: conda + name: libre2-11 + version: 2023.09.01 + build: hbbce691_3 + build_number: 3 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-hbbce691_3.conda + sha256: 239ca2319645308633ed773bda7ff1f153390ac84ee4e94955e0ed5be7e78967 + md5: f7f3ff4fff310fcac18769ce3f46e40a + depends: + - __glibc >=2.17,<3.0.a0 + - libabseil * cxx17* + - libabseil >=20240722.0,<20240723.0a0 + - libgcc >=13 + - libstdcxx >=13 + constrains: + - re2 2023.09.01.* + license: BSD-3-Clause + license_family: BSD + purls: [] + size: 241063 + timestamp: 1727157471101 - kind: conda name: libre2-11 version: 2023.09.01 @@ -19362,6 +19688,30 @@ packages: purls: [] size: 1066349 timestamp: 1723760593232 +- kind: conda + name: orc + version: 2.0.2 + build: h690cf93_1 + build_number: 1 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h690cf93_1.conda + sha256: ce023f259ffd93b4678cc582fc4b15a8a991a7b8edd9def8b6838bf7e7962bec + md5: 0044701dd48af57d3d5467a704ef9ebd + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + - libprotobuf >=5.27.5,<5.27.6.0a0 + - libstdcxx >=13 + - libzlib >=1.3.1,<2.0a0 + - lz4-c >=1.9.3,<1.10.0a0 + - snappy >=1.2.1,<1.3.0a0 + - tzdata + - zstd >=1.5.6,<1.6.0a0 + license: Apache-2.0 + license_family: Apache + purls: [] + size: 1184634 + timestamp: 1727242386732 - kind: conda name: orc version: 2.0.2 @@ -19711,7 +20061,7 @@ packages: license: BSD-3-Clause license_family: BSD purls: - - pkg:pypi/pandas?source=compressed-mapping + - pkg:pypi/pandas?source=hash-mapping size: 14470437 timestamp: 1726878887799 - kind: conda @@ -23354,6 +23704,22 @@ packages: purls: [] size: 26770 timestamp: 1708947220914 +- kind: conda + name: re2 + version: 2023.09.01 + build: h77b4e00_3 + build_number: 3 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h77b4e00_3.conda + sha256: f3cd9d8c39b2b39da67bbf6630c807e5019dce496b21aea104f97b2264b5474a + md5: 173a62ebf031d6d53462f8f657c800bb + depends: + - libre2-11 2023.09.01 hbbce691_3 + license: BSD-3-Clause + license_family: BSD + purls: [] + size: 26605 + timestamp: 1727157480972 - kind: conda name: re2 version: 2023.09.01 @@ -24615,7 +24981,7 @@ packages: name: skrub version: 0.4.dev0 path: . - sha256: b7c67ca1782219d797b575bcda5068fa98117f9e47ffa9f926dc09a4ae4e026b + sha256: b05795442e9930d7741094828e9df85834daa7798eea5a23ba0d3b5316312389 requires_dist: - numpy>=1.23.5 - packaging>=23.1 @@ -24637,6 +25003,7 @@ packages: - sphinx-gallery ; extra == 'doc' - sphinxext-opengraph ; extra == 'doc' - statsmodels ; extra == 'doc' + - pyarrow ; extra == 'doc' - black==23.3.0 ; extra == 'lint' - ruff==0.4.8 ; extra == 'lint' - pre-commit ; extra == 'lint' diff --git a/pyproject.toml b/pyproject.toml index 28fc95e2c..340cacc06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ doc = [ "sphinx-gallery", "sphinxext-opengraph", "statsmodels", + "pyarrow", ] lint = [ "black==23.3.0", @@ -166,7 +167,7 @@ test = { cmd = "pytest -vsl --cov=skrub --cov-report=xml skrub" } [tool.pixi.environments] lint = ["lint"] doc = ["optional", "doc"] -test = ["test"] +test = ["optional", "test"] dev = ["dev", "optional", "doc", "lint", "test"] ci-py309-min-deps = ["py309", "min-dependencies", "test"] ci-py309-min-optional-deps = ["py309", "min-dependencies", "min-optional-dependencies", "test"] diff --git a/skrub/datasets/__init__.py b/skrub/datasets/__init__.py index d64775d3d..ea393dad3 100644 --- a/skrub/datasets/__init__.py +++ b/skrub/datasets/__init__.py @@ -1,6 +1,7 @@ from ._fetching import ( DatasetAll, DatasetInfoOnly, + fetch_credit_fraud, fetch_drug_directory, fetch_employee_salaries, fetch_figshare, @@ -32,6 +33,7 @@ "fetch_traffic_violations", "fetch_world_bank_indicator", "fetch_figshare", + "fetch_credit_fraud", "fetch_movielens", "get_data_dir", "make_deduplication_data", diff --git a/skrub/datasets/_fetching.py b/skrub/datasets/_fetching.py index 8e4b18691..cd8667c4d 100644 --- a/skrub/datasets/_fetching.py +++ b/skrub/datasets/_fetching.py @@ -25,6 +25,7 @@ from sklearn import __version__ as sklearn_version from sklearn.datasets import fetch_openml from sklearn.datasets._base import _sha256 +from sklearn.utils import Bunch from sklearn.utils.fixes import parse_version from skrub._utils import import_optional_dependency @@ -1096,3 +1097,45 @@ def fetch_movielens( load_dataframe=load_dataframe, data_directory=data_directory, ) + + +def fetch_credit_fraud(load_dataframe=True, data_directory=None): + """Fetch the credit fraud dataset from figshare. + + This is an imbalanced binary classification use-case. This dataset consists in + two tables: + + - baskets, containing the binary fraud target label + - products + + Baskets contain at least one product each, so aggregation then joining operations + are required to build a design matrix. + + More details on \ + `Figshare `_ + + Parameters + ---------- + load_dataframe : bool, default=True + Whether or not to load the dataset in memory after download. + + data_directory : str, default=None + The directory to which the dataset will be written during the download. + If None, the directory is set to ~/skrub_data. + """ + dataset_name_to_id = { + "products": "49176205", + "baskets": "49176202", + } + bunch = Bunch() + for dataset_name, figshare_id in dataset_name_to_id.items(): + dataset = fetch_figshare( + figshare_id, + load_dataframe=load_dataframe, + data_directory=data_directory, + ) + bunch[dataset_name] = dataset.X + bunch[f"source_{dataset_name}"] = dataset.source + bunch[f"path_{dataset_name}"] = dataset.path + + return bunch diff --git a/skrub/datasets/tests/test_fetching.py b/skrub/datasets/tests/test_fetching.py index 6539ca2db..8cfac5661 100644 --- a/skrub/datasets/tests/test_fetching.py +++ b/skrub/datasets/tests/test_fetching.py @@ -5,6 +5,7 @@ import pandas as pd import pytest +from pandas.testing import assert_frame_equal from skrub.datasets import _fetching @@ -219,3 +220,40 @@ def test_fetch_movielens(): ) mock_urlretrieve.assert_not_called() assert disk_loaded_info == returned_info + + +def test_fetch_credit_fraud(): + pytest.importorskip("pyarrow") + with TemporaryDirectory() as temp_dir: + try: + # Valid call + bunch = _fetching.fetch_credit_fraud( + data_directory=temp_dir, + ) + + except (ConnectionError, URLError): + pytest.skip( + "Exception: Skipping this test because we encountered an " + "issue probably related to an Internet connection problem. " + ) + return + + assert ( + bunch.source_products == "https://ndownloader.figshare.com/files/49176205" + ) + assert bunch.source_baskets == "https://ndownloader.figshare.com/files/49176202" + + assert_frame_equal(bunch.products, pd.read_parquet(bunch.path_products)) + assert_frame_equal(bunch.baskets, pd.read_parquet(bunch.path_baskets)) + + # Now that we have verified the file is on disk, we want to test + # whether calling the function again reads it from disk (it should) + # or queries the network again (it shouldn't). + with mock.patch("urllib.request.urlretrieve") as mock_urlretrieve: + # Same valid call as above + disk_bunch = _fetching.fetch_credit_fraud( + data_directory=temp_dir, + ) + mock_urlretrieve.assert_not_called() + assert_frame_equal(bunch.products, disk_bunch.products) + assert_frame_equal(bunch.baskets, disk_bunch.baskets)