Skip to content

Commit

Permalink
CLN: aggregation.transform (pandas-dev#36478)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach authored Oct 6, 2020
1 parent 98af5d7 commit ebd9906
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 26 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ Reshaping
- Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`)
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
-

Sparse
Expand All @@ -446,7 +447,6 @@ Other
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
- Fixed metadata propagation in the :class:`Series.dt` accessor (:issue:`28283`)
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
- Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)

.. ---------------------------------------------------------------------------
Expand Down
49 changes: 38 additions & 11 deletions pandas/core/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,17 @@
Sequence,
Tuple,
Union,
cast,
)

from pandas._typing import AggFuncType, Axis, FrameOrSeries, Label
from pandas._typing import (
AggFuncType,
AggFuncTypeBase,
Axis,
FrameOrSeries,
FrameOrSeriesUnion,
Label,
)

from pandas.core.dtypes.common import is_dict_like, is_list_like
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
Expand Down Expand Up @@ -391,7 +399,7 @@ def validate_func_kwargs(

def transform(
obj: FrameOrSeries, func: AggFuncType, axis: Axis, *args, **kwargs
) -> FrameOrSeries:
) -> FrameOrSeriesUnion:
"""
Transform a DataFrame or Series
Expand Down Expand Up @@ -424,16 +432,20 @@ def transform(
assert not is_series
return transform(obj.T, func, 0, *args, **kwargs).T

if isinstance(func, list):
if is_list_like(func) and not is_dict_like(func):
func = cast(List[AggFuncTypeBase], func)
# Convert func equivalent dict
if is_series:
func = {com.get_callable_name(v) or v: v for v in func}
else:
func = {col: func for col in obj}

if isinstance(func, dict):
if is_dict_like(func):
func = cast(Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]], func)
return transform_dict_like(obj, func, *args, **kwargs)

# func is either str or callable
func = cast(AggFuncTypeBase, func)
try:
result = transform_str_or_callable(obj, func, *args, **kwargs)
except Exception:
Expand All @@ -451,37 +463,52 @@ def transform(
return result


def transform_dict_like(obj, func, *args, **kwargs):
def transform_dict_like(
obj: FrameOrSeries,
func: Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]],
*args,
**kwargs,
):
"""
Compute transform in the case of a dict-like func
"""
from pandas.core.reshape.concat import concat

if len(func) == 0:
raise ValueError("No transform functions were provided")

if obj.ndim != 1:
# Check for missing columns on a frame
cols = sorted(set(func.keys()) - set(obj.columns))
if len(cols) > 0:
raise SpecificationError(f"Column(s) {cols} do not exist")

if any(isinstance(v, dict) for v in func.values()):
# Can't use func.values(); wouldn't work for a Series
if any(is_dict_like(v) for _, v in func.items()):
# GH 15931 - deprecation of renaming keys
raise SpecificationError("nested renamer is not supported")

results = {}
results: Dict[Label, FrameOrSeriesUnion] = {}
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
try:
results[name] = transform(colg, how, 0, *args, **kwargs)
except Exception as e:
if str(e) == "Function did not transform":
raise e
except Exception as err:
if (
str(err) == "Function did not transform"
or str(err) == "No transform functions were provided"
):
raise err

# combine results
if len(results) == 0:
raise ValueError("Transform function failed")
return concat(results, axis=1)


def transform_str_or_callable(obj, func, *args, **kwargs):
def transform_str_or_callable(
obj: FrameOrSeries, func: AggFuncTypeBase, *args, **kwargs
) -> FrameOrSeriesUnion:
"""
Compute transform in the case of a string or callable func
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7270,7 +7270,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:

def _gotitem(
self,
key: Union[str, List[str]],
key: Union[Label, List[Label]],
ndim: int,
subset: Optional[FrameOrSeriesUnion] = None,
) -> FrameOrSeriesUnion:
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,16 +265,17 @@
Parameters
----------
func : function, str, list or dict
func : function, str, list-like or dict-like
Function to use for transforming the data. If a function, must either
work when passed a {klass} or when passed to {klass}.apply.
work when passed a {klass} or when passed to {klass}.apply. If func
is both list-like and dict-like, dict-like behavior takes precedence.
Accepted combinations are:
- function
- string function name
- list of functions and/or function names, e.g. ``[np.exp, 'sqrt']``
- dict of axis labels -> functions, function names or list of such.
- list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']``
- dict-like of axis labels -> functions, function names or list-like of such.
{axis}
*args
Positional arguments to pass to `func`.
Expand Down
39 changes: 34 additions & 5 deletions pandas/tests/frame/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pytest

from pandas import DataFrame, MultiIndex
from pandas import DataFrame, MultiIndex, Series
import pandas._testing as tm
from pandas.core.base import SpecificationError
from pandas.core.groupby.base import transformation_kernels
Expand Down Expand Up @@ -41,9 +41,15 @@ def test_transform_groupby_kernel(axis, float_frame, op):


@pytest.mark.parametrize(
"ops, names", [([np.sqrt], ["sqrt"]), ([np.abs, np.sqrt], ["absolute", "sqrt"])]
"ops, names",
[
([np.sqrt], ["sqrt"]),
([np.abs, np.sqrt], ["absolute", "sqrt"]),
(np.array([np.sqrt]), ["sqrt"]),
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
],
)
def test_transform_list(axis, float_frame, ops, names):
def test_transform_listlike(axis, float_frame, ops, names):
# GH 35964
other_axis = 1 if axis in {0, "index"} else 0
with np.errstate(all="ignore"):
Expand All @@ -56,18 +62,41 @@ def test_transform_list(axis, float_frame, ops, names):
tm.assert_frame_equal(result, expected)


def test_transform_dict(axis, float_frame):
@pytest.mark.parametrize("ops", [[], np.array([])])
def test_transform_empty_listlike(float_frame, ops):
with pytest.raises(ValueError, match="No transform functions were provided"):
float_frame.transform(ops)


@pytest.mark.parametrize("box", [dict, Series])
def test_transform_dictlike(axis, float_frame, box):
# GH 35964
if axis == 0 or axis == "index":
e = float_frame.columns[0]
expected = float_frame[[e]].transform(np.abs)
else:
e = float_frame.index[0]
expected = float_frame.iloc[[0]].transform(np.abs)
result = float_frame.transform({e: np.abs}, axis=axis)
result = float_frame.transform(box({e: np.abs}), axis=axis)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"ops",
[
{},
{"A": []},
{"A": [], "B": "cumsum"},
{"A": "cumsum", "B": []},
{"A": [], "B": ["cumsum"]},
{"A": ["cumsum"], "B": []},
],
)
def test_transform_empty_dictlike(float_frame, ops):
with pytest.raises(ValueError, match="No transform functions were provided"):
float_frame.transform(ops)


@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_udf(axis, float_frame, use_apply):
# GH 35964
Expand Down
37 changes: 33 additions & 4 deletions pandas/tests/series/apply/test_series_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,15 @@ def test_transform_groupby_kernel(string_series, op):


@pytest.mark.parametrize(
"ops, names", [([np.sqrt], ["sqrt"]), ([np.abs, np.sqrt], ["absolute", "sqrt"])]
"ops, names",
[
([np.sqrt], ["sqrt"]),
([np.abs, np.sqrt], ["absolute", "sqrt"]),
(np.array([np.sqrt]), ["sqrt"]),
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
],
)
def test_transform_list(string_series, ops, names):
def test_transform_listlike(string_series, ops, names):
# GH 35964
with np.errstate(all="ignore"):
expected = concat([op(string_series) for op in ops], axis=1)
Expand All @@ -45,15 +51,38 @@ def test_transform_list(string_series, ops, names):
tm.assert_frame_equal(result, expected)


def test_transform_dict(string_series):
@pytest.mark.parametrize("ops", [[], np.array([])])
def test_transform_empty_listlike(string_series, ops):
with pytest.raises(ValueError, match="No transform functions were provided"):
string_series.transform(ops)


@pytest.mark.parametrize("box", [dict, Series])
def test_transform_dictlike(string_series, box):
# GH 35964
with np.errstate(all="ignore"):
expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1)
expected.columns = ["foo", "bar"]
result = string_series.transform({"foo": np.sqrt, "bar": np.abs})
result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs}))
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"ops",
[
{},
{"A": []},
{"A": [], "B": ["cumsum"]},
{"A": ["cumsum"], "B": []},
{"A": [], "B": "cumsum"},
{"A": "cumsum", "B": []},
],
)
def test_transform_empty_dictlike(string_series, ops):
with pytest.raises(ValueError, match="No transform functions were provided"):
string_series.transform(ops)


def test_transform_udf(axis, string_series):
# GH 35964
# via apply
Expand Down

0 comments on commit ebd9906

Please sign in to comment.