Skip to content

Commit 4b98e0b

Browse files
authored
DEPR: DataFrame.median/mean with numeric_only=None and dt64 columns (#49250)
1 parent 7b39329 commit 4b98e0b

File tree

3 files changed

+13
-39
lines changed

3 files changed

+13
-39
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ Removal of prior version deprecations/changes
237237
- Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`)
238238
- Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
239239
- Removed the ``numeric_only`` keyword from :meth:`Categorical.min` and :meth:`Categorical.max` in favor of ``skipna`` (:issue:`48821`)
240+
- Changed behavior of :meth:`DataFrame.median` and :meth:`DataFrame.mean` with ``numeric_only=None`` to not exclude datetime-like columns THIS NOTE WILL BE IRRELEVANT ONCE ``numeric_only=None`` DEPRECATION IS ENFORCED (:issue:`29941`)
240241
- Removed :func:`is_extension_type` in favor of :func:`is_extension_array_dtype` (:issue:`29457`)
241242
- Removed ``.ExponentialMovingWindow.vol`` (:issue:`39220`)
242243
- Removed :meth:`Index.get_value` and :meth:`Index.set_value` (:issue:`33907`, :issue:`28621`)

pandas/core/frame.py

-24
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@
124124
is_1d_only_ea_dtype,
125125
is_bool_dtype,
126126
is_dataclass,
127-
is_datetime64_any_dtype,
128127
is_dict_like,
129128
is_dtype_equal,
130129
is_extension_array_dtype,
@@ -10739,29 +10738,6 @@ def _reduce(
1073910738
assert filter_type is None or filter_type == "bool", filter_type
1074010739
out_dtype = "bool" if filter_type == "bool" else None
1074110740

10742-
if numeric_only is None and name in ["mean", "median"]:
10743-
own_dtypes = [arr.dtype for arr in self._mgr.arrays]
10744-
10745-
dtype_is_dt = np.array(
10746-
[is_datetime64_any_dtype(dtype) for dtype in own_dtypes],
10747-
dtype=bool,
10748-
)
10749-
if dtype_is_dt.any():
10750-
warnings.warn(
10751-
"DataFrame.mean and DataFrame.median with numeric_only=None "
10752-
"will include datetime64 and datetime64tz columns in a "
10753-
"future version.",
10754-
FutureWarning,
10755-
stacklevel=find_stack_level(),
10756-
)
10757-
# Non-copy equivalent to
10758-
# dt64_cols = self.dtypes.apply(is_datetime64_any_dtype)
10759-
# cols = self.columns[~dt64_cols]
10760-
# self = self[cols]
10761-
predicate = lambda x: not is_datetime64_any_dtype(x.dtype)
10762-
mgr = self._mgr._get_data_subset(predicate)
10763-
self = type(self)(mgr)
10764-
1076510741
# TODO: Make other agg func handle axis=None properly GH#21597
1076610742
axis = self._get_axis_number(axis)
1076710743
labels = self._get_agg_axis(axis)

pandas/tests/frame/test_reductions.py

+12-15
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,13 @@ def assert_stat_op_calc(
7474
f = getattr(frame, opname)
7575

7676
if check_dates:
77-
expected_warning = FutureWarning if opname in ["mean", "median"] else None
7877
df = DataFrame({"b": date_range("1/1/2001", periods=2)})
79-
with tm.assert_produces_warning(expected_warning):
78+
with tm.assert_produces_warning(None):
8079
result = getattr(df, opname)()
8180
assert isinstance(result, Series)
8281

8382
df["a"] = range(len(df))
84-
with tm.assert_produces_warning(expected_warning):
83+
with tm.assert_produces_warning(None):
8584
result = getattr(df, opname)()
8685
assert isinstance(result, Series)
8786
assert len(result)
@@ -384,21 +383,19 @@ def test_nunique(self):
384383
def test_mean_mixed_datetime_numeric(self, tz):
385384
# https://github.com/pandas-dev/pandas/issues/24752
386385
df = DataFrame({"A": [1, 1], "B": [Timestamp("2000", tz=tz)] * 2})
387-
with tm.assert_produces_warning(FutureWarning):
388-
result = df.mean()
389-
expected = Series([1.0], index=["A"])
386+
result = df.mean()
387+
expected = Series([1.0, Timestamp("2000", tz=tz)], index=["A", "B"])
390388
tm.assert_series_equal(result, expected)
391389

392390
@pytest.mark.parametrize("tz", [None, "UTC"])
393-
def test_mean_excludes_datetimes(self, tz):
391+
def test_mean_includes_datetimes(self, tz):
394392
# https://github.com/pandas-dev/pandas/issues/24752
395-
# Our long-term desired behavior is unclear, but the behavior in
396-
# 0.24.0rc1 was buggy.
393+
# Behavior in 0.24.0rc1 was buggy.
394+
# As of 2.0 with numeric_only=None we do *not* drop datetime columns
397395
df = DataFrame({"A": [Timestamp("2000", tz=tz)] * 2})
398-
with tm.assert_produces_warning(FutureWarning):
399-
result = df.mean()
396+
result = df.mean()
400397

401-
expected = Series(dtype=np.float64)
398+
expected = Series([Timestamp("2000", tz=tz)], index=["A"])
402399
tm.assert_series_equal(result, expected)
403400

404401
def test_mean_mixed_string_decimal(self):
@@ -851,6 +848,7 @@ def test_mean_corner(self, float_frame, float_string_frame):
851848
def test_mean_datetimelike(self):
852849
# GH#24757 check that datetimelike are excluded by default, handled
853850
# correctly with numeric_only=True
851+
# As of 2.0, datetimelike are *not* excluded with numeric_only=None
854852

855853
df = DataFrame(
856854
{
@@ -864,10 +862,9 @@ def test_mean_datetimelike(self):
864862
expected = Series({"A": 1.0})
865863
tm.assert_series_equal(result, expected)
866864

867-
with tm.assert_produces_warning(FutureWarning):
868-
# in the future datetime columns will be included
865+
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
869866
result = df.mean()
870-
expected = Series({"A": 1.0, "C": df.loc[1, "C"]})
867+
expected = Series({"A": 1.0, "B": df.loc[1, "B"], "C": df.loc[1, "C"]})
871868
tm.assert_series_equal(result, expected)
872869

873870
def test_mean_datetimelike_numeric_only_false(self):

0 commit comments

Comments
 (0)