Skip to content

Commit c4a2026

Browse files
TST (string dtype): resolve all easy xfails in pandas/tests/groupby (#60314)
1 parent ba4d1cf commit c4a2026

13 files changed

+30
-53
lines changed

pandas/tests/groupby/aggregate/test_aggregate.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
import numpy as np
1010
import pytest
1111

12-
from pandas._config import using_string_dtype
13-
1412
from pandas.errors import SpecificationError
1513

1614
from pandas.core.dtypes.common import is_integer_dtype
@@ -296,12 +294,11 @@ def aggfun_1(ser):
296294
assert len(result) == 0
297295

298296

299-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
300297
def test_wrap_agg_out(three_group):
301298
grouped = three_group.groupby(["A", "B"])
302299

303300
def func(ser):
304-
if ser.dtype == object:
301+
if ser.dtype == object or ser.dtype == "string":
305302
raise TypeError("Test error message")
306303
return ser.sum()
307304

@@ -1117,7 +1114,6 @@ def test_lambda_named_agg(func):
11171114
tm.assert_frame_equal(result, expected)
11181115

11191116

1120-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
11211117
def test_aggregate_mixed_types():
11221118
# GH 16916
11231119
df = DataFrame(
@@ -1129,7 +1125,7 @@ def test_aggregate_mixed_types():
11291125
expected = DataFrame(
11301126
expected_data,
11311127
index=Index([2, "group 1"], dtype="object", name="grouping"),
1132-
columns=Index(["X", "Y", "Z"], dtype="object"),
1128+
columns=Index(["X", "Y", "Z"]),
11331129
)
11341130
tm.assert_frame_equal(result, expected)
11351131

pandas/tests/groupby/aggregate/test_cython.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas.core.dtypes.common import (
119
is_float_dtype,
1210
is_integer_dtype,
@@ -92,7 +90,6 @@ def test_cython_agg_boolean():
9290
tm.assert_series_equal(result, expected)
9391

9492

95-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
9693
def test_cython_agg_nothing_to_agg():
9794
frame = DataFrame(
9895
{"a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25}
@@ -108,7 +105,9 @@ def test_cython_agg_nothing_to_agg():
108105

109106
result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True)
110107
expected = DataFrame(
111-
[], index=frame["a"].sort_values().drop_duplicates(), columns=[]
108+
[],
109+
index=frame["a"].sort_values().drop_duplicates(),
110+
columns=Index([], dtype="str"),
112111
)
113112
tm.assert_frame_equal(result, expected)
114113

pandas/tests/groupby/aggregate/test_other.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import using_string_dtype
12-
1311
from pandas.errors import SpecificationError
1412

1513
import pandas as pd
@@ -308,7 +306,6 @@ def test_series_agg_multikey():
308306
tm.assert_series_equal(result, expected)
309307

310308

311-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
312309
def test_series_agg_multi_pure_python():
313310
data = DataFrame(
314311
{
@@ -358,7 +355,8 @@ def test_series_agg_multi_pure_python():
358355
)
359356

360357
def bad(x):
361-
assert len(x.values.base) > 0
358+
if isinstance(x.values, np.ndarray):
359+
assert len(x.values.base) > 0
362360
return "foo"
363361

364362
result = data.groupby(["A", "B"]).agg(bad)

pandas/tests/groupby/methods/test_quantile.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -158,11 +156,10 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby,
158156
tm.assert_frame_equal(result, expected)
159157

160158

161-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
162159
def test_quantile_raises():
163160
df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"])
164161

165-
msg = "dtype 'object' does not support operation 'quantile'"
162+
msg = "dtype '(object|str)' does not support operation 'quantile'"
166163
with pytest.raises(TypeError, match=msg):
167164
df.groupby("key").quantile()
168165

pandas/tests/groupby/methods/test_size.py

+2
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ def test_size_series_masked_type_returns_Int64(dtype):
7676
tm.assert_series_equal(result, expected)
7777

7878

79+
# TODO(infer_string) in case the column is object dtype, it should preserve that dtype
80+
# for the result's index
7981
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
8082
def test_size_strings(any_string_dtype):
8183
# GH#55627

pandas/tests/groupby/test_categorical.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas as pd
97
from pandas import (
108
Categorical,
@@ -322,15 +320,18 @@ def test_apply(ordered):
322320
tm.assert_series_equal(result, expected)
323321

324322

325-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
326-
def test_observed(observed):
323+
def test_observed(request, using_infer_string, observed):
327324
# multiple groupers, don't re-expand the output space
328325
# of the grouper
329326
# gh-14942 (implement)
330327
# gh-10132 (back-compat)
331328
# gh-8138 (back-compat)
332329
# gh-8869
333330

331+
if using_infer_string and not observed:
332+
# TODO(infer_string) this fails with filling the string column with 0
333+
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
334+
334335
cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True)
335336
cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True)
336337
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})

pandas/tests/groupby/test_groupby.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -1281,7 +1281,6 @@ def test_groupby_two_group_keys_all_nan():
12811281
assert result == {}
12821282

12831283

1284-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
12851284
def test_groupby_2d_malformed():
12861285
d = DataFrame(index=range(2))
12871286
d["group"] = ["g1", "g2"]
@@ -1290,7 +1289,7 @@ def test_groupby_2d_malformed():
12901289
d["label"] = ["l1", "l2"]
12911290
tmp = d.groupby(["group"]).mean(numeric_only=True)
12921291
res_values = np.array([[0.0, 1.0], [0.0, 1.0]])
1293-
tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"]))
1292+
tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"], dtype=object))
12941293
tm.assert_numpy_array_equal(tmp.values, res_values)
12951294

12961295

@@ -2345,7 +2344,6 @@ def test_groupby_all_nan_groups_drop():
23452344
tm.assert_series_equal(result, expected)
23462345

23472346

2348-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
23492347
@pytest.mark.parametrize("numeric_only", [True, False])
23502348
def test_groupby_empty_multi_column(as_index, numeric_only):
23512349
# GH 15106 & GH 41998
@@ -2354,15 +2352,14 @@ def test_groupby_empty_multi_column(as_index, numeric_only):
23542352
result = gb.sum(numeric_only=numeric_only)
23552353
if as_index:
23562354
index = MultiIndex([[], []], [[], []], names=["A", "B"])
2357-
columns = ["C"] if not numeric_only else []
2355+
columns = ["C"] if not numeric_only else Index([], dtype="str")
23582356
else:
23592357
index = RangeIndex(0)
23602358
columns = ["A", "B", "C"] if not numeric_only else ["A", "B"]
23612359
expected = DataFrame([], columns=columns, index=index)
23622360
tm.assert_frame_equal(result, expected)
23632361

23642362

2365-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
23662363
def test_groupby_aggregation_non_numeric_dtype():
23672364
# GH #43108
23682365
df = DataFrame(
@@ -2373,7 +2370,7 @@ def test_groupby_aggregation_non_numeric_dtype():
23732370
{
23742371
"v": [[1, 1], [10, 20]],
23752372
},
2376-
index=Index(["M", "W"], dtype="object", name="MW"),
2373+
index=Index(["M", "W"], name="MW"),
23772374
)
23782375

23792376
gb = df.groupby(by=["MW"])

pandas/tests/groupby/test_groupby_dropna.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas.compat.pyarrow import pa_version_under10p1
75

86
from pandas.core.dtypes.missing import na_value_for_dtype
@@ -99,7 +97,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_two_groups(
9997
tm.assert_frame_equal(grouped, expected)
10098

10199

102-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
103100
@pytest.mark.parametrize(
104101
"dropna, idx, outputs",
105102
[
@@ -126,7 +123,7 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
126123
df = pd.DataFrame(df_list, columns=["a", "b", "c", "d"])
127124
grouped = df.groupby("a", dropna=dropna).sum()
128125

129-
expected = pd.DataFrame(outputs, index=pd.Index(idx, dtype="object", name="a"))
126+
expected = pd.DataFrame(outputs, index=pd.Index(idx, name="a"))
130127

131128
tm.assert_frame_equal(grouped, expected)
132129

pandas/tests/groupby/test_grouping.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
import numpy as np
1111
import pytest
1212

13-
from pandas._config import using_string_dtype
14-
1513
from pandas.errors import SpecificationError
1614

1715
import pandas as pd
@@ -807,7 +805,6 @@ def test_groupby_empty(self):
807805
expected = ["name"]
808806
assert result == expected
809807

810-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
811808
def test_groupby_level_index_value_all_na(self):
812809
# issue 20519
813810
df = DataFrame(
@@ -817,7 +814,7 @@ def test_groupby_level_index_value_all_na(self):
817814
expected = DataFrame(
818815
data=[],
819816
index=MultiIndex(
820-
levels=[Index(["x"], dtype="object"), Index([], dtype="float64")],
817+
levels=[Index(["x"], dtype="str"), Index([], dtype="float64")],
821818
codes=[[], []],
822819
names=["A", "B"],
823820
),
@@ -981,12 +978,13 @@ def test_groupby_with_empty(self):
981978
grouped = series.groupby(grouper)
982979
assert next(iter(grouped), None) is None
983980

984-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
985981
def test_groupby_with_single_column(self):
986982
df = DataFrame({"a": list("abssbab")})
987983
tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]])
988984
# GH 13530
989-
exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[])
985+
exp = DataFrame(
986+
index=Index(["a", "b", "s"], name="a"), columns=Index([], dtype="str")
987+
)
990988
tm.assert_frame_equal(df.groupby("a").count(), exp)
991989
tm.assert_frame_equal(df.groupby("a").sum(), exp)
992990

pandas/tests/groupby/test_pipe.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
import numpy as np
2-
import pytest
3-
4-
from pandas._config import using_string_dtype
52

63
import pandas as pd
74
from pandas import (
@@ -11,7 +8,6 @@
118
import pandas._testing as tm
129

1310

14-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
1511
def test_pipe():
1612
# Test the pipe method of DataFrameGroupBy.
1713
# Issue #17871
@@ -39,7 +35,7 @@ def square(srs):
3935
# NDFrame.pipe methods
4036
result = df.groupby("A").pipe(f).pipe(square)
4137

42-
index = Index(["bar", "foo"], dtype="object", name="A")
38+
index = Index(["bar", "foo"], name="A")
4339
expected = pd.Series([3.749306591013693, 6.717707873081384], name="B", index=index)
4440

4541
tm.assert_series_equal(expected, result)

pandas/tests/groupby/test_reductions.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas._libs.tslibs import iNaT
119

1210
from pandas.core.dtypes.common import pandas_dtype
@@ -470,8 +468,7 @@ def test_max_min_non_numeric():
470468
assert "ss" in result
471469

472470

473-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
474-
def test_max_min_object_multiple_columns():
471+
def test_max_min_object_multiple_columns(using_infer_string):
475472
# GH#41111 case where the aggregation is valid for some columns but not
476473
# others; we split object blocks column-wise, consistent with
477474
# DataFrame._reduce
@@ -484,7 +481,7 @@ def test_max_min_object_multiple_columns():
484481
}
485482
)
486483
df._consolidate_inplace() # should already be consolidate, but double-check
487-
assert len(df._mgr.blocks) == 2
484+
assert len(df._mgr.blocks) == 3 if using_infer_string else 2
488485

489486
gb = df.groupby("A")
490487

pandas/tests/groupby/test_timegrouper.py

+2
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper):
7676

7777

7878
class TestGroupBy:
79+
# TODO(infer_string) resample sum introduces 0's
80+
# https://github.com/pandas-dev/pandas/issues/60229
7981
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
8082
def test_groupby_with_timegrouper(self):
8183
# GH 4161

pandas/tests/groupby/transform/test_transform.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas._libs import lib
97

108
from pandas.core.dtypes.common import ensure_platform_int
@@ -1034,20 +1032,19 @@ def test_groupby_transform_with_datetimes(func, values):
10341032
tm.assert_series_equal(result, expected)
10351033

10361034

1037-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
10381035
def test_groupby_transform_dtype():
10391036
# GH 22243
10401037
df = DataFrame({"a": [1], "val": [1.35]})
10411038

10421039
result = df["val"].transform(lambda x: x.map(lambda y: f"+{y}"))
1043-
expected1 = Series(["+1.35"], name="val", dtype="object")
1040+
expected1 = Series(["+1.35"], name="val")
10441041
tm.assert_series_equal(result, expected1)
10451042

10461043
result = df.groupby("a")["val"].transform(lambda x: x.map(lambda y: f"+{y}"))
10471044
tm.assert_series_equal(result, expected1)
10481045

10491046
result = df.groupby("a")["val"].transform(lambda x: x.map(lambda y: f"+({y})"))
1050-
expected2 = Series(["+(1.35)"], name="val", dtype="object")
1047+
expected2 = Series(["+(1.35)"], name="val")
10511048
tm.assert_series_equal(result, expected2)
10521049

10531050
df["val"] = df["val"].astype(object)

0 commit comments

Comments
 (0)