Skip to content

Commit c498d20

Browse files
committed
update exception messages
1 parent fa3856c commit c498d20

File tree

14 files changed

+84
-44
lines changed

14 files changed

+84
-44
lines changed

asv_bench/benchmarks/groupby.py

+2
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,8 @@ def setup(self, dtype, method):
760760
)
761761

762762
def time_str_func(self, dtype, method):
763+
if dtype == "string[python]" and method == "sum":
764+
raise NotImplementedError
763765
self.df.groupby("a")[self.df.columns[1:]].agg(method)
764766

765767

pandas/core/arrays/arrow/array.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2256,7 +2256,9 @@ def _groupby_op(
22562256
"var",
22572257
"skew",
22582258
]:
2259-
raise TypeError(f"{self.dtype} dtype does not support {how} operations")
2259+
raise TypeError(
2260+
f"dtype '{self.dtype}' does not support operation '{how}'"
2261+
)
22602262
return super()._groupby_op(
22612263
how=how,
22622264
has_dropped_na=has_dropped_na,

pandas/core/arrays/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2423,7 +2423,7 @@ def _groupby_op(
24232423
"skew",
24242424
]:
24252425
raise TypeError(
2426-
f"{self.dtype} dtype does not support {op.how} operations"
2426+
f"dtype '{self.dtype}' does not support operation '{how}'"
24272427
)
24282428
if op.how not in ["any", "all"]:
24292429
# Fail early to avoid conversion to object

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4292,7 +4292,7 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
42924292
)
42934293
elif isinstance(vals.dtype, StringDtype):
42944294
raise TypeError(
4295-
f"{vals.dtype} dtype does not support quantile operations"
4295+
f"dtype '{vals.dtype}' does not support operation 'quantile'"
42964296
)
42974297

42984298
inference: DtypeObj | None = None

pandas/tests/frame/test_stack_unstack.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2126,7 +2126,7 @@ def test_stack_multiple_bug(self, future_stack, using_infer_string):
21262126
unst = multi.unstack("ID")
21272127
msg = re.escape("agg function failed [how->mean,dtype->")
21282128
if using_infer_string:
2129-
msg = "str dtype does not support mean operations"
2129+
msg = "dtype 'str' does not support operation 'mean'"
21302130
with pytest.raises(TypeError, match=msg):
21312131
unst.resample("W-THU").mean()
21322132
down = unst.resample("W-THU").mean(numeric_only=True)

pandas/tests/generic/test_frame.py

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def test_metadata_propagation_indiv_groupby(self):
6161
"D": np.random.default_rng(2).standard_normal(8),
6262
}
6363
)
64+
df = df.astype({"A": object, "B": object})
6465
result = df.groupby("A").sum()
6566
tm.assert_metadata_equivalent(df, result)
6667

pandas/tests/groupby/aggregate/test_aggregate.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ def test_groupby_as_index_agg(df):
10201020

10211021
result2 = grouped.agg({"C": "mean", "D": "sum"})
10221022
expected2 = grouped.mean(numeric_only=True)
1023-
expected2["D"] = grouped.sum()["D"]
1023+
expected2["D"] = grouped.sum(numeric_only=True)["D"]
10241024
tm.assert_frame_equal(result2, expected2)
10251025

10261026
grouped = df.groupby("A", as_index=True)

pandas/tests/groupby/methods/test_quantile.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -241,19 +241,20 @@ def test_groupby_quantile_nullable_array(values, q):
241241
tm.assert_series_equal(result, expected)
242242

243243

244-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
244+
# @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
245245
@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
246246
@pytest.mark.parametrize("numeric_only", [True, False])
247-
def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
247+
def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only, using_infer_string):
248248
df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]})
249249
if numeric_only:
250250
result = df.groupby("a").quantile(q, numeric_only=numeric_only)
251251
expected = df.groupby("a")[["b"]].quantile(q)
252252
tm.assert_frame_equal(result, expected)
253253
else:
254-
with pytest.raises(
255-
TypeError, match="'quantile' cannot be performed against 'object' dtypes!"
256-
):
254+
msg = "'quantile' cannot be performed against 'object' dtypes!"
255+
if using_infer_string:
256+
msg = "dtype 'str' does not support operation 'quantile'"
257+
with pytest.raises(TypeError, match=msg):
257258
df.groupby("a").quantile(q, numeric_only=numeric_only)
258259

259260

pandas/tests/groupby/test_groupby.py

+47-16
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ def test_frame_multi_key_function_list_partial_failure(using_infer_string):
480480
funcs = ["mean", "std"]
481481
msg = re.escape("agg function failed [how->mean,dtype->")
482482
if using_infer_string:
483-
msg = "str dtype does not support mean operations"
483+
msg = "dtype 'str' does not support operation 'mean'"
484484
with pytest.raises(TypeError, match=msg):
485485
grouped.agg(funcs)
486486

@@ -578,6 +578,7 @@ def test_ops_not_as_index(reduction_func):
578578

579579

580580
def test_as_index_series_return_frame(df):
581+
df = df.astype({"A": object, "B": object})
581582
grouped = df.groupby("A", as_index=False)
582583
grouped2 = df.groupby(["A", "B"], as_index=False)
583584

@@ -671,7 +672,7 @@ def test_raises_on_nuisance(df, using_infer_string):
671672
grouped = df.groupby("A")
672673
msg = re.escape("agg function failed [how->mean,dtype->")
673674
if using_infer_string:
674-
msg = "str dtype does not support mean operations"
675+
msg = "dtype 'str' does not support operation 'mean'"
675676
with pytest.raises(TypeError, match=msg):
676677
grouped.agg("mean")
677678
with pytest.raises(TypeError, match=msg):
@@ -717,7 +718,7 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
717718
# Added numeric_only as part of GH#46560; these do not drop nuisance
718719
# columns when numeric_only is False
719720
if using_infer_string:
720-
msg = f"str dtype does not support {agg_function} operations"
721+
msg = f"dtype 'str' does not support operation '{agg_function}'"
721722
klass = TypeError
722723
elif agg_function in ("std", "sem"):
723724
klass = ValueError
@@ -740,18 +741,24 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
740741
tm.assert_frame_equal(result, expected)
741742

742743

743-
def test_raise_on_nuisance_python_single(df):
744+
def test_raise_on_nuisance_python_single(df, using_infer_string):
744745
# GH 38815
745746
grouped = df.groupby("A")
746-
with pytest.raises(ValueError, match="could not convert"):
747+
748+
err = ValueError
749+
msg = "could not convert"
750+
if using_infer_string:
751+
err = TypeError
752+
msg = "dtype 'str' does not support operation 'skew'"
753+
with pytest.raises(err, match=msg):
747754
grouped.skew()
748755

749756

750757
def test_raise_on_nuisance_python_multiple(three_group, using_infer_string):
751758
grouped = three_group.groupby(["A", "B"])
752759
msg = re.escape("agg function failed [how->mean,dtype->")
753760
if using_infer_string:
754-
msg = "str dtype does not support mean operations"
761+
msg = "dtype 'str' does not support operation 'mean'"
755762
with pytest.raises(TypeError, match=msg):
756763
grouped.agg("mean")
757764
with pytest.raises(TypeError, match=msg):
@@ -798,7 +805,7 @@ def test_wrap_aggregated_output_multindex(
798805
keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
799806
msg = re.escape("agg function failed [how->mean,dtype->")
800807
if using_infer_string:
801-
msg = "str dtype does not support mean operations"
808+
msg = "dtype 'str' does not support operation 'mean'"
802809
with pytest.raises(TypeError, match=msg):
803810
df.groupby(keys).agg("mean")
804811
agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
@@ -976,10 +983,20 @@ def test_groupby_with_hier_columns():
976983
tm.assert_index_equal(result.columns, df.columns[:-1])
977984

978985

979-
def test_grouping_ndarray(df):
986+
def test_grouping_ndarray(df, using_infer_string):
980987
grouped = df.groupby(df["A"].values)
988+
grouped2 = df.groupby(df["A"].rename(None))
989+
990+
if using_infer_string:
991+
msg = "dtype 'str' does not support operation 'sum'"
992+
with pytest.raises(TypeError, match=msg):
993+
grouped.sum()
994+
with pytest.raises(TypeError, match=msg):
995+
grouped2.sum()
996+
return
997+
981998
result = grouped.sum()
982-
expected = df.groupby(df["A"].rename(None)).sum()
999+
expected = grouped2.sum()
9831000
tm.assert_frame_equal(result, expected)
9841001

9851002

@@ -1478,13 +1495,23 @@ def f(group):
14781495
assert names == expected_names
14791496

14801497

1481-
def test_no_dummy_key_names(df):
1498+
def test_no_dummy_key_names(df, using_infer_string):
14821499
# see gh-1291
1483-
result = df.groupby(df["A"].values).sum()
1500+
gb = df.groupby(df["A"].values)
1501+
gb2 = df.groupby([df["A"].values, df["B"].values])
1502+
if using_infer_string:
1503+
msg = "dtype 'str' does not support operation 'sum'"
1504+
with pytest.raises(TypeError, match=msg):
1505+
gb.sum()
1506+
with pytest.raises(TypeError, match=msg):
1507+
gb2.sum()
1508+
return
1509+
1510+
result = gb.sum()
14841511
assert result.index.name is None
14851512

1486-
result = df.groupby([df["A"].values, df["B"].values]).sum()
1487-
assert result.index.names == (None, None)
1513+
result2 = gb2.sum()
1514+
assert result2.index.names == (None, None)
14881515

14891516

14901517
def test_groupby_sort_multiindex_series():
@@ -1820,7 +1847,7 @@ def get_categorical_invalid_expected():
18201847
elif is_per:
18211848
msg = "Period type does not support"
18221849
elif is_str:
1823-
msg = "str dtype does not support"
1850+
msg = f"dtype 'str' does not support operation '{op}'"
18241851
else:
18251852
msg = "category type does not support"
18261853
if op == "skew":
@@ -2750,7 +2777,7 @@ def test_obj_with_exclusions_duplicate_columns():
27502777
def test_groupby_numeric_only_std_no_result(numeric_only):
27512778
# GH 51080
27522779
dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}]
2753-
df = DataFrame(dicts_non_numeric)
2780+
df = DataFrame(dicts_non_numeric, dtype=object)
27542781
dfgb = df.groupby("a", as_index=False, sort=False)
27552782

27562783
if numeric_only:
@@ -2809,10 +2836,14 @@ def test_grouping_with_categorical_interval_columns():
28092836
def test_groupby_sum_on_nan_should_return_nan(bug_var):
28102837
# GH 24196
28112838
df = DataFrame({"A": [bug_var, bug_var, bug_var, np.nan]})
2839+
if isinstance(bug_var, str):
2840+
df = df.astype(object)
28122841
dfgb = df.groupby(lambda x: x)
28132842
result = dfgb.sum(min_count=1)
28142843

2815-
expected_df = DataFrame([bug_var, bug_var, bug_var, None], columns=["A"])
2844+
expected_df = DataFrame(
2845+
[bug_var, bug_var, bug_var, None], columns=["A"], dtype=df["A"].dtype
2846+
)
28162847
tm.assert_frame_equal(result, expected_df)
28172848

28182849

pandas/tests/groupby/test_numeric_only.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def df(self):
2828
"group": [1, 1, 2],
2929
"int": [1, 2, 3],
3030
"float": [4.0, 5.0, 6.0],
31-
"string": list("abc"),
31+
"string": Series(["a", "b", "c"], dtype=object),
3232
"category_string": Series(list("abc")).astype("category"),
3333
"category_int": [7, 8, 9],
3434
"datetime": date_range("20130101", periods=3),
@@ -180,7 +180,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
180180
"category type does not support sum operations",
181181
re.escape(f"agg function failed [how->{method},dtype->object]"),
182182
re.escape(f"agg function failed [how->{method},dtype->string]"),
183-
f"str dtype does not support {method} operations",
183+
f"dtype 'str' does not support operation '{method}'",
184184
]
185185
)
186186
with pytest.raises(exception, match=msg):
@@ -198,7 +198,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
198198
f"Cannot perform {method} with non-ordered Categorical",
199199
re.escape(f"agg function failed [how->{method},dtype->object]"),
200200
re.escape(f"agg function failed [how->{method},dtype->string]"),
201-
f"str dtype does not support {method} operations",
201+
f"dtype 'str' does not support operation '{method}'",
202202
]
203203
)
204204
with pytest.raises(exception, match=msg):

pandas/tests/groupby/test_raises.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def test_groupby_raises_string(
194194
"skew",
195195
"quantile",
196196
]:
197-
msg = f"str dtype does not support {groupby_func} operations"
197+
msg = f"dtype 'str' does not support operation '{groupby_func}'"
198198
if groupby_func == "sum":
199199
# The object-dtype allows this, StringArray variants do not.
200200
klass = TypeError
@@ -213,14 +213,13 @@ def test_groupby_raises_string(
213213
# there.
214214
import pyarrow as pa
215215

216+
# TODO(infer_string): avoid bubbling up pyarrow exceptions
216217
klass = pa.lib.ArrowNotImplementedError
217218
msg = "Function 'subtract_checked' has no kernel matching input types"
218219
elif groupby_func in ["cummin", "cummax"]:
219220
msg = msg.replace("object", "str")
220221
elif groupby_func == "corrwith":
221-
msg = (
222-
"'.*NumpySemantics' with dtype str does " "not support operation 'mean'"
223-
)
222+
msg = "'.*NumpySemantics' with dtype str does not support operation 'mean'"
224223

225224
if groupby_func == "fillna":
226225
kind = "Series" if groupby_series else "DataFrame"
@@ -275,7 +274,10 @@ def test_groupby_raises_string_np(
275274
if using_infer_string:
276275
klass = TypeError
277276
if df["d"].dtype.storage == "python":
278-
msg = "Cannot perform reduction 'mean' with string dtype"
277+
msg = (
278+
f"Cannot perform reduction '{groupby_func_np.__name__}' "
279+
"with string dtype"
280+
)
279281
else:
280282
msg = (
281283
"'ArrowStringArrayNumpySemantics' with dtype str does not "

pandas/tests/resample/test_resample_api.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def tests_raises_on_nuisance(test_frame, using_infer_string):
198198
expected = r[["A", "B", "C"]].mean()
199199
msg = re.escape("agg function failed [how->mean,dtype->")
200200
if using_infer_string:
201-
msg = "str dtype does not support mean operations"
201+
msg = "dtype 'str' does not support operation 'mean'"
202202
with pytest.raises(TypeError, match=msg):
203203
r.mean()
204204
result = r.mean(numeric_only=True)
@@ -903,18 +903,18 @@ def test_frame_downsample_method(
903903
klass = TypeError
904904
msg = re.escape(f"agg function failed [how->{method},dtype->")
905905
if using_infer_string:
906-
msg = f"str dtype does not support {method} operations"
906+
msg = f"dtype 'str' does not support operation '{method}'"
907907
elif method in ["sum", "std", "sem"] and using_infer_string:
908908
klass = TypeError
909-
msg = f"str dtype does not support {method} operations"
909+
msg = f"dtype 'str' does not support operation '{method}'"
910910
else:
911911
klass = ValueError
912912
msg = expected_data
913913
with pytest.raises(klass, match=msg):
914914
_ = func(**kwargs)
915915
elif method == "sum" and using_infer_string and numeric_only is not True:
916916
klass = TypeError
917-
msg = "str dtype does not support sum operations"
917+
msg = f"dtype 'str' does not support operation '{method}'"
918918
with pytest.raises(klass, match=msg):
919919
_ = func(**kwargs)
920920
else:
@@ -965,11 +965,11 @@ def test_series_downsample_method(
965965
elif method == "prod":
966966
msg = re.escape("agg function failed [how->prod,dtype->")
967967
if using_infer_string:
968-
msg = "str dtype does not support prod operations"
968+
msg = "dtype 'str' does not support operation 'prod'"
969969
with pytest.raises(TypeError, match=msg):
970970
func(**kwargs)
971971
elif method == "sum" and using_infer_string and numeric_only is not True:
972-
msg = "str dtype does not support sum operations"
972+
msg = "dtype 'str' does not support operation 'sum'"
973973
with pytest.raises(TypeError, match=msg):
974974
func(**kwargs)
975975

pandas/tests/reshape/merge/test_join.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ def test_mixed_type_join_with_suffix(self, using_infer_string):
632632
grouped = df.groupby("id")
633633
msg = re.escape("agg function failed [how->mean,dtype->")
634634
if using_infer_string:
635-
msg = "str dtype does not support mean operations"
635+
msg = "dtype 'str' does not support operation 'mean'"
636636
with pytest.raises(TypeError, match=msg):
637637
grouped.mean()
638638
mn = grouped.mean(numeric_only=True)

pandas/tests/reshape/test_pivot.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,11 @@ def test_pivot_table_categorical_observed_equal(self, observed):
135135

136136
tm.assert_frame_equal(result, expected)
137137

138-
def test_pivot_table_nocols(self, using_infer_string):
138+
def test_pivot_table_nocols(self):
139139
df = DataFrame(
140140
{"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
141141
)
142+
df = df.astype({"rows": object, "cols": object})
142143
rs = df.pivot_table(columns="cols", aggfunc="sum")
143144
xp = df.pivot_table(index="cols", aggfunc="sum").T
144145
tm.assert_frame_equal(rs, xp)
@@ -942,7 +943,7 @@ def test_no_col(self, data, using_infer_string):
942943
data.columns = [k * 2 for k in data.columns]
943944
msg = re.escape("agg function failed [how->mean,dtype->")
944945
if using_infer_string:
945-
msg = "str dtype does not support mean operations"
946+
msg = "dtype 'str' does not support operation 'mean'"
946947
with pytest.raises(TypeError, match=msg):
947948
data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
948949
table = data.drop(columns="CC").pivot_table(
@@ -1017,7 +1018,7 @@ def test_margin_with_only_columns_defined(
10171018
if aggfunc != "sum":
10181019
msg = re.escape("agg function failed [how->mean,dtype->")
10191020
if using_infer_string:
1020-
msg = "str dtype does not support mean operations"
1021+
msg = "dtype 'str' does not support operation 'mean'"
10211022
with pytest.raises(TypeError, match=msg):
10221023
df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
10231024
if "B" not in columns:

0 commit comments

Comments
 (0)