@@ -480,7 +480,7 @@ def test_frame_multi_key_function_list_partial_failure(using_infer_string):
480
480
funcs = ["mean" , "std" ]
481
481
msg = re .escape ("agg function failed [how->mean,dtype->" )
482
482
if using_infer_string :
483
- msg = "str dtype does not support mean operations "
483
+ msg = "dtype 'str' does not support operation 'mean' "
484
484
with pytest .raises (TypeError , match = msg ):
485
485
grouped .agg (funcs )
486
486
@@ -578,6 +578,7 @@ def test_ops_not_as_index(reduction_func):
578
578
579
579
580
580
def test_as_index_series_return_frame (df ):
581
+ df = df .astype ({"A" : object , "B" : object })
581
582
grouped = df .groupby ("A" , as_index = False )
582
583
grouped2 = df .groupby (["A" , "B" ], as_index = False )
583
584
@@ -671,7 +672,7 @@ def test_raises_on_nuisance(df, using_infer_string):
671
672
grouped = df .groupby ("A" )
672
673
msg = re .escape ("agg function failed [how->mean,dtype->" )
673
674
if using_infer_string :
674
- msg = "str dtype does not support mean operations "
675
+ msg = "dtype 'str' does not support operation 'mean' "
675
676
with pytest .raises (TypeError , match = msg ):
676
677
grouped .agg ("mean" )
677
678
with pytest .raises (TypeError , match = msg ):
@@ -717,7 +718,7 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
717
718
# Added numeric_only as part of GH#46560; these do not drop nuisance
718
719
# columns when numeric_only is False
719
720
if using_infer_string :
720
- msg = f"str dtype does not support { agg_function } operations "
721
+ msg = f"dtype 'str' does not support operation ' { agg_function } ' "
721
722
klass = TypeError
722
723
elif agg_function in ("std" , "sem" ):
723
724
klass = ValueError
@@ -740,18 +741,24 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
740
741
tm .assert_frame_equal (result , expected )
741
742
742
743
743
- def test_raise_on_nuisance_python_single (df ):
744
+ def test_raise_on_nuisance_python_single (df , using_infer_string ):
744
745
# GH 38815
745
746
grouped = df .groupby ("A" )
746
- with pytest .raises (ValueError , match = "could not convert" ):
747
+
748
+ err = ValueError
749
+ msg = "could not convert"
750
+ if using_infer_string :
751
+ err = TypeError
752
+ msg = "dtype 'str' does not support operation 'skew'"
753
+ with pytest .raises (err , match = msg ):
747
754
grouped .skew ()
748
755
749
756
750
757
def test_raise_on_nuisance_python_multiple (three_group , using_infer_string ):
751
758
grouped = three_group .groupby (["A" , "B" ])
752
759
msg = re .escape ("agg function failed [how->mean,dtype->" )
753
760
if using_infer_string :
754
- msg = "str dtype does not support mean operations "
761
+ msg = "dtype 'str' does not support operation 'mean' "
755
762
with pytest .raises (TypeError , match = msg ):
756
763
grouped .agg ("mean" )
757
764
with pytest .raises (TypeError , match = msg ):
@@ -798,7 +805,7 @@ def test_wrap_aggregated_output_multindex(
798
805
keys = [np .array ([0 , 0 , 1 ]), np .array ([0 , 0 , 1 ])]
799
806
msg = re .escape ("agg function failed [how->mean,dtype->" )
800
807
if using_infer_string :
801
- msg = "str dtype does not support mean operations "
808
+ msg = "dtype 'str' does not support operation 'mean' "
802
809
with pytest .raises (TypeError , match = msg ):
803
810
df .groupby (keys ).agg ("mean" )
804
811
agged = df .drop (columns = ("baz" , "two" )).groupby (keys ).agg ("mean" )
@@ -976,10 +983,20 @@ def test_groupby_with_hier_columns():
976
983
tm .assert_index_equal (result .columns , df .columns [:- 1 ])
977
984
978
985
979
- def test_grouping_ndarray (df ):
986
+ def test_grouping_ndarray (df , using_infer_string ):
980
987
grouped = df .groupby (df ["A" ].values )
988
+ grouped2 = df .groupby (df ["A" ].rename (None ))
989
+
990
+ if using_infer_string :
991
+ msg = "dtype 'str' does not support operation 'sum'"
992
+ with pytest .raises (TypeError , match = msg ):
993
+ grouped .sum ()
994
+ with pytest .raises (TypeError , match = msg ):
995
+ grouped2 .sum ()
996
+ return
997
+
981
998
result = grouped .sum ()
982
- expected = df . groupby ( df [ "A" ]. rename ( None )) .sum ()
999
+ expected = grouped2 .sum ()
983
1000
tm .assert_frame_equal (result , expected )
984
1001
985
1002
@@ -1478,13 +1495,23 @@ def f(group):
1478
1495
assert names == expected_names
1479
1496
1480
1497
1481
- def test_no_dummy_key_names (df ):
1498
+ def test_no_dummy_key_names (df , using_infer_string ):
1482
1499
# see gh-1291
1483
- result = df .groupby (df ["A" ].values ).sum ()
1500
+ gb = df .groupby (df ["A" ].values )
1501
+ gb2 = df .groupby ([df ["A" ].values , df ["B" ].values ])
1502
+ if using_infer_string :
1503
+ msg = "dtype 'str' does not support operation 'sum'"
1504
+ with pytest .raises (TypeError , match = msg ):
1505
+ gb .sum ()
1506
+ with pytest .raises (TypeError , match = msg ):
1507
+ gb2 .sum ()
1508
+ return
1509
+
1510
+ result = gb .sum ()
1484
1511
assert result .index .name is None
1485
1512
1486
- result = df . groupby ([ df [ "A" ]. values , df [ "B" ]. values ]) .sum ()
1487
- assert result .index .names == (None , None )
1513
+ result2 = gb2 .sum ()
1514
+ assert result2 .index .names == (None , None )
1488
1515
1489
1516
1490
1517
def test_groupby_sort_multiindex_series ():
@@ -1820,7 +1847,7 @@ def get_categorical_invalid_expected():
1820
1847
elif is_per :
1821
1848
msg = "Period type does not support"
1822
1849
elif is_str :
1823
- msg = "str dtype does not support"
1850
+ msg = f" dtype 'str' does not support operation ' { op } ' "
1824
1851
else :
1825
1852
msg = "category type does not support"
1826
1853
if op == "skew" :
@@ -2750,7 +2777,7 @@ def test_obj_with_exclusions_duplicate_columns():
2750
2777
def test_groupby_numeric_only_std_no_result (numeric_only ):
2751
2778
# GH 51080
2752
2779
dicts_non_numeric = [{"a" : "foo" , "b" : "bar" }, {"a" : "car" , "b" : "dar" }]
2753
- df = DataFrame (dicts_non_numeric )
2780
+ df = DataFrame (dicts_non_numeric , dtype = object )
2754
2781
dfgb = df .groupby ("a" , as_index = False , sort = False )
2755
2782
2756
2783
if numeric_only :
@@ -2809,10 +2836,14 @@ def test_grouping_with_categorical_interval_columns():
2809
2836
def test_groupby_sum_on_nan_should_return_nan (bug_var ):
2810
2837
# GH 24196
2811
2838
df = DataFrame ({"A" : [bug_var , bug_var , bug_var , np .nan ]})
2839
+ if isinstance (bug_var , str ):
2840
+ df = df .astype (object )
2812
2841
dfgb = df .groupby (lambda x : x )
2813
2842
result = dfgb .sum (min_count = 1 )
2814
2843
2815
- expected_df = DataFrame ([bug_var , bug_var , bug_var , None ], columns = ["A" ])
2844
+ expected_df = DataFrame (
2845
+ [bug_var , bug_var , bug_var , None ], columns = ["A" ], dtype = df ["A" ].dtype
2846
+ )
2816
2847
tm .assert_frame_equal (result , expected_df )
2817
2848
2818
2849
0 commit comments