77import numpy as np
88import pytest
99
10- from pandas ._config import using_string_dtype
11-
12- from pandas .compat import HAS_PYARROW
1310import pandas .util ._test_decorators as td
1411
1512from pandas import (
@@ -276,7 +273,6 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
276273 return df [keys ].value_counts (normalize = normalize , sort = sort , ascending = ascending )
277274
278275
279- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
280276@pytest .mark .parametrize ("groupby" , ["column" , "array" , "function" ])
281277@pytest .mark .parametrize ("normalize, name" , [(True , "proportion" ), (False , "count" )])
282278@pytest .mark .parametrize (
@@ -289,7 +285,16 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
289285)
290286@pytest .mark .parametrize ("frame" , [True , False ])
291287def test_against_frame_and_seriesgroupby (
292- education_df , groupby , normalize , name , sort , ascending , as_index , frame , request
288+ education_df ,
289+ groupby ,
290+ normalize ,
291+ name ,
292+ sort ,
293+ ascending ,
294+ as_index ,
295+ frame ,
296+ request ,
297+ using_infer_string ,
293298):
294299 # test all parameters:
295300 # - Use column, array or function as by= parameter
@@ -350,17 +355,24 @@ def test_against_frame_and_seriesgroupby(
350355 index_frame ["gender" ] = index_frame ["both" ].str .split ("-" ).str .get (0 )
351356 index_frame ["education" ] = index_frame ["both" ].str .split ("-" ).str .get (1 )
352357 del index_frame ["both" ]
353- index_frame = index_frame .rename ({0 : None }, axis = 1 )
354- expected .index = MultiIndex .from_frame (index_frame )
358+ index_frame2 = index_frame .rename ({0 : None }, axis = 1 )
359+ expected .index = MultiIndex .from_frame (index_frame2 )
360+
361+ if index_frame2 .columns .isna ()[0 ]:
362+ # with using_infer_string, the columns in index_frame as string
363+ # dtype, which makes the rename({0: None}) above use np.nan
364+ # instead of None, so we need to set None more explicitly.
365+ expected .index .names = [None ] + expected .index .names [1 :]
355366 tm .assert_series_equal (result , expected )
356367 else :
357368 expected .insert (1 , "gender" , expected ["both" ].str .split ("-" ).str .get (0 ))
358369 expected .insert (2 , "education" , expected ["both" ].str .split ("-" ).str .get (1 ))
370+ if using_infer_string :
371+ expected = expected .astype ({"gender" : "str" , "education" : "str" })
359372 del expected ["both" ]
360373 tm .assert_frame_equal (result , expected )
361374
362375
363- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
364376@pytest .mark .parametrize (
365377 "dtype" ,
366378 [
@@ -387,6 +399,7 @@ def test_compound(
387399 expected_count ,
388400 expected_group_size ,
389401 dtype ,
402+ using_infer_string ,
390403):
391404 education_df = education_df .astype (dtype )
392405 education_df .columns = education_df .columns .astype (dtype )
@@ -409,6 +422,11 @@ def test_compound(
409422 expected ["count" ] = expected_count
410423 if dtype == "string[pyarrow]" :
411424 expected ["count" ] = expected ["count" ].convert_dtypes ()
425+ if using_infer_string and dtype == object :
426+ expected = expected .astype (
427+ {"country" : "str" , "gender" : "str" , "education" : "str" }
428+ )
429+
412430 tm .assert_frame_equal (result , expected )
413431
414432
@@ -501,9 +519,6 @@ def test_dropna_combinations(
501519 tm .assert_series_equal (result , expected )
502520
503521
504- @pytest .mark .xfail (
505- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)" , strict = False
506- )
507522@pytest .mark .parametrize (
508523 "dropna, expected_data, expected_index" ,
509524 [
0 commit comments