88import numpy as np
99import pytest
1010
11- from pandas ._config import using_string_dtype
12-
13- from pandas .compat import HAS_PYARROW
1411import pandas .util ._test_decorators as td
1512
1613from pandas import (
@@ -288,7 +285,6 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
288285 return df [keys ].value_counts (normalize = normalize , sort = sort , ascending = ascending )
289286
290287
291- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
292288@pytest .mark .parametrize ("groupby" , ["column" , "array" , "function" ])
293289@pytest .mark .parametrize ("normalize, name" , [(True , "proportion" ), (False , "count" )])
294290@pytest .mark .parametrize (
@@ -302,7 +298,16 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
302298@pytest .mark .parametrize ("as_index" , [True , False ])
303299@pytest .mark .parametrize ("frame" , [True , False ])
304300def test_against_frame_and_seriesgroupby (
305- education_df , groupby , normalize , name , sort , ascending , as_index , frame , request
301+ education_df ,
302+ groupby ,
303+ normalize ,
304+ name ,
305+ sort ,
306+ ascending ,
307+ as_index ,
308+ frame ,
309+ request ,
310+ using_infer_string ,
306311):
307312 # test all parameters:
308313 # - Use column, array or function as by= parameter
@@ -366,17 +371,24 @@ def test_against_frame_and_seriesgroupby(
366371 index_frame ["gender" ] = index_frame ["both" ].str .split ("-" ).str .get (0 )
367372 index_frame ["education" ] = index_frame ["both" ].str .split ("-" ).str .get (1 )
368373 del index_frame ["both" ]
369- index_frame = index_frame .rename ({0 : None }, axis = 1 )
370- expected .index = MultiIndex .from_frame (index_frame )
374+ index_frame2 = index_frame .rename ({0 : None }, axis = 1 )
375+ expected .index = MultiIndex .from_frame (index_frame2 )
376+
377+ if index_frame2 .columns .isna ()[0 ]:
378+ # with using_infer_string, the columns in index_frame as string
379+ # dtype, which makes the rename({0: None}) above use np.nan
380+ # instead of None, so we need to set None more explicitly.
381+ expected .index .names = [None ] + expected .index .names [1 :]
371382 tm .assert_series_equal (result , expected )
372383 else :
373384 expected .insert (1 , "gender" , expected ["both" ].str .split ("-" ).str .get (0 ))
374385 expected .insert (2 , "education" , expected ["both" ].str .split ("-" ).str .get (1 ))
386+ if using_infer_string :
387+ expected = expected .astype ({"gender" : "str" , "education" : "str" })
375388 del expected ["both" ]
376389 tm .assert_frame_equal (result , expected )
377390
378391
379- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
380392@pytest .mark .parametrize (
381393 "dtype" ,
382394 [
@@ -403,6 +415,7 @@ def test_compound(
403415 expected_count ,
404416 expected_group_size ,
405417 dtype ,
418+ using_infer_string ,
406419):
407420 education_df = education_df .astype (dtype )
408421 education_df .columns = education_df .columns .astype (dtype )
@@ -425,6 +438,11 @@ def test_compound(
425438 expected ["count" ] = expected_count
426439 if dtype == "string[pyarrow]" :
427440 expected ["count" ] = expected ["count" ].convert_dtypes ()
441+ if using_infer_string and dtype == object :
442+ expected = expected .astype (
443+ {"country" : "str" , "gender" : "str" , "education" : "str" }
444+ )
445+
428446 tm .assert_frame_equal (result , expected )
429447
430448
@@ -537,9 +555,6 @@ def names_with_nulls_df(nulls_fixture):
537555 )
538556
539557
540- @pytest .mark .xfail (
541- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)" , strict = False
542- )
543558@pytest .mark .parametrize (
544559 "dropna, expected_data, expected_index" ,
545560 [
0 commit comments