7
7
import numpy as np
8
8
import pytest
9
9
10
+ from pandas .compat import pa_version_under21p0
11
+
10
12
from pandas import (
11
13
NA ,
12
14
DataFrame ,
13
15
Index ,
14
16
MultiIndex ,
15
17
Series ,
18
+ StringDtype ,
16
19
)
17
20
import pandas ._testing as tm
18
21
from pandas .core .strings .accessor import StringMethods
@@ -240,8 +243,9 @@ def test_ismethods(method, expected, any_string_dtype):
240
243
@pytest .mark .parametrize (
241
244
"method, expected" ,
242
245
[
243
- ("isnumeric" , [False , True , True , False , True , True , False ]),
244
- ("isdecimal" , [False , True , False , False , False , True , False ]),
246
+ ("isnumeric" , [False , True , True , True , False , True , True , False ]),
247
+ ("isdecimal" , [False , True , False , False , False , False , True , False ]),
248
+ ("isdigit" , [False , True , True , False , False , False , True , False ]),
245
249
],
246
250
)
247
251
def test_isnumeric_unicode (method , expected , any_string_dtype ):
@@ -250,18 +254,35 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
250
254
# 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
251
255
# 0xFF13: 3 Em 3 # noqa: RUF003
252
256
ser = Series (
253
- ["A" , "3" , "¼" , "★" , "፸" , "3" , "four" ], dtype = any_string_dtype # noqa: RUF001
257
+ ["A" , "3" , "³" , "¼" , "★" , "፸" , "3" , "four" ], # noqa: RUF001
258
+ dtype = any_string_dtype ,
254
259
)
255
260
expected_dtype = (
256
261
"bool" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
257
262
)
258
263
expected = Series (expected , dtype = expected_dtype )
264
+ if (
265
+ method == "isdigit"
266
+ and isinstance (ser .dtype , StringDtype )
267
+ and ser .dtype .storage == "pyarrow"
268
+ and not pa_version_under21p0
269
+ ):
270
+ # known difference in behavior between python and pyarrow unicode handling
271
+ # pyarrow 21+ considers ¼ and ፸ as a digit, while python does not
272
+ expected .iloc [3 ] = True
273
+ expected .iloc [5 ] = True
274
+
259
275
result = getattr (ser .str , method )()
260
276
tm .assert_series_equal (result , expected )
261
277
262
278
# compare with standard library
263
- expected = [getattr (item , method )() for item in ser ]
264
- assert list (result ) == expected
279
+ # (only for non-pyarrow storage given the above differences)
280
+ if any_string_dtype == "object" or (
281
+ isinstance (any_string_dtype , StringDtype )
282
+ and any_string_dtype .storage == "python"
283
+ ):
284
+ expected = [getattr (item , method )() for item in ser ]
285
+ assert list (result ) == expected
265
286
266
287
267
288
@pytest .mark .filterwarnings ("ignore:Downcasting object dtype arrays:FutureWarning" )
0 commit comments