Skip to content

Commit 4ee17b3

Browse files
BUG: String[pyarrow] comparison with mixed object (#62424)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent e97a56e commit 4ee17b3

File tree

3 files changed

+35
-14
lines changed

3 files changed

+35
-14
lines changed

doc/source/whatsnew/v2.3.3.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Bug fixes
4747
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
4848
with a compiled regex and custom flags (:issue:`62240`)
4949
- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
50-
50+
- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
5151

5252
Improvements and fixes for Copy-on-Write
5353
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

pandas/core/arrays/arrow/array.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -883,22 +883,27 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
883883
ltype = self._pa_array.type
884884

885885
if isinstance(other, (ExtensionArray, np.ndarray, list)):
886-
boxed = self._box_pa(other)
887-
rtype = boxed.type
888-
if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
889-
pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
890-
):
891-
# GH#62157 match non-pyarrow behavior
892-
result = ops.invalid_comparison(self, other, op)
893-
result = pa.array(result, type=pa.bool_())
886+
try:
887+
boxed = self._box_pa(other)
888+
except pa.lib.ArrowInvalid:
889+
# e.g. GH#60228 [1, "b"] we have to operate pointwise
890+
res_values = [op(x, y) for x, y in zip(self, other)]
891+
result = pa.array(res_values, type=pa.bool_(), from_pandas=True)
894892
else:
895-
try:
896-
result = pc_func(self._pa_array, boxed)
897-
except pa.ArrowNotImplementedError:
898-
# TODO: could this be wrong if other is object dtype?
899-
# in which case we need to operate pointwise?
893+
rtype = boxed.type
894+
if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
895+
pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
896+
):
897+
# GH#62157 match non-pyarrow behavior
900898
result = ops.invalid_comparison(self, other, op)
901899
result = pa.array(result, type=pa.bool_())
900+
else:
901+
try:
902+
result = pc_func(self._pa_array, boxed)
903+
except pa.ArrowNotImplementedError:
904+
result = ops.invalid_comparison(self, other, op)
905+
result = pa.array(result, type=pa.bool_())
906+
902907
elif is_scalar(other):
903908
if (isinstance(other, datetime) and pa.types.is_date(ltype)) or (
904909
type(other) is date and pa.types.is_timestamp(ltype)

pandas/tests/extension/test_string.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,19 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series):
288288
)
289289
with pytest.raises(ValueError, match=msg):
290290
arr.searchsorted(b)
291+
292+
293+
def test_mixed_object_comparison(dtype):
294+
# GH#60228
295+
ser = pd.Series(["a", "b"], dtype=dtype)
296+
297+
mixed = pd.Series([1, "b"], dtype=object)
298+
299+
result = ser == mixed
300+
expected = pd.Series([False, True], dtype=bool)
301+
if dtype.storage == "python" and dtype.na_value is pd.NA:
302+
expected = expected.astype("boolean")
303+
elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA:
304+
expected = expected.astype("bool[pyarrow]")
305+
306+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)