Skip to content

Commit 058eb2b

Browse files
[backport 2.3.x] BUG: String[pyarrow] comparison with mixed object (#62424) (#62504)
Co-authored-by: jbrockmendel <[email protected]>
1 parent 2ca088d commit 058eb2b

File tree

3 files changed

+29
-7
lines changed

3 files changed

+29
-7
lines changed

doc/source/whatsnew/v2.3.3.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Bug fixes
4747
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
4848
with a compiled regex and custom flags (:issue:`62240`)
4949
- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
50-
50+
- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
5151

5252
Improvements and fixes for Copy-on-Write
5353
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

pandas/core/arrays/arrow/array.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -727,12 +727,18 @@ def _cmp_method(self, other, op):
727727
pc_func = ARROW_CMP_FUNCS[op.__name__]
728728
if isinstance(other, (ExtensionArray, np.ndarray, list)):
729729
try:
730-
result = pc_func(self._pa_array, self._box_pa(other))
731-
except pa.ArrowNotImplementedError:
732-
# TODO: could this be wrong if other is object dtype?
733-
# in which case we need to operate pointwise?
734-
result = ops.invalid_comparison(self, other, op)
735-
result = pa.array(result, type=pa.bool_())
730+
boxed = self._box_pa(other)
731+
except pa.lib.ArrowInvalid:
732+
# e.g. GH#60228 [1, "b"] we have to operate pointwise
733+
res_values = [op(x, y) for x, y in zip(self, other)]
734+
result = pa.array(res_values, type=pa.bool_(), from_pandas=True)
735+
else:
736+
try:
737+
result = pc_func(self._pa_array, boxed)
738+
except pa.ArrowNotImplementedError:
739+
result = ops.invalid_comparison(self, other, op)
740+
result = pa.array(result, type=pa.bool_())
741+
736742
elif is_scalar(other):
737743
try:
738744
result = pc_func(self._pa_array, self._box_pa(other))

pandas/tests/extension/test_string.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,3 +275,19 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series):
275275
)
276276
with pytest.raises(ValueError, match=msg):
277277
arr.searchsorted(b)
278+
279+
280+
def test_mixed_object_comparison(dtype):
281+
# GH#60228
282+
ser = pd.Series(["a", "b"], dtype=dtype)
283+
284+
mixed = pd.Series([1, "b"], dtype=object)
285+
286+
result = ser == mixed
287+
expected = pd.Series([False, True], dtype=bool)
288+
if dtype.storage == "python" and dtype.na_value is pd.NA:
289+
expected = expected.astype("boolean")
290+
elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA:
291+
expected = expected.astype("bool[pyarrow]")
292+
293+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)