From de0d274a756a9b98b91e95bc1e076a96b49e49f5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 23 Sep 2025 09:01:38 -0700 Subject: [PATCH 1/2] BUG: String[pyarrow] comparison with mixed object --- pandas/core/arrays/arrow/array.py | 17 +++++++++++------ pandas/tests/extension/test_string.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 653a900fbfe45..45483c60f9b4d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -829,12 +829,17 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray: pc_func = ARROW_CMP_FUNCS[op.__name__] if isinstance(other, (ExtensionArray, np.ndarray, list)): try: - result = pc_func(self._pa_array, self._box_pa(other)) - except pa.ArrowNotImplementedError: - # TODO: could this be wrong if other is object dtype? - # in which case we need to operate pointwise? - result = ops.invalid_comparison(self, other, op) - result = pa.array(result, type=pa.bool_()) + boxed = self._box_pa(other) + except pa.lib.ArrowInvalid: + # e.g. GH#60228 [1, "b"] we have to operate pointwise + res_values = [op(x, y) for x, y in zip(self, other)] + result = pa.array(res_values, type=pa.bool_(), from_pandas=True) + else: + try: + result = pc_func(self._pa_array, boxed) + except pa.ArrowNotImplementedError: + result = ops.invalid_comparison(self, other, op) + result = pa.array(result, type=pa.bool_()) elif is_scalar(other): try: result = pc_func(self._pa_array, self._box_pa(other)) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index e373ff12c4086..3793f06e4cca9 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -288,3 +288,19 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series): ) with pytest.raises(ValueError, match=msg): arr.searchsorted(b) + + +def test_mixed_object_comparison(dtype): + # GH#60228 + ser = pd.Series(["a", "b"], dtype=dtype) + + mixed = pd.Series([1, "b"], dtype=object) + + result = ser == mixed + expected = pd.Series([False, True], dtype=bool) + if dtype.storage == "python" and dtype.na_value is pd.NA: + expected = expected.astype("boolean") + elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA: + expected = expected.astype("bool[pyarrow]") + + tm.assert_series_equal(result, expected) From 88e9c3666c8464c8361472a2f30ce1c8762c479b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 27 Sep 2025 11:41:14 +0200 Subject: [PATCH 2/2] add whatsnew note --- doc/source/whatsnew/v2.3.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index eeba61a686c73..e04a73fb5d287 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -47,7 +47,7 @@ Bug fixes - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch`` with a compiled regex and custom flags (:issue:`62240`) - Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`) - +- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`) Improvements and fixes for Copy-on-Write ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~