diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index 0accd07eb366a..323963a4ec4d2 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -47,7 +47,7 @@ Bug fixes - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch`` with a compiled regex and custom flags (:issue:`62240`) - Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`) - +- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`) Improvements and fixes for Copy-on-Write ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0f7cd792f7ee8..b8dd44a58e8ec 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -883,22 +883,27 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray: ltype = self._pa_array.type if isinstance(other, (ExtensionArray, np.ndarray, list)): - boxed = self._box_pa(other) - rtype = boxed.type - if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or ( - pa.types.is_timestamp(rtype) and pa.types.is_date(ltype) - ): - # GH#62157 match non-pyarrow behavior - result = ops.invalid_comparison(self, other, op) - result = pa.array(result, type=pa.bool_()) + try: + boxed = self._box_pa(other) + except pa.lib.ArrowInvalid: + # e.g. GH#60228 [1, "b"] we have to operate pointwise + res_values = [op(x, y) for x, y in zip(self, other)] + result = pa.array(res_values, type=pa.bool_(), from_pandas=True) else: - try: - result = pc_func(self._pa_array, boxed) - except pa.ArrowNotImplementedError: - # TODO: could this be wrong if other is object dtype? - # in which case we need to operate pointwise? + rtype = boxed.type + if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or ( + pa.types.is_timestamp(rtype) and pa.types.is_date(ltype) + ): + # GH#62157 match non-pyarrow behavior result = ops.invalid_comparison(self, other, op) result = pa.array(result, type=pa.bool_()) + else: + try: + result = pc_func(self._pa_array, boxed) + except pa.ArrowNotImplementedError: + result = ops.invalid_comparison(self, other, op) + result = pa.array(result, type=pa.bool_()) + elif is_scalar(other): if (isinstance(other, datetime) and pa.types.is_date(ltype)) or ( type(other) is date and pa.types.is_timestamp(ltype) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index e373ff12c4086..3793f06e4cca9 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -288,3 +288,19 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series): ) with pytest.raises(ValueError, match=msg): arr.searchsorted(b) + + +def test_mixed_object_comparison(dtype): + # GH#60228 + ser = pd.Series(["a", "b"], dtype=dtype) + + mixed = pd.Series([1, "b"], dtype=object) + + result = ser == mixed + expected = pd.Series([False, True], dtype=bool) + if dtype.storage == "python" and dtype.na_value is pd.NA: + expected = expected.astype("boolean") + elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA: + expected = expected.astype("bool[pyarrow]") + + tm.assert_series_equal(result, expected)