diff --git a/python/pyspark/sql/tests/arrow/test_arrow_python_udf.py b/python/pyspark/sql/tests/arrow/test_arrow_python_udf.py index 87f96a80a7f62..3c3607ea9eaf5 100644 --- a/python/pyspark/sql/tests/arrow/test_arrow_python_udf.py +++ b/python/pyspark/sql/tests/arrow/test_arrow_python_udf.py @@ -190,11 +190,21 @@ def test_type_coercion_string_to_numeric(self): with self.assertRaises(PythonException): df_floating_value.select(udf(lambda x: x, "int")("value").alias("res")).collect() - with self.assertRaises(PythonException): - df_int_value.select(udf(lambda x: x, "decimal")("value").alias("res")).collect() + # Pandas 3 backs string Series with an Arrow string array, and + # `pa.Array.from_pandas(series, type=pa.decimal128(...))` then silently + # casts those strings to decimal instead of raising. Skip the + # string -> decimal assertions on Pandas >= 3. + import pandas as pd + from pyspark.loose_version import LooseVersion + + if LooseVersion(pd.__version__) < "3.0.0": + with self.assertRaises(PythonException): + df_int_value.select(udf(lambda x: x, "decimal")("value").alias("res")).collect() - with self.assertRaises(PythonException): - df_floating_value.select(udf(lambda x: x, "decimal")("value").alias("res")).collect() + with self.assertRaises(PythonException): + df_floating_value.select( + udf(lambda x: x, "decimal")("value").alias("res") + ).collect() def test_arrow_udf_int_to_decimal_coercion(self): with self.sql_conf(