diff --git a/ibis/backends/tests/conftest.py b/ibis/backends/tests/conftest.py index a9c69b64b57b..24ea4f01ca4c 100644 --- a/ibis/backends/tests/conftest.py +++ b/ibis/backends/tests/conftest.py @@ -78,3 +78,10 @@ def decorator(func): pytest.mark.notimpl(["datafusion", "exasol", "mssql", "druid", "oracle"]), ] NO_JSON_SUPPORT = combine_marks(NO_JSON_SUPPORT_MARKS) + +NAN_TREATED_AS_NULL_MARKS = [ + pytest.mark.never( + ["sqlite", "mssql", "mysql"], reason="Treats NaN as NULL", raises=Exception + ), +] +NAN_TREATED_AS_NULL = combine_marks(NAN_TREATED_AS_NULL_MARKS) diff --git a/ibis/backends/tests/test_pyarrow.py b/ibis/backends/tests/test_pyarrow.py new file mode 100644 index 000000000000..ac8e450410e3 --- /dev/null +++ b/ibis/backends/tests/test_pyarrow.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import math + +import pytest + +import ibis +from ibis.backends.tests.conftest import NAN_TREATED_AS_NULL + +pa = pytest.importorskip("pyarrow") + + +@NAN_TREATED_AS_NULL +@pytest.mark.notimpl( + "exasol", reason="Exasol driver can't handle NaNs during memtable registration" +) +@pytest.mark.parametrize( + "method", + [ + pytest.param(lambda pa_arr: pa.table({"f": pa_arr}), id="pa_table"), + pytest.param( + lambda pa_arr: {"f": pa_arr}, + id="dict_of_pa_arrays", + marks=pytest.mark.xfail( + # https://github.com/ibis-project/ibis/issues/11700 + reason="During ops.InMemoryTable creation, we go through pd.DataFrame, losing NaN info" + ), + ), + ], +) +def test_nans_roundtrip(con, method): + inp = [1.0, float("nan"), None] + t = ibis.memtable(method(pa.array(inp))) + assert t.schema()["f"] == ibis.dtype("float64") + + def make_comparable(vals): + return {"nan" if (isinstance(v, float) and math.isnan(v)) else v for v in vals} + + n_nan = con.execute(t.f.isnan().sum()) + n_null = con.execute(t.f.isnull().sum()) + assert (n_nan, n_null) == (1, 1) + + result = make_comparable(con.to_pyarrow(t.f).to_pylist()) + expected = make_comparable(inp) + assert result == expected