Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: fix error in write_dataframe when writing an empty or all-None object column with use_arrow #512

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
4 changes: 3 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
### Bug fixes

- Fix WKB writing on big-endian systems (#497).
- Fix writing fids to e.g. GPKG file with use_arrow (#511).
- Fix writing fids to e.g. GPKG file with `use_arrow` (#511).
- Fix error in `write_dataframe` when writing an empty or all-None object
column with `use_arrow` (#512).

### Packaging

Expand Down
9 changes: 9 additions & 0 deletions pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,15 @@ def write_dataframe(

table = pa.Table.from_pandas(df, preserve_index=False)

# Null arrow columns are not supported by GDAL, so convert to string
for field_index, field in enumerate(table.schema):
if field.type == pa.null():
table = table.set_column(
field_index,
field.with_type(pa.string()),
table[field_index].cast(pa.string()),
)

if geometry_column is not None:
# ensure that the geometry column is binary (for all-null geometries,
# this could be a wrong type)
Expand Down
39 changes: 35 additions & 4 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,16 +1146,29 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):


@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
@pytest.mark.parametrize(
"columns, dtype",
[
([], None),
(["col_int"], np.int64),
(["col_float"], np.float64),
(["col_object"], object),
],
)
@pytest.mark.requires_arrow_write_api
def test_write_empty_dataframe(tmp_path, ext, use_arrow):
expected = gp.GeoDataFrame(geometry=[], crs=4326)
def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
"""Test writing dataframe with no rows.

With use_arrow, object type columns with no rows are converted to null type columns
by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
"""
expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
filename = tmp_path / f"test{ext}"
write_dataframe(expected, filename, use_arrow=use_arrow)

assert filename.exists()
df = read_dataframe(filename)
assert_geodataframe_equal(df, expected)
df = read_dataframe(filename, use_arrow=use_arrow)
assert_geodataframe_equal(df, expected, check_index_type=False)


def test_write_empty_geometry(tmp_path):
Expand All @@ -1175,6 +1188,24 @@ def test_write_empty_geometry(tmp_path):
assert_geodataframe_equal(df, expected)


@pytest.mark.requires_arrow_write_api
def test_write_None_string_column(tmp_path, use_arrow):
"""Test pandas object columns with all None values.

With use_arrow, such columns are converted to null type columns by pyarrow, but null
columns are not supported by GDAL. Added to test fix for #513.
"""
gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
filename = tmp_path / "test.gpkg"

write_dataframe(gdf, filename, use_arrow=use_arrow)
assert filename.exists()

result_gdf = read_dataframe(filename, use_arrow=use_arrow)
assert result_gdf.object_col.dtype == object
assert_geodataframe_equal(result_gdf, gdf)


@pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
@pytest.mark.requires_arrow_write_api
def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
Expand Down
Loading