Skip to content

Commit

Permalink
ogr2ogr: GPKG/FlatGeoBuf -> other format: in Arrow code path, use DAT…
Browse files Browse the repository at this point in the history
…ETIME_AS_STRING to preserve origin timezone

Fixes OSGeo#11212
  • Loading branch information
rouault committed Nov 7, 2024
1 parent d1ac855 commit ac2995d
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 3 deletions.
32 changes: 29 additions & 3 deletions apps/ogr2ogr_lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3997,7 +3997,8 @@ static int GetArrowGeomFieldIndex(const struct ArrowSchema *psLayerSchema,
/************************************************************************/

static CPLStringList
BuildGetArrowStreamOptions(const GDALVectorTranslateOptions *psOptions,
BuildGetArrowStreamOptions(OGRLayer *poSrcLayer, OGRLayer *poDstLayer,
const GDALVectorTranslateOptions *psOptions,
bool bPreserveFID)
{
CPLStringList aosOptionsGetArrowStream;
Expand All @@ -4021,6 +4022,31 @@ BuildGetArrowStreamOptions(const GDALVectorTranslateOptions *psOptions,
"MAX_FEATURES_IN_BATCH",
CPLSPrintf("%d", psOptions->nGroupTransactions));
}

auto poSrcDS = poSrcLayer->GetDataset();
auto poDstDS = poDstLayer->GetDataset();
if (poSrcDS && poDstDS)
{
auto poSrcDriver = poSrcDS->GetDriver();
auto poDstDriver = poDstDS->GetDriver();

const auto IsArrowNativeDriver = [](GDALDriver *poDriver)
{
return EQUAL(poDriver->GetDescription(), "ARROW") ||
EQUAL(poDriver->GetDescription(), "PARQUET") ||
EQUAL(poDriver->GetDescription(), "ADBC");
};

if (poSrcDriver && poDstDriver && !IsArrowNativeDriver(poSrcDriver) &&
!IsArrowNativeDriver(poDstDriver))
{
// For non-Arrow-native drivers, request DateTime as string, to
// allow mix of timezones
aosOptionsGetArrowStream.SetNameValue(GAS_OPT_DATETIME_AS_STRING,
"YES");
}
}

return aosOptionsGetArrowStream;
}

Expand Down Expand Up @@ -4085,8 +4111,8 @@ bool SetupTargetLayer::CanUseWriteArrowBatch(
}
}

const CPLStringList aosGetArrowStreamOptions(
BuildGetArrowStreamOptions(psOptions, bPreserveFID));
const CPLStringList aosGetArrowStreamOptions(BuildGetArrowStreamOptions(
poSrcLayer, poDstLayer, psOptions, bPreserveFID));
if (poSrcLayer->GetArrowStream(streamSrc.get(),
aosGetArrowStreamOptions.List()))
{
Expand Down
51 changes: 51 additions & 0 deletions autotest/utilities/test_ogr2ogr_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2958,3 +2958,54 @@ def test_ogr2ogr_lib_explodecollections_empty_geoms(input_wkt, expected_output_w
out_lyr = out_ds.GetLayer(0)
f = out_lyr.GetNextFeature()
assert f.GetGeometryRef().ExportToIsoWkt() == expected_output_wkt


###############################################################################


@gdaltest.enable_exceptions()
@pytest.mark.require_driver("GPKG")
def test_ogr2ogr_lib_arrow_datetime_as_string(tmp_vsimem):

src_filename = str(tmp_vsimem / "src.gpkg")
with ogr.GetDriverByName("GPKG").CreateDataSource(src_filename) as src_ds:
src_lyr = src_ds.CreateLayer("test", geom_type=ogr.wkbNone)

field = ogr.FieldDefn("dt", ogr.OFTDateTime)
src_lyr.CreateField(field)

f = ogr.Feature(src_lyr.GetLayerDefn())
src_lyr.CreateFeature(f)

f = ogr.Feature(src_lyr.GetLayerDefn())
f.SetField("dt", "2022-05-31T12:34:56.789Z")
src_lyr.CreateFeature(f)

f = ogr.Feature(src_lyr.GetLayerDefn())
f.SetField("dt", "2022-05-31T12:34:56")
src_lyr.CreateFeature(f)

f = ogr.Feature(src_lyr.GetLayerDefn())
f.SetField("dt", "2022-05-31T12:34:56+12:30")
src_lyr.CreateFeature(f)

got_msg = []

def my_handler(errorClass, errno, msg):
got_msg.append(msg)
return

with gdaltest.error_handler(my_handler), gdaltest.config_options(
{"CPL_DEBUG": "ON", "OGR2OGR_USE_ARROW_API": "YES"}
):
dst_ds = gdal.VectorTranslate("", src_filename, format="Memory")

assert "OGR2OGR: Using WriteArrowBatch()" in got_msg

dst_lyr = dst_ds.GetLayer(0)
assert [f.GetField("dt") for f in dst_lyr] == [
None,
"2022/05/31 12:34:56.789+00",
"2022/05/31 12:34:56",
"2022/05/31 12:34:56+1230",
]

0 comments on commit ac2995d

Please sign in to comment.