From ac2995d5f4d713a9f80fde315f03ecad497e25cf Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 7 Nov 2024 13:15:02 +0100 Subject: [PATCH] ogr2ogr: GPKG/FlatGeoBuf -> other format: in Arrow code path, use DATETIME_AS_STRING to preserve origin timezone Fixes #11212 --- apps/ogr2ogr_lib.cpp | 32 ++++++++++++++-- autotest/utilities/test_ogr2ogr_lib.py | 51 ++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/apps/ogr2ogr_lib.cpp b/apps/ogr2ogr_lib.cpp index 27969eb6cf07..66a96be02740 100644 --- a/apps/ogr2ogr_lib.cpp +++ b/apps/ogr2ogr_lib.cpp @@ -3997,7 +3997,8 @@ static int GetArrowGeomFieldIndex(const struct ArrowSchema *psLayerSchema, /************************************************************************/ static CPLStringList -BuildGetArrowStreamOptions(const GDALVectorTranslateOptions *psOptions, +BuildGetArrowStreamOptions(OGRLayer *poSrcLayer, OGRLayer *poDstLayer, + const GDALVectorTranslateOptions *psOptions, bool bPreserveFID) { CPLStringList aosOptionsGetArrowStream; @@ -4021,6 +4022,31 @@ BuildGetArrowStreamOptions(const GDALVectorTranslateOptions *psOptions, "MAX_FEATURES_IN_BATCH", CPLSPrintf("%d", psOptions->nGroupTransactions)); } + + auto poSrcDS = poSrcLayer->GetDataset(); + auto poDstDS = poDstLayer->GetDataset(); + if (poSrcDS && poDstDS) + { + auto poSrcDriver = poSrcDS->GetDriver(); + auto poDstDriver = poDstDS->GetDriver(); + + const auto IsArrowNativeDriver = [](GDALDriver *poDriver) + { + return EQUAL(poDriver->GetDescription(), "ARROW") || + EQUAL(poDriver->GetDescription(), "PARQUET") || + EQUAL(poDriver->GetDescription(), "ADBC"); + }; + + if (poSrcDriver && poDstDriver && !IsArrowNativeDriver(poSrcDriver) && + !IsArrowNativeDriver(poDstDriver)) + { + // For non-Arrow-native drivers, request DateTime as string, to + // allow mix of timezones + aosOptionsGetArrowStream.SetNameValue(GAS_OPT_DATETIME_AS_STRING, + "YES"); + } + } + return aosOptionsGetArrowStream; } @@ -4085,8 +4111,8 @@ bool SetupTargetLayer::CanUseWriteArrowBatch( } } - const CPLStringList aosGetArrowStreamOptions( - BuildGetArrowStreamOptions(psOptions, bPreserveFID)); + const CPLStringList aosGetArrowStreamOptions(BuildGetArrowStreamOptions( + poSrcLayer, poDstLayer, psOptions, bPreserveFID)); if (poSrcLayer->GetArrowStream(streamSrc.get(), aosGetArrowStreamOptions.List())) { diff --git a/autotest/utilities/test_ogr2ogr_lib.py b/autotest/utilities/test_ogr2ogr_lib.py index 2526163097a2..ca6b776b524c 100755 --- a/autotest/utilities/test_ogr2ogr_lib.py +++ b/autotest/utilities/test_ogr2ogr_lib.py @@ -2958,3 +2958,54 @@ def test_ogr2ogr_lib_explodecollections_empty_geoms(input_wkt, expected_output_w out_lyr = out_ds.GetLayer(0) f = out_lyr.GetNextFeature() assert f.GetGeometryRef().ExportToIsoWkt() == expected_output_wkt + + +############################################################################### + + +@gdaltest.enable_exceptions() +@pytest.mark.require_driver("GPKG") +def test_ogr2ogr_lib_arrow_datetime_as_string(tmp_vsimem): + + src_filename = str(tmp_vsimem / "src.gpkg") + with ogr.GetDriverByName("GPKG").CreateDataSource(src_filename) as src_ds: + src_lyr = src_ds.CreateLayer("test", geom_type=ogr.wkbNone) + + field = ogr.FieldDefn("dt", ogr.OFTDateTime) + src_lyr.CreateField(field) + + f = ogr.Feature(src_lyr.GetLayerDefn()) + src_lyr.CreateFeature(f) + + f = ogr.Feature(src_lyr.GetLayerDefn()) + f.SetField("dt", "2022-05-31T12:34:56.789Z") + src_lyr.CreateFeature(f) + + f = ogr.Feature(src_lyr.GetLayerDefn()) + f.SetField("dt", "2022-05-31T12:34:56") + src_lyr.CreateFeature(f) + + f = ogr.Feature(src_lyr.GetLayerDefn()) + f.SetField("dt", "2022-05-31T12:34:56+12:30") + src_lyr.CreateFeature(f) + + got_msg = [] + + def my_handler(errorClass, errno, msg): + got_msg.append(msg) + return + + with gdaltest.error_handler(my_handler), gdaltest.config_options( + {"CPL_DEBUG": "ON", "OGR2OGR_USE_ARROW_API": "YES"} + ): + dst_ds = gdal.VectorTranslate("", src_filename, format="Memory") + + assert "OGR2OGR: Using WriteArrowBatch()" in got_msg + + dst_lyr = dst_ds.GetLayer(0) + assert [f.GetField("dt") for f in dst_lyr] == [ + None, + "2022/05/31 12:34:56.789+00", + "2022/05/31 12:34:56", + "2022/05/31 12:34:56+1230", + ]