From d1ac855d0b975001142624321d390f9b15493249 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 7 Nov 2024 12:28:52 +0100 Subject: [PATCH] CreateFieldFromArrowSchema(): take into account GDAL:OGR:Type=DataTime when ArrowSchema.format='u' (string) --- autotest/ogr/ogr_mem.py | 52 +++++++++++++++++++++++ ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp | 25 ++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/autotest/ogr/ogr_mem.py b/autotest/ogr/ogr_mem.py index c5918ea20085..aa0cbe9a521e 100755 --- a/autotest/ogr/ogr_mem.py +++ b/autotest/ogr/ogr_mem.py @@ -1022,6 +1022,58 @@ def test_ogr_mem_arrow_stream_numpy_datetime_as_string(): assert batch["datetime"][3] == b"2022-05-31T12:34:56+12:30" +############################################################################### +# Test CreateFieldFromArrowSchema() when there is a GDAL:OGR:type=DateTime +# Arrow schema metadata. + + +@gdaltest.enable_exceptions() +def test_ogr_mem_arrow_write_with_datetime_as_string(): + + src_ds = ogr.GetDriverByName("Memory").CreateDataSource("") + src_lyr = src_ds.CreateLayer("src_lyr", geom_type=ogr.wkbNone) + + field = ogr.FieldDefn("dt", ogr.OFTDateTime) + src_lyr.CreateField(field) + + f = ogr.Feature(src_lyr.GetLayerDefn()) + src_lyr.CreateFeature(f) + + f = ogr.Feature(src_lyr.GetLayerDefn()) + f.SetField("dt", "2022-05-31T12:34:56.789Z") + src_lyr.CreateFeature(f) + + f = ogr.Feature(src_lyr.GetLayerDefn()) + f.SetField("dt", "2022-05-31T12:34:56") + src_lyr.CreateFeature(f) + + f = ogr.Feature(src_lyr.GetLayerDefn()) + f.SetField("dt", "2022-05-31T12:34:56+12:30") + src_lyr.CreateFeature(f) + + ds = ogr.GetDriverByName("Memory").CreateDataSource("") + dst_lyr = ds.CreateLayer("dst_lyr") + + stream = src_lyr.GetArrowStream(["DATETIME_AS_STRING=YES"]) + schema = stream.GetSchema() + + for i in range(schema.GetChildrenCount()): + dst_lyr.CreateFieldFromArrowSchema(schema.GetChild(i)) + + while True: + array = stream.GetNextRecordBatch() + if array is None: + break + dst_lyr.WriteArrowBatch(schema, array) + + assert [f.GetField("dt") for f in dst_lyr] == [ + None, + "2022/05/31 12:34:56.789+00", + "2022/05/31 12:34:56", + "2022/05/31 12:34:56+1230", + ] + + ############################################################################### diff --git a/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp b/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp index a0100c9ed738..2a999fd91a91 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp +++ b/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp @@ -6160,7 +6160,23 @@ bool OGRLayer::CreateFieldFromArrowSchemaInternal( const auto oMetadata = OGRParseArrowMetadata(schema->metadata); for (const auto &oIter : oMetadata) { - if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME) + if (oIter.first == MD_GDAL_OGR_TYPE) + { + const auto &osType = oIter.second; + for (auto eType = OFTInteger; eType <= OFTMaxType;) + { + if (OGRFieldDefn::GetFieldTypeName(eType) == osType) + { + oFieldDefn.SetType(eType); + break; + } + if (eType == OFTMaxType) + break; + else + eType = static_cast(eType + 1); + } + } + else if (oIter.first == MD_GDAL_OGR_ALTERNATIVE_NAME) oFieldDefn.SetAlternativeName(oIter.second.c_str()); else if (oIter.first == MD_GDAL_OGR_COMMENT) oFieldDefn.SetComment(oIter.second); @@ -6615,6 +6631,13 @@ static bool BuildOGRFieldInfo( bTypeOK = true; break; } + else if (eOGRType == OFTDateTime && + sType.eType == OFTString) + { + bFallbackTypesUsed = true; + bTypeOK = true; + break; + } else { CPLError(CE_Failure, CPLE_AppDefined,