From d722580b162cca5324b2a9b6ed2df7a67b7915d1 Mon Sep 17 00:00:00 2001 From: Eric Wang Date: Wed, 20 May 2026 02:43:09 -0700 Subject: [PATCH] feat(csharp): apply adbc.spark.data_type_conv on SEA path (PECO-3060) [SEA] Honor data_type_conv on the Statement Execution (REST/SEA) path so it matches the Thrift path's HiveServer2SchemaParser semantics: - scalar (default): DATE -> Date32, DECIMAL -> Decimal128, TIMESTAMP -> Timestamp, FLOAT -> Float (native Arrow types, unchanged from current behaviour). - none: DATE / DECIMAL / TIMESTAMP -> String; FLOAT -> Double (widened). StatementExecutionConnection now parses SparkParameters.DataTypeConv via DataTypeConversionParser (same precedence as SparkHttpConnection) and exposes the resulting DataTypeConversion to StatementExecutionStatement. The schema mapping in ArrowTypeParser.MapPrimitiveType branches on the flag for the four conversion-sensitive scalars; a new ScalarConversionStream wrapper, layered between IntervalSerializingStream and ComplexTypeSerializingStream when the mode is none, converts the native Date32/Timestamp/Decimal128/Float arrays into matching StringArray / DoubleArray so the schema and batch data agree. E2E coverage: ExecuteQuery_DataTypeConv_None_SerializesScalarTypesToStrings proves DATE/TIMESTAMP/DECIMAL columns surface as StringType under adbc.spark.data_type_conv=none; ExecuteQuery_DataTypeConv_Scalar_KeepsNativeTypes guards the default mode. PECO-3060 --- csharp/src/ArrowTypeParser.cs | 52 +++- csharp/src/ScalarConversionStream.cs | 274 ++++++++++++++++++ .../StatementExecutionConnection.cs | 15 + .../StatementExecutionStatement.cs | 15 +- .../StatementExecutionDriverE2ETests.cs | 97 ++++++- 5 files changed, 443 insertions(+), 10 deletions(-) create mode 100644 csharp/src/ScalarConversionStream.cs diff --git a/csharp/src/ArrowTypeParser.cs b/csharp/src/ArrowTypeParser.cs index 4b2a44ef3..867c34af7 100644 --- a/csharp/src/ArrowTypeParser.cs +++ b/csharp/src/ArrowTypeParser.cs @@ -19,6 +19,7 @@ using Apache.Arrow; using Apache.Arrow.Types; using AdbcDrivers.Databricks.StatementExecution; +using AdbcDrivers.HiveServer2.Hive2; namespace AdbcDrivers.Databricks { @@ -57,9 +58,21 @@ internal static class ArrowTypeParser /// true: native nested Arrow types parsed from /// the manifest's type_text (see ). /// - /// Primitives (including INTERVAL, which is always string-typed) ignore the flag. + /// + /// controls how the conversion-sensitive + /// scalar types — DATE / DECIMAL / TIMESTAMP / FLOAT — are surfaced, mirroring + /// : + /// + /// scalar (default): DATE→Date32, DECIMAL→Decimal128, + /// TIMESTAMP→Timestamp, FLOAT→Float (native Arrow types). + /// none: DATE/DECIMAL/TIMESTAMP→String, + /// FLOAT→Double (widening). Paired with + /// which converts the native arrays to match. + /// + /// Other primitives (BOOLEAN/INT/BIGINT/STRING/BINARY/INTERVAL/NULL) ignore the flag. + /// /// - internal static IArrowType MapToArrowType(string typeText, bool enableComplexDatatypeSupport) + internal static IArrowType MapToArrowType(string typeText, bool enableComplexDatatypeSupport, DataTypeConversion dataTypeConversion) { var baseType = ColumnMetadataHelper.GetBaseTypeName(typeText).ToUpperInvariant(); if (baseType is "ARRAY" or "MAP" or "STRUCT") @@ -68,9 +81,16 @@ internal static IArrowType MapToArrowType(string typeText, bool enableComplexDat ? ParseComplexType(typeText) : StringType.Default; } - return MapPrimitiveType(typeText); + return MapPrimitiveType(typeText, dataTypeConversion); } + /// + /// Backward-compatible overload that defaults to . + /// Existing callers (e.g. unit tests that pre-date PECO-3060) keep their current behaviour. + /// + internal static IArrowType MapToArrowType(string typeText, bool enableComplexDatatypeSupport) + => MapToArrowType(typeText, enableComplexDatatypeSupport, DataTypeConversion.Scalar); + /// /// Parses into a native Arrow type. Returns /// on any parse failure — callers can rely on this, @@ -89,8 +109,11 @@ internal static IArrowType ParseComplexType(string typeText) /// Used by for top-level columns and by /// for primitive leaves inside ARRAY/MAP/STRUCT. /// - private static IArrowType MapPrimitiveType(string typeText) + /// The manifest type text (may include parameters like DECIMAL(10,2)). + /// Controls DATE/DECIMAL/TIMESTAMP/FLOAT handling — see . + private static IArrowType MapPrimitiveType(string typeText, DataTypeConversion dataTypeConversion) { + bool convertScalar = dataTypeConversion.HasFlag(DataTypeConversion.Scalar); var baseType = ColumnMetadataHelper.GetBaseTypeName(typeText).ToUpperInvariant(); return baseType switch { @@ -99,13 +122,17 @@ private static IArrowType MapPrimitiveType(string typeText) "SHORT" or "SMALLINT" => Int16Type.Default, "INT" or "INTEGER" => Int32Type.Default, "LONG" or "BIGINT" => Int64Type.Default, - "FLOAT" or "REAL" => FloatType.Default, + // FLOAT: scalar→Float (native), none→Double (widening), matching HiveServer2SchemaParser. + "FLOAT" or "REAL" => convertScalar ? FloatType.Default : DoubleType.Default, "DOUBLE" => DoubleType.Default, - "DECIMAL" or "NUMERIC" => ParseDecimalType(typeText), + // DECIMAL: scalar→Decimal128, none→String, matching HiveServer2SchemaParser. + "DECIMAL" or "NUMERIC" => convertScalar ? ParseDecimalType(typeText) : StringType.Default, "STRING" or "VARCHAR" or "CHAR" => StringType.Default, "BINARY" or "VARBINARY" => BinaryType.Default, - "DATE" => Date32Type.Default, - "TIMESTAMP" or "TIMESTAMP_NTZ" or "TIMESTAMP_LTZ" => TimestampType.Default, + // DATE: scalar→Date32, none→String, matching HiveServer2SchemaParser. + "DATE" => convertScalar ? Date32Type.Default : StringType.Default, + // TIMESTAMP: scalar→Timestamp, none→String, matching HiveServer2SchemaParser. + "TIMESTAMP" or "TIMESTAMP_NTZ" or "TIMESTAMP_LTZ" => convertScalar ? TimestampType.Default : StringType.Default, // INTERVAL is converted to string by IntervalSerializingStream; StringType is the output contract. "INTERVAL" => StringType.Default, "NULL" or "VOID" => NullType.Default, @@ -113,6 +140,15 @@ private static IArrowType MapPrimitiveType(string typeText) }; } + /// + /// Backward-compatible overload defaulting to + /// for the recursive complex-type parser, which always uses native scalar mapping + /// for leaves regardless of the user's flag (Thrift behaves the same — the flag + /// only governs the schema's top-level type for the affected scalars). + /// + private static IArrowType MapPrimitiveType(string typeText) + => MapPrimitiveType(typeText, DataTypeConversion.Scalar); + private static IArrowType ParseDecimalType(string typeText) { int precision = 38; diff --git a/csharp/src/ScalarConversionStream.cs b/csharp/src/ScalarConversionStream.cs new file mode 100644 index 000000000..00e1414e4 --- /dev/null +++ b/csharp/src/ScalarConversionStream.cs @@ -0,0 +1,274 @@ +/* +* Copyright (c) 2025 ADBC Drivers Contributors +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Threading; +using System.Threading.Tasks; +using Apache.Arrow; +using Apache.Arrow.Ipc; +using Apache.Arrow.Types; +using AdbcDrivers.Databricks.StatementExecution; +using AdbcDrivers.HiveServer2.Hive2; + +namespace AdbcDrivers.Databricks +{ + /// + /// PECO-3060: Wraps an and converts native Arrow + /// arrays for the conversion-sensitive scalar types into the shape Thrift's + /// data_type_conv=none mode produces, so SEA and Thrift agree on the output + /// contract regardless of protocol. + /// + /// Active only when the user sets adbc.spark.data_type_conv=none + /// (default is scalar, which keeps native types and bypasses this stream). + /// Mirrors : + /// + /// + /// DATE (Date32Array) → "yyyy-MM-dd" string + /// TIMESTAMP (TimestampArray) → "yyyy-MM-dd HH:mm:ss[.fffffff]" string + /// DECIMAL (Decimal128Array) → plain string (preserves precision) + /// FLOAT (FloatArray) → widened to DoubleArray + /// + /// + /// The declared schema (from TryGetSchemaFromManifest) already reports + /// StringType / DoubleType for these columns; this stream only converts the data + /// arrays so Arrow's strongly-typed contract holds. + /// + /// Column detection: Uses the same Spark:DataType:SqlName + /// metadata pattern as and + /// — the manifest schema embeds the raw + /// SQL type text on every field, which is reliable across inline / CloudFetch / empty paths. + /// + internal sealed class ScalarConversionStream : IArrowArrayStream + { + private readonly IArrowArrayStream _inner; + private readonly Schema _schema; + // index -> kind of conversion to apply. We do this once up front so per-batch + // work is just a dictionary lookup. + private readonly Dictionary _conversions; + + private enum ScalarConversion + { + DateToString, + TimestampToString, + DecimalToString, + FloatToDouble, + } + + public ScalarConversionStream(IArrowArrayStream inner) + { + _inner = inner ?? throw new ArgumentNullException(nameof(inner)); + _schema = inner.Schema; + _conversions = DetectConversions(_schema); + } + + public Schema Schema => _schema; + + public async ValueTask ReadNextRecordBatchAsync(CancellationToken cancellationToken = default) + { + RecordBatch? batch = await _inner.ReadNextRecordBatchAsync(cancellationToken).ConfigureAwait(false); + if (batch == null) return null; + if (_conversions.Count == 0) return batch; + return ConvertColumns(batch); + } + + public void Dispose() => _inner.Dispose(); + + private static Dictionary DetectConversions(Schema schema) + { + var result = new Dictionary(); + for (int i = 0; i < schema.FieldsList.Count; i++) + { + Field field = schema.FieldsList[i]; + if (field.Metadata == null) continue; + if (!field.Metadata.TryGetValue(ColumnMetadataHelper.ArrowMetadataKey, out string? sqlName) || sqlName == null) + continue; + + // Match by SQL name prefix to keep the detection logic in sync with + // HiveServer2SchemaParser (which switches on TTypeId). Parametrised types + // like DECIMAL(10,2) and TIMESTAMP_NTZ are handled by prefix checks. + var upper = sqlName.ToUpperInvariant(); + if (upper.StartsWith("DATE", StringComparison.Ordinal) && !upper.StartsWith("DATETIME", StringComparison.Ordinal)) + { + result[i] = ScalarConversion.DateToString; + } + else if (upper.StartsWith("TIMESTAMP", StringComparison.Ordinal)) + { + result[i] = ScalarConversion.TimestampToString; + } + else if (upper.StartsWith("DECIMAL", StringComparison.Ordinal) || upper.StartsWith("NUMERIC", StringComparison.Ordinal)) + { + result[i] = ScalarConversion.DecimalToString; + } + else if (upper.Equals("FLOAT", StringComparison.Ordinal) || upper.Equals("REAL", StringComparison.Ordinal)) + { + result[i] = ScalarConversion.FloatToDouble; + } + } + return result; + } + + private RecordBatch ConvertColumns(RecordBatch batch) + { + var arrays = new IArrowArray[batch.ColumnCount]; + for (int i = 0; i < batch.ColumnCount; i++) + { + arrays[i] = _conversions.TryGetValue(i, out var conv) + ? Convert(batch.Column(i), conv) + : batch.Column(i); + } + return new RecordBatch(_schema, arrays, batch.Length); + } + + private static IArrowArray Convert(IArrowArray array, ScalarConversion conv) + { + return conv switch + { + ScalarConversion.DateToString => ConvertDateToString(array), + ScalarConversion.TimestampToString => ConvertTimestampToString(array), + ScalarConversion.DecimalToString => ConvertDecimalToString(array), + ScalarConversion.FloatToDouble => ConvertFloatToDouble(array), + _ => array, + }; + } + + private static StringArray ConvertDateToString(IArrowArray array) + { + var builder = new StringArray.Builder(); + if (array is Date32Array d32) + { + for (int i = 0; i < d32.Length; i++) + { + if (d32.IsNull(i)) { builder.AppendNull(); continue; } + DateTime? dt = d32.GetDateTime(i); + builder.Append(dt?.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture)); + } + } + else if (array is Date64Array d64) + { + for (int i = 0; i < d64.Length; i++) + { + if (d64.IsNull(i)) { builder.AppendNull(); continue; } + DateTime? dt = d64.GetDateTime(i); + builder.Append(dt?.ToString("yyyy-MM-dd", CultureInfo.InvariantCulture)); + } + } + else + { + // Defensive: column was detected as DATE via SqlName metadata but the + // underlying array isn't a Date{32,64}Array. Null-fill to keep row counts + // consistent rather than throw. + NullFill(builder, array.Length); + } + return builder.Build(); + } + + private static StringArray ConvertTimestampToString(IArrowArray array) + { + var builder = new StringArray.Builder(); + if (array is TimestampArray ts) + { + var unit = ((TimestampType)ts.Data.DataType).Unit; + for (int i = 0; i < ts.Length; i++) + { + if (ts.IsNull(i)) { builder.AppendNull(); continue; } + DateTimeOffset? dto = ts.GetTimestamp(i); + builder.Append(dto.HasValue ? FormatTimestamp(dto.Value, unit) : null); + } + } + else + { + NullFill(builder, array.Length); + } + return builder.Build(); + } + + /// + /// Formats a timestamp as Thrift would emit it for none mode: "yyyy-MM-dd HH:mm:ss" + /// with a fractional-seconds suffix matching the column unit (omitted for second + /// precision). The value is rendered in UTC because SEA returns TIMESTAMP / TIMESTAMP_NTZ + /// values without timezone info and Thrift renders them as wall-clock strings. + /// + internal static string FormatTimestamp(DateTimeOffset value, TimeUnit unit) + { + // Use UTC wall-clock representation (matches Thrift's TIMESTAMP_NTZ rendering). + var utc = value.UtcDateTime; + string format = unit switch + { + TimeUnit.Second => "yyyy-MM-dd HH:mm:ss", + TimeUnit.Millisecond => "yyyy-MM-dd HH:mm:ss.fff", + TimeUnit.Nanosecond => "yyyy-MM-dd HH:mm:ss.fffffff", + _ => "yyyy-MM-dd HH:mm:ss.ffffff", // Microsecond (SEA default) and unknown + }; + return utc.ToString(format, CultureInfo.InvariantCulture); + } + + private static StringArray ConvertDecimalToString(IArrowArray array) + { + var builder = new StringArray.Builder(); + if (array is Decimal128Array dec) + { + for (int i = 0; i < dec.Length; i++) + { + if (dec.IsNull(i)) { builder.AppendNull(); continue; } + // Decimal128Array.GetString preserves the full precision/scale of the + // declared type — exactly what Thrift returns for none mode. + builder.Append(dec.GetString(i)); + } + } + else + { + NullFill(builder, array.Length); + } + return builder.Build(); + } + + private static DoubleArray ConvertFloatToDouble(IArrowArray array) + { + var builder = new DoubleArray.Builder().Reserve(array.Length); + if (array is FloatArray f) + { + for (int i = 0; i < f.Length; i++) + { + if (f.IsNull(i)) { builder.AppendNull(); continue; } + float? v = f.GetValue(i); + builder.Append(v.HasValue ? (double)v.Value : (double?)null); + } + } + else if (array is DoubleArray d) + { + // Already double — return as-is by rebuilding so the caller's contract + // is preserved (DoubleArray output regardless of input). + for (int i = 0; i < d.Length; i++) + { + if (d.IsNull(i)) { builder.AppendNull(); continue; } + builder.Append(d.GetValue(i)); + } + } + else + { + for (int i = 0; i < array.Length; i++) builder.AppendNull(); + } + return builder.Build(); + } + + private static void NullFill(StringArray.Builder builder, int length) + { + for (int i = 0; i < length; i++) builder.AppendNull(); + } + } +} diff --git a/csharp/src/StatementExecution/StatementExecutionConnection.cs b/csharp/src/StatementExecution/StatementExecutionConnection.cs index 7b6361ae0..1bb7b46eb 100644 --- a/csharp/src/StatementExecution/StatementExecutionConnection.cs +++ b/csharp/src/StatementExecution/StatementExecutionConnection.cs @@ -73,6 +73,7 @@ internal class StatementExecutionConnection : TracingConnection, IGetObjectsData private readonly string _traceParentHeaderName; private readonly bool _traceStateEnabled; private readonly bool _enableComplexDatatypeSupport; + private readonly DataTypeConversion _dataTypeConversion; // Authentication support private readonly string? _identityFederationClientId; @@ -242,6 +243,13 @@ private StatementExecutionConnection( _traceStateEnabled = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.TraceStateEnabled, false); _enableComplexDatatypeSupport = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.EnableComplexDatatypeSupport, false); + // PECO-3060: Mirror the Thrift path's handling of adbc.spark.data_type_conv + // (see SparkHttpConnection.ValidateOptions). scalar (default) keeps native + // Arrow types for DATE/DECIMAL/TIMESTAMP/FLOAT; none surfaces them as strings + // (and widens FLOAT to DOUBLE), matching HiveServer2SchemaParser.GetArrowType. + properties.TryGetValue(SparkParameters.DataTypeConv, out string? dataTypeConv); + _dataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv); + // Authentication configuration if (properties.TryGetValue(DatabricksParameters.IdentityFederationClientId, out string? identityFederationClientId)) { @@ -936,6 +944,13 @@ public override void Dispose() // TracingConnection provides IActivityTracer implementation internal bool EnableComplexDatatypeSupport => _enableComplexDatatypeSupport; + /// + /// The parsed scalar data-type conversion mode. Mirrors HiveServer2Connection.DataTypeConversion — + /// scalar (default) keeps native types, none surfaces DATE/DECIMAL/TIMESTAMP as strings (and + /// widens FLOAT to DOUBLE) so SEA matches Thrift's behaviour. See PECO-3060. + /// + internal DataTypeConversion DataTypeConversion => _dataTypeConversion; + public override string AssemblyVersion => GetType().Assembly.GetName().Version?.ToString() ?? "1.0.0"; public override string AssemblyName => "AdbcDrivers.Databricks"; } diff --git a/csharp/src/StatementExecution/StatementExecutionStatement.cs b/csharp/src/StatementExecution/StatementExecutionStatement.cs index b910097af..1661a79ef 100644 --- a/csharp/src/StatementExecution/StatementExecutionStatement.cs +++ b/csharp/src/StatementExecution/StatementExecutionStatement.cs @@ -69,6 +69,9 @@ internal class StatementExecutionStatement : TracingStatement // Complex type configuration private readonly bool _enableComplexDatatypeSupport; + // Scalar data-type conversion mode (PECO-3060) — mirrors the connection setting. + private readonly DataTypeConversion _dataTypeConversion; + // Connection reference for metadata queries private readonly StatementExecutionConnection _connection; @@ -186,6 +189,7 @@ public StatementExecutionStatement( _lz4BufferPool = lz4BufferPool ?? throw new ArgumentNullException(nameof(lz4BufferPool)); _httpClient = httpClient ?? throw new ArgumentNullException(nameof(httpClient)); _enableComplexDatatypeSupport = connection.EnableComplexDatatypeSupport; + _dataTypeConversion = connection.DataTypeConversion; // Match Thrift: statement starts with connection's default catalog. // When enableMultipleCatalogSupport=true, this is the catalog from config (e.g. "main"). @@ -402,6 +406,15 @@ private async Task ExecuteQueryInternalAsync(CancellationToken canc reader = new ComplexTypeSerializingStream(reader); } + // PECO-3060: when data_type_conv=none, surface DATE/DECIMAL/TIMESTAMP as strings + // and widen FLOAT to DOUBLE so SEA matches Thrift's HiveServer2SchemaParser semantics. + // Bypass when scalar (default) — the native types declared in the manifest schema + // are already the desired output. + if (!_dataTypeConversion.HasFlag(DataTypeConversion.Scalar)) + { + reader = new ScalarConversionStream(reader); + } + // Get schema from reader var schema = reader.Schema; @@ -615,7 +628,7 @@ private Schema GetSchemaFromManifest(ResultManifest manifest) foreach (var column in manifest.Schema.Columns) { var typeText = column.TypeText ?? string.Empty; - var arrowType = ArrowTypeParser.MapToArrowType(typeText, _enableComplexDatatypeSupport); + var arrowType = ArrowTypeParser.MapToArrowType(typeText, _enableComplexDatatypeSupport, _dataTypeConversion); var metadata = new Dictionary { [ColumnMetadataHelper.ArrowMetadataKey] = typeText diff --git a/csharp/test/E2E/StatementExecution/StatementExecutionDriverE2ETests.cs b/csharp/test/E2E/StatementExecution/StatementExecutionDriverE2ETests.cs index 88d0ea7cc..13ae9f258 100644 --- a/csharp/test/E2E/StatementExecution/StatementExecutionDriverE2ETests.cs +++ b/csharp/test/E2E/StatementExecution/StatementExecutionDriverE2ETests.cs @@ -16,8 +16,11 @@ using System; using System.Collections.Generic; +using Apache.Arrow; using Apache.Arrow.Adbc; +using Apache.Arrow.Types; using AdbcDrivers.HiveServer2.Spark; +using AdbcDrivers.HiveServer2.Hive2; using Apache.Arrow.Adbc.Tests; using Xunit; using Xunit.Abstractions; @@ -43,12 +46,19 @@ private void SkipIfNotConfigured() // and OAuth M2M (client_credentials) flow (implemented in PECO-2857). } - private AdbcConnection CreateRestConnection() + private AdbcConnection CreateRestConnection(IReadOnlyDictionary? extraProperties = null) { var properties = new Dictionary { [DatabricksParameters.Protocol] = "rest", }; + if (extraProperties != null) + { + foreach (var kv in extraProperties) + { + properties[kv.Key] = kv.Value; + } + } // Use URI if available (connection will parse host and warehouse ID from it) if (!string.IsNullOrEmpty(TestConfiguration.Uri)) @@ -432,5 +442,90 @@ public void ExecuteQuery_WithNullValues_HandlesNullsCorrectly() Assert.NotNull(batch); Assert.Equal(1, batch.Length); } + + /// + /// PECO-3060: Verifies that the SEA path honors adbc.spark.data_type_conv=none + /// (the M3 parameter gap). When set, DATE / DECIMAL / TIMESTAMP columns must surface + /// as StringType — matching the Thrift behaviour driven by + /// HiveServer2SchemaParser.GetArrowType (none → StringType for those types). + /// + [SkippableFact] + public void ExecuteQuery_DataTypeConv_None_SerializesScalarTypesToStrings() + { + SkipIfNotConfigured(); + + var extra = new Dictionary + { + [SparkParameters.DataTypeConv] = DataTypeConversionOptions.None, + }; + using var connection = CreateRestConnection(extra); + using var statement = connection.CreateStatement(); + + // Single-row SELECT covering each conversion-sensitive scalar type. + // Cheap query — no warehouse scan, INLINE result. + statement.SqlQuery = + "SELECT " + + "CAST('2024-01-15' AS DATE) AS date_col, " + + "CAST('2024-01-15 10:20:30.123' AS TIMESTAMP) AS ts_col, " + + "CAST(123.456 AS DECIMAL(10,3)) AS dec_col"; + + var result = statement.ExecuteQuery(); + Assert.NotNull(result); + using var reader = result.Stream; + Assert.NotNull(reader); + + var schema = reader.Schema; + Assert.NotNull(schema); + Assert.Equal(3, schema.FieldsList.Count); + + // Honest signal: with data_type_conv=none, all three conversion-sensitive + // columns must be exposed as StringType — same as Thrift's HiveServer2SchemaParser + // does for none mode. + Assert.Equal(ArrowTypeId.String, schema.GetFieldByName("date_col").DataType.TypeId); + Assert.Equal(ArrowTypeId.String, schema.GetFieldByName("ts_col").DataType.TypeId); + Assert.Equal(ArrowTypeId.String, schema.GetFieldByName("dec_col").DataType.TypeId); + + // The data arrays must agree with the declared schema (Arrow contract). + var batch = reader.ReadNextRecordBatchAsync().Result; + Assert.NotNull(batch); + Assert.Equal(1, batch!.Length); + Assert.IsType(batch.Column("date_col")); + Assert.IsType(batch.Column("ts_col")); + Assert.IsType(batch.Column("dec_col")); + } + + /// + /// PECO-3060: Sanity check the default (scalar) mode on SEA — DATE / DECIMAL / + /// TIMESTAMP columns must continue to surface as their native Arrow types so + /// existing callers are unaffected. + /// + [SkippableFact] + public void ExecuteQuery_DataTypeConv_Scalar_KeepsNativeTypes() + { + SkipIfNotConfigured(); + + var extra = new Dictionary + { + [SparkParameters.DataTypeConv] = DataTypeConversionOptions.Scalar, + }; + using var connection = CreateRestConnection(extra); + using var statement = connection.CreateStatement(); + + statement.SqlQuery = + "SELECT " + + "CAST('2024-01-15' AS DATE) AS date_col, " + + "CAST('2024-01-15 10:20:30.123' AS TIMESTAMP) AS ts_col, " + + "CAST(123.456 AS DECIMAL(10,3)) AS dec_col"; + + var result = statement.ExecuteQuery(); + Assert.NotNull(result); + using var reader = result.Stream; + Assert.NotNull(reader); + + var schema = reader.Schema; + Assert.Equal(ArrowTypeId.Date32, schema.GetFieldByName("date_col").DataType.TypeId); + Assert.Equal(ArrowTypeId.Timestamp, schema.GetFieldByName("ts_col").DataType.TypeId); + Assert.Equal(ArrowTypeId.Decimal128, schema.GetFieldByName("dec_col").DataType.TypeId); + } } }