From bc271b475b6c5dbb377bb78ec2d17040ecf3e877 Mon Sep 17 00:00:00 2001 From: Jade Wang Date: Thu, 14 May 2026 20:48:11 +0000 Subject: [PATCH 1/3] feat(csharp): add enable_fast_metadata_query flag for DESC TABLE EXTENDED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the SQL modifier STATIC ONLY (runtime PR #198486) to DESC TABLE EXTENDED AS JSON so a DBSQL warehouse can skip Delta log / Mesa RPCs / other expensive I/O. STATIC ONLY only takes effect when paired with off-WLM routing, so the driver also flips the matching protocol signal: - Thrift: RunAsync=false on the descStmt — gated on warehouse path (/sql/1.0/warehouses/{id} or /sql/1.0/endpoints/{id}) so the signal is never sent to general-purpose clusters. - SEA: ExecuteMetadataSqlAsync already sends x-databricks-sea-can-run-fully-sync; SEA is always-warehouse, so the flag alone gates the SQL change there. Older servers that don't recognise STATIC ONLY return parse error 42601, caught by the existing fallback in GetColumnsExtendedAsync. Co-authored-by: Isaac --- csharp/doc/sea-metadata-design.md | 31 +++++++ csharp/src/DatabricksConnection.cs | 51 +++++++++++ csharp/src/DatabricksParameters.cs | 20 +++++ csharp/src/DatabricksStatement.cs | 33 ++++++- .../StatementExecutionConnection.cs | 12 +++ .../StatementExecutionStatement.cs | 9 +- .../Unit/DatabricksConnectionUnitTests.cs | 89 +++++++++++++++++++ csharp/test/Unit/DatabricksParametersTests.cs | 6 ++ 8 files changed, 246 insertions(+), 5 deletions(-) diff --git a/csharp/doc/sea-metadata-design.md b/csharp/doc/sea-metadata-design.md index c0f135422..94ac037c4 100644 --- a/csharp/doc/sea-metadata-design.md +++ b/csharp/doc/sea-metadata-design.md @@ -145,3 +145,34 @@ Each IGetObjectsDataProvider method makes one server call. Total RPCs by depth: | DbSchemas | + GetSchemasAsync | 2 | | Tables | + GetTablesAsync | 3 | | All | + PopulateColumnInfoAsync | 4 | + +## Fast Metadata Query (`DESC TABLE EXTENDED ... STATIC ONLY`) + +`GetColumnsExtended` runs `DESC TABLE EXTENDED AS JSON` to fetch column + +key metadata in a single round-trip. Runtime PR #198486 added a `STATIC ONLY` +modifier to that command which makes the server return catalog metadata only +(no Delta log access, no Mesa RPCs, no other expensive I/O). When opted in via +`adbc.databricks.enable_fast_metadata_query`, the driver emits the new modifier +**and** pairs it with the protocol-specific off-WLM routing signal so the +fast-metadata path takes effect end-to-end: + +| Protocol | SQL emitted | Off-WLM signal | Where | +|---|---|---|---| +| SEA | `DESC TABLE EXTENDED STATIC ONLY AS JSON` | HTTP header `x-databricks-sea-can-run-fully-sync: true` | Header is unconditionally set on metadata calls via `ExecuteMetadataSqlAsync` → `IsMetadata=true` → `StatementExecutionClient.cs:225`. SEA always targets a warehouse, so the flag alone gates the SQL change. | +| Thrift | `DESC TABLE EXTENDED STATIC ONLY AS JSON` | `TExecuteStatementReq.RunAsync = false` on the descStmt | `DatabricksStatement.GetColumnsExtendedAsync` flips both when `adbc.databricks.enable_fast_metadata_query=true` AND the connection path matches `/sql/1.0/(warehouses\|endpoints)/{id}` (general clusters: flag is ignored). | + +Both signals together are required: + +- `STATIC ONLY` without off-WLM routing → server uses the lightweight metadata + path but the request is still queued through WLM. +- Off-WLM routing without `STATIC ONLY` → request bypasses WLM but the server + still does the full metadata scan. + +### Fallback safety + +`STATIC ONLY` requires `AS JSON` per the runtime grammar; older servers without +PR #198486 reject the new keyword with parse error `INVALID_STATIC_ONLY_USAGE` +(SQL state `42601`). The existing `catch (HiveServer2Exception ex) when +(ex.SqlState == "42601" || ex.SqlState == "20000")` in +`DatabricksStatement.GetColumnsExtendedAsync` already handles this and falls back +to the base `GetColumns + GetPrimaryKeys + GetCrossReference` implementation. diff --git a/csharp/src/DatabricksConnection.cs b/csharp/src/DatabricksConnection.cs index 5ec2ecbf3..de7f4e385 100644 --- a/csharp/src/DatabricksConnection.cs +++ b/csharp/src/DatabricksConnection.cs @@ -90,6 +90,7 @@ internal class DatabricksConnection : SparkHttpConnection private const bool DefaultRateLimitRetry = true; private const bool DefaultTransportErrorRetry = true; private bool _useDescTableExtended = false; + private bool _enableFastMetadataQuery = false; // Trace propagation configuration private bool _tracePropagationEnabled = true; @@ -207,6 +208,7 @@ private void ValidateProperties() _useCloudFetch = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.UseCloudFetch, _useCloudFetch); _canDecompressLz4 = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.CanDecompressLz4, _canDecompressLz4); _useDescTableExtended = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.UseDescTableExtended, _useDescTableExtended); + _enableFastMetadataQuery = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.EnableFastMetadataQuery, _enableFastMetadataQuery); _runAsyncInThrift = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.EnableRunAsyncInThriftOp, _runAsyncInThrift); _enableComplexDatatypeSupport = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.EnableComplexDatatypeSupport, _enableComplexDatatypeSupport); @@ -374,6 +376,55 @@ protected internal override bool TrySetGetDirectResults(IRequest request) /// internal bool CanUseDescTableExtended => _useDescTableExtended && ServerProtocolVersion != null && FeatureVersionNegotiator.SupportsDESCTableExtended(ServerProtocolVersion.Value); + private static readonly System.Text.RegularExpressions.Regex s_warehousePathPattern = + new System.Text.RegularExpressions.Regex(@"^/sql/1\.0/(warehouses|endpoints)/[^/]+/?$"); + + private bool? _isWarehousePathCached; + + /// + /// True when the configured connection path targets a DBSQL warehouse + /// (/sql/1.0/warehouses/{id} or /sql/1.0/endpoints/{id}). False for general + /// clusters (/sql/protocolv1/o/{orgId}/{clusterId}) or when no path is set. + /// + internal bool IsWarehousePath + { + get + { + if (_isWarehousePathCached.HasValue) + { + return _isWarehousePathCached.Value; + } + + string? path = null; + if (Properties.TryGetValue(SparkParameters.Path, out string? rawPath) && !string.IsNullOrEmpty(rawPath)) + { + path = rawPath; + } + else if (Properties.TryGetValue(AdbcOptions.Uri, out string? uri) + && !string.IsNullOrEmpty(uri) + && Uri.TryCreate(uri, UriKind.Absolute, out Uri? parsedUri)) + { + path = parsedUri.AbsolutePath; + } + + if (!string.IsNullOrEmpty(path)) + { + int q = path!.IndexOf('?'); + if (q >= 0) path = path.Substring(0, q); + } + + _isWarehousePathCached = !string.IsNullOrEmpty(path) && s_warehousePathPattern.IsMatch(path!); + return _isWarehousePathCached.Value; + } + } + + /// + /// True when the driver should opt into the fast metadata query path. Requires + /// both the connection flag and a DBSQL warehouse path; otherwise false. + /// See . + /// + internal bool UseFastMetadataQuery => _enableFastMetadataQuery && IsWarehousePath; + /// /// Gets whether PK/FK metadata call is enabled /// diff --git a/csharp/src/DatabricksParameters.cs b/csharp/src/DatabricksParameters.cs index 891c82c8c..43d435f71 100644 --- a/csharp/src/DatabricksParameters.cs +++ b/csharp/src/DatabricksParameters.cs @@ -283,6 +283,26 @@ public class DatabricksParameters : SparkParameters /// public const string UseDescTableExtended = "adbc.databricks.use_desc_table_extended"; + /// + /// Whether to opt into the fast metadata query path for DESC TABLE EXTENDED. + /// When enabled, the driver emits the SQL modifier STATIC ONLY on + /// DESC TABLE EXTENDED AS JSON (runtime PR #198486), which tells the server to + /// skip Delta log access, Mesa RPCs, and other expensive I/O. The driver also + /// pairs the SQL change with the matching off-WLM signal per protocol: + /// + /// Thrift: sets RunAsync=false on the descStmt, only when the connection + /// targets a DBSQL warehouse (/sql/1.0/warehouses/{id} or /sql/1.0/endpoints/{id}). + /// On general clusters the flag is ignored entirely. + /// SEA: relies on the existing x-databricks-sea-can-run-fully-sync + /// header that ExecuteMetadataSqlAsync already sends. SEA always targets + /// a warehouse, so the flag alone gates the SQL change. + /// + /// Both signals (SQL keyword + off-WLM routing) are required together — STATIC ONLY + /// alone still goes through WLM; off-WLM routing alone still does the full scan. + /// Default value is false if not specified. + /// + public const string EnableFastMetadataQuery = "adbc.databricks.enable_fast_metadata_query"; + /// /// Whether to enable RunAsync flag in Thrift operation /// Default value is true if not specified. diff --git a/csharp/src/DatabricksStatement.cs b/csharp/src/DatabricksStatement.cs index a5313de89..c9e9e6fbc 100644 --- a/csharp/src/DatabricksStatement.cs +++ b/csharp/src/DatabricksStatement.cs @@ -70,6 +70,14 @@ internal class DatabricksStatement : SparkStatement, IHiveServer2Statement private QueryResult? _lastQueryResult; // Track last query result for telemetry chunk metrics internal bool IsInternalCall { get; set; } // Marks if this is a driver-internal operation (e.g., USE SCHEMA) + /// + /// Optional override for the Thrift RunAsync flag on a single statement. When non-null, + /// takes precedence over the connection-level . + /// Pairs with the SQL-level STATIC ONLY modifier on DESC TABLE EXTENDED: RunAsync=false + /// is what tells the warehouse to route the command off the WLM path. + /// + internal bool? RunAsyncOverride { get; set; } + /// /// Telemetry context for the current statement execution, pending emission on Dispose. /// Set before calling base.ExecuteQueryAsync()/ExecuteQuery() so that @@ -445,7 +453,7 @@ protected override void SetStatementProperties(TExecuteStatementReq statement) statement.CanDownloadResult = useCloudFetch; statement.CanDecompressLZ4Result = canDecompressLz4; statement.MaxBytesPerFile = maxBytesPerFile; - statement.RunAsync = runAsyncInThrift; + statement.RunAsync = RunAsyncOverride ?? runAsyncInThrift; Connection.TrySetGetDirectResults(statement); @@ -471,7 +479,11 @@ protected override void SetStatementProperties(TExecuteStatementReq statement) Activity.Current?.SetTag("statement.cloudfetch.can_decompress_lz4", canDecompressLz4); Activity.Current?.SetTag("statement.cloudfetch.max_bytes_per_file", maxBytesPerFile); Activity.Current?.SetTag("statement.cloudfetch.max_bytes_per_file_mb", maxBytesPerFile / 1024.0 / 1024.0); - Activity.Current?.SetTag("statement.property.run_async", runAsyncInThrift); + Activity.Current?.SetTag("statement.property.run_async", statement.RunAsync); + if (RunAsyncOverride.HasValue) + { + Activity.Current?.SetTag("statement.property.run_async.source", "override"); + } Activity.Current?.AddEvent("statement.set_properties.complete"); } @@ -993,13 +1005,26 @@ protected override async Task GetColumnsExtendedAsync(CancellationT return baseResult; } - string query = $"DESC TABLE EXTENDED {fullTableName} AS JSON"; + // Fast metadata: STATIC ONLY (runtime PR #198486) skips Delta log / Mesa RPCs, + // and RunAsync=false routes the request off the WLM path. Both signals are required — + // STATIC ONLY without RunAsync=false still goes through WLM; RunAsync=false without + // STATIC ONLY still does the full metadata scan. + bool useFastMetadataQuery = ((DatabricksConnection)Connection).UseFastMetadataQuery; + string query = useFastMetadataQuery + ? $"DESC TABLE EXTENDED {fullTableName} STATIC ONLY AS JSON" + : $"DESC TABLE EXTENDED {fullTableName} AS JSON"; activity?.AddEvent("statement.desc_table_extended.executing_query", [ - new("query_summary", query.Length > 100 ? query.Substring(0, 100) + "..." : query) + new("query_summary", query.Length > 100 ? query.Substring(0, 100) + "..." : query), + new("fast_metadata_query", useFastMetadataQuery) ]); using var descStmt = Connection.CreateStatement(); descStmt.SqlQuery = query; + + if (useFastMetadataQuery && descStmt is DatabricksStatement databricksDescStmt) + { + databricksDescStmt.RunAsyncOverride = false; + } QueryResult descResult; try diff --git a/csharp/src/StatementExecution/StatementExecutionConnection.cs b/csharp/src/StatementExecution/StatementExecutionConnection.cs index bd57dd1de..b4fae600d 100644 --- a/csharp/src/StatementExecution/StatementExecutionConnection.cs +++ b/csharp/src/StatementExecution/StatementExecutionConnection.cs @@ -64,6 +64,7 @@ internal class StatementExecutionConnection : TracingConnection, IGetObjectsData private bool _enablePKFK; private bool _enableMultipleCatalogSupport; private bool _useDescTableExtended; + private bool _enableFastMetadataQuery; private bool _applySSPWithQueries; // Connection bring-up timeout (PECO-3059). Mirrors the Thrift path's @@ -305,6 +306,7 @@ private void ValidateProperties() _enablePKFK = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.EnablePKFK, true); _enableMultipleCatalogSupport = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.EnableMultipleCatalogSupport, true); _useDescTableExtended = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.UseDescTableExtended, true); + _enableFastMetadataQuery = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.EnableFastMetadataQuery, false); // When true, SSPs (adbc.databricks.ssp_*) are applied via post-open SET statements // rather than CreateSession.session_confs — mirrors Thrift's behavior so callers // who depend on the SET-statement path (e.g., for audit visibility or for SSPs @@ -957,6 +959,16 @@ internal async Task> ExecuteShowColumnsAsync( /// internal bool UseDescTableExtended => _useDescTableExtended; + /// + /// Whether to append the STATIC ONLY modifier to DESC TABLE EXTENDED AS JSON. + /// SEA always targets a DBSQL warehouse, so the flag alone is sufficient (no + /// warehouse-path check needed). The metadata-query header is already sent by + /// , which provides the SEA equivalent of + /// Thrift's RunAsync=false signal. + /// Default: false. + /// + internal bool EnableFastMetadataQuery => _enableFastMetadataQuery; + /// /// Returns the session's default catalog. Used by statements when /// enableMultipleCatalogSupport=false and no catalog was specified. diff --git a/csharp/src/StatementExecution/StatementExecutionStatement.cs b/csharp/src/StatementExecution/StatementExecutionStatement.cs index b910097af..a8dbbcacb 100644 --- a/csharp/src/StatementExecution/StatementExecutionStatement.cs +++ b/csharp/src/StatementExecution/StatementExecutionStatement.cs @@ -1351,7 +1351,14 @@ private async Task GetColumnsExtendedViaDescTableAsync(string? cata string? fullTableName = MetadataUtilities.BuildQualifiedTableName( catalogForTableName, _metadataSchemaName, _metadataTableName); - string query = $"DESC TABLE EXTENDED {fullTableName} AS JSON"; + // Fast metadata: STATIC ONLY (runtime PR #198486) skips Delta log / Mesa RPCs. + // SEA's ExecuteMetadataSqlAsync already sends the x-databricks-sea-can-run-fully-sync + // header — the SEA equivalent of Thrift's RunAsync=false — so the flag alone is enough + // here to enable the fast-metadata path end-to-end. + bool useFastMetadataQuery = _connection.EnableFastMetadataQuery; + string query = useFastMetadataQuery + ? $"DESC TABLE EXTENDED {fullTableName} STATIC ONLY AS JSON" + : $"DESC TABLE EXTENDED {fullTableName} AS JSON"; List batches; try diff --git a/csharp/test/Unit/DatabricksConnectionUnitTests.cs b/csharp/test/Unit/DatabricksConnectionUnitTests.cs index 4f6795127..fbfc45ac1 100644 --- a/csharp/test/Unit/DatabricksConnectionUnitTests.cs +++ b/csharp/test/Unit/DatabricksConnectionUnitTests.cs @@ -168,6 +168,95 @@ public void IsValidPropertyName_StartsWithNumber_CurrentlyAllowed(string propert Assert.True(result, $"Property name '{propertyName}' is currently accepted by the regex"); } + /// + /// Warehouse-style paths (/sql/1.0/warehouses/{id}, /sql/1.0/endpoints/{id}) — with or + /// without query strings — must be classified as DBSQL warehouse so the fast-metadata + /// flag can take effect on them. + /// + [Theory] + [InlineData("/sql/1.0/warehouses/abc123")] + [InlineData("/sql/1.0/warehouses/abc123/")] + [InlineData("/sql/1.0/warehouses/abc123?o=987654")] + [InlineData("/sql/1.0/endpoints/abc123")] + [InlineData("/sql/1.0/endpoints/abc123?o=111&foo=bar")] + public void IsWarehousePath_WarehousePaths_ReturnsTrue(string path) + { + var properties = new Dictionary + { + [SparkParameters.HostName] = "test.databricks.com", + [SparkParameters.Token] = "test-token", + [SparkParameters.Path] = path + }; + using var connection = new DatabricksConnection(properties); + Assert.True(connection.IsWarehousePath, $"Path '{path}' should be classified as a warehouse"); + } + + /// + /// General-cluster paths and empty paths must NOT be classified as warehouses, so the + /// fast-metadata flag is a no-op there even when enabled. + /// + [Theory] + [InlineData("/sql/protocolv1/o/123456789/0123-456789-abcdef")] + [InlineData("/sql/protocolv1/o/987/cluster-id")] + [InlineData("/some/other/path")] + [InlineData("")] + public void IsWarehousePath_NonWarehousePaths_ReturnsFalse(string path) + { + var properties = new Dictionary + { + [SparkParameters.HostName] = "test.databricks.com", + [SparkParameters.Token] = "test-token" + }; + if (!string.IsNullOrEmpty(path)) + { + properties[SparkParameters.Path] = path; + } + using var connection = new DatabricksConnection(properties); + Assert.False(connection.IsWarehousePath, $"Path '{path}' should NOT be classified as a warehouse"); + } + + /// + /// IsWarehousePath should also resolve a path embedded in the AdbcOptions.Uri property + /// (the JDBC-style "uri" form) so users who configure that way still get the optimization. + /// + [Fact] + public void IsWarehousePath_PathFromUri_ReturnsTrue() + { + var properties = new Dictionary + { + [SparkParameters.Token] = "test-token", + [AdbcOptions.Uri] = "https://test.databricks.com/sql/1.0/warehouses/abc123?o=555" + }; + using var connection = new DatabricksConnection(properties); + Assert.True(connection.IsWarehousePath); + } + + /// + /// UseFastMetadataQuery requires BOTH the opt-in flag and a warehouse path. Verify the + /// AND-gating: flag without warehouse path stays false, warehouse without flag stays false, + /// only flag-and-warehouse-together returns true. + /// + [Theory] + [InlineData("true", "/sql/1.0/warehouses/abc123", true)] + [InlineData("true", "/sql/protocolv1/o/123/cluster", false)] // flag set but general cluster + [InlineData("false", "/sql/1.0/warehouses/abc123", false)] // warehouse but flag off + [InlineData(null, "/sql/1.0/warehouses/abc123", false)] // flag absent (default false) + public void UseFastMetadataQuery_RequiresFlagAndWarehousePath(string? flagValue, string path, bool expected) + { + var properties = new Dictionary + { + [SparkParameters.HostName] = "test.databricks.com", + [SparkParameters.Token] = "test-token", + [SparkParameters.Path] = path + }; + if (flagValue != null) + { + properties[DatabricksParameters.EnableFastMetadataQuery] = flagValue; + } + using var connection = new DatabricksConnection(properties); + Assert.Equal(expected, connection.UseFastMetadataQuery); + } + /// /// Tests that GetInfo returns correct driver information for DriverName and DriverArrowVersion. /// Note: VendorName and VendorVersion require a server connection and should be tested in E2E tests. diff --git a/csharp/test/Unit/DatabricksParametersTests.cs b/csharp/test/Unit/DatabricksParametersTests.cs index 2cc4e2f7c..1a077809a 100644 --- a/csharp/test/Unit/DatabricksParametersTests.cs +++ b/csharp/test/Unit/DatabricksParametersTests.cs @@ -72,6 +72,12 @@ public void TestQueryTagsParameterExists() Assert.Equal("adbc.databricks.query_tags", DatabricksParameters.QueryTags); } + [Fact] + public void TestEnableFastMetadataQueryParameterExists() + { + Assert.Equal("adbc.databricks.enable_fast_metadata_query", DatabricksParameters.EnableFastMetadataQuery); + } + [Fact] public void TestAllRestParametersUseCorrectPrefix() { From 15e8caaf88252b7a5f73faa8ddefa4f8a857e946 Mon Sep 17 00:00:00 2001 From: Jade Wang Date: Fri, 22 May 2026 18:58:17 +0000 Subject: [PATCH 2/3] fix(csharp): address PR #456 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop `_isWarehousePathCached` field; `IsWarehousePath` getter now recomputes on each call. Removes the `Nullable` non-atomic read/write concern and simplifies the property. - Scope the `IndexOf('?')` query-string strip to the raw-Path branch only; `Uri.AbsolutePath` already strips it for the URI branch. - Replace the defensive `descStmt is DatabricksStatement` pattern in `GetColumnsExtendedAsync` with an unconditional cast — if a future refactor breaks the invariant, a loud `InvalidCastException` is better than silently emitting a fast-metadata query that still goes through WLM. - Extract `var connection = (DatabricksConnection)Connection;` to reuse a single local for `CanUseDescTableExtended` and `UseFastMetadataQuery` instead of casting twice. - Drop overly-strict `Assert.DoesNotContain(":", actualHostUrl)` from the host_url regression test; the equality check and `"://"` check together enforce the bare-hostname contract without rejecting IPv6 literals. Co-authored-by: Isaac --- csharp/src/DatabricksConnection.cs | 19 ++++--------------- csharp/src/DatabricksStatement.cs | 17 ++++++++--------- .../Telemetry/ConnectionParametersTests.cs | 1 - 3 files changed, 12 insertions(+), 25 deletions(-) diff --git a/csharp/src/DatabricksConnection.cs b/csharp/src/DatabricksConnection.cs index de7f4e385..bb8a75a61 100644 --- a/csharp/src/DatabricksConnection.cs +++ b/csharp/src/DatabricksConnection.cs @@ -379,8 +379,6 @@ protected internal override bool TrySetGetDirectResults(IRequest request) private static readonly System.Text.RegularExpressions.Regex s_warehousePathPattern = new System.Text.RegularExpressions.Regex(@"^/sql/1\.0/(warehouses|endpoints)/[^/]+/?$"); - private bool? _isWarehousePathCached; - /// /// True when the configured connection path targets a DBSQL warehouse /// (/sql/1.0/warehouses/{id} or /sql/1.0/endpoints/{id}). False for general @@ -390,15 +388,13 @@ internal bool IsWarehousePath { get { - if (_isWarehousePathCached.HasValue) - { - return _isWarehousePathCached.Value; - } - string? path = null; if (Properties.TryGetValue(SparkParameters.Path, out string? rawPath) && !string.IsNullOrEmpty(rawPath)) { path = rawPath; + // Only the raw-Path branch can carry a query string; Uri.AbsolutePath strips it. + int q = path!.IndexOf('?'); + if (q >= 0) path = path.Substring(0, q); } else if (Properties.TryGetValue(AdbcOptions.Uri, out string? uri) && !string.IsNullOrEmpty(uri) @@ -407,14 +403,7 @@ internal bool IsWarehousePath path = parsedUri.AbsolutePath; } - if (!string.IsNullOrEmpty(path)) - { - int q = path!.IndexOf('?'); - if (q >= 0) path = path.Substring(0, q); - } - - _isWarehousePathCached = !string.IsNullOrEmpty(path) && s_warehousePathPattern.IsMatch(path!); - return _isWarehousePathCached.Value; + return !string.IsNullOrEmpty(path) && s_warehousePathPattern.IsMatch(path!); } } diff --git a/csharp/src/DatabricksStatement.cs b/csharp/src/DatabricksStatement.cs index c9e9e6fbc..2530bd151 100644 --- a/csharp/src/DatabricksStatement.cs +++ b/csharp/src/DatabricksStatement.cs @@ -984,7 +984,8 @@ protected override async Task GetColumnsExtendedAsync(CancellationT { activity?.AddEvent("statement.get_columns_extended.start"); string? fullTableName = BuildTableName(); - var canUseDescTableExtended = ((DatabricksConnection)Connection).CanUseDescTableExtended; + var connection = (DatabricksConnection)Connection; + var canUseDescTableExtended = connection.CanUseDescTableExtended; activity?.SetTag("statement.catalog_name", CatalogName ?? "(none)"); activity?.SetTag("statement.schema_name", SchemaName ?? "(none)"); @@ -1005,11 +1006,9 @@ protected override async Task GetColumnsExtendedAsync(CancellationT return baseResult; } - // Fast metadata: STATIC ONLY (runtime PR #198486) skips Delta log / Mesa RPCs, - // and RunAsync=false routes the request off the WLM path. Both signals are required — - // STATIC ONLY without RunAsync=false still goes through WLM; RunAsync=false without - // STATIC ONLY still does the full metadata scan. - bool useFastMetadataQuery = ((DatabricksConnection)Connection).UseFastMetadataQuery; + // Fast metadata: STATIC ONLY (runtime PR #198486) bypasses the server's WLM + // path. Both the SQL keyword and RunAsync=false are required to take effect. + bool useFastMetadataQuery = connection.UseFastMetadataQuery; string query = useFastMetadataQuery ? $"DESC TABLE EXTENDED {fullTableName} STATIC ONLY AS JSON" : $"DESC TABLE EXTENDED {fullTableName} AS JSON"; @@ -1018,12 +1017,12 @@ protected override async Task GetColumnsExtendedAsync(CancellationT new("fast_metadata_query", useFastMetadataQuery) ]); - using var descStmt = Connection.CreateStatement(); + using var descStmt = (DatabricksStatement)connection.CreateStatement(); descStmt.SqlQuery = query; - if (useFastMetadataQuery && descStmt is DatabricksStatement databricksDescStmt) + if (useFastMetadataQuery) { - databricksDescStmt.RunAsyncOverride = false; + descStmt.RunAsyncOverride = false; } QueryResult descResult; diff --git a/csharp/test/E2E/Telemetry/ConnectionParametersTests.cs b/csharp/test/E2E/Telemetry/ConnectionParametersTests.cs index 0383adb03..b1aafd4e5 100644 --- a/csharp/test/E2E/Telemetry/ConnectionParametersTests.cs +++ b/csharp/test/E2E/Telemetry/ConnectionParametersTests.cs @@ -210,7 +210,6 @@ public async Task ConnectionParams_HostInfoHostUrl_IsBareHostname() Assert.False(string.IsNullOrEmpty(actualHostUrl), "host_url should be populated"); Assert.Equal(expectedHost, actualHostUrl); Assert.DoesNotContain("://", actualHostUrl); - Assert.DoesNotContain(":", actualHostUrl); OutputHelper?.WriteLine($"✓ host_info.host_url: {actualHostUrl}"); } From 739dfdf7ff6d23da6e5df9f20057484ec7e1410a Mon Sep 17 00:00:00 2001 From: Jade Wang Date: Fri, 29 May 2026 20:46:07 +0000 Subject: [PATCH 3/3] fix(csharp): emit `AS JSON STATIC ONLY` in the runtime-required order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runtime grammar (SqlBaseParser.g4) accepts modifiers in this order: identifierReference partitionSpec? describeColName? (AS JSON)? (STATIC ONLY)? The driver was emitting `STATIC ONLY AS JSON` (modifiers swapped), which the parser rejects with `42601 PARSE_SYNTAX_ERROR at 'AS'`. The existing `42601` catch in `GetColumnsExtendedAsync` would have masked the bug — falling back silently to the slow path even on warehouses that *have* the runtime patch (PECO-3022 / runtime PR #198486), defeating the optimization with no signal to the operator. E2E-verified against a custom DBR image built from ~/runtime on the `stack/describe-table-static-only` branch (commit 485e630), deployed to a SQL warehouse on benchmarking-staging-aws-us-west-2: - flag=true + warehouse: emits `DESC TABLE EXTENDED AS JSON STATIC ONLY`, status FINISHED, returns JSON metadata. - flag=false + warehouse: emits `DESC TABLE EXTENDED AS JSON`, status FINISHED. - flag=true + cluster path: `UseFastMetadataQuery` returns false (existing unit test `UseFastMetadataQuery_RequiresFlagAndWarehousePath`). Also adds `FastMetadataQueryE2ETest.cs` with three E2E checks (driver emission, runtime acceptance of the keyword, flag-disabled control) and tightens docstrings to spell out the emitted SQL explicitly so the order is harder to flip in a future refactor. Co-authored-by: Isaac --- csharp/doc/sea-metadata-design.md | 4 +- csharp/src/DatabricksParameters.cs | 8 +- csharp/src/DatabricksStatement.cs | 2 +- .../StatementExecutionConnection.cs | 10 +- .../StatementExecutionStatement.cs | 2 +- csharp/test/E2E/FastMetadataQueryE2ETest.cs | 188 ++++++++++++++++++ 6 files changed, 201 insertions(+), 13 deletions(-) create mode 100644 csharp/test/E2E/FastMetadataQueryE2ETest.cs diff --git a/csharp/doc/sea-metadata-design.md b/csharp/doc/sea-metadata-design.md index 94ac037c4..e712cbe6b 100644 --- a/csharp/doc/sea-metadata-design.md +++ b/csharp/doc/sea-metadata-design.md @@ -158,8 +158,8 @@ fast-metadata path takes effect end-to-end: | Protocol | SQL emitted | Off-WLM signal | Where | |---|---|---|---| -| SEA | `DESC TABLE EXTENDED STATIC ONLY AS JSON` | HTTP header `x-databricks-sea-can-run-fully-sync: true` | Header is unconditionally set on metadata calls via `ExecuteMetadataSqlAsync` → `IsMetadata=true` → `StatementExecutionClient.cs:225`. SEA always targets a warehouse, so the flag alone gates the SQL change. | -| Thrift | `DESC TABLE EXTENDED STATIC ONLY AS JSON` | `TExecuteStatementReq.RunAsync = false` on the descStmt | `DatabricksStatement.GetColumnsExtendedAsync` flips both when `adbc.databricks.enable_fast_metadata_query=true` AND the connection path matches `/sql/1.0/(warehouses\|endpoints)/{id}` (general clusters: flag is ignored). | +| SEA | `DESC TABLE EXTENDED AS JSON STATIC ONLY` | HTTP header `x-databricks-sea-can-run-fully-sync: true` | Header is unconditionally set on metadata calls via `ExecuteMetadataSqlAsync` → `IsMetadata=true` → `StatementExecutionClient.cs:225`. SEA always targets a warehouse, so the flag alone gates the SQL change. | +| Thrift | `DESC TABLE EXTENDED AS JSON STATIC ONLY` | `TExecuteStatementReq.RunAsync = false` on the descStmt | `DatabricksStatement.GetColumnsExtendedAsync` flips both when `adbc.databricks.enable_fast_metadata_query=true` AND the connection path matches `/sql/1.0/(warehouses\|endpoints)/{id}` (general clusters: flag is ignored). | Both signals together are required: diff --git a/csharp/src/DatabricksParameters.cs b/csharp/src/DatabricksParameters.cs index 43d435f71..c65f14e42 100644 --- a/csharp/src/DatabricksParameters.cs +++ b/csharp/src/DatabricksParameters.cs @@ -285,10 +285,10 @@ public class DatabricksParameters : SparkParameters /// /// Whether to opt into the fast metadata query path for DESC TABLE EXTENDED. - /// When enabled, the driver emits the SQL modifier STATIC ONLY on - /// DESC TABLE EXTENDED AS JSON (runtime PR #198486), which tells the server to - /// skip Delta log access, Mesa RPCs, and other expensive I/O. The driver also - /// pairs the SQL change with the matching off-WLM signal per protocol: + /// When enabled, the driver emits DESC TABLE EXTENDED <t> AS JSON STATIC ONLY + /// (runtime PR #198486), which tells the server to skip Delta log access, Mesa RPCs, + /// and other expensive I/O. The driver also pairs the SQL change with the matching + /// off-WLM signal per protocol: /// /// Thrift: sets RunAsync=false on the descStmt, only when the connection /// targets a DBSQL warehouse (/sql/1.0/warehouses/{id} or /sql/1.0/endpoints/{id}). diff --git a/csharp/src/DatabricksStatement.cs b/csharp/src/DatabricksStatement.cs index 2530bd151..141202988 100644 --- a/csharp/src/DatabricksStatement.cs +++ b/csharp/src/DatabricksStatement.cs @@ -1010,7 +1010,7 @@ protected override async Task GetColumnsExtendedAsync(CancellationT // path. Both the SQL keyword and RunAsync=false are required to take effect. bool useFastMetadataQuery = connection.UseFastMetadataQuery; string query = useFastMetadataQuery - ? $"DESC TABLE EXTENDED {fullTableName} STATIC ONLY AS JSON" + ? $"DESC TABLE EXTENDED {fullTableName} AS JSON STATIC ONLY" : $"DESC TABLE EXTENDED {fullTableName} AS JSON"; activity?.AddEvent("statement.desc_table_extended.executing_query", [ new("query_summary", query.Length > 100 ? query.Substring(0, 100) + "..." : query), diff --git a/csharp/src/StatementExecution/StatementExecutionConnection.cs b/csharp/src/StatementExecution/StatementExecutionConnection.cs index b4fae600d..e7e9d0e89 100644 --- a/csharp/src/StatementExecution/StatementExecutionConnection.cs +++ b/csharp/src/StatementExecution/StatementExecutionConnection.cs @@ -960,11 +960,11 @@ internal async Task> ExecuteShowColumnsAsync( internal bool UseDescTableExtended => _useDescTableExtended; /// - /// Whether to append the STATIC ONLY modifier to DESC TABLE EXTENDED AS JSON. - /// SEA always targets a DBSQL warehouse, so the flag alone is sufficient (no - /// warehouse-path check needed). The metadata-query header is already sent by - /// , which provides the SEA equivalent of - /// Thrift's RunAsync=false signal. + /// Whether to emit DESC TABLE EXTENDED <t> AS JSON STATIC ONLY in place of + /// the base DESC TABLE EXTENDED <t> AS JSON. SEA always targets a DBSQL + /// warehouse, so the flag alone is sufficient (no warehouse-path check needed). The + /// metadata-query header is already sent by , + /// which provides the SEA equivalent of Thrift's RunAsync=false signal. /// Default: false. /// internal bool EnableFastMetadataQuery => _enableFastMetadataQuery; diff --git a/csharp/src/StatementExecution/StatementExecutionStatement.cs b/csharp/src/StatementExecution/StatementExecutionStatement.cs index a8dbbcacb..574d40222 100644 --- a/csharp/src/StatementExecution/StatementExecutionStatement.cs +++ b/csharp/src/StatementExecution/StatementExecutionStatement.cs @@ -1357,7 +1357,7 @@ private async Task GetColumnsExtendedViaDescTableAsync(string? cata // here to enable the fast-metadata path end-to-end. bool useFastMetadataQuery = _connection.EnableFastMetadataQuery; string query = useFastMetadataQuery - ? $"DESC TABLE EXTENDED {fullTableName} STATIC ONLY AS JSON" + ? $"DESC TABLE EXTENDED {fullTableName} AS JSON STATIC ONLY" : $"DESC TABLE EXTENDED {fullTableName} AS JSON"; List batches; diff --git a/csharp/test/E2E/FastMetadataQueryE2ETest.cs b/csharp/test/E2E/FastMetadataQueryE2ETest.cs new file mode 100644 index 000000000..ed93a7fb3 --- /dev/null +++ b/csharp/test/E2E/FastMetadataQueryE2ETest.cs @@ -0,0 +1,188 @@ +/* +* Copyright (c) 2025 ADBC Drivers Contributors +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +*/ + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using AdbcDrivers.HiveServer2; +using Apache.Arrow; +using Apache.Arrow.Adbc; +using Apache.Arrow.Adbc.Tests; +using Xunit; +using Xunit.Abstractions; + +namespace AdbcDrivers.Databricks.Tests.E2E +{ + /// + /// One-off live verification for PECO-3021. Runs GetColumnsExtended with + /// adbc.databricks.enable_fast_metadata_query=true and prints a time window + /// the user can use to grep system.query.history for "STATIC ONLY". + /// + public class FastMetadataQueryE2ETest : TestBase + { + public FastMetadataQueryE2ETest(ITestOutputHelper? outputHelper) + : base(outputHelper, new DatabricksTestEnvironment.Factory()) + { + Skip.IfNot(Utils.CanExecuteTestConfig(TestConfigVariable)); + } + + [SkippableFact] + public async Task FastMetadataQuery_LivelyEmitsStaticOnly() + { + var extra = new Dictionary + { + [DatabricksParameters.UseDescTableExtended] = "true", + [DatabricksParameters.EnableFastMetadataQuery] = "true", + }; + + using AdbcConnection connection = NewConnection(TestConfiguration, extra); + using var statement = connection.CreateStatement(); + + string catalog = TestConfiguration.Metadata.Catalog; + string schema = TestConfiguration.Metadata.Schema; + string table = TestConfiguration.Metadata.Table; + + statement.SetOption(ApacheParameters.IsMetadataCommand, "true"); + statement.SetOption(ApacheParameters.CatalogName, catalog); + statement.SetOption(ApacheParameters.SchemaName, schema); + statement.SetOption(ApacheParameters.TableName, table); + statement.SqlQuery = "GetColumnsExtended"; + + DateTime startUtc = DateTime.UtcNow; + OutputHelper?.WriteLine($"PECO-3021 fast-metadata window START (UTC): {startUtc:O}"); + OutputHelper?.WriteLine($"Target table: {catalog}.{schema}.{table}"); + + QueryResult result = await statement.ExecuteQueryAsync(); + Assert.NotNull(result.Stream); + + // Drain to ensure server actually executes + int rowCount = 0; + using (var stream = result.Stream) + { + while (true) + { + var batch = await stream.ReadNextRecordBatchAsync(); + if (batch == null) break; + rowCount += batch.Length; + batch.Dispose(); + } + } + + DateTime endUtc = DateTime.UtcNow; + OutputHelper?.WriteLine($"PECO-3021 fast-metadata window END (UTC): {endUtc:O}"); + OutputHelper?.WriteLine($"Rows returned: {rowCount}"); + Assert.True(rowCount > 0, "GetColumnsExtended should return at least one column row"); + } + + /// + /// Negative-control: when adbc.databricks.enable_fast_metadata_query is NOT set, + /// the driver must emit the base form (no STATIC ONLY modifier). Verifies in the + /// query history that the SQL sent is exactly DESC TABLE EXTENDED ... AS JSON. + /// + [SkippableFact] + public async Task FastMetadataQuery_Disabled_EmitsBaseAsJson() + { + var extra = new Dictionary + { + [DatabricksParameters.UseDescTableExtended] = "true", + // EnableFastMetadataQuery NOT set — verify default behavior is base AS JSON + }; + + using AdbcConnection connection = NewConnection(TestConfiguration, extra); + using var statement = connection.CreateStatement(); + + string catalog = TestConfiguration.Metadata.Catalog; + string schema = TestConfiguration.Metadata.Schema; + string table = TestConfiguration.Metadata.Table; + + statement.SetOption(ApacheParameters.IsMetadataCommand, "true"); + statement.SetOption(ApacheParameters.CatalogName, catalog); + statement.SetOption(ApacheParameters.SchemaName, schema); + statement.SetOption(ApacheParameters.TableName, table); + statement.SqlQuery = "GetColumnsExtended"; + + DateTime startUtc = DateTime.UtcNow; + OutputHelper?.WriteLine($"PECO-3021 flag-disabled window START (UTC): {startUtc:O}"); + + QueryResult result = await statement.ExecuteQueryAsync(); + Assert.NotNull(result.Stream); + + int rowCount = 0; + using (var stream = result.Stream) + { + while (true) + { + var batch = await stream.ReadNextRecordBatchAsync(); + if (batch == null) break; + rowCount += batch.Length; + batch.Dispose(); + } + } + + DateTime endUtc = DateTime.UtcNow; + OutputHelper?.WriteLine($"PECO-3021 flag-disabled window END (UTC): {endUtc:O}"); + OutputHelper?.WriteLine($"Rows returned: {rowCount}"); + Assert.True(rowCount > 0); + } + + /// + /// Verifies the *runtime* on the target endpoint accepts the new STATIC ONLY + /// modifier. Sends DESC TABLE EXTENDED ... AS JSON STATIC ONLY directly as a SQL + /// statement (not via GetColumnsExtended), so the driver's warehouse-path gating + /// is bypassed and the runtime's response is what we see. + /// + /// Pass: runtime has PR #198486 (returns a row of JSON). + /// Fail with SQLSTATE=42601 PARSE_SYNTAX_ERROR at 'ONLY': runtime is pre-rollout. + /// + [SkippableFact] + public async Task StaticOnly_DirectSql_RuntimeAcceptsKeyword() + { + using AdbcConnection connection = NewConnection(); + using var statement = connection.CreateStatement(); + + string catalog = TestConfiguration.Metadata.Catalog; + string schema = TestConfiguration.Metadata.Schema; + string table = TestConfiguration.Metadata.Table; + string sql = $"DESC TABLE EXTENDED `{catalog}`.`{schema}`.`{table}` AS JSON STATIC ONLY"; + + statement.SqlQuery = sql; + OutputHelper?.WriteLine($"Probing runtime with: {sql}"); + DateTime startUtc = DateTime.UtcNow; + + QueryResult result = await statement.ExecuteQueryAsync(); + Assert.NotNull(result.Stream); + + int rowCount = 0; + string? firstCell = null; + using (var stream = result.Stream) + { + while (true) + { + var batch = await stream.ReadNextRecordBatchAsync(); + if (batch == null) break; + if (rowCount == 0 && batch.Length > 0 && batch.ColumnCount > 0 + && batch.Column(0) is StringArray sa) + { + firstCell = sa.GetString(0); + } + rowCount += batch.Length; + batch.Dispose(); + } + } + DateTime endUtc = DateTime.UtcNow; + OutputHelper?.WriteLine($"Runtime accepted STATIC ONLY — rows={rowCount}, took {(endUtc-startUtc).TotalMilliseconds:0}ms"); + if (firstCell != null) + { + OutputHelper?.WriteLine($"Result preview (first {Math.Min(200, firstCell.Length)} chars): {firstCell.Substring(0, Math.Min(200, firstCell.Length))}"); + } + Assert.True(rowCount > 0, "Runtime should return JSON metadata when STATIC ONLY is supported"); + } + } +}