Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions csharp/doc/sea-metadata-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,34 @@ Each IGetObjectsDataProvider method makes one server call. Total RPCs by depth:
| DbSchemas | + GetSchemasAsync | 2 |
| Tables | + GetTablesAsync | 3 |
| All | + PopulateColumnInfoAsync | 4 |

## Fast Metadata Query (`DESC TABLE EXTENDED ... STATIC ONLY`)

`GetColumnsExtended` runs `DESC TABLE EXTENDED <table> AS JSON` to fetch column +
key metadata in a single round-trip. Runtime PR #198486 added a `STATIC ONLY`
modifier to that command which makes the server return catalog metadata only
(no Delta log access, no Mesa RPCs, no other expensive I/O). When opted in via
`adbc.databricks.enable_fast_metadata_query`, the driver emits the new modifier
**and** pairs it with the protocol-specific off-WLM routing signal so the
fast-metadata path takes effect end-to-end:

| Protocol | SQL emitted | Off-WLM signal | Where |
|---|---|---|---|
| SEA | `DESC TABLE EXTENDED <t> AS JSON STATIC ONLY` | HTTP header `x-databricks-sea-can-run-fully-sync: true` | Header is unconditionally set on metadata calls via `ExecuteMetadataSqlAsync` → `IsMetadata=true` → `StatementExecutionClient.cs:225`. SEA always targets a warehouse, so the flag alone gates the SQL change. |
| Thrift | `DESC TABLE EXTENDED <t> AS JSON STATIC ONLY` | `TExecuteStatementReq.RunAsync = false` on the descStmt | `DatabricksStatement.GetColumnsExtendedAsync` flips both when `adbc.databricks.enable_fast_metadata_query=true` AND the connection path matches `/sql/1.0/(warehouses\|endpoints)/{id}` (general clusters: flag is ignored). |

Both signals together are required:

- `STATIC ONLY` without off-WLM routing → server uses the lightweight metadata
path but the request is still queued through WLM.
- Off-WLM routing without `STATIC ONLY` → request bypasses WLM but the server
still does the full metadata scan.

### Fallback safety

`STATIC ONLY` requires `AS JSON` per the runtime grammar; older servers without
PR #198486 reject the new keyword with parse error `INVALID_STATIC_ONLY_USAGE`
(SQL state `42601`). The existing `catch (HiveServer2Exception ex) when
(ex.SqlState == "42601" || ex.SqlState == "20000")` in
`DatabricksStatement.GetColumnsExtendedAsync` already handles this and falls back
to the base `GetColumns + GetPrimaryKeys + GetCrossReference` implementation.
40 changes: 40 additions & 0 deletions csharp/src/DatabricksConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ internal class DatabricksConnection : SparkHttpConnection
private const bool DefaultRateLimitRetry = true;
private const bool DefaultTransportErrorRetry = true;
private bool _useDescTableExtended = false;
private bool _enableFastMetadataQuery = false;

// Trace propagation configuration
private bool _tracePropagationEnabled = true;
Expand Down Expand Up @@ -207,6 +208,7 @@ private void ValidateProperties()
_useCloudFetch = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.UseCloudFetch, _useCloudFetch);
_canDecompressLz4 = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.CanDecompressLz4, _canDecompressLz4);
_useDescTableExtended = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.UseDescTableExtended, _useDescTableExtended);
_enableFastMetadataQuery = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.EnableFastMetadataQuery, _enableFastMetadataQuery);
_runAsyncInThrift = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.EnableRunAsyncInThriftOp, _runAsyncInThrift);
_enableComplexDatatypeSupport = PropertyHelper.GetBooleanPropertyWithValidation(Properties, DatabricksParameters.EnableComplexDatatypeSupport, _enableComplexDatatypeSupport);

Expand Down Expand Up @@ -374,6 +376,44 @@ protected internal override bool TrySetGetDirectResults(IRequest request)
/// </summary>
internal bool CanUseDescTableExtended => _useDescTableExtended && ServerProtocolVersion != null && FeatureVersionNegotiator.SupportsDESCTableExtended(ServerProtocolVersion.Value);

private static readonly System.Text.RegularExpressions.Regex s_warehousePathPattern =
Comment thread
jadewang-db marked this conversation as resolved.
new System.Text.RegularExpressions.Regex(@"^/sql/1\.0/(warehouses|endpoints)/[^/]+/?$");

/// <summary>
/// True when the configured connection path targets a DBSQL warehouse
/// (/sql/1.0/warehouses/{id} or /sql/1.0/endpoints/{id}). False for general
/// clusters (/sql/protocolv1/o/{orgId}/{clusterId}) or when no path is set.
/// </summary>
internal bool IsWarehousePath
{
get
{
string? path = null;
if (Properties.TryGetValue(SparkParameters.Path, out string? rawPath) && !string.IsNullOrEmpty(rawPath))
{
path = rawPath;
// Only the raw-Path branch can carry a query string; Uri.AbsolutePath strips it.
int q = path!.IndexOf('?');
if (q >= 0) path = path.Substring(0, q);
}
else if (Properties.TryGetValue(AdbcOptions.Uri, out string? uri)
&& !string.IsNullOrEmpty(uri)
&& Uri.TryCreate(uri, UriKind.Absolute, out Uri? parsedUri))
{
path = parsedUri.AbsolutePath;
}

return !string.IsNullOrEmpty(path) && s_warehousePathPattern.IsMatch(path!);
}
}

/// <summary>
/// True when the driver should opt into the fast metadata query path. Requires
/// both the connection flag and a DBSQL warehouse path; otherwise false.
/// See <see cref="DatabricksParameters.EnableFastMetadataQuery"/>.
/// </summary>
internal bool UseFastMetadataQuery => _enableFastMetadataQuery && IsWarehousePath;

/// <summary>
/// Gets whether PK/FK metadata call is enabled
/// </summary>
Expand Down
20 changes: 20 additions & 0 deletions csharp/src/DatabricksParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,26 @@ public class DatabricksParameters : SparkParameters
/// </summary>
public const string UseDescTableExtended = "adbc.databricks.use_desc_table_extended";

/// <summary>
/// Whether to opt into the fast metadata query path for DESC TABLE EXTENDED.
/// When enabled, the driver emits <c>DESC TABLE EXTENDED &lt;t&gt; AS JSON STATIC ONLY</c>
/// (runtime PR #198486), which tells the server to skip Delta log access, Mesa RPCs,
/// and other expensive I/O. The driver also pairs the SQL change with the matching
/// off-WLM signal per protocol:
/// <list type="bullet">
/// <item>Thrift: sets <c>RunAsync=false</c> on the descStmt, only when the connection
/// targets a DBSQL warehouse (/sql/1.0/warehouses/{id} or /sql/1.0/endpoints/{id}).
/// On general clusters the flag is ignored entirely.</item>
/// <item>SEA: relies on the existing <c>x-databricks-sea-can-run-fully-sync</c>
/// header that <c>ExecuteMetadataSqlAsync</c> already sends. SEA always targets
/// a warehouse, so the flag alone gates the SQL change.</item>
/// </list>
/// Both signals (SQL keyword + off-WLM routing) are required together — STATIC ONLY
/// alone still goes through WLM; off-WLM routing alone still does the full scan.
/// Default value is false if not specified.
/// </summary>
public const string EnableFastMetadataQuery = "adbc.databricks.enable_fast_metadata_query";

/// <summary>
/// Whether to enable RunAsync flag in Thrift operation
/// Default value is true if not specified.
Expand Down
36 changes: 30 additions & 6 deletions csharp/src/DatabricksStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ internal class DatabricksStatement : SparkStatement, IHiveServer2Statement
private QueryResult? _lastQueryResult; // Track last query result for telemetry chunk metrics
internal bool IsInternalCall { get; set; } // Marks if this is a driver-internal operation (e.g., USE SCHEMA)

/// <summary>
/// Optional override for the Thrift RunAsync flag on a single statement. When non-null,
/// takes precedence over the connection-level <see cref="DatabricksConnection.RunAsyncInThrift"/>.
/// Pairs with the SQL-level STATIC ONLY modifier on DESC TABLE EXTENDED: RunAsync=false
/// is what tells the warehouse to route the command off the WLM path.
/// </summary>
internal bool? RunAsyncOverride { get; set; }
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this extra flag? Who is passing this in?
Can we just rely on runAsyncInThrift?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

runAsyncInThrift is the user's connection-level setting (adbc.databricks.enable_run_async_in_thrift_op, defaults to true) — flipping it on the connection would affect all their statements, and mutating it around the desc-stmt call would race with any concurrent statements on the same connection.

RunAsyncOverride is an internal per-statement mechanism — no user surface — used exclusively by GetColumnsExtendedAsync to flip RunAsync=false on its own internal descStmt while the user's other statements keep their configured setting. The pairing with the STATIC ONLY SQL modifier is what tells the warehouse to route the desc-stmt off the WLM path; the connection-level flag alone can't express "this single statement, no others."

Open to a different shape if you have one in mind — happy to refactor if e.g. you'd prefer a constructor param or a dedicated ExecuteWithRunAsyncOff() helper instead of the override property.


/// <summary>
/// Telemetry context for the current statement execution, pending emission on Dispose.
/// Set before calling base.ExecuteQueryAsync()/ExecuteQuery() so that
Expand Down Expand Up @@ -445,7 +453,7 @@ protected override void SetStatementProperties(TExecuteStatementReq statement)
statement.CanDownloadResult = useCloudFetch;
statement.CanDecompressLZ4Result = canDecompressLz4;
statement.MaxBytesPerFile = maxBytesPerFile;
statement.RunAsync = runAsyncInThrift;
statement.RunAsync = RunAsyncOverride ?? runAsyncInThrift;

Connection.TrySetGetDirectResults(statement);

Expand All @@ -471,7 +479,11 @@ protected override void SetStatementProperties(TExecuteStatementReq statement)
Activity.Current?.SetTag("statement.cloudfetch.can_decompress_lz4", canDecompressLz4);
Activity.Current?.SetTag("statement.cloudfetch.max_bytes_per_file", maxBytesPerFile);
Activity.Current?.SetTag("statement.cloudfetch.max_bytes_per_file_mb", maxBytesPerFile / 1024.0 / 1024.0);
Activity.Current?.SetTag("statement.property.run_async", runAsyncInThrift);
Activity.Current?.SetTag("statement.property.run_async", statement.RunAsync);
Comment thread
jadewang-db marked this conversation as resolved.
if (RunAsyncOverride.HasValue)
{
Activity.Current?.SetTag("statement.property.run_async.source", "override");
}

Activity.Current?.AddEvent("statement.set_properties.complete");
}
Expand Down Expand Up @@ -972,7 +984,8 @@ protected override async Task<QueryResult> GetColumnsExtendedAsync(CancellationT
{
activity?.AddEvent("statement.get_columns_extended.start");
string? fullTableName = BuildTableName();
var canUseDescTableExtended = ((DatabricksConnection)Connection).CanUseDescTableExtended;
var connection = (DatabricksConnection)Connection;
var canUseDescTableExtended = connection.CanUseDescTableExtended;

activity?.SetTag("statement.catalog_name", CatalogName ?? "(none)");
activity?.SetTag("statement.schema_name", SchemaName ?? "(none)");
Expand All @@ -993,13 +1006,24 @@ protected override async Task<QueryResult> GetColumnsExtendedAsync(CancellationT
return baseResult;
}

string query = $"DESC TABLE EXTENDED {fullTableName} AS JSON";
// Fast metadata: STATIC ONLY (runtime PR #198486) bypasses the server's WLM
// path. Both the SQL keyword and RunAsync=false are required to take effect.
bool useFastMetadataQuery = connection.UseFastMetadataQuery;
string query = useFastMetadataQuery
? $"DESC TABLE EXTENDED {fullTableName} AS JSON STATIC ONLY"
: $"DESC TABLE EXTENDED {fullTableName} AS JSON";
activity?.AddEvent("statement.desc_table_extended.executing_query", [
new("query_summary", query.Length > 100 ? query.Substring(0, 100) + "..." : query)
new("query_summary", query.Length > 100 ? query.Substring(0, 100) + "..." : query),
new("fast_metadata_query", useFastMetadataQuery)
]);

using var descStmt = Connection.CreateStatement();
using var descStmt = (DatabricksStatement)connection.CreateStatement();
descStmt.SqlQuery = query;

if (useFastMetadataQuery)
{
descStmt.RunAsyncOverride = false;
}
QueryResult descResult;

try
Expand Down
12 changes: 12 additions & 0 deletions csharp/src/StatementExecution/StatementExecutionConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ internal class StatementExecutionConnection : TracingConnection, IGetObjectsData
private bool _enablePKFK;
private bool _enableMultipleCatalogSupport;
private bool _useDescTableExtended;
private bool _enableFastMetadataQuery;
private bool _applySSPWithQueries;

// Connection bring-up timeout (PECO-3059). Mirrors the Thrift path's
Expand Down Expand Up @@ -305,6 +306,7 @@ private void ValidateProperties()
_enablePKFK = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.EnablePKFK, true);
_enableMultipleCatalogSupport = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.EnableMultipleCatalogSupport, true);
_useDescTableExtended = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.UseDescTableExtended, true);
_enableFastMetadataQuery = PropertyHelper.GetBooleanPropertyWithValidation(properties, DatabricksParameters.EnableFastMetadataQuery, false);
// When true, SSPs (adbc.databricks.ssp_*) are applied via post-open SET statements
// rather than CreateSession.session_confs — mirrors Thrift's behavior so callers
// who depend on the SET-statement path (e.g., for audit visibility or for SSPs
Expand Down Expand Up @@ -957,6 +959,16 @@ internal async Task<List<RecordBatch>> ExecuteShowColumnsAsync(
/// </summary>
internal bool UseDescTableExtended => _useDescTableExtended;

/// <summary>
/// Whether to emit <c>DESC TABLE EXTENDED &lt;t&gt; AS JSON STATIC ONLY</c> in place of
/// the base <c>DESC TABLE EXTENDED &lt;t&gt; AS JSON</c>. SEA always targets a DBSQL
/// warehouse, so the flag alone is sufficient (no warehouse-path check needed). The
/// metadata-query header is already sent by <see cref="ExecuteMetadataSqlAsync"/>,
/// which provides the SEA equivalent of Thrift's RunAsync=false signal.
/// Default: false.
/// </summary>
internal bool EnableFastMetadataQuery => _enableFastMetadataQuery;

/// <summary>
/// Returns the session's default catalog. Used by statements when
/// enableMultipleCatalogSupport=false and no catalog was specified.
Expand Down
9 changes: 8 additions & 1 deletion csharp/src/StatementExecution/StatementExecutionStatement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1351,7 +1351,14 @@ private async Task<QueryResult> GetColumnsExtendedViaDescTableAsync(string? cata
string? fullTableName = MetadataUtilities.BuildQualifiedTableName(
catalogForTableName, _metadataSchemaName, _metadataTableName);

string query = $"DESC TABLE EXTENDED {fullTableName} AS JSON";
// Fast metadata: STATIC ONLY (runtime PR #198486) skips Delta log / Mesa RPCs.
// SEA's ExecuteMetadataSqlAsync already sends the x-databricks-sea-can-run-fully-sync
// header — the SEA equivalent of Thrift's RunAsync=false — so the flag alone is enough
// here to enable the fast-metadata path end-to-end.
bool useFastMetadataQuery = _connection.EnableFastMetadataQuery;
string query = useFastMetadataQuery
? $"DESC TABLE EXTENDED {fullTableName} AS JSON STATIC ONLY"
: $"DESC TABLE EXTENDED {fullTableName} AS JSON";

List<RecordBatch> batches;
try
Expand Down
Loading
Loading