From b4f71d5fd84daea20f3e71aaa91f06c36d0c3f10 Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Wed, 18 Mar 2026 12:31:46 -0400 Subject: [PATCH 1/2] feat(driver_manager): search system lib dirs with platform-aware filenames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhances `LOAD_FLAG_SEARCH_SYSTEM` to also search well-known system library directories, and teaches `search_path_list` to try the platform-aware filename (e.g. `duckdb` → `libduckdb.dylib`) in each search directory. ## Changes **`system_lib_dirs()`** (new) — returns existing well-known lib paths: - macOS: `/opt/homebrew/lib`, `/usr/local/lib` - Linux: `/usr/lib`, arch-specific multiarch path, `/usr/local/lib` - Windows: empty (uses registry) **`get_search_paths()`** — extended the `LOAD_FLAG_SEARCH_SYSTEM` block to append `system_lib_dirs()` after the ADBC config dir. **`search_path_list()`** — after the bare-name attempt, also tries the platform-aware filename via `libloading::library_filename()`. Without this, searching `/opt/homebrew/lib` for `duckdb` would never find `libduckdb.dylib`. The comment explains the motivating constraint: macOS enforces matching Team IDs across all shared libraries in a process, so a CDN-bundled driver (signed with one key) blocks user-installed DuckDB extensions (signed with the DuckDB key); using the system library avoids the mismatch. **`driver_manifests.rst`** — documents the new system lib dir search step under `LOAD_FLAG_SEARCH_SYSTEM`. **Tests** — updates `test_get_search_paths` for the new behaviour; adds `test_system_lib_dirs_returns_expected_paths` and `test_search_path_list_uses_platform_filename`. ## Search order after this change (Unix/macOS) 1. `ADBC_DRIVER_PATH` env var (`LOAD_FLAG_SEARCH_ENV`) 2. Caller-provided `additional_search_paths` 3. `$CONDA_PREFIX/etc/adbc/drivers` (conda builds, `LOAD_FLAG_SEARCH_ENV`) 4. User config dir (`LOAD_FLAG_SEARCH_USER`) 5. System config dir (`LOAD_FLAG_SEARCH_SYSTEM`) 6. **NEW** System lib dirs — `/opt/homebrew/lib`, `/usr/local/lib`, etc. 7. OS dynamic linker fallback (`load_dynamic_from_name`) ## Motivation / downstream impact This change was motivated by dbt-labs/fs#8693, which adds ~170 lines of DuckDB-specific system library discovery to the `fs` repo because the driver manager didn't search standard lib paths. After this lands, that PR can be simplified to a version bump plus a one-call replacement of its custom discovery logic: ```rust // Before (~105 lines: try_discover_system_duckdb_driver, // try_load_duckdb_from_env_paths, system_duckdb_search_paths, // duckdb_library_filename, plus a bespoke test): if let Some(driver) = Self::try_discover_system_duckdb_driver(adbc_version) { return Ok(driver); } // After (single call; system lib dirs + platform filename handled in adbc): if let Ok(driver) = ManagedAdbcDriver::load_from_name( backend, "duckdb", entrypoint, adbc_version, LOAD_FLAG_SEARCH_SYSTEM, None, ) { return Ok(driver); } ``` The one minor behavioural difference: `fs` currently explicitly walks `DYLD_LIBRARY_PATH`/`LD_LIBRARY_PATH` before the well-known paths; the driver manager's `LOAD_FLAG_SEARCH_ENV` only covers `ADBC_DRIVER_PATH`. In practice this is not a regression — the OS dynamic-linker fallback (step 7) naturally honours those env vars — the only loss is a per-path tracing log line. Co-Authored-By: Claude Sonnet 4.6 --- docs/source/format/driver_manifests.rst | 6 +- rust/driver_manager/src/lib.rs | 153 ++++++++++++++++++++++-- 2 files changed, 149 insertions(+), 10 deletions(-) diff --git a/docs/source/format/driver_manifests.rst b/docs/source/format/driver_manifests.rst index f2d46137a4..bfeb86b79f 100644 --- a/docs/source/format/driver_manifests.rst +++ b/docs/source/format/driver_manifests.rst @@ -460,10 +460,10 @@ the given order: * On Linux (and other Unix-like platforms), the ``XDG_CONFIG_HOME`` environment variable is checked first. If it is set, the driver manager will search ``$XDG_CONFIG_HOME/adbc/drivers``, otherwise it will search ``~/.config/adbc/drivers`` -#. If the ``LOAD_FLAG_SEARCH_SYSTEM`` load option is set, then a system-level configuration directory will be searched +#. If the ``LOAD_FLAG_SEARCH_SYSTEM`` load option is set, then a system-level configuration directory will be searched, followed by well-known system library directories - * On macOS, this will be ``/Library/Application Support/ADBC/Drivers`` if it exists - * On Linux (and other Unix-like platforms), this will be ``/etc/adbc/drivers`` if it exists + * On macOS, this will be ``/Library/Application Support/ADBC/Drivers`` if it exists, then ``/opt/homebrew/lib`` (if it exists) and ``/usr/local/lib`` (if it exists) + * On Linux (and other Unix-like platforms), this will be ``/etc/adbc/drivers`` if it exists, then ``/usr/lib``, the architecture-specific multiarch path (e.g. ``/usr/lib/x86_64-linux-gnu``), and ``/usr/local/lib`` (each if they exist) Windows ^^^^^^^ diff --git a/rust/driver_manager/src/lib.rs b/rust/driver_manager/src/lib.rs index 67c1c1706d..f75c075881 100644 --- a/rust/driver_manager/src/lib.rs +++ b/rust/driver_manager/src/lib.rs @@ -492,7 +492,29 @@ impl ManagedDriver { } full_path.set_extension(""); // Remove the extension to try loading as a dynamic library. - if let Ok(result) = Self::load_dynamic_from_filename(full_path, entrypoint, version) { + if let Ok(result) = + Self::load_dynamic_from_filename(&full_path, entrypoint, version) + { + return Ok(result); + } + + // Try the platform-aware filename (e.g. "duckdb" -> "libduckdb.dylib"). + // + // This is required for system library directories (e.g. /opt/homebrew/lib) + // that store libraries under their canonical platform names rather than the + // bare driver name. On macOS in particular, preferring a system-installed + // library over a CDN-bundled one is important for code-signing: macOS + // enforces that every shared library loaded into a process shares the same + // Team ID. A CDN-bundled DuckDB driver signed with one key will prevent + // user-installed DuckDB extensions (signed with the DuckDB key) from loading. + // Using the system library avoids this mismatch. + // + // See: docs/source/format/driver_manifests.rst, LOAD_FLAG_SEARCH_SYSTEM section. + let lib_filename = libloading::library_filename(driver_path); + let platform_path = path.join(&lib_filename); + if let Ok(result) = + Self::load_dynamic_from_filename(&platform_path, entrypoint, version) + { return Ok(result); } } @@ -1849,6 +1871,53 @@ fn system_config_dir() -> Option { } } +fn system_lib_dirs() -> Vec { + let mut result = Vec::new(); + + #[cfg(target_os = "macos")] + { + let homebrew = PathBuf::from("/opt/homebrew/lib"); + if homebrew.is_dir() { + result.push(homebrew); + } + let usr_local = PathBuf::from("/usr/local/lib"); + if usr_local.is_dir() { + result.push(usr_local); + } + } + + #[cfg(all(unix, not(target_os = "macos")))] + { + let usr_lib = PathBuf::from("/usr/lib"); + if usr_lib.is_dir() { + result.push(usr_lib); + } + + // Architecture-specific multiarch path + #[cfg(target_arch = "x86_64")] + { + let multiarch = PathBuf::from("/usr/lib/x86_64-linux-gnu"); + if multiarch.is_dir() { + result.push(multiarch); + } + } + #[cfg(target_arch = "aarch64")] + { + let multiarch = PathBuf::from("/usr/lib/aarch64-linux-gnu"); + if multiarch.is_dir() { + result.push(multiarch); + } + } + + let usr_local = PathBuf::from("/usr/local/lib"); + if usr_local.is_dir() { + result.push(usr_local); + } + } + + result +} + fn get_search_paths(lvls: LoadFlags) -> Vec { let mut result = Vec::new(); if lvls & LOAD_FLAG_SEARCH_ENV != 0 { @@ -1875,6 +1944,7 @@ fn get_search_paths(lvls: LoadFlags) -> Vec { result.push(path); } } + result.extend(system_lib_dirs()); } result @@ -2368,15 +2438,84 @@ mod tests { #[cfg_attr(not(windows), ignore)] fn test_get_search_paths() { #[cfg(target_os = "macos")] - let system_path = PathBuf::from("/Library/Application Support/ADBC/Drivers"); + let system_config = PathBuf::from("/Library/Application Support/ADBC/Drivers"); #[cfg(not(target_os = "macos"))] - let system_path = PathBuf::from("/etc/adbc/drivers"); + let system_config = PathBuf::from("/etc/adbc/drivers"); let search_paths = get_search_paths(LOAD_FLAG_SEARCH_SYSTEM); - if system_path.exists() { - assert_eq!(search_paths, vec![system_path]); - } else { - assert_eq!(search_paths, Vec::::new()); + + // The config dir is included only when it exists; system lib dirs follow. + if system_config.exists() { + assert!(search_paths.contains(&system_config)); + } + + // All returned paths must exist. + for p in &search_paths { + assert!(p.is_dir(), "search path does not exist: {}", p.display()); + } + + // system lib dirs should be a subset of the returned paths. + for p in system_lib_dirs() { + assert!( + search_paths.contains(&p), + "expected system lib dir in search paths: {}", + p.display() + ); + } + } + + #[cfg(not(windows))] + #[test] + fn test_system_lib_dirs_returns_expected_paths() { + let dirs = system_lib_dirs(); + + // Every returned path must exist and be a directory. + for p in &dirs { + assert!(p.is_dir(), "system_lib_dirs returned non-existent path: {}", p.display()); + } + + #[cfg(target_os = "macos")] + { + // On macOS the only candidates are /opt/homebrew/lib and /usr/local/lib. + let candidates = [ + PathBuf::from("/opt/homebrew/lib"), + PathBuf::from("/usr/local/lib"), + ]; + for p in &dirs { + assert!(candidates.contains(p), "unexpected path on macOS: {}", p.display()); + } + } + + #[cfg(all(unix, not(target_os = "macos")))] + { + // On Linux /usr/lib is almost always present. + if PathBuf::from("/usr/lib").is_dir() { + assert!( + dirs.contains(&PathBuf::from("/usr/lib")), + "/usr/lib exists but was not returned" + ); + } } } + + #[cfg(not(windows))] + #[test] + fn test_search_path_list_uses_platform_filename() { + // Verify that library_filename produces a platform-aware name. + // We just check the naming convention rather than loading a real library. + let name = libloading::library_filename("duckdb"); + let name_str = name.to_string_lossy(); + + #[cfg(target_os = "macos")] + assert!( + name_str.starts_with("lib") && name_str.ends_with(".dylib"), + "unexpected library filename on macOS: {name_str}" + ); + + #[cfg(all(unix, not(target_os = "macos")))] + assert!( + name_str.starts_with("lib") && name_str.contains(".so"), + "unexpected library filename on Linux: {name_str}" + ); + } } From 2ba9302206bed61fe9df42db0a05aa879e4bcf51 Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Mon, 6 Apr 2026 11:57:44 -0400 Subject: [PATCH 2/2] feat(go/adbc/driver/bigquery): expose bytes processed stats after ExecuteUpdate After ExecuteUpdate completes (e.g. CREATE TABLE AS SELECT), store TotalBytesProcessed and TotalBytesBilled from the BigQuery JobStatistics on the statement struct. These are accessible via GetOptionInt with the new OptionIntStatBytesProcessed and OptionIntStatBytesBilled keys. This allows consumers like dbt-fusion to log `X.X GiB processed` annotations in execution output, matching dbt-core's bigquery adapter behavior. Co-Authored-By: Claude Sonnet 4.6 --- go/adbc/driver/bigquery/driver.go | 7 +++++++ go/adbc/driver/bigquery/record_reader.go | 22 +++++++++++++++++++--- go/adbc/driver/bigquery/statement.go | 23 +++++++++++++++++++++-- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/go/adbc/driver/bigquery/driver.go b/go/adbc/driver/bigquery/driver.go index c8c329c636..2ac2cb006e 100644 --- a/go/adbc/driver/bigquery/driver.go +++ b/go/adbc/driver/bigquery/driver.go @@ -123,6 +123,13 @@ const ( OptionJsonAuthorizeViewToDatasets = "adbc.bigquery.dataset.authorize_view_to_datasets" OptionStringUpdateTableDescriptionValue = "adbc.bigquery.table.update_description" + // OptionIntStatBytesProcessed is a read-only post-execution option that returns the + // total bytes processed by the last ExecuteUpdate call (e.g. CREATE TABLE AS SELECT). + OptionIntStatBytesProcessed = "adbc.bigquery.sql.stat.bytes_processed" + // OptionIntStatBytesBilled is a read-only post-execution option that returns the + // total bytes billed for the last ExecuteUpdate call. + OptionIntStatBytesBilled = "adbc.bigquery.sql.stat.bytes_billed" + // WithAppDefaultCredentials instructs the driver to authenticate using // Application Default Credentials (ADC). OptionValueAuthTypeAppDefaultCredentials = "adbc.bigquery.sql.auth_type.app_default_credentials" diff --git a/go/adbc/driver/bigquery/record_reader.go b/go/adbc/driver/bigquery/record_reader.go index 301003c0fd..79e3501b5e 100644 --- a/go/adbc/driver/bigquery/record_reader.go +++ b/go/adbc/driver/bigquery/record_reader.go @@ -42,6 +42,12 @@ const ( MetadataKeyBigqueryQueryID = "BIGQUERY:query_id" ) +// jobStats holds post-execution statistics from a BigQuery job. +type jobStats struct { + BytesProcessed int64 + BytesBilled int64 +} + type reader struct { refCount int64 schema *arrow.Schema @@ -64,12 +70,22 @@ func checkContext(ctx context.Context, maybeErr error) error { return ctx.Err() } -func runQuery(ctx context.Context, query *bigquery.Query, executeUpdate bool, linkFailedJob bool, alloc memory.Allocator) (bigquery.ArrowIterator, int64, error) { +func runQuery(ctx context.Context, query *bigquery.Query, executeUpdate bool, linkFailedJob bool, alloc memory.Allocator, outStats *jobStats) (bigquery.ArrowIterator, int64, error) { job, err := query.Run(ctx) if err != nil { return nil, -1, err } if executeUpdate { + status, waitErr := job.Wait(ctx) + if waitErr != nil { + return nil, -1, waitErr + } + if outStats != nil && status != nil && status.Statistics != nil { + outStats.BytesProcessed = status.Statistics.TotalBytesProcessed + if qs, ok := status.Statistics.Details.(*bigquery.QueryStatistics); ok { + outStats.BytesBilled = qs.TotalBytesBilled + } + } return nil, 0, nil } @@ -135,7 +151,7 @@ func getQueryParameter(values arrow.RecordBatch, row int, parameterMode string) } func runPlainQuery(ctx context.Context, query *bigquery.Query, alloc memory.Allocator, resultRecordBufferSize int, linkFailedJob bool) (bigqueryRdr *reader, totalRows int64, err error) { - arrowIterator, totalRows, err := runQuery(ctx, query, false, linkFailedJob, alloc) + arrowIterator, totalRows, err := runQuery(ctx, query, false, linkFailedJob, alloc, nil) if err != nil { return nil, -1, err } @@ -192,7 +208,7 @@ func queryRecordWithSchemaCallback(ctx context.Context, group *errgroup.Group, q query.Parameters = parameters } - arrowIterator, rows, err := runQuery(ctx, query, false, linkFailedJob, alloc) + arrowIterator, rows, err := runQuery(ctx, query, false, linkFailedJob, alloc, nil) if err != nil { return -1, err } diff --git a/go/adbc/driver/bigquery/statement.go b/go/adbc/driver/bigquery/statement.go index 5db769d919..ac846abb66 100644 --- a/go/adbc/driver/bigquery/statement.go +++ b/go/adbc/driver/bigquery/statement.go @@ -122,6 +122,10 @@ type statement struct { // Wrap errors with a link to failed job linkFailedJob bool + + // Post-execution statistics populated by ExecuteUpdate + lastBytesProcessed int64 + lastBytesBilled int64 } func (st *statement) GetOptionBytes(key string) ([]byte, error) { @@ -279,6 +283,10 @@ func (st *statement) GetOption(key string) (string, error) { func (st *statement) GetOptionInt(key string) (int64, error) { switch key { + case OptionIntStatBytesProcessed: + return st.lastBytesProcessed, nil + case OptionIntStatBytesBilled: + return st.lastBytesBilled, nil case OptionIntQueryMaxBillingTier: return int64(st.queryConfig.MaxBillingTier), nil case OptionIntQueryMaxBytesBilled: @@ -580,17 +588,25 @@ func (st *statement) ExecuteQuery(ctx context.Context) (array.RecordReader, int6 // ExecuteUpdate executes a statement that does not generate a result // set. It returns the number of rows affected if known, otherwise -1. +// After a successful call, bytes processed are available via GetOptionInt +// with OptionIntStatBytesProcessed and OptionIntStatBytesBilled. func (st *statement) ExecuteUpdate(ctx context.Context) (int64, error) { boundParameters, err := st.getBoundParameterReader() if err != nil { return -1, err } + st.lastBytesProcessed = 0 + st.lastBytesBilled = 0 + if boundParameters == nil { - _, totalRows, err := runQuery(ctx, st.query(), true, st.linkFailedJob, st.alloc) + var stats jobStats + _, totalRows, err := runQuery(ctx, st.query(), true, st.linkFailedJob, st.alloc, &stats) if err != nil { return -1, err } + st.lastBytesProcessed = stats.BytesProcessed + st.lastBytesBilled = stats.BytesBilled return totalRows, nil } else { totalRows := int64(0) @@ -605,11 +621,14 @@ func (st *statement) ExecuteUpdate(ctx context.Context) (int64, error) { st.queryConfig.Parameters = parameters } - _, currentRows, err := runQuery(ctx, st.query(), true, st.linkFailedJob, st.alloc) + var stats jobStats + _, currentRows, err := runQuery(ctx, st.query(), true, st.linkFailedJob, st.alloc, &stats) if err != nil { return -1, err } totalRows += currentRows + st.lastBytesProcessed += stats.BytesProcessed + st.lastBytesBilled += stats.BytesBilled } } return totalRows, nil