diff --git a/docs/source/format/driver_manifests.rst b/docs/source/format/driver_manifests.rst index f2d46137a4..bfeb86b79f 100644 --- a/docs/source/format/driver_manifests.rst +++ b/docs/source/format/driver_manifests.rst @@ -460,10 +460,10 @@ the given order: * On Linux (and other Unix-like platforms), the ``XDG_CONFIG_HOME`` environment variable is checked first. If it is set, the driver manager will search ``$XDG_CONFIG_HOME/adbc/drivers``, otherwise it will search ``~/.config/adbc/drivers`` -#. If the ``LOAD_FLAG_SEARCH_SYSTEM`` load option is set, then a system-level configuration directory will be searched +#. If the ``LOAD_FLAG_SEARCH_SYSTEM`` load option is set, then a system-level configuration directory will be searched, followed by well-known system library directories - * On macOS, this will be ``/Library/Application Support/ADBC/Drivers`` if it exists - * On Linux (and other Unix-like platforms), this will be ``/etc/adbc/drivers`` if it exists + * On macOS, this will be ``/Library/Application Support/ADBC/Drivers`` if it exists, then ``/opt/homebrew/lib`` (if it exists) and ``/usr/local/lib`` (if it exists) + * On Linux (and other Unix-like platforms), this will be ``/etc/adbc/drivers`` if it exists, then ``/usr/lib``, the architecture-specific multiarch path (e.g. ``/usr/lib/x86_64-linux-gnu``), and ``/usr/local/lib`` (each if they exist) Windows ^^^^^^^ diff --git a/go/adbc/driver/bigquery/driver.go b/go/adbc/driver/bigquery/driver.go index c8c329c636..2ac2cb006e 100644 --- a/go/adbc/driver/bigquery/driver.go +++ b/go/adbc/driver/bigquery/driver.go @@ -123,6 +123,13 @@ const ( OptionJsonAuthorizeViewToDatasets = "adbc.bigquery.dataset.authorize_view_to_datasets" OptionStringUpdateTableDescriptionValue = "adbc.bigquery.table.update_description" + // OptionIntStatBytesProcessed is a read-only post-execution option that returns the + // total bytes processed by the last ExecuteUpdate call (e.g. CREATE TABLE AS SELECT). + OptionIntStatBytesProcessed = "adbc.bigquery.sql.stat.bytes_processed" + // OptionIntStatBytesBilled is a read-only post-execution option that returns the + // total bytes billed for the last ExecuteUpdate call. + OptionIntStatBytesBilled = "adbc.bigquery.sql.stat.bytes_billed" + // WithAppDefaultCredentials instructs the driver to authenticate using // Application Default Credentials (ADC). OptionValueAuthTypeAppDefaultCredentials = "adbc.bigquery.sql.auth_type.app_default_credentials" diff --git a/go/adbc/driver/bigquery/record_reader.go b/go/adbc/driver/bigquery/record_reader.go index 301003c0fd..79e3501b5e 100644 --- a/go/adbc/driver/bigquery/record_reader.go +++ b/go/adbc/driver/bigquery/record_reader.go @@ -42,6 +42,12 @@ const ( MetadataKeyBigqueryQueryID = "BIGQUERY:query_id" ) +// jobStats holds post-execution statistics from a BigQuery job. +type jobStats struct { + BytesProcessed int64 + BytesBilled int64 +} + type reader struct { refCount int64 schema *arrow.Schema @@ -64,12 +70,22 @@ func checkContext(ctx context.Context, maybeErr error) error { return ctx.Err() } -func runQuery(ctx context.Context, query *bigquery.Query, executeUpdate bool, linkFailedJob bool, alloc memory.Allocator) (bigquery.ArrowIterator, int64, error) { +func runQuery(ctx context.Context, query *bigquery.Query, executeUpdate bool, linkFailedJob bool, alloc memory.Allocator, outStats *jobStats) (bigquery.ArrowIterator, int64, error) { job, err := query.Run(ctx) if err != nil { return nil, -1, err } if executeUpdate { + status, waitErr := job.Wait(ctx) + if waitErr != nil { + return nil, -1, waitErr + } + if outStats != nil && status != nil && status.Statistics != nil { + outStats.BytesProcessed = status.Statistics.TotalBytesProcessed + if qs, ok := status.Statistics.Details.(*bigquery.QueryStatistics); ok { + outStats.BytesBilled = qs.TotalBytesBilled + } + } return nil, 0, nil } @@ -135,7 +151,7 @@ func getQueryParameter(values arrow.RecordBatch, row int, parameterMode string) } func runPlainQuery(ctx context.Context, query *bigquery.Query, alloc memory.Allocator, resultRecordBufferSize int, linkFailedJob bool) (bigqueryRdr *reader, totalRows int64, err error) { - arrowIterator, totalRows, err := runQuery(ctx, query, false, linkFailedJob, alloc) + arrowIterator, totalRows, err := runQuery(ctx, query, false, linkFailedJob, alloc, nil) if err != nil { return nil, -1, err } @@ -192,7 +208,7 @@ func queryRecordWithSchemaCallback(ctx context.Context, group *errgroup.Group, q query.Parameters = parameters } - arrowIterator, rows, err := runQuery(ctx, query, false, linkFailedJob, alloc) + arrowIterator, rows, err := runQuery(ctx, query, false, linkFailedJob, alloc, nil) if err != nil { return -1, err } diff --git a/go/adbc/driver/bigquery/statement.go b/go/adbc/driver/bigquery/statement.go index 5db769d919..ac846abb66 100644 --- a/go/adbc/driver/bigquery/statement.go +++ b/go/adbc/driver/bigquery/statement.go @@ -122,6 +122,10 @@ type statement struct { // Wrap errors with a link to failed job linkFailedJob bool + + // Post-execution statistics populated by ExecuteUpdate + lastBytesProcessed int64 + lastBytesBilled int64 } func (st *statement) GetOptionBytes(key string) ([]byte, error) { @@ -279,6 +283,10 @@ func (st *statement) GetOption(key string) (string, error) { func (st *statement) GetOptionInt(key string) (int64, error) { switch key { + case OptionIntStatBytesProcessed: + return st.lastBytesProcessed, nil + case OptionIntStatBytesBilled: + return st.lastBytesBilled, nil case OptionIntQueryMaxBillingTier: return int64(st.queryConfig.MaxBillingTier), nil case OptionIntQueryMaxBytesBilled: @@ -580,17 +588,25 @@ func (st *statement) ExecuteQuery(ctx context.Context) (array.RecordReader, int6 // ExecuteUpdate executes a statement that does not generate a result // set. It returns the number of rows affected if known, otherwise -1. +// After a successful call, bytes processed are available via GetOptionInt +// with OptionIntStatBytesProcessed and OptionIntStatBytesBilled. func (st *statement) ExecuteUpdate(ctx context.Context) (int64, error) { boundParameters, err := st.getBoundParameterReader() if err != nil { return -1, err } + st.lastBytesProcessed = 0 + st.lastBytesBilled = 0 + if boundParameters == nil { - _, totalRows, err := runQuery(ctx, st.query(), true, st.linkFailedJob, st.alloc) + var stats jobStats + _, totalRows, err := runQuery(ctx, st.query(), true, st.linkFailedJob, st.alloc, &stats) if err != nil { return -1, err } + st.lastBytesProcessed = stats.BytesProcessed + st.lastBytesBilled = stats.BytesBilled return totalRows, nil } else { totalRows := int64(0) @@ -605,11 +621,14 @@ func (st *statement) ExecuteUpdate(ctx context.Context) (int64, error) { st.queryConfig.Parameters = parameters } - _, currentRows, err := runQuery(ctx, st.query(), true, st.linkFailedJob, st.alloc) + var stats jobStats + _, currentRows, err := runQuery(ctx, st.query(), true, st.linkFailedJob, st.alloc, &stats) if err != nil { return -1, err } totalRows += currentRows + st.lastBytesProcessed += stats.BytesProcessed + st.lastBytesBilled += stats.BytesBilled } } return totalRows, nil diff --git a/rust/driver_manager/src/lib.rs b/rust/driver_manager/src/lib.rs index 67c1c1706d..f75c075881 100644 --- a/rust/driver_manager/src/lib.rs +++ b/rust/driver_manager/src/lib.rs @@ -492,7 +492,29 @@ impl ManagedDriver { } full_path.set_extension(""); // Remove the extension to try loading as a dynamic library. - if let Ok(result) = Self::load_dynamic_from_filename(full_path, entrypoint, version) { + if let Ok(result) = + Self::load_dynamic_from_filename(&full_path, entrypoint, version) + { + return Ok(result); + } + + // Try the platform-aware filename (e.g. "duckdb" -> "libduckdb.dylib"). + // + // This is required for system library directories (e.g. /opt/homebrew/lib) + // that store libraries under their canonical platform names rather than the + // bare driver name. On macOS in particular, preferring a system-installed + // library over a CDN-bundled one is important for code-signing: macOS + // enforces that every shared library loaded into a process shares the same + // Team ID. A CDN-bundled DuckDB driver signed with one key will prevent + // user-installed DuckDB extensions (signed with the DuckDB key) from loading. + // Using the system library avoids this mismatch. + // + // See: docs/source/format/driver_manifests.rst, LOAD_FLAG_SEARCH_SYSTEM section. + let lib_filename = libloading::library_filename(driver_path); + let platform_path = path.join(&lib_filename); + if let Ok(result) = + Self::load_dynamic_from_filename(&platform_path, entrypoint, version) + { return Ok(result); } } @@ -1849,6 +1871,53 @@ fn system_config_dir() -> Option { } } +fn system_lib_dirs() -> Vec { + let mut result = Vec::new(); + + #[cfg(target_os = "macos")] + { + let homebrew = PathBuf::from("/opt/homebrew/lib"); + if homebrew.is_dir() { + result.push(homebrew); + } + let usr_local = PathBuf::from("/usr/local/lib"); + if usr_local.is_dir() { + result.push(usr_local); + } + } + + #[cfg(all(unix, not(target_os = "macos")))] + { + let usr_lib = PathBuf::from("/usr/lib"); + if usr_lib.is_dir() { + result.push(usr_lib); + } + + // Architecture-specific multiarch path + #[cfg(target_arch = "x86_64")] + { + let multiarch = PathBuf::from("/usr/lib/x86_64-linux-gnu"); + if multiarch.is_dir() { + result.push(multiarch); + } + } + #[cfg(target_arch = "aarch64")] + { + let multiarch = PathBuf::from("/usr/lib/aarch64-linux-gnu"); + if multiarch.is_dir() { + result.push(multiarch); + } + } + + let usr_local = PathBuf::from("/usr/local/lib"); + if usr_local.is_dir() { + result.push(usr_local); + } + } + + result +} + fn get_search_paths(lvls: LoadFlags) -> Vec { let mut result = Vec::new(); if lvls & LOAD_FLAG_SEARCH_ENV != 0 { @@ -1875,6 +1944,7 @@ fn get_search_paths(lvls: LoadFlags) -> Vec { result.push(path); } } + result.extend(system_lib_dirs()); } result @@ -2368,15 +2438,84 @@ mod tests { #[cfg_attr(not(windows), ignore)] fn test_get_search_paths() { #[cfg(target_os = "macos")] - let system_path = PathBuf::from("/Library/Application Support/ADBC/Drivers"); + let system_config = PathBuf::from("/Library/Application Support/ADBC/Drivers"); #[cfg(not(target_os = "macos"))] - let system_path = PathBuf::from("/etc/adbc/drivers"); + let system_config = PathBuf::from("/etc/adbc/drivers"); let search_paths = get_search_paths(LOAD_FLAG_SEARCH_SYSTEM); - if system_path.exists() { - assert_eq!(search_paths, vec![system_path]); - } else { - assert_eq!(search_paths, Vec::::new()); + + // The config dir is included only when it exists; system lib dirs follow. + if system_config.exists() { + assert!(search_paths.contains(&system_config)); + } + + // All returned paths must exist. + for p in &search_paths { + assert!(p.is_dir(), "search path does not exist: {}", p.display()); + } + + // system lib dirs should be a subset of the returned paths. + for p in system_lib_dirs() { + assert!( + search_paths.contains(&p), + "expected system lib dir in search paths: {}", + p.display() + ); + } + } + + #[cfg(not(windows))] + #[test] + fn test_system_lib_dirs_returns_expected_paths() { + let dirs = system_lib_dirs(); + + // Every returned path must exist and be a directory. + for p in &dirs { + assert!(p.is_dir(), "system_lib_dirs returned non-existent path: {}", p.display()); + } + + #[cfg(target_os = "macos")] + { + // On macOS the only candidates are /opt/homebrew/lib and /usr/local/lib. + let candidates = [ + PathBuf::from("/opt/homebrew/lib"), + PathBuf::from("/usr/local/lib"), + ]; + for p in &dirs { + assert!(candidates.contains(p), "unexpected path on macOS: {}", p.display()); + } + } + + #[cfg(all(unix, not(target_os = "macos")))] + { + // On Linux /usr/lib is almost always present. + if PathBuf::from("/usr/lib").is_dir() { + assert!( + dirs.contains(&PathBuf::from("/usr/lib")), + "/usr/lib exists but was not returned" + ); + } } } + + #[cfg(not(windows))] + #[test] + fn test_search_path_list_uses_platform_filename() { + // Verify that library_filename produces a platform-aware name. + // We just check the naming convention rather than loading a real library. + let name = libloading::library_filename("duckdb"); + let name_str = name.to_string_lossy(); + + #[cfg(target_os = "macos")] + assert!( + name_str.starts_with("lib") && name_str.ends_with(".dylib"), + "unexpected library filename on macOS: {name_str}" + ); + + #[cfg(all(unix, not(target_os = "macos")))] + assert!( + name_str.starts_with("lib") && name_str.contains(".so"), + "unexpected library filename on Linux: {name_str}" + ); + } }