diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index c4088d41c8f..b95ea57b480 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -289,7 +289,7 @@ impl DbUrlOptions { let db_config = db::Config { url: db_url.clone() }; let pool = - Arc::new(db::Pool::new_single_host(&log.clone(), &db_config)); + Arc::new(db::PoolBuilder::new_single_host(&log, db_config).build()); // Being a dev tool, we want to try this operation even if the schema // doesn't match what we expect. So we use `DataStore::new_unchecked()` diff --git a/dev-tools/reconfigurator-exec-unsafe/src/main.rs b/dev-tools/reconfigurator-exec-unsafe/src/main.rs index 9e0f2b52523..f3495ae1487 100644 --- a/dev-tools/reconfigurator-exec-unsafe/src/main.rs +++ b/dev-tools/reconfigurator-exec-unsafe/src/main.rs @@ -108,7 +108,7 @@ impl ReconfiguratorExec { internal_dns_resolver::QorbResolver::new(vec![self.dns_server]); info!(&log, "setting up database pool"); - let pool = Arc::new(db::Pool::new(&log, &qorb_resolver)); + let pool = Arc::new(db::PoolBuilder::new(&log, &qorb_resolver).build()); let datastore = Arc::new( DataStore::new_failfast(&log, pool) .await diff --git a/live-tests/tests/common/mod.rs b/live-tests/tests/common/mod.rs index 6bbbaf9b564..da35fbc92ab 100644 --- a/live-tests/tests/common/mod.rs +++ b/live-tests/tests/common/mod.rs @@ -134,8 +134,10 @@ async fn create_datastore( .context("failed to parse constructed postgres URL")?; let db_config = nexus_db_queries::db::Config { url }; - let pool = - Arc::new(nexus_db_queries::db::Pool::new_single_host(log, &db_config)); + let pool = Arc::new( + nexus_db_queries::db::PoolBuilder::new_single_host(&log, db_config) + .build(), + ); DataStore::new_failfast(log, pool) .await .context("creating DataStore") diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index a91d98dcaa8..d1213f7cb8f 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -273,6 +273,7 @@ pub struct MgdConfig { struct UnvalidatedTunables { max_vpc_ipv4_subnet_prefix: u8, load_timeout: Option, + collect_backtraces: Option, } /// Configuration for HTTP clients to external services. @@ -301,6 +302,12 @@ pub struct Tunables { /// /// If "None", nexus loops forever during initialization. pub load_timeout: Option, + + /// Should backtraces be collected for database connections? + /// + /// If "None", uses the default configured by the `PoolBuilder` + /// in nexus_db_queries. + pub collect_backtraces: Option, } // Convert from the unvalidated tunables, verifying each parameter as needed. @@ -312,6 +319,7 @@ impl TryFrom for Tunables { Ok(Tunables { max_vpc_ipv4_subnet_prefix: unvalidated.max_vpc_ipv4_subnet_prefix, load_timeout: unvalidated.load_timeout, + collect_backtraces: unvalidated.collect_backtraces, }) } } @@ -363,6 +371,7 @@ impl Default for Tunables { Tunables { max_vpc_ipv4_subnet_prefix: MAX_VPC_IPV4_SUBNET_PREFIX, load_timeout: None, + collect_backtraces: None, } } } @@ -1201,7 +1210,8 @@ mod test { schema: None, tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27, - load_timeout: None + load_timeout: None, + collect_backtraces: None, }, dendrite: HashMap::from([( SwitchLocation::Switch0, diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index 411ed3d4cce..2d7d556873d 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -46,6 +46,7 @@ pub use config::Config; pub use datastore::DataStore; pub use on_conflict_ext::IncompleteOnConflictExt; pub use pool::Pool; +pub use pool::PoolBuilder; pub use saga_types::SecId; pub use sec_store::CockroachDbSecStore; diff --git a/nexus/db-queries/src/db/pool.rs b/nexus/db-queries/src/db/pool.rs index 65473aad14e..a67f4dd470a 100644 --- a/nexus/db-queries/src/db/pool.rs +++ b/nexus/db-queries/src/db/pool.rs @@ -28,6 +28,8 @@ use tokio::sync::watch; /// Wrapper around a database connection pool. /// /// Expected to be used as the primary interface to the database. +/// +/// Can be constructed with a [`PoolBuilder`]. pub struct Pool { // IDs are assigned to each connection, acting as keys within the Quiesce // state. These are used to track the set of in-use connections and @@ -37,6 +39,7 @@ pub struct Pool { log: Logger, terminated: std::sync::atomic::AtomicBool, quiesce: watch::Sender, + collect_backtraces: bool, } // Provides an alternative to the DNS resolver for cases where we want to @@ -46,7 +49,7 @@ struct SingleHostResolver { } impl SingleHostResolver { - fn new(config: &DbConfig) -> Self { + fn new(config: DbConfig) -> Self { let backends = Arc::new(BTreeMap::from([( backend::Name::new("singleton"), backend::Backend { address: config.url.address() }, @@ -63,7 +66,7 @@ impl Resolver for SingleHostResolver { } fn make_single_host_resolver( - config: &DbConfig, + config: DbConfig, ) -> qorb::resolver::BoxedResolver { Box::new(SingleHostResolver::new(config)) } @@ -85,81 +88,60 @@ fn make_postgres_connector( )) } -impl Pool { - /// Creates a new qorb-backed connection pool to the database. - /// - /// Creating this pool does not necessarily wait for connections to become - /// available, as backends may shift over time. - pub fn new(log: &Logger, resolver: &QorbResolver) -> Self { - let resolver = resolver.for_service(ServiceName::Cockroach); - let connector = make_postgres_connector(log); - let policy = Policy::default(); - let inner = match qorb::pool::Pool::new( - "crdb".to_string(), - resolver, - connector, - policy, - ) { - Ok(pool) => { - debug!(log, "registered USDT probes"); - pool - } - Err(err) => { - error!(log, "failed to register USDT probes"); - err.into_inner() - } - }; - Self::new_common(inner, log.clone()) +enum ConnectWith<'a> { + Resolver(&'a QorbResolver), + SingleHost(Box), +} + +/// Utility for building [`Pool`]s. +pub struct PoolBuilder<'a> { + log: &'a Logger, + connect_with: ConnectWith<'a>, + policy: Option, + collect_backtraces: Option, +} + +impl<'a> PoolBuilder<'a> { + /// Uses a resolver to connect to the database. + pub fn new(log: &'a Logger, resolver: &'a QorbResolver) -> Self { + let connect_with = ConnectWith::Resolver(resolver); + Self { log, connect_with, policy: None, collect_backtraces: None } } - /// Creates a new qorb-backed connection pool to a single instance of the - /// database. - /// - /// This is intended for tests that want to skip DNS resolution, relying - /// on a single instance of the database. - /// - /// In production, [Self::new] should be preferred. - pub fn new_single_host(log: &Logger, db_config: &DbConfig) -> Self { - let resolver = make_single_host_resolver(db_config); - let connector = make_postgres_connector(log); - let policy = Policy::default(); - let inner = match qorb::pool::Pool::new( - "crdb-single-host".to_string(), - resolver, - connector, - policy, - ) { - Ok(pool) => { - debug!(log, "registered USDT probes"); - pool + /// Connects to a single hard-coded database node. + pub fn new_single_host(log: &'a Logger, config: DbConfig) -> Self { + let connect_with = ConnectWith::SingleHost(Box::new(config)); + Self { log, connect_with, policy: None, collect_backtraces: None } + } + + pub fn policy(mut self, policy: Policy) -> Self { + self.policy = Some(policy); + self + } + + pub fn collect_backtraces(mut self, collect_backtraces: bool) -> Self { + self.collect_backtraces = Some(collect_backtraces); + self + } + + pub fn build(self) -> Pool { + let Self { log, connect_with, policy, collect_backtraces } = self; + + let (resolver, name) = match connect_with { + ConnectWith::Resolver(resolver) => { + (resolver.for_service(ServiceName::Cockroach), "crdb") } - Err(err) => { - error!(log, "failed to register USDT probes"); - err.into_inner() + ConnectWith::SingleHost(config) => { + (make_single_host_resolver(*config), "crdb-single-host") } }; - Self::new_common(inner, log.clone()) - } - - /// Creates a new qorb-backed connection pool which returns an error - /// if claims are not available within one millisecond. - /// - /// This is intended for test-only usage, in particular for tests where - /// claim requests should rapidly return errors when a backend has been - /// intentionally disabled. - #[cfg(any(test, feature = "testing"))] - pub fn new_single_host_failfast( - log: &Logger, - db_config: &DbConfig, - ) -> Self { - let resolver = make_single_host_resolver(db_config); let connector = make_postgres_connector(log); - let policy = Policy { - claim_timeout: tokio::time::Duration::from_millis(1), - ..Default::default() - }; + + let policy = policy.unwrap_or_else(|| Policy::default()); + let collect_backtraces = collect_backtraces.unwrap_or(true); + let inner = match qorb::pool::Pool::new( - "crdb-single-host-failfast".to_string(), + name.to_string(), resolver, connector, policy, @@ -173,12 +155,15 @@ impl Pool { err.into_inner() } }; - Self::new_common(inner, log.clone()) + Pool::new(inner, log.clone(), collect_backtraces) } +} - fn new_common( +impl Pool { + fn new( inner: qorb::pool::Pool, log: Logger, + collect_backtraces: bool, ) -> Self { let (quiesce, _) = watch::channel(Quiesce { new_claims_allowed: ClaimsAllowed::Allowed, @@ -190,6 +175,7 @@ impl Pool { log, terminated: std::sync::atomic::AtomicBool::new(false), quiesce, + collect_backtraces, } } @@ -197,7 +183,11 @@ impl Pool { pub async fn claim(&self) -> Result { let id = self.next_id.fetch_add(1, Ordering::Relaxed); let held_since = Utc::now(); - let debug = Backtrace::force_capture().to_string(); + let debug = if self.collect_backtraces { + Backtrace::force_capture().to_string() + } else { + "".to_string() + }; let allowed = self.quiesce.send_if_modified(|q| { if let ClaimsAllowed::Disallowed = q.new_claims_allowed { false @@ -366,7 +356,9 @@ mod test { let mut db = crdb::test_setup_database(log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; { - let pool = Pool::new_single_host(&log, &cfg); + let pool = PoolBuilder::new_single_host(&log, cfg) + .collect_backtraces(false) + .build(); pool.terminate().await; } db.cleanup().await.unwrap(); @@ -383,7 +375,9 @@ mod test { let mut db = crdb::test_setup_database(log).await; let cfg = crate::db::Config { url: db.pg_config().clone() }; { - let pool = Pool::new_single_host(&log, &cfg); + let pool = PoolBuilder::new_single_host(&log, cfg) + .collect_backtraces(false) + .build(); drop(pool); } db.cleanup().await.unwrap(); diff --git a/nexus/db-queries/src/db/pub_test_utils/mod.rs b/nexus/db-queries/src/db/pub_test_utils/mod.rs index 6662fe8cc06..4dd21052bd5 100644 --- a/nexus/db-queries/src/db/pub_test_utils/mod.rs +++ b/nexus/db-queries/src/db/pub_test_utils/mod.rs @@ -35,7 +35,12 @@ enum Interface { fn new_pool(log: &Logger, db: &CockroachInstance) -> Arc { let cfg = db::Config { url: db.pg_config().clone() }; - Arc::new(db::Pool::new_single_host(log, &cfg)) + + Arc::new( + db::PoolBuilder::new_single_host(&log, cfg) + .collect_backtraces(false) + .build(), + ) } struct TestDatabaseBuilder { @@ -322,7 +327,11 @@ async fn datastore_test( use crate::authn; let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new_single_host(&log, &cfg)); + let pool = Arc::new( + db::PoolBuilder::new_single_host(&log, cfg) + .collect_backtraces(false) + .build(), + ); let datastore = Arc::new( DataStore::new(&log, pool, None, IdentityCheckPolicy::DontCare) .await diff --git a/nexus/src/bin/schema-updater.rs b/nexus/src/bin/schema-updater.rs index e6d10ef18c1..92f86f59a28 100644 --- a/nexus/src/bin/schema-updater.rs +++ b/nexus/src/bin/schema-updater.rs @@ -79,7 +79,8 @@ async fn main_impl() -> anyhow::Result<()> { let all_versions = AllSchemaVersions::load(&schema_config.schema_dir)?; let crdb_cfg = db::Config { url: args.url }; - let pool = Arc::new(db::Pool::new_single_host(&log, &crdb_cfg)); + let pool = + Arc::new(db::PoolBuilder::new_single_host(&log, crdb_cfg).build()); // We use the unchecked constructor of the datastore because we // don't want to block on someone else applying an upgrade. diff --git a/nexus/src/context.rs b/nexus/src/context.rs index 91f0db6c31e..b672a7d120f 100644 --- a/nexus/src/context.rs +++ b/nexus/src/context.rs @@ -119,9 +119,9 @@ pub(crate) struct ConsoleConfig { } impl ServerContext { - /// Create a new context with the given rack id and log. This creates the - /// underlying nexus as well. - pub async fn new( + // Create a new context with the given rack id and log. This creates the + // underlying nexus as well. + async fn new( rack_id: Uuid, log: Logger, config: &NexusConfig, @@ -265,27 +265,36 @@ impl ServerContext { // // It must be explicitly terminated, so be cautious about returning // results beyond this point. - let pool = match &config.deployment.database { + let pool_builder = match &config.deployment.database { nexus_config::Database::FromUrl { url } => { info!( log, "Setting up qorb database pool from a single host"; "url" => #?url, ); - db::Pool::new_single_host( - &log, - &db::Config { url: url.clone() }, - ) + let cfg = db::Config { url: url.clone() }; + + db::PoolBuilder::new_single_host(&log, cfg) } nexus_config::Database::FromDns => { info!( log, "Setting up qorb database pool from DNS"; "dns_addrs" => ?qorb_resolver.bootstrap_dns_ips(), ); - db::Pool::new(&log, &qorb_resolver) + db::PoolBuilder::new(&log, &qorb_resolver) } }; - let pool = Arc::new(pool); + // If explicitly requested, backtrace collection of database connections can be + // overwritten. + let pool_builder = + if let Some(collect) = config.pkg.tunables.collect_backtraces { + pool_builder.collect_backtraces(collect) + } else { + pool_builder + }; + + let pool = Arc::new(pool_builder.build()); + let nexus = match Nexus::new_with_id( rack_id, log.new(o!("component" => "nexus")), diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index eddb0520761..89a3beba2bc 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -70,8 +70,8 @@ pub struct InternalServer { } impl InternalServer { - /// Start a nexus server. - pub async fn start( + // Start a nexus server. + async fn start( config: &NexusConfig, log: &Logger, ) -> Result { diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 93632d77c10..59554d0df1a 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -363,7 +363,11 @@ mod test { let logctx = dev::test_setup_log("test_populator"); let db = TestDatabase::new_populate_schema_only(&logctx.log).await; let cfg = db::Config { url: db.crdb().pg_config().clone() }; - let pool = Arc::new(db::Pool::new_single_host(&logctx.log, &cfg)); + let pool = Arc::new( + db::PoolBuilder::new_single_host(&logctx.log, cfg.clone()) + .collect_backtraces(false) + .build(), + ); let datastore = Arc::new( db::DataStore::new( &logctx.log, @@ -418,8 +422,15 @@ mod test { // // If we try again with a broken database, we should get a // ServiceUnavailable error, which indicates a transient failure. - let pool = - Arc::new(db::Pool::new_single_host_failfast(&logctx.log, &cfg)); + let pool = Arc::new( + db::PoolBuilder::new_single_host(&logctx.log, cfg) + .policy(qorb::policy::Policy { + claim_timeout: tokio::time::Duration::from_millis(1), + ..Default::default() + }) + .collect_backtraces(false) + .build(), + ); // We need to create the datastore before tearing down the database, as // it verifies the schema version of the DB while booting. let datastore = Arc::new( diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 37640ab2e8a..0b795787948 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -828,6 +828,8 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { planner_enabled: false, planner_config: PlannerConfig::default(), }); + // In tests, disable backtrace collection. + self.config.pkg.tunables.collect_backtraces = Some(false); self.config.deployment.internal_dns = InternalDns::FromAddress { address: self .internal_dns diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index ee00f37ad6a..994a5647341 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -826,12 +826,13 @@ impl MigrationContext<'_> { // Typically called as a part of a "before" function, to set up a connection // before a schema migration. async fn populate_pool_and_connection(&self, version: Version) { - let pool = nexus_db_queries::db::Pool::new_single_host( + use nexus_db_queries::db; + let pool = db::PoolBuilder::new_single_host( self.log, - &nexus_db_queries::db::Config { - url: self.crdb.pg_config().clone(), - }, - ); + db::Config { url: self.crdb.pg_config().clone() }, + ) + .collect_backtraces(false) + .build(); let conn = pool.claim().await.expect("failed to get pooled connection"); let mut map = self.pool_and_conn.lock().unwrap(); diff --git a/openapi/nexus-lockstep.json b/openapi/nexus-lockstep.json index 8dbc8b3c484..a03852f4d5a 100644 --- a/openapi/nexus-lockstep.json +++ b/openapi/nexus-lockstep.json @@ -7087,6 +7087,21 @@ "type" ] }, + { + "description": "Waiting on zones to propagate to inventory.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "inventory_propagation" + ] + } + }, + "required": [ + "type" + ] + }, { "description": "Waiting on updates to RoT / SP / Host OS / etc.", "type": "object",