diff --git a/Cargo.lock b/Cargo.lock index 7f0f523e70039e..a82967fb4b29ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,10 +13,10 @@ dependencies = [ ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aead" @@ -524,6 +524,7 @@ name = "agave-votor" version = "4.0.0-alpha.0" dependencies = [ "agave-logger", + "agave-votor", "agave-votor-messages", "anyhow", "bincode", @@ -586,7 +587,11 @@ version = "4.0.0-alpha.0" dependencies = [ "agave-feature-set", "agave-logger", + "agave-votor-messages", + "bitvec", + "bytemuck", "log", + "num_enum", "serde", "solana-address 2.0.0", "solana-bls-signatures", @@ -596,6 +601,7 @@ dependencies = [ "solana-frozen-abi-macro", "solana-hash 3.1.0", "solana-pubkey 4.0.0", + "tempfile", ] [[package]] @@ -1888,9 +1894,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -3130,9 +3136,9 @@ checksum = "398ea4fabe40b9b0d885340a2a991a44c8a645624075ad966d21f88688e2b69e" [[package]] name = "flate2" -version = "1.0.31" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" dependencies = [ "crc32fast", "miniz_oxide", @@ -4284,9 +4290,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -4769,11 +4775,12 @@ checksum = "9c64630dcdd71f1a64c435f54885086a0de5d6a12d104d69b165fb7d5286d677" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ - "adler", + "adler2", + "simd-adler32", ] [[package]] @@ -6950,6 +6957,12 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "simpl" version = "0.1.0" @@ -7494,9 +7507,9 @@ dependencies = [ [[package]] name = "solana-bls-signatures" -version = "2.0.0-alpha.1" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acb2f8e2ce54f8798c8a5eb5d06f4cb4f596a5879be5ad81e95a60a1bde9359" +checksum = "e21cad136370a83c91bbe9348c69a510222d8d70144154ca37edff59df789661" dependencies = [ "base64 0.22.1", "blst", @@ -7754,7 +7767,7 @@ dependencies = [ "solana-signature", "solana-signer", "solana-system-interface 3.0.0", - "solana-zk-sdk", + "solana-zk-sdk 5.0.0", "tempfile", "thiserror 2.0.17", "tiny-bip39", @@ -7837,7 +7850,7 @@ dependencies = [ "solana-signer", "solana-slot-history", "solana-stake-interface", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-sysvar", "solana-test-validator", "solana-tps-client", @@ -7943,7 +7956,6 @@ dependencies = [ "solana-pubkey 4.0.0", "solana-pubsub-client", "solana-quic-client", - "solana-quic-definitions", "solana-rpc-client", "solana-rpc-client-api", "solana-rpc-client-nonce-utils", @@ -7951,6 +7963,7 @@ dependencies = [ "solana-signer", "solana-streamer", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client", "solana-transaction", "solana-transaction-error", @@ -8274,7 +8287,6 @@ dependencies = [ "solana-program-runtime", "solana-pubkey 4.0.0", "solana-quic-client", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rent", "solana-rpc", @@ -8628,7 +8640,7 @@ dependencies = [ "solana-packet 4.0.0", "solana-pubkey 4.0.0", "solana-signer", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-system-transaction", "solana-transaction", "solana-version", @@ -8911,7 +8923,6 @@ dependencies = [ "solana-packet 4.0.0", "solana-perf", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rpc-client", "solana-runtime", @@ -9999,7 +10010,6 @@ dependencies = [ "solana-perf", "solana-pubkey 4.0.0", "solana-quic-client", - "solana-quic-definitions", "solana-rpc-client-api", "solana-signer", "solana-streamer", @@ -10010,15 +10020,6 @@ dependencies = [ "tokio-util 0.7.18", ] -[[package]] -name = "solana-quic-definitions" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15319accf7d3afd845817aeffa6edd8cc185f135cefbc6b985df29cfd8c09609" -dependencies = [ - "solana-keypair", -] - [[package]] name = "solana-rayon-threadlimit" version = "4.0.0-alpha.0" @@ -10141,7 +10142,6 @@ dependencies = [ "solana-program-pack", "solana-program-runtime", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rent", "solana-rpc", @@ -10162,6 +10162,7 @@ dependencies = [ "solana-system-transaction", "solana-sysvar", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client", "solana-transaction", "solana-transaction-context", @@ -10657,13 +10658,13 @@ dependencies = [ "solana-nonce", "solana-nonce-account", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-runtime", "solana-signature", "solana-signer", "solana-system-interface 3.0.0", "solana-system-transaction", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client-next", "tokio", "tokio-util 0.7.18", @@ -10965,7 +10966,6 @@ dependencies = [ "solana-packet 4.0.0", "solana-perf", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-signature", "solana-signer", "solana-streamer", @@ -11884,9 +11884,9 @@ dependencies = [ "solana-net-utils", "solana-perf", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-signer", "solana-streamer", + "solana-tls-utils", "solana-transaction-metrics-tracker", "solana-version", "thiserror 2.0.17", @@ -12052,7 +12052,7 @@ dependencies = [ "solana-program-runtime", "solana-sdk-ids", "solana-svm-log-collector", - "solana-zk-sdk", + "solana-zk-sdk 5.0.0", ] [[package]] @@ -12071,7 +12071,7 @@ dependencies = [ "solana-system-interface 3.0.0", "solana-transaction", "solana-transaction-error", - "solana-zk-sdk", + "solana-zk-sdk 5.0.0", ] [[package]] @@ -12111,6 +12111,40 @@ dependencies = [ "zeroize", ] +[[package]] +name = "solana-zk-sdk" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89042b5867c7440526d47085db2cd11a7ae557461a4f41a3b3a569799dd9d6" +dependencies = [ + "aes-gcm-siv", + "base64 0.22.1", + "bincode", + "bytemuck", + "bytemuck_derive", + "curve25519-dalek 4.1.3", + "itertools 0.14.0", + "merlin", + "num-derive", + "num-traits", + "rand 0.8.5", + "serde", + "serde_derive", + "serde_json", + "sha3", + "solana-address 2.0.0", + "solana-derivation-path", + "solana-instruction", + "solana-sdk-ids", + "solana-seed-derivable", + "solana-seed-phrase", + "solana-signature", + "solana-signer", + "subtle", + "thiserror 2.0.17", + "zeroize", +] + [[package]] name = "solana-zk-token-proof-program" version = "4.0.0-alpha.0" @@ -12216,7 +12250,7 @@ dependencies = [ "solana-program-error", "solana-program-option", "solana-pubkey 3.0.0", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "thiserror 2.0.17", ] @@ -12238,7 +12272,7 @@ dependencies = [ "solana-program-pack", "solana-pubkey 3.0.0", "solana-sdk-ids", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "spl-pod", "spl-token-confidential-transfer-proof-extraction", "spl-token-confidential-transfer-proof-generation", @@ -12263,7 +12297,7 @@ dependencies = [ "solana-program-error", "solana-pubkey 3.0.0", "solana-sdk-ids", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "spl-pod", "thiserror 2.0.17", ] @@ -12275,7 +12309,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63a2b41095945dc15274b924b21ccae9b3ec9dc2fdd43dbc08de8c33bbcd915" dependencies = [ "curve25519-dalek 4.1.3", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "thiserror 2.0.17", ] @@ -13541,9 +13575,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if 1.0.4", "once_cell", @@ -13566,9 +13600,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -13576,9 +13610,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -13589,9 +13623,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] diff --git a/Cargo.toml b/Cargo.toml index 87658a563d0a80..1c0721a6f904a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -250,7 +250,7 @@ caps = "0.5.6" cargo_metadata = "0.15.4" cfg-if = "1.0.4" cfg_eval = "0.1.2" -chrono = { version = "0.4.42", default-features = false } +chrono = { version = "0.4.43", default-features = false } chrono-humanize = "0.2.3" clap = "2.33.1" console = "0.16.2" @@ -282,7 +282,7 @@ env_logger = "0.11.8" fast-math = "0.1" fd-lock = "4.0.4" five8_const = "0.1.4" -flate2 = "1.0.31" +flate2 = "1.1.8" fnv = "1.0.7" fs_extra = "1.3.0" futures = "0.3.31" @@ -308,7 +308,7 @@ itertools = "0.14.0" jemallocator = { package = "tikv-jemallocator", version = "0.6.0", features = [ "unprefixed_malloc_on_supported_platforms", ] } -js-sys = "0.3.83" +js-sys = "0.3.85" json5 = "1.3.0" jsonrpc-core = "18.0.0" jsonrpc-core-client = "18.0.0" @@ -408,7 +408,7 @@ solana-big-mod-exp = "3.0.0" solana-bincode = "3.1.0" solana-blake3-hasher = "3.1.0" solana-bloom = { path = "bloom", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } -solana-bls-signatures = { version = "2.0.0-alpha.1", features = ["serde"] } #TODO: change to 2.0.0 before this gets into 4.0 +solana-bls-signatures = { version = "3.0.0", features = ["serde"] } solana-bn254 = "3.1.2" solana-borsh = "3.0.0" solana-bpf-loader-program = { path = "programs/bpf_loader", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } @@ -505,7 +505,6 @@ solana-program-test = { path = "program-test", version = "=4.0.0-alpha.0", featu solana-pubkey = { version = "4.0.0", default-features = false } solana-pubsub-client = { path = "pubsub-client", version = "=4.0.0-alpha.0" } solana-quic-client = { path = "quic-client", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } -solana-quic-definitions = "3.0.0" solana-rayon-threadlimit = { path = "rayon-threadlimit", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } solana-remote-wallet = { path = "remote-wallet", version = "=4.0.0-alpha.0", default-features = false, features = ["agave-unstable-api"] } solana-rent = "3.0.0" @@ -581,7 +580,7 @@ solana-vote-interface = "5.0.0" solana-vote-program = { path = "programs/vote", version = "=4.0.0-alpha.0", default-features = false, features = ["agave-unstable-api"] } solana-wen-restart = { path = "wen-restart", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } solana-zk-elgamal-proof-program = { path = "programs/zk-elgamal-proof", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } -solana-zk-sdk = "4.0.0" +solana-zk-sdk = "5.0.0" solana-zk-token-proof-program = { path = "programs/zk-token-proof", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } spl-associated-token-account-interface = "2.0.0" spl-generic-token = "2.0.0" diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index dae637ac81cb3e..3eec9926f84af0 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -1029,11 +1029,6 @@ impl AccountsDb { let accounts_index_config = accounts_db_config.index.unwrap_or_default(); let accounts_index = AccountsIndex::new(&accounts_index_config, exit); - let bank_hash_details_dir = accounts_db_config.bank_hash_details_dir.unwrap_or_else(|| { - warn!("bank hash details dir is unset"); - PathBuf::new() - }); - let (paths, temp_paths) = if paths.is_empty() { // Create a temporary set of accounts directories, used primarily // for testing @@ -1083,7 +1078,7 @@ impl AccountsDb { let new = Self { accounts_index, paths, - bank_hash_details_dir, + bank_hash_details_dir: accounts_db_config.bank_hash_details_dir, temp_paths, shrink_paths, skip_initial_hash_calc: accounts_db_config.skip_initial_hash_calc, @@ -3674,7 +3669,7 @@ impl AccountsDb { // | | // V | // F3 store_accounts_frozen()/ | index - // update_index() | (replaces existing store_id, offset in caches) + // update_index_stored_accounts()| (replaces existing store_id, offset in caches) // | | // V | // F4 accounts_cache.remove_slot() | map of caches (removes old entry) @@ -3694,7 +3689,7 @@ impl AccountsDb { // | | // V | // S3 store_accounts_frozen()/ | index - // update_index() | (replaces existing store_id, offset in stores) + // update_index_stored_accounts()| (replaces existing store_id, offset in stores) // | | // V | // S4 do_shrink_slot_store()/ | map of stores (removes old entry) @@ -4605,10 +4600,12 @@ impl AccountsDb { excess_slot_count = old_slots.len(); let mut flush_stats = FlushStats::default(); old_slots.into_iter().for_each(|old_slot| { - // Don't flush slots that are known to be unrooted + // Only flush unrooted slots > max_flushed_root. Slots older < max_flushed_root + // cannot have max_flushed_root as an ancestor, and thus will never become rooted. + // The unrootable slots will get purged later. if old_slot > max_flushed_root { if self.should_aggressively_flush_cache() { - if let Some(stats) = self.flush_slot_cache(old_slot) { + if let Some(stats) = self.flush_unrooted_slot_cache(old_slot) { flush_stats.accumulate(&stats); } } @@ -4859,8 +4856,19 @@ impl AccountsDb { flush_stats } - /// flush all accounts in this slot - fn flush_slot_cache(&self, slot: Slot) -> Option { + /// Flushes an unrooted slot from the write cache to storage to free up memory + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] + fn flush_unrooted_slot_cache(&self, slot: Slot) -> Option { + assert!( + !self + .accounts_index + .roots_tracker + .read() + .unwrap() + .alive_roots + .contains(&slot), + "slot: {slot}" + ); self.flush_slot_cache_with_clean(slot, None::<&mut fn(&_) -> bool>, None) } @@ -5119,9 +5127,60 @@ impl AccountsDb { } /// Updates the accounts index with the given `infos` and `accounts`. + /// Used for cached accounts only. + fn update_index_cached_accounts<'a>( + &self, + infos: Vec, + accounts: &impl StorableAccounts<'a>, + update_index_thread_selection: UpdateIndexThreadSelection, + ) { + let target_slot = accounts.target_slot(); + let len = std::cmp::min(accounts.len(), infos.len()); + + let update = |start, end| { + (start..end).for_each(|i| { + accounts.account(i, |account| { + let info = infos[i]; + debug_assert!(info.is_cached()); + self.accounts_index.upsert( + target_slot, + target_slot, + account.pubkey(), + &account, + &self.account_indexes, + info, + ReclaimsSlotList::default().as_mut(), + UpsertReclaim::PreviousSlotEntryWasCached, + ); + }); + }); + }; + + let threshold = 1; + if matches!( + update_index_thread_selection, + UpdateIndexThreadSelection::PoolWithThreshold, + ) && len > threshold + { + let chunk_size = std::cmp::max(1, len / quarter_thread_count()); // # pubkeys/thread + let batches = 1 + len / chunk_size; + self.thread_pool_foreground.install(|| { + (0..batches).into_par_iter().for_each(|batch| { + let start = batch * chunk_size; + let end = std::cmp::min(start + chunk_size, len); + update(start, end) + }) + }); + } else { + update(0, len); + } + } + + /// Updates the accounts index with the given `infos` and `accounts`. + /// Used when storing accounts to storage. /// Returns a vector of `SlotList` containing the reclaims for each batch processed. /// The element of the returned vector is guaranteed to be non-empty. - fn update_index<'a>( + fn update_index_stored_accounts<'a>( &self, infos: Vec, accounts: &impl StorableAccounts<'a>, @@ -5144,7 +5203,8 @@ impl AccountsDb { let mut reclaims = ReclaimsSlotList::with_capacity((end - start) / 2); (start..end).for_each(|i| { - let info = infos[i]; + let info: AccountInfo = infos[i]; + debug_assert!(!info.is_cached()); accounts.account(i, |account| { let old_slot = accounts.slot(i); self.accounts_index.upsert( @@ -5571,13 +5631,7 @@ impl AccountsDb { // Update the index let mut update_index_time = Measure::start("update_index"); - self.update_index( - infos, - &accounts, - UpsertReclaim::PreviousSlotEntryWasCached, - update_index_thread_selection, - &self.thread_pool_foreground, - ); + self.update_index_cached_accounts(infos, &accounts, update_index_thread_selection); update_index_time.stop(); self.stats @@ -5646,7 +5700,7 @@ impl AccountsDb { // after the account are stored by the above `store_accounts_to` // call and all the accounts are stored, all reads after this point // will know to not check the cache anymore - let reclaims = self.update_index( + let reclaims = self.update_index_stored_accounts( infos, &accounts, reclaim_handling, @@ -6999,7 +7053,16 @@ impl AccountsDb { } pub fn flush_accounts_cache_slot_for_tests(&self, slot: Slot) { - self.flush_slot_cache(slot); + assert!( + self.accounts_index + .roots_tracker + .read() + .unwrap() + .alive_roots + .contains(&slot), + "slot: {slot}" + ); + self.flush_slot_cache_with_clean(slot, None::<&mut fn(&_) -> bool>, None); } /// useful to adapt tests written prior to introduction of the write cache @@ -7053,8 +7116,37 @@ impl AccountsDb { } } - /// callers used to call store_uncached. But, this is not allowed anymore. + // Store accounts for tests. For zero-lamport accounts, first store a single-lamport + // placeholder, then store the actual account. This is to ensure that an index entry is created + // for zero-lamport accounts. pub fn store_for_tests<'a>(&self, accounts: impl StorableAccounts<'a>) { + let slot = accounts.target_slot(); + + let placeholder = AccountSharedData::new(1, 0, &Pubkey::default()); + + // Build a list of zero-lamport accounts not present in the index + let mut pre_populate_zero_lamport = Vec::new(); + for i in 0..accounts.len() { + if accounts.is_zero_lamport(i) { + let key = *accounts.pubkey(i); + if self + .accounts_index + .get_and_then(&key, |account| (true, account.is_none())) + { + // Account is not in the index, need to pre-populate with placeholder + pre_populate_zero_lamport.push((key, placeholder.clone())); + } + } + } + + // Pre-populate new zero-lamport accounts with single-lamport placeholders. + self.store_accounts_unfrozen( + (slot, pre_populate_zero_lamport.as_slice()), + None, + UpdateIndexThreadSelection::PoolWithThreshold, + ); + + // Then store the actual accounts provided by the caller. self.store_accounts_unfrozen( accounts, None, diff --git a/accounts-db/src/accounts_db/accounts_db_config.rs b/accounts-db/src/accounts_db/accounts_db_config.rs index 7b18c8b2d1cd39..302bd02831f481 100644 --- a/accounts-db/src/accounts_db/accounts_db_config.rs +++ b/accounts-db/src/accounts_db/accounts_db_config.rs @@ -19,11 +19,7 @@ use { pub struct AccountsDbConfig { pub index: Option, pub account_indexes: Option, - // We need the Option wrapper until we're on Rust 1.91 or newer, - // because PathBuf::new() is non-const until then. - // For now, the only way for ACCOUNTS_DB_CONFIG_FOR_TESTING/BENCHMARKS - // to indicate they do not use bank_hash_details_dir is to use None. - pub bank_hash_details_dir: Option, + pub bank_hash_details_dir: PathBuf, pub shrink_paths: Option>, pub shrink_ratio: AccountShrinkThreshold, /// The low and high watermark sizes for the read cache, in bytes. @@ -57,7 +53,7 @@ pub struct AccountsDbConfig { pub const ACCOUNTS_DB_CONFIG_FOR_TESTING: AccountsDbConfig = AccountsDbConfig { index: Some(ACCOUNTS_INDEX_CONFIG_FOR_TESTING), account_indexes: None, - bank_hash_details_dir: None, // tests don't use bank hash details + bank_hash_details_dir: PathBuf::new(), // tests don't use bank hash details shrink_paths: None, shrink_ratio: DEFAULT_ACCOUNTS_SHRINK_THRESHOLD_OPTION, read_cache_limit_bytes: None, @@ -80,7 +76,7 @@ pub const ACCOUNTS_DB_CONFIG_FOR_TESTING: AccountsDbConfig = AccountsDbConfig { pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig { index: Some(ACCOUNTS_INDEX_CONFIG_FOR_BENCHMARKS), account_indexes: None, - bank_hash_details_dir: None, // benches don't use bank hash details + bank_hash_details_dir: PathBuf::new(), // benches don't use bank hash details shrink_paths: None, shrink_ratio: DEFAULT_ACCOUNTS_SHRINK_THRESHOLD_OPTION, read_cache_limit_bytes: None, diff --git a/accounts-db/src/accounts_db/tests.rs b/accounts-db/src/accounts_db/tests.rs index 9b1ac3ff61863d..ce62b2e2189c65 100644 --- a/accounts-db/src/accounts_db/tests.rs +++ b/accounts-db/src/accounts_db/tests.rs @@ -4496,7 +4496,7 @@ fn test_cache_flush_delayed_remove_unrooted_race() { if flush_trial_start_receiver.recv().is_err() { return; } - db.flush_slot_cache(10); + db.flush_unrooted_slot_cache(10); flush_done_sender.send(()).unwrap(); }) .unwrap() @@ -4558,7 +4558,7 @@ fn test_cache_flush_remove_unrooted_race_multiple_slots() { return; } for slot in 0..num_cached_slots { - db.flush_slot_cache(slot); + db.flush_unrooted_slot_cache(slot); } flush_done_sender.send(()).unwrap(); }) @@ -6478,10 +6478,13 @@ fn test_mark_obsolete_accounts_at_startup_purge_slot() { // Store the same pubkey in multiple slots // Store other pubkey in slot0 to ensure slot is not purged accounts_db.store_for_tests((0, [(&pubkey1, &account), (&pubkey2, &account)].as_slice())); + accounts_db.add_root(0); accounts_db.flush_accounts_cache_slot_for_tests(0); accounts_db.store_for_tests((1, [(&pubkey1, &account)].as_slice())); + accounts_db.add_root(1); accounts_db.flush_accounts_cache_slot_for_tests(1); accounts_db.store_for_tests((2, [(&pubkey1, &account)].as_slice())); + accounts_db.add_root(2); accounts_db.flush_accounts_cache_slot_for_tests(2); let pubkeys_with_duplicates_by_bin = vec![vec![pubkey1]]; @@ -6514,6 +6517,7 @@ fn test_mark_obsolete_accounts_at_startup_multiple_bins() { slot, [(&pubkey1, &account), (&pubkey2, &account)].as_slice(), )); + accounts_db.add_root(slot); accounts_db.flush_accounts_cache_slot_for_tests(slot); } diff --git a/bloom/Cargo.toml b/bloom/Cargo.toml index 250450e69763cf..bafa976eb2f217 100644 --- a/bloom/Cargo.toml +++ b/bloom/Cargo.toml @@ -7,7 +7,7 @@ authors = { workspace = true } repository = { workspace = true } homepage = { workspace = true } license = { workspace = true } -edition = { workspace = true } +edition = "2024" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] diff --git a/bloom/benches/bloom.rs b/bloom/benches/bloom.rs index afd6ac896b0f17..dd3ccc1e1470d0 100644 --- a/bloom/benches/bloom.rs +++ b/bloom/benches/bloom.rs @@ -1,7 +1,7 @@ #![allow(clippy::arithmetic_side_effects)] use { - bencher::{benchmark_group, benchmark_main, Bencher}, + bencher::{Bencher, benchmark_group, benchmark_main}, bv::BitVec, fnv::FnvHasher, rand::Rng, diff --git a/ci/test-checks.sh b/ci/test-checks.sh index f2839ee61ad932..047b923573de8a 100755 --- a/ci/test-checks.sh +++ b/ci/test-checks.sh @@ -24,6 +24,20 @@ fi export RUST_BACKTRACE=1 export RUSTFLAGS="-D warnings -A incomplete_features" +# sort +if [[ -n $CI ]]; then + # exclude from printing "Checking xxx ..." + _ scripts/cargo-for-all-lock-files.sh -- "+${rust_nightly}" sort --workspace --check > /dev/null +else + _ scripts/cargo-for-all-lock-files.sh -- "+${rust_nightly}" sort --workspace --check +fi + +# check dev-context-only-utils isn't used in normal dependencies +_ scripts/check-dev-context-only-utils.sh tree + +# fmt +_ scripts/cargo-for-all-lock-files.sh -- "+${rust_nightly}" fmt --all -- --check + # run cargo check for all rust files in this monorepo for faster turnaround in # case of any compilation/build error for nightly @@ -47,17 +61,6 @@ _ ci/order-crates-for-publishing.py _ scripts/cargo-clippy.sh -if [[ -n $CI ]]; then - # exclude from printing "Checking xxx ..." - _ scripts/cargo-for-all-lock-files.sh -- "+${rust_nightly}" sort --workspace --check > /dev/null -else - _ scripts/cargo-for-all-lock-files.sh -- "+${rust_nightly}" sort --workspace --check -fi - -_ scripts/check-dev-context-only-utils.sh tree - -_ scripts/cargo-for-all-lock-files.sh -- "+${rust_nightly}" fmt --all -- --check - _ ci/do-audit.sh if [[ -n $CI ]] && [[ $CHANNEL = "stable" ]]; then diff --git a/clap-utils/src/input_parsers.rs b/clap-utils/src/input_parsers.rs index 3c239e2263e362..5cf57ec93ef7a9 100644 --- a/clap-utils/src/input_parsers.rs +++ b/clap-utils/src/input_parsers.rs @@ -438,8 +438,8 @@ mod tests { #[test] fn test_bls_pubkeys_of() { - let bls_pubkey1: BLSPubkey = BLSKeypair::new().public; - let bls_pubkey2: BLSPubkey = BLSKeypair::new().public; + let bls_pubkey1: BLSPubkey = BLSKeypair::new().public.into(); + let bls_pubkey2: BLSPubkey = BLSKeypair::new().public.into(); let bls_pubkey1_compressed: BLSPubkeyCompressed = bls_pubkey1.try_into().unwrap(); let bls_pubkey2_compressed: BLSPubkeyCompressed = bls_pubkey2.try_into().unwrap(); let matches = app().get_matches_from(vec![ diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 36215b2f8b786a..f14fd43b87bb9e 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -91,7 +91,7 @@ solana-signature = { version = "=3.1.0", default-features = false } solana-signer = "=3.0.0" solana-slot-history = "=3.0.0" solana-stake-interface = "=2.0.2" -solana-system-interface = { version = "=2.0", features = ["bincode"] } +solana-system-interface = { version = "=3.0", features = ["bincode"] } solana-sysvar = "=3.1.1" solana-tps-client = { workspace = true } solana-tpu-client = { workspace = true, features = ["default"] } diff --git a/client/Cargo.toml b/client/Cargo.toml index 18f0312cd76962..ea083c6c2e4b85 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -41,7 +41,6 @@ solana-net-utils = { workspace = true } solana-pubkey = { workspace = true } solana-pubsub-client = { workspace = true } solana-quic-client = { workspace = true } -solana-quic-definitions = { workspace = true } solana-rpc-client = { workspace = true, features = ["default"] } solana-rpc-client-api = { workspace = true } solana-rpc-client-nonce-utils = { workspace = true } @@ -49,6 +48,7 @@ solana-signature = { workspace = true } solana-signer = { workspace = true } solana-streamer = { workspace = true } solana-time-utils = { workspace = true } +solana-tls-utils = { workspace = true } solana-tpu-client = { workspace = true, features = ["default"] } solana-transaction = { workspace = true } solana-transaction-error = { workspace = true } diff --git a/client/src/connection_cache.rs b/client/src/connection_cache.rs index 1fdc27ac922979..7358ca057d5013 100644 --- a/client/src/connection_cache.rs +++ b/client/src/connection_cache.rs @@ -10,8 +10,8 @@ use { solana_keypair::Keypair, solana_pubkey::Pubkey, solana_quic_client::{QuicConfig, QuicConnectionManager, QuicPool}, - solana_quic_definitions::NotifyKeyUpdate, solana_streamer::streamer::StakedNodes, + solana_tls_utils::NotifyKeyUpdate, solana_transaction_error::TransportResult, solana_udp_client::{UdpConfig, UdpConnectionManager, UdpPool}, std::{ diff --git a/core/Cargo.toml b/core/Cargo.toml index 282f37af168dbe..1c2b43a2caf5c1 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -135,7 +135,6 @@ solana-poh = { workspace = true } solana-poh-config = { workspace = true } solana-pubkey = { workspace = true } solana-quic-client = { workspace = true } -solana-quic-definitions = { workspace = true } solana-rayon-threadlimit = { workspace = true } solana-rent = { workspace = true } solana-rpc = { workspace = true } diff --git a/core/src/admin_rpc_post_init.rs b/core/src/admin_rpc_post_init.rs index 1081fec6099a27..03e1e0375dca3a 100644 --- a/core/src/admin_rpc_post_init.rs +++ b/core/src/admin_rpc_post_init.rs @@ -6,8 +6,8 @@ use { }, solana_gossip::{cluster_info::ClusterInfo, node::NodeMultihoming}, solana_pubkey::Pubkey, - solana_quic_definitions::NotifyKeyUpdate, solana_runtime::{bank_forks::BankForks, snapshot_controller::SnapshotController}, + solana_tls_utils::NotifyKeyUpdate, std::{ collections::{HashMap, HashSet}, net::UdpSocket, diff --git a/core/src/forwarding_stage.rs b/core/src/forwarding_stage.rs index e3f88ba73e573d..779a04c8e18e9d 100644 --- a/core/src/forwarding_stage.rs +++ b/core/src/forwarding_stage.rs @@ -18,7 +18,6 @@ use { solana_packet as packet, solana_perf::data_budget::DataBudget, solana_poh::poh_recorder::PohRecorder, - solana_quic_definitions::NotifyKeyUpdate, solana_runtime::{ bank::{Bank, CollectorFeeDetails}, bank_forks::SharableBanks, @@ -27,6 +26,7 @@ use { runtime_transaction::RuntimeTransaction, transaction_meta::StaticMeta, }, solana_streamer::sendmmsg::{batch_send, SendPktsError}, + solana_tls_utils::NotifyKeyUpdate, solana_tpu_client_next::{ connection_workers_scheduler::{ BindTarget, ConnectionWorkersSchedulerConfig, Fanout, StakeIdentity, diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 51980b6dcf2865..56d690cdab614b 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -4139,7 +4139,6 @@ impl ReplayStage { ) }, ) - .expect("rooting must succeed") } // To avoid code duplication and keep compatibility with alpenglow, we add this diff --git a/core/src/tpu.rs b/core/src/tpu.rs index 35e1db997eca34..7fadf649cb2bb7 100644 --- a/core/src/tpu.rs +++ b/core/src/tpu.rs @@ -101,6 +101,10 @@ impl SigVerifier { // Conservatively allow 20 TPS per validator. pub const MAX_VOTES_PER_SECOND: u64 = 20; +/// Size of the channel between streamer and TPU sigverify stage. The values have been selected to +/// be conservative max of obsersed on mnb during high-load events. +const TPU_CHANNEL_SIZE: usize = 50_000; + pub struct Tpu { fetch_stage: FetchStage, sig_verifier: SigVerifier, @@ -173,7 +177,7 @@ impl Tpu { vortexor_receivers, } = sockets; - let (packet_sender, packet_receiver) = unbounded(); + let (packet_sender, packet_receiver) = bounded(TPU_CHANNEL_SIZE); let (vote_packet_sender, vote_packet_receiver) = unbounded(); let (forwarded_packet_sender, forwarded_packet_receiver) = unbounded(); let fetch_stage = FetchStage::new_with_sender( diff --git a/core/src/validator.rs b/core/src/validator.rs index bfdbf83968b849..3d3d2887e6b235 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -2871,6 +2871,12 @@ fn get_stake_percent_in_gossip(bank: &Bank, cluster_info: &ClusterInfo, log: boo ); } } + datapoint_info!( + "wfsm_gossip", + ("online_stake", online_stake, i64), + ("offline_stake", offline_stake, i64), + ("total_activated_stake", total_activated_stake, i64), + ); } online_stake_percentage as u64 diff --git a/dev-bins/Cargo.lock b/dev-bins/Cargo.lock index 7c8b03fd89c2fb..4a0dc75403edc9 100644 --- a/dev-bins/Cargo.lock +++ b/dev-bins/Cargo.lock @@ -181,7 +181,7 @@ dependencies = [ "solana-svm-feature-set", "solana-svm-log-collector", "solana-svm-type-overrides", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-transaction", "solana-transaction-context", "solana-transaction-status", @@ -298,7 +298,7 @@ dependencies = [ "solana-account", "solana-accounts-db", "solana-pubkey 4.0.0", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-version", ] @@ -409,6 +409,7 @@ dependencies = [ "solana-signature", "solana-signer", "solana-signer-store", + "solana-streamer", "solana-time-utils", "solana-transaction", "solana-transaction-error", @@ -423,7 +424,10 @@ version = "4.0.0-alpha.0" dependencies = [ "agave-feature-set", "agave-logger", + "bitvec", + "bytemuck", "log", + "num_enum", "serde", "solana-address 2.0.0", "solana-bls-signatures", @@ -1512,9 +1516,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -2577,9 +2581,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.1.4" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" dependencies = [ "crc32fast", "miniz_oxide", @@ -6269,7 +6273,7 @@ dependencies = [ "solana-runtime", "solana-signature", "solana-signer", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-system-transaction", "solana-time-utils", "solana-tpu-client", @@ -6406,7 +6410,7 @@ dependencies = [ "solana-signature", "solana-signer", "solana-streamer", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-test-validator", "solana-time-utils", "solana-tps-client", @@ -6467,9 +6471,9 @@ dependencies = [ [[package]] name = "solana-bls-signatures" -version = "2.0.0-alpha.1" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acb2f8e2ce54f8798c8a5eb5d06f4cb4f596a5879be5ad81e95a60a1bde9359" +checksum = "e21cad136370a83c91bbe9348c69a510222d8d70144154ca37edff59df789661" dependencies = [ "base64 0.22.1", "blst", @@ -6701,7 +6705,6 @@ dependencies = [ "solana-pubkey 4.0.0", "solana-pubsub-client", "solana-quic-client", - "solana-quic-definitions", "solana-rpc-client", "solana-rpc-client-api", "solana-rpc-client-nonce-utils", @@ -6709,6 +6712,7 @@ dependencies = [ "solana-signer", "solana-streamer", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client", "solana-transaction", "solana-transaction-error", @@ -6951,7 +6955,6 @@ dependencies = [ "solana-poh-config", "solana-pubkey 4.0.0", "solana-quic-client", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rent", "solana-rpc", @@ -7140,7 +7143,7 @@ dependencies = [ "solana-signature", "solana-signer", "solana-stake-interface", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-time-utils", "solana-tps-client", "solana-tpu-client", @@ -7270,7 +7273,7 @@ dependencies = [ "solana-packet 4.0.0", "solana-pubkey 4.0.0", "solana-signer", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-system-transaction", "solana-transaction", "solana-version", @@ -7502,7 +7505,6 @@ dependencies = [ "solana-packet 4.0.0", "solana-perf", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rpc-client", "solana-runtime", @@ -8380,7 +8382,6 @@ dependencies = [ "solana-metrics", "solana-net-utils", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-rpc-client-api", "solana-signer", "solana-streamer", @@ -8390,15 +8391,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "solana-quic-definitions" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15319accf7d3afd845817aeffa6edd8cc185f135cefbc6b985df29cfd8c09609" -dependencies = [ - "solana-keypair", -] - [[package]] name = "solana-rayon-threadlimit" version = "4.0.0-alpha.0" @@ -8502,7 +8494,6 @@ dependencies = [ "solana-poh-config", "solana-program-pack", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rpc-client-api", "solana-runtime", @@ -8517,6 +8508,7 @@ dependencies = [ "solana-system-transaction", "solana-sysvar", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client", "solana-transaction", "solana-transaction-context", @@ -8907,10 +8899,10 @@ dependencies = [ "solana-metrics", "solana-nonce-account", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-runtime", "solana-signature", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client-next", "tokio", "tokio-util 0.7.18", @@ -9163,7 +9155,6 @@ dependencies = [ "solana-packet 4.0.0", "solana-perf", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-signature", "solana-signer", "solana-time-utils", @@ -9916,7 +9907,7 @@ dependencies = [ "solana-program-runtime", "solana-sdk-ids", "solana-svm-log-collector", - "solana-zk-sdk", + "solana-zk-sdk 5.0.0", ] [[package]] @@ -9956,6 +9947,40 @@ dependencies = [ "zeroize", ] +[[package]] +name = "solana-zk-sdk" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89042b5867c7440526d47085db2cd11a7ae557461a4f41a3b3a569799dd9d6" +dependencies = [ + "aes-gcm-siv", + "base64 0.22.1", + "bincode", + "bytemuck", + "bytemuck_derive", + "curve25519-dalek 4.1.3", + "itertools 0.14.0", + "merlin", + "num-derive", + "num-traits", + "rand 0.8.5", + "serde", + "serde_derive", + "serde_json", + "sha3", + "solana-address 2.0.0", + "solana-derivation-path", + "solana-instruction", + "solana-sdk-ids", + "solana-seed-derivable", + "solana-seed-phrase", + "solana-signature", + "solana-signer", + "subtle", + "thiserror 2.0.17", + "zeroize", +] + [[package]] name = "solana-zk-token-proof-program" version = "4.0.0-alpha.0" @@ -10073,7 +10098,7 @@ dependencies = [ "solana-program-error", "solana-program-option", "solana-pubkey 3.0.0", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "thiserror 2.0.17", ] @@ -10095,7 +10120,7 @@ dependencies = [ "solana-program-pack", "solana-pubkey 3.0.0", "solana-sdk-ids", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "spl-pod", "spl-token-confidential-transfer-proof-extraction", "spl-token-confidential-transfer-proof-generation", @@ -10120,7 +10145,7 @@ dependencies = [ "solana-program-error", "solana-pubkey 3.0.0", "solana-sdk-ids", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "spl-pod", "thiserror 2.0.17", ] @@ -10132,7 +10157,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63a2b41095945dc15274b924b21ccae9b3ec9dc2fdd43dbc08de8c33bbcd915" dependencies = [ "curve25519-dalek 4.1.3", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "thiserror 2.0.17", ] diff --git a/dev-bins/Cargo.toml b/dev-bins/Cargo.toml index f86c1828a64dd3..f21950a7d74498 100644 --- a/dev-bins/Cargo.toml +++ b/dev-bins/Cargo.toml @@ -146,7 +146,7 @@ solana-svm-callback = { path = "../svm-callback", version = "=4.0.0-alpha.0", fe solana-svm-feature-set = { path = "../svm-feature-set", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } solana-svm-log-collector = { path = "../svm-log-collector", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } solana-svm-type-overrides = { path = "../svm-type-overrides", version = "=4.0.0-alpha.0", features = ["agave-unstable-api"] } -solana-system-interface = "2.0" +solana-system-interface = "3.0" solana-system-transaction = "3.0.0" solana-test-validator = { path = "../test-validator", version = "=4.0.0-alpha.0" } solana-time-utils = "3.0.0" diff --git a/faucet/Cargo.toml b/faucet/Cargo.toml index 1ce5b2c0f5be2b..d2c64d03039022 100644 --- a/faucet/Cargo.toml +++ b/faucet/Cargo.toml @@ -43,7 +43,7 @@ solana-net-utils = { workspace = true } solana-packet = "=4.0.0" solana-pubkey = { version = "=4.0.0", features = ["rand"] } solana-signer = "=3.0.0" -solana-system-interface = "=2.0" +solana-system-interface = "=3.0" solana-system-transaction = "=3.0.0" solana-transaction = "=3.0.2" solana-version = { workspace = true } diff --git a/feature-set/src/lib.rs b/feature-set/src/lib.rs index 3839ee64fb2111..ff78a9e80a1e9a 100644 --- a/feature-set/src/lib.rs +++ b/feature-set/src/lib.rs @@ -1133,7 +1133,7 @@ pub mod disable_zk_elgamal_proof_program { } pub mod reenable_zk_elgamal_proof_program { - solana_pubkey::declare_id!("zkesAyFB19sTkX8i9ReoKaMNDA4YNTPYJpZKPDt7FMW"); + solana_pubkey::declare_id!("zkexuyPRdyTVbZqEAREueqL2xvvoBhRgth9xGSc1tMN"); } pub mod raise_block_limits_to_100m { diff --git a/genesis/src/main.rs b/genesis/src/main.rs index d7ebcadc0745f5..835afff85dc325 100644 --- a/genesis/src/main.rs +++ b/genesis/src/main.rs @@ -1356,7 +1356,7 @@ mod tests { let generate_bls_pubkey = || { if add_bls_pubkey { - let bls_pubkey = BLSKeypair::new().public; + let bls_pubkey: BLSPubkey = BLSKeypair::new().public.into(); if use_compressed_pubkey { let bls_pubkey_compressed: BLSPubkeyCompressed = bls_pubkey.try_into().unwrap(); Some(bls_pubkey_compressed.to_string()) diff --git a/gossip/Cargo.toml b/gossip/Cargo.toml index 97a5a64749a9f8..98d19c9ec9ad1c 100644 --- a/gossip/Cargo.toml +++ b/gossip/Cargo.toml @@ -7,7 +7,7 @@ description = { workspace = true } repository = { workspace = true } homepage = { workspace = true } license = { workspace = true } -edition = { workspace = true } +edition = "2024" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] @@ -78,7 +78,6 @@ solana-net-utils = { workspace = true, features = ["agave-unstable-api"] } solana-packet = { workspace = true } solana-perf = { workspace = true } solana-pubkey = { workspace = true, features = ["rand"] } -solana-quic-definitions = { workspace = true } solana-rayon-threadlimit = { workspace = true } solana-rpc-client = { workspace = true } solana-runtime = { workspace = true } diff --git a/gossip/benches/crds.rs b/gossip/benches/crds.rs index e1f29edbc8839f..afbdc73ffc3d73 100644 --- a/gossip/benches/crds.rs +++ b/gossip/benches/crds.rs @@ -1,10 +1,10 @@ use { - criterion::{criterion_group, criterion_main, Criterion}, - rand::{rng, Rng}, + criterion::{Criterion, criterion_group, criterion_main}, + rand::{Rng, rng}, rayon::ThreadPoolBuilder, solana_gossip::{ crds::{Crds, GossipRoute}, - crds_gossip_pull::{CrdsTimeouts, CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS}, + crds_gossip_pull::{CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, CrdsTimeouts}, crds_value::CrdsValue, }, solana_pubkey::Pubkey, diff --git a/gossip/benches/crds_gossip_pull.rs b/gossip/benches/crds_gossip_pull.rs index b8c4e82c80f506..d3b8e8670fc82f 100644 --- a/gossip/benches/crds_gossip_pull.rs +++ b/gossip/benches/crds_gossip_pull.rs @@ -1,6 +1,6 @@ use { - criterion::{criterion_group, criterion_main, Criterion}, - rand::{rng, Rng}, + criterion::{Criterion, criterion_group, criterion_main}, + rand::{Rng, rng}, rayon::ThreadPoolBuilder, solana_gossip::{ crds::{Crds, GossipRoute}, diff --git a/gossip/benches/crds_shards.rs b/gossip/benches/crds_shards.rs index 10fb19cccc39d6..e7ee10cb0aaa5a 100644 --- a/gossip/benches/crds_shards.rs +++ b/gossip/benches/crds_shards.rs @@ -1,6 +1,6 @@ use { - criterion::{criterion_group, criterion_main, Criterion}, - rand::{rng, Rng}, + criterion::{Criterion, criterion_group, criterion_main}, + rand::{Rng, rng}, solana_gossip::{ crds::{Crds, GossipRoute, VersionedCrdsValue}, crds_shards::CrdsShards, diff --git a/gossip/benches/weighted_shuffle.rs b/gossip/benches/weighted_shuffle.rs index e3c28721162710..80e328ded89e11 100644 --- a/gossip/benches/weighted_shuffle.rs +++ b/gossip/benches/weighted_shuffle.rs @@ -1,5 +1,5 @@ use { - criterion::{criterion_group, criterion_main, Criterion}, + criterion::{Criterion, criterion_group, criterion_main}, rand::{Rng, SeedableRng}, rand_chacha::ChaChaRng, solana_gossip::weighted_shuffle::WeightedShuffle, diff --git a/gossip/src/cluster_info.rs b/gossip/src/cluster_info.rs index 1f690994634859..bcc8d81b4969fe 100644 --- a/gossip/src/cluster_info.rs +++ b/gossip/src/cluster_info.rs @@ -18,13 +18,13 @@ use { cluster_info_metrics::{Counter, GossipStats, ScopedTimer, TimedGuard}, contact_info::{self, ContactInfo, ContactInfoQuery, Error as ContactInfoError}, crds::{Crds, Cursor, GossipRoute}, - crds_data::{self, CrdsData, EpochSlotsIndex, LowestSlot, SnapshotHashes, Vote, MAX_VOTES}, - crds_filter::{should_retain_crds_value, GossipFilterDirection}, + crds_data::{self, CrdsData, EpochSlotsIndex, LowestSlot, MAX_VOTES, SnapshotHashes, Vote}, + crds_filter::{GossipFilterDirection, should_retain_crds_value}, crds_gossip::CrdsGossip, crds_gossip_error::CrdsGossipError, crds_gossip_pull::{ - get_max_bloom_filter_bytes, CrdsFilter, CrdsTimeouts, ProcessPullStats, PullRequest, - CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, + CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, CrdsFilter, CrdsTimeouts, ProcessPullStats, + PullRequest, get_max_bloom_filter_bytes, }, crds_value::{CrdsValue, CrdsValueLabel}, duplicate_shred::DuplicateShred, @@ -33,10 +33,10 @@ use { gossip_error::GossipError, ping_pong::Pong, protocol::{ - split_gossip_messages, Ping, PingCache, Protocol, PruneData, DUPLICATE_SHRED_MAX_PAYLOAD_SIZE, MAX_INCREMENTAL_SNAPSHOT_HASHES, MAX_PRUNE_DATA_NODES, PULL_RESPONSE_MAX_PAYLOAD_SIZE, - PULL_RESPONSE_MIN_SERIALIZED_SIZE, PUSH_MESSAGE_MAX_PAYLOAD_SIZE, + PULL_RESPONSE_MIN_SERIALIZED_SIZE, PUSH_MESSAGE_MAX_PAYLOAD_SIZE, Ping, PingCache, + Protocol, PruneData, split_gossip_messages, }, restart_crds_values::{ RestartHeaviestFork, RestartLastVotedForkSlots, RestartLastVotedForkSlotsError, @@ -46,17 +46,16 @@ use { arc_swap::ArcSwap, crossbeam_channel::{Receiver, TrySendError}, itertools::{Either, Itertools}, - rand::{prelude::IndexedMutRandom, CryptoRng, Rng}, - rayon::{prelude::*, ThreadPool, ThreadPoolBuilder}, - solana_clock::{Slot, DEFAULT_MS_PER_SLOT, DEFAULT_SLOTS_PER_EPOCH}, + rand::{CryptoRng, Rng, prelude::IndexedMutRandom}, + rayon::{ThreadPool, ThreadPoolBuilder, prelude::*}, + solana_clock::{DEFAULT_MS_PER_SLOT, DEFAULT_SLOTS_PER_EPOCH, Slot}, solana_hash::Hash, - solana_keypair::{signable::Signable, Keypair}, + solana_keypair::{Keypair, signable::Signable}, solana_ledger::shred::Shred, solana_net_utils::{ - bind_in_range, + PortRange, SocketAddrSpace, VALIDATOR_PORT_RANGE, bind_in_range, multihomed_sockets::BindIpAddrs, sockets::{bind_gossip_port_in_range, bind_to_localhost_unique}, - PortRange, SocketAddrSpace, VALIDATOR_PORT_RANGE, }, solana_perf::{ data_budget::DataBudget, @@ -89,10 +88,10 @@ use { rc::Rc, result::Result, sync::{ - atomic::{AtomicBool, Ordering}, Arc, Mutex, RwLock, RwLockReadGuard, + atomic::{AtomicBool, Ordering}, }, - thread::{sleep, Builder, JoinHandle}, + thread::{Builder, JoinHandle, sleep}, time::{Duration, Instant}, }, thiserror::Error, @@ -1205,15 +1204,14 @@ impl ClusterInfo { return Either::Left(pulls); } entrypoint.set_wallclock(now); - if let Some(entrypoint_gossip) = entrypoint.gossip() { - if self + if let Some(entrypoint_gossip) = entrypoint.gossip() + && self .time_gossip_read_lock("entrypoint", &self.stats.entrypoint) .get_nodes_contact_info() .any(|node| node.gossip() == Some(entrypoint_gossip)) - { - // Found the entrypoint, no need to pull from it. - return Either::Left(pulls); - } + { + // Found the entrypoint, no need to pull from it. + return Either::Left(pulls); } } let Some(entrypoint) = entrypoint.gossip() else { @@ -1378,12 +1376,12 @@ impl ClusterInfo { ) .filter_map(|(addr, data)| make_gossip_packet(addr, &data, &self.stats)) .for_each(|pkt| packet_batch.push(pkt)); - if !packet_batch.is_empty() { - if let Err(TrySendError::Full(packet_batch)) = sender.try_send(packet_batch.into()) { - self.stats - .gossip_transmit_packets_dropped_count - .add_relaxed(packet_batch.len() as u64); - } + if !packet_batch.is_empty() + && let Err(TrySendError::Full(packet_batch)) = sender.try_send(packet_batch.into()) + { + self.stats + .gossip_transmit_packets_dropped_count + .add_relaxed(packet_batch.len() as u64); } self.stats .gossip_transmit_loop_iterations_since_last_report @@ -1610,13 +1608,12 @@ impl ClusterInfo { let _st = ScopedTimer::from(&self.stats.handle_batch_pull_requests_time); if !requests.is_empty() { let response = self.handle_pull_requests(thread_pool, recycler, requests, stakes); - if !response.is_empty() { - if let Err(TrySendError::Full(response)) = response_sender.try_send(response.into()) - { - self.stats - .gossip_packets_dropped_count - .add_relaxed(response.len() as u64); - } + if !response.is_empty() + && let Err(TrySendError::Full(response)) = response_sender.try_send(response.into()) + { + self.stats + .gossip_packets_dropped_count + .add_relaxed(response.len() as u64); } } } @@ -1892,14 +1889,13 @@ impl ClusterInfo { self.new_push_requests(stakes) .filter_map(|(addr, data)| make_gossip_packet(addr, &data, &self.stats)) .for_each(|pkt| packet_batch.push(pkt)); - if !packet_batch.is_empty() { - if let Err(TrySendError::Full(packet_batch)) = + if !packet_batch.is_empty() + && let Err(TrySendError::Full(packet_batch)) = response_sender.try_send(packet_batch.into()) - { - self.stats - .gossip_packets_dropped_count - .add_relaxed(packet_batch.len() as u64); - } + { + self.stats + .gossip_packets_dropped_count + .add_relaxed(packet_batch.len() as u64); } } @@ -2471,7 +2467,7 @@ fn discard_different_shred_version( Protocol::PullRequest(..) => return, // No CRDS values in Prune, Ping and Pong messages. Protocol::PruneMessage(_, _) | Protocol::PingMessage(_) | Protocol::PongMessage(_) => { - return + return; } }; let num_values = values.len(); @@ -2593,7 +2589,7 @@ mod tests { solana_streamer::quic::DEFAULT_QUIC_ENDPOINTS, solana_vote_program::{ vote_instruction, - vote_state::{Vote, MAX_LOCKOUT_HISTORY}, + vote_state::{MAX_LOCKOUT_HISTORY, Vote}, }, std::{ iter::repeat_with, @@ -3236,12 +3232,14 @@ mod tests { tower.clear(); tower.extend(0..=slot); let vote = new_vote_transaction(vec![slot]); - assert!(panic::catch_unwind(|| cluster_info.push_vote(&tower, vote)) - .err() - .and_then(|a| a - .downcast_ref::() - .map(|s| { s.starts_with("Submitting old vote") })) - .unwrap_or_default()); + assert!( + panic::catch_unwind(|| cluster_info.push_vote(&tower, vote)) + .err() + .and_then(|a| a + .downcast_ref::() + .map(|s| { s.starts_with("Submitting old vote") })) + .unwrap_or_default() + ); } #[test] @@ -3273,9 +3271,11 @@ mod tests { { let mut gossip_crds = cluster_info.gossip.crds.write().unwrap(); for entry in entries { - assert!(gossip_crds - .insert(entry, /*now=*/ 0, GossipRoute::LocalMessage) - .is_ok()); + assert!( + gossip_crds + .insert(entry, /*now=*/ 0, GossipRoute::LocalMessage) + .is_ok() + ); } } // Should exclude other node's epoch-slot because of different @@ -3410,9 +3410,11 @@ mod tests { let (pings, pulls) = cluster_info.old_pull_requests(&thread_pool, None, &stakes); assert!(pings.is_empty()); assert_eq!(pulls.len(), MIN_NUM_BLOOM_FILTERS); - assert!(pulls - .into_iter() - .all(|(addr, _)| addr == other_node.gossip().unwrap())); + assert!( + pulls + .into_iter() + .all(|(addr, _)| addr == other_node.gossip().unwrap()) + ); // Pull request 2: pretend it's been a while since we've pulled from `entrypoint`. There should // now be two pull requests @@ -3434,9 +3436,11 @@ mod tests { let (pings, pulls) = cluster_info.old_pull_requests(&thread_pool, None, &stakes); assert!(pings.is_empty()); assert_eq!(pulls.len(), MIN_NUM_BLOOM_FILTERS); - assert!(pulls - .into_iter() - .all(|(addr, _)| addr == other_node.gossip().unwrap())); + assert!( + pulls + .into_iter() + .all(|(addr, _)| addr == other_node.gossip().unwrap()) + ); } #[test] @@ -3591,9 +3595,11 @@ mod tests { let leader = Arc::new(Keypair::new()); let shred1 = new_rand_shred(&mut rng, next_shred_index, &shredder, &leader); let shred2 = new_rand_shred(&mut rng, next_shred_index, &shredder, &leader); - assert!(cluster_info - .push_duplicate_shred(&shred1, shred2.payload()) - .is_ok()); + assert!( + cluster_info + .push_duplicate_shred(&shred1, shred2.payload()) + .is_ok() + ); cluster_info.flush_push_queue(); let entries = cluster_info.get_duplicate_shreds(&mut cursor); // One duplicate shred proof is split into 3 chunks. @@ -3609,9 +3615,11 @@ mod tests { let next_shred_index = 354; let shred3 = new_rand_shred(&mut rng, next_shred_index, &shredder, &leader); let shred4 = new_rand_shred(&mut rng, next_shred_index, &shredder, &leader); - assert!(cluster_info - .push_duplicate_shred(&shred3, shred4.payload()) - .is_ok()); + assert!( + cluster_info + .push_duplicate_shred(&shred3, shred4.payload()) + .is_ok() + ); cluster_info.flush_push_queue(); let entries1 = cluster_info.get_duplicate_shreds(&mut cursor); // One duplicate shred proof is split into 3 chunks. @@ -3636,9 +3644,11 @@ mod tests { update.push(i * 1050 + j); } } - assert!(cluster_info - .push_restart_last_voted_fork_slots(&update, Hash::default()) - .is_ok()); + assert!( + cluster_info + .push_restart_last_voted_fork_slots(&update, Hash::default()) + .is_ok() + ); cluster_info.flush_push_queue(); let mut cursor = Cursor::default(); @@ -3665,9 +3675,11 @@ mod tests { { let mut gossip_crds = cluster_info.gossip.crds.write().unwrap(); for entry in entries { - assert!(gossip_crds - .insert(entry, /*now=*/ 0, GossipRoute::LocalMessage) - .is_ok()); + assert!( + gossip_crds + .insert(entry, /*now=*/ 0, GossipRoute::LocalMessage) + .is_ok() + ); } } // Should exclude other node's last-voted-fork-slot because of different @@ -3681,9 +3693,11 @@ mod tests { let mut node = cluster_info.my_contact_info.write().unwrap(); node.set_shred_version(42); } - assert!(cluster_info - .push_restart_last_voted_fork_slots(&update, Hash::default()) - .is_ok()); + assert!( + cluster_info + .push_restart_last_voted_fork_slots(&update, Hash::default()) + .is_ok() + ); cluster_info.flush_push_queue(); // Should now include both slots. let slots = cluster_info.get_restart_last_voted_fork_slots(&mut Cursor::default()); @@ -3742,9 +3756,11 @@ mod tests { { let mut gossip_crds = cluster_info.gossip.crds.write().unwrap(); for entry in entries { - assert!(gossip_crds - .insert(entry, /*now=*/ 0, GossipRoute::LocalMessage) - .is_ok()); + assert!( + gossip_crds + .insert(entry, /*now=*/ 0, GossipRoute::LocalMessage) + .is_ok() + ); } } // Should exclude other node's heaviest_fork because of different @@ -3884,9 +3900,10 @@ mod tests { )), &keypair2, ); - assert!(crds - .insert(ci_wrong_pubkey, /*now=*/ 0, GossipRoute::LocalMessage) - .is_ok()); + assert!( + crds.insert(ci_wrong_pubkey, /*now=*/ 0, GossipRoute::LocalMessage) + .is_ok() + ); // Test insert EpochSlot w/ previous ContactInfo w/ matching shred version but different pubkey -> should be rejected let epoch_slots = EpochSlots::new_rand(&mut rng, Some(keypair.pubkey())); @@ -3898,9 +3915,10 @@ mod tests { } // Now insert ContactInfo with same pubkey as EpochSlot - assert!(crds - .insert(ci.clone(), /*now=*/ 0, GossipRoute::LocalMessage) - .is_ok()); + assert!( + crds.insert(ci.clone(), /*now=*/ 0, GossipRoute::LocalMessage) + .is_ok() + ); let mut msg = Protocol::PushMessage(keypair.pubkey(), vec![es]); discard_different_shred_version(&mut msg, self_shred_version, &crds, &stats); diff --git a/gossip/src/contact_info.rs b/gossip/src/contact_info.rs index d5bf67f80a7f8e..9b5543e034abf2 100644 --- a/gossip/src/contact_info.rs +++ b/gossip/src/contact_info.rs @@ -681,9 +681,7 @@ macro_rules! socketaddr { ($ip:expr, $port:expr) => { std::net::SocketAddr::from((std::net::Ipv4Addr::from($ip), $port)) }; - ($str:expr) => {{ - $str.parse::().unwrap() - }}; + ($str:expr) => {{ $str.parse::().unwrap() }}; } #[macro_export] @@ -698,8 +696,8 @@ mod tests { use { super::*, rand::{ - prelude::{IndexedRandom as _, SliceRandom as _}, Rng, + prelude::{IndexedRandom as _, SliceRandom as _}, }, solana_keypair::Keypair, solana_signer::Signer, @@ -959,19 +957,22 @@ mod tests { sockets.values().map(SocketAddr::ip).collect::>(), ); // Assert that all sockets reference a valid IP address. - assert!(node - .sockets - .iter() - .map(|entry| node.addrs.get(usize::from(entry.index))) - .all(|addr| addr.is_some())); - // Assert that port offsets don't overflow. - assert!(u16::try_from( + assert!( node.sockets .iter() - .map(|entry| u64::from(entry.offset)) - .sum::() - ) - .is_ok()); + .map(|entry| node.addrs.get(usize::from(entry.index))) + .all(|addr| addr.is_some()) + ); + // Assert that port offsets don't overflow. + assert!( + u16::try_from( + node.sockets + .iter() + .map(|entry| u64::from(entry.offset)) + .sum::() + ) + .is_ok() + ); // Assert that serde round trips. let bytes = bincode::serialize(&node).unwrap(); let other: ContactInfo = bincode::deserialize(&bytes).unwrap(); diff --git a/gossip/src/crds.rs b/gossip/src/crds.rs index abe180b75eb096..2d2d75f2401c94 100644 --- a/gossip/src/crds.rs +++ b/gossip/src/crds.rs @@ -37,17 +37,17 @@ use { }, assert_matches::debug_assert_matches, indexmap::{ - map::{rayon::ParValues, Entry, IndexMap}, + map::{Entry, IndexMap, rayon::ParValues}, set::IndexSet, }, lru::LruCache, - rayon::{prelude::*, ThreadPool}, + rayon::{ThreadPool, prelude::*}, solana_clock::Slot, solana_hash::Hash, solana_pubkey::Pubkey, std::{ cmp::Ordering, - collections::{hash_map, BTreeMap, HashMap, VecDeque}, + collections::{BTreeMap, HashMap, VecDeque, hash_map}, ops::{Bound, Index, IndexMut}, sync::Mutex, }, @@ -192,12 +192,11 @@ fn overrides(value: &CrdsValue, other: &VersionedCrdsValue) -> bool { // Contact-infos are special cased so that if there are // two running instances of the same node, the more recent start is // propagated through gossip regardless of wallclocks. - if let CrdsData::ContactInfo(value) = value.data() { - if let CrdsData::ContactInfo(other) = other.value.data() { - if let Some(out) = value.overrides(other) { - return out; - } - } + if let CrdsData::ContactInfo(value) = value.data() + && let CrdsData::ContactInfo(other) = other.value.data() + && let Some(out) = value.overrides(other) + { + return out; } match value.wallclock().cmp(&other.value.wallclock()) { Ordering::Less => false, @@ -518,16 +517,15 @@ impl Crds { // If the origin's contact-info hasn't expired yet then preserve // all associated values. let origin = CrdsValueLabel::ContactInfo(*pubkey); - if let Some(origin) = self.table.get(&origin) { - if origin + if let Some(origin) = self.table.get(&origin) + && origin .value .wallclock() .min(origin.local_timestamp) .saturating_add(timeout) > now - { - return vec![]; - } + { + return vec![]; } // Otherwise check each value's timestamp individually. index @@ -698,11 +696,11 @@ impl Default for CrdsDataStats { impl CrdsDataStats { fn record_insert(&mut self, entry: &VersionedCrdsValue, route: GossipRoute) { self.counts[Self::ordinal(entry)] += 1; - if let CrdsData::Vote(_, vote) = entry.value.data() { - if let Some(slot) = vote.slot() { - let num_nodes = self.votes.get(&slot).copied().unwrap_or_default(); - self.votes.put(slot, num_nodes + 1); - } + if let CrdsData::Vote(_, vote) = entry.value.data() + && let Some(slot) = vote.slot() + { + let num_nodes = self.votes.get(&slot).copied().unwrap_or_default(); + self.votes.put(slot, num_nodes + 1); } let GossipRoute::PushMessage(from) = route else { @@ -781,8 +779,8 @@ impl CrdsStats { mod tests { use { super::*, - crate::crds_data::{new_rand_timestamp, AccountsHashes}, - rand::{rng, Rng}, + crate::crds_data::{AccountsHashes, new_rand_timestamp}, + rand::{Rng, rng}, rayon::ThreadPoolBuilder, solana_keypair::Keypair, solana_signer::Signer, diff --git a/gossip/src/crds_data.rs b/gossip/src/crds_data.rs index 216c78b75edfa3..7de2ee7f55c0eb 100644 --- a/gossip/src/crds_data.rs +++ b/gossip/src/crds_data.rs @@ -8,7 +8,7 @@ use { restart_crds_values::{RestartHeaviestFork, RestartLastVotedForkSlots}, }, rand::Rng, - serde::{de::Deserializer, Deserialize, Serialize}, + serde::{Deserialize, Serialize, de::Deserializer}, solana_clock::Slot, solana_hash::Hash, solana_pubkey::{self, Pubkey}, diff --git a/gossip/src/crds_gossip_pull.rs b/gossip/src/crds_gossip_pull.rs index 6a0421334b520e..aafa28aca1b202 100644 --- a/gossip/src/crds_gossip_pull.rs +++ b/gossip/src/crds_gossip_pull.rs @@ -23,10 +23,10 @@ use { }, itertools::Itertools, rand::{ - distr::{weighted::WeightedIndex, Distribution}, Rng, + distr::{Distribution, weighted::WeightedIndex}, }, - rayon::{prelude::*, ThreadPool}, + rayon::{ThreadPool, prelude::*}, serde::{Deserialize, Serialize}, solana_bloom::bloom::{Bloom, ConcurrentBloom}, solana_hash::Hash, @@ -43,8 +43,8 @@ use { net::SocketAddr, ops::Index, sync::{ - atomic::{AtomicI64, AtomicUsize, Ordering}, LazyLock, Mutex, RwLock, + atomic::{AtomicI64, AtomicUsize, Ordering}, }, time::Duration, }, @@ -680,7 +680,7 @@ pub(crate) mod tests { protocol::Protocol, }, itertools::Itertools, - rand::{prelude::IndexedRandom as _, SeedableRng}, + rand::{SeedableRng, prelude::IndexedRandom as _}, rand_chacha::ChaChaRng, rayon::ThreadPoolBuilder, solana_hash::HASH_BYTES, diff --git a/gossip/src/crds_gossip_push.rs b/gossip/src/crds_gossip_push.rs index 57ad91e12380f8..c24a586a91e9fc 100644 --- a/gossip/src/crds_gossip_push.rs +++ b/gossip/src/crds_gossip_push.rs @@ -34,8 +34,8 @@ use { net::SocketAddr, ops::{DerefMut, RangeBounds}, sync::{ - atomic::{AtomicUsize, Ordering}, Mutex, RwLock, + atomic::{AtomicUsize, Ordering}, }, }, }; @@ -348,9 +348,10 @@ mod tests { assert_eq!(crds.read().unwrap().get::<&CrdsValue>(&label), Some(&value)); // push it again - assert!(push - .process_push_message(&crds, vec![(Pubkey::default(), vec![value])], 0) - .is_empty()); + assert!( + push.process_push_message(&crds, vec![(Pubkey::default(), vec![value])], 0) + .is_empty() + ); } #[test] fn test_process_push_old_version() { @@ -369,9 +370,10 @@ mod tests { // push an old version ci.set_wallclock(0); let value = CrdsValue::new_unsigned(CrdsData::from(ci)); - assert!(push - .process_push_message(&crds, vec![(Pubkey::default(), vec![value])], 0) - .is_empty()); + assert!( + push.process_push_message(&crds, vec![(Pubkey::default(), vec![value])], 0) + .is_empty() + ); } #[test] fn test_process_push_timeout() { @@ -383,16 +385,18 @@ mod tests { // push a version to far in the future ci.set_wallclock(timeout + 1); let value = CrdsValue::new_unsigned(CrdsData::from(&ci)); - assert!(push - .process_push_message(&crds, vec![(Pubkey::default(), vec![value])], 0) - .is_empty()); + assert!( + push.process_push_message(&crds, vec![(Pubkey::default(), vec![value])], 0) + .is_empty() + ); // push a version to far in the past ci.set_wallclock(0); let value = CrdsValue::new_unsigned(CrdsData::from(ci)); - assert!(push - .process_push_message(&crds, vec![(Pubkey::default(), vec![value])], timeout + 1) - .is_empty()); + assert!( + push.process_push_message(&crds, vec![(Pubkey::default(), vec![value])], timeout + 1) + .is_empty() + ); } #[test] fn test_process_push_update() { @@ -641,13 +645,15 @@ mod tests { ); // push it again - assert!(push - .process_push_message(&crds, vec![(Pubkey::default(), vec![value.clone()])], 0) - .is_empty()); + assert!( + push.process_push_message(&crds, vec![(Pubkey::default(), vec![value.clone()])], 0) + .is_empty() + ); // push it again - assert!(push - .process_push_message(&crds, vec![(Pubkey::default(), vec![value])], 0) - .is_empty()); + assert!( + push.process_push_message(&crds, vec![(Pubkey::default(), vec![value])], 0) + .is_empty() + ); } } diff --git a/gossip/src/crds_shards.rs b/gossip/src/crds_shards.rs index c38ac234bd84c2..c1faaa9256819e 100644 --- a/gossip/src/crds_shards.rs +++ b/gossip/src/crds_shards.rs @@ -137,7 +137,7 @@ mod test { crds::{Crds, GossipRoute}, crds_value::CrdsValue, }, - rand::{rng, Rng}, + rand::{Rng, rng}, solana_time_utils::timestamp, std::{collections::HashSet, iter::repeat_with, ops::Index}, }; diff --git a/gossip/src/crds_value.rs b/gossip/src/crds_value.rs index bf5ab2824530d1..c11c7ebac81c73 100644 --- a/gossip/src/crds_value.rs +++ b/gossip/src/crds_value.rs @@ -8,9 +8,9 @@ use { arrayvec::ArrayVec, bincode::serialize, rand::Rng, - serde::{de::Deserializer, Deserialize, Serialize}, + serde::{Deserialize, Serialize, de::Deserializer}, solana_hash::Hash, - solana_keypair::{signable::Signable, Keypair}, + solana_keypair::{Keypair, signable::Signable}, solana_packet::PACKET_DATA_SIZE, solana_pubkey::Pubkey, solana_sanitize::{Sanitize, SanitizeError}, diff --git a/gossip/src/duplicate_shred.rs b/gossip/src/duplicate_shred.rs index c52e9aed6b9e69..4f22ee9aadbcf3 100644 --- a/gossip/src/duplicate_shred.rs +++ b/gossip/src/duplicate_shred.rs @@ -11,7 +11,7 @@ use { solana_pubkey::Pubkey, solana_sanitize::{Sanitize, SanitizeError}, std::{ - collections::{hash_map::Entry, HashMap}, + collections::{HashMap, hash_map::Entry}, convert::TryFrom, num::TryFromIntError, }, diff --git a/gossip/src/duplicate_shred_handler.rs b/gossip/src/duplicate_shred_handler.rs index d7d4da27ae4b2c..6c74fb00d2b7fd 100644 --- a/gossip/src/duplicate_shred_handler.rs +++ b/gossip/src/duplicate_shred_handler.rs @@ -238,7 +238,7 @@ mod tests { itertools::Itertools, solana_keypair::Keypair, solana_ledger::{ - genesis_utils::{create_genesis_config_with_leader, GenesisConfigInfo}, + genesis_utils::{GenesisConfigInfo, create_genesis_config_with_leader}, get_tmp_ledger_path_auto_delete, shred::Shredder, }, diff --git a/gossip/src/duplicate_shred_listener.rs b/gossip/src/duplicate_shred_listener.rs index 2c7be1e56e4267..3a45e1db2ed7e8 100644 --- a/gossip/src/duplicate_shred_listener.rs +++ b/gossip/src/duplicate_shred_listener.rs @@ -6,10 +6,10 @@ use { }, std::{ sync::{ - atomic::{AtomicBool, Ordering}, Arc, + atomic::{AtomicBool, Ordering}, }, - thread::{self, sleep, Builder, JoinHandle}, + thread::{self, Builder, JoinHandle, sleep}, time::Duration, }, }; @@ -77,8 +77,8 @@ mod tests { solana_net_utils::SocketAddrSpace, solana_signer::Signer, std::sync::{ - atomic::{AtomicU32, Ordering}, Arc, + atomic::{AtomicU32, Ordering}, }, }; struct FakeHandler { @@ -118,9 +118,11 @@ mod tests { let leader = Arc::new(Keypair::new()); let shred1 = new_rand_shred(&mut rng, next_shred_index, &shredder, &leader); let shred2 = new_rand_shred(&mut rng, next_shred_index, &shredder, &leader); - assert!(cluster_info - .push_duplicate_shred(&shred1, shred2.payload()) - .is_ok()); + assert!( + cluster_info + .push_duplicate_shred(&shred1, shred2.payload()) + .is_ok() + ); cluster_info.flush_push_queue(); sleep(Duration::from_millis(GOSSIP_SLEEP_MILLIS)); assert_eq!(count.load(Ordering::Relaxed), 3); diff --git a/gossip/src/epoch_specs.rs b/gossip/src/epoch_specs.rs index cd40bf5be67273..ac3a62e01c8f4c 100644 --- a/gossip/src/epoch_specs.rs +++ b/gossip/src/epoch_specs.rs @@ -1,5 +1,5 @@ use { - solana_clock::{Epoch, DEFAULT_MS_PER_SLOT}, + solana_clock::{DEFAULT_MS_PER_SLOT, Epoch}, solana_epoch_schedule::EpochSchedule, solana_pubkey::Pubkey, solana_runtime::{ @@ -87,7 +87,7 @@ mod tests { use { super::*, solana_clock::Slot, - solana_runtime::genesis_utils::{create_genesis_config, GenesisConfigInfo}, + solana_runtime::genesis_utils::{GenesisConfigInfo, create_genesis_config}, }; #[test] diff --git a/gossip/src/gossip_service.rs b/gossip/src/gossip_service.rs index 97178aaeb4ae3e..7a5cf126f84d33 100644 --- a/gossip/src/gossip_service.rs +++ b/gossip/src/gossip_service.rs @@ -8,10 +8,10 @@ use { epoch_specs::EpochSpecs, }, crossbeam_channel::Sender, - rand::{rng, Rng}, + rand::{Rng, rng}, solana_client::{connection_cache::ConnectionCache, tpu_client::TpuClientWrapper}, solana_keypair::Keypair, - solana_net_utils::{SocketAddrSpace, DEFAULT_IP_ECHO_SERVER_THREADS}, + solana_net_utils::{DEFAULT_IP_ECHO_SERVER_THREADS, SocketAddrSpace}, solana_perf::recycler::Recycler, solana_pubkey::Pubkey, solana_rpc_client::rpc_client::RpcClient, @@ -26,10 +26,10 @@ use { collections::HashSet, net::{SocketAddr, TcpListener, UdpSocket}, sync::{ - atomic::{AtomicBool, Ordering}, Arc, RwLock, + atomic::{AtomicBool, Ordering}, }, - thread::{self, sleep, Builder, JoinHandle}, + thread::{self, Builder, JoinHandle, sleep}, time::{Duration, Instant}, }, }; @@ -385,7 +385,7 @@ mod tests { use { super::*, crate::{cluster_info::ClusterInfo, contact_info::ContactInfo, node::Node}, - std::sync::{atomic::AtomicBool, Arc}, + std::sync::{Arc, atomic::AtomicBool}, }; #[test] diff --git a/gossip/src/node.rs b/gossip/src/node.rs index b5c29eeaa1da0f..190c96ec4330ae 100644 --- a/gossip/src/node.rs +++ b/gossip/src/node.rs @@ -10,9 +10,9 @@ use { find_available_ports_in_range, multihomed_sockets::BindIpAddrs, sockets::{ - bind_gossip_port_in_range, bind_in_range_with_config, bind_more_with_config, - bind_to_with_config, localhost_port_range_for_tests, multi_bind_in_range_with_config, - SocketConfiguration as SocketConfig, + SocketConfiguration as SocketConfig, bind_gossip_port_in_range, + bind_in_range_with_config, bind_more_with_config, bind_to_with_config, + localhost_port_range_for_tests, multi_bind_in_range_with_config, }, }, solana_pubkey::Pubkey, diff --git a/gossip/src/ping_pong.rs b/gossip/src/ping_pong.rs index b695040b4ed2d2..73945e7fb01f79 100644 --- a/gossip/src/ping_pong.rs +++ b/gossip/src/ping_pong.rs @@ -6,7 +6,7 @@ use { serde_big_array::BigArray, siphasher::sip::SipHasher24, solana_hash::Hash, - solana_keypair::{signable::Signable, Keypair}, + solana_keypair::{Keypair, signable::Signable}, solana_pubkey::Pubkey, solana_sanitize::{Sanitize, SanitizeError}, solana_signature::Signature, @@ -187,18 +187,18 @@ impl PingCache { return false; }; self.pongs.put(remote_node, now); - if let Some(sent_time) = self.ping_times.pop(&socket.ip()) { - if should_report_message_signature( + if let Some(sent_time) = self.ping_times.pop(&socket.ip()) + && should_report_message_signature( pong.signature(), PONG_SIGNATURE_SAMPLE_LEADING_ZEROS, - ) { - let rtt = now.saturating_duration_since(sent_time); - datapoint_info!( - "ping_rtt", - ("peer_ip", socket.ip().to_string(), String), - ("rtt_us", rtt.as_micros() as i64, i64), - ); - } + ) + { + let rtt = now.saturating_duration_since(sent_time); + datapoint_info!( + "ping_rtt", + ("peer_ip", socket.ip().to_string(), String), + ("rtt_us", rtt.as_micros() as i64, i64), + ); } true } diff --git a/gossip/src/protocol.rs b/gossip/src/protocol.rs index 7a789421f15cb4..b09771e2b0ec5c 100644 --- a/gossip/src/protocol.rs +++ b/gossip/src/protocol.rs @@ -230,25 +230,27 @@ pub(crate) fn split_gossip_messages( let mut data_feed = data_feed.into_iter().fuse(); let mut buffer = vec![]; let mut buffer_size = 0; // Serialized size of buffered values. - std::iter::from_fn(move || loop { - let Some(data) = data_feed.next() else { - return (!buffer.is_empty()).then(|| std::mem::take(&mut buffer)); - }; - let data_size = match bincode::serialized_size(&data) { - Ok(size) => size as usize, - Err(err) => { - error!("serialized_size failed: {err:?}"); - continue; + std::iter::from_fn(move || { + loop { + let Some(data) = data_feed.next() else { + return (!buffer.is_empty()).then(|| std::mem::take(&mut buffer)); + }; + let data_size = match bincode::serialized_size(&data) { + Ok(size) => size as usize, + Err(err) => { + error!("serialized_size failed: {err:?}"); + continue; + } + }; + if buffer_size + data_size <= max_chunk_size { + buffer_size += data_size; + buffer.push(data); + } else if data_size <= max_chunk_size { + buffer_size = data_size; + return Some(std::mem::replace(&mut buffer, vec![data])); + } else { + error!("dropping data larger than the maximum chunk size {data:?}",); } - }; - if buffer_size + data_size <= max_chunk_size { - buffer_size += data_size; - buffer.push(data); - } else if data_size <= max_chunk_size { - buffer_size = data_size; - return Some(std::mem::replace(&mut buffer, vec![data])); - } else { - error!("dropping data larger than the maximum chunk size {data:?}",); } }) } @@ -262,7 +264,7 @@ pub(crate) mod tests { crds_data::{ self, AccountsHashes, CrdsData, LowestSlot, SnapshotHashes, Vote as CrdsVote, }, - duplicate_shred::{self, tests::new_rand_shred, MAX_DUPLICATE_SHREDS}, + duplicate_shred::{self, MAX_DUPLICATE_SHREDS, tests::new_rand_shred}, }, rand::Rng, solana_clock::Slot, diff --git a/gossip/src/push_active_set.rs b/gossip/src/push_active_set.rs index bce579628e5571..89692ec22f5bd1 100644 --- a/gossip/src/push_active_set.rs +++ b/gossip/src/push_active_set.rs @@ -226,39 +226,55 @@ mod tests { } let other = &nodes[5]; let origin = &nodes[17]; - assert!(active_set - .get_nodes(&pubkey, origin, &stakes) - .eq([13, 5, 18, 16, 0].into_iter().map(|k| &nodes[k]))); - assert!(active_set - .get_nodes(&pubkey, other, &stakes) - .eq([13, 18, 16, 0].into_iter().map(|k| &nodes[k]))); + assert!( + active_set + .get_nodes(&pubkey, origin, &stakes) + .eq([13, 5, 18, 16, 0].into_iter().map(|k| &nodes[k])) + ); + assert!( + active_set + .get_nodes(&pubkey, other, &stakes) + .eq([13, 18, 16, 0].into_iter().map(|k| &nodes[k])) + ); active_set.prune(&pubkey, &nodes[5], &[*origin], &stakes); active_set.prune(&pubkey, &nodes[3], &[*origin], &stakes); active_set.prune(&pubkey, &nodes[16], &[*origin], &stakes); - assert!(active_set - .get_nodes(&pubkey, origin, &stakes) - .eq([13, 18, 0].into_iter().map(|k| &nodes[k]))); - assert!(active_set - .get_nodes(&pubkey, other, &stakes) - .eq([13, 18, 16, 0].into_iter().map(|k| &nodes[k]))); + assert!( + active_set + .get_nodes(&pubkey, origin, &stakes) + .eq([13, 18, 0].into_iter().map(|k| &nodes[k])) + ); + assert!( + active_set + .get_nodes(&pubkey, other, &stakes) + .eq([13, 18, 16, 0].into_iter().map(|k| &nodes[k])) + ); active_set.rotate(&mut rng, 7, CLUSTER_SIZE, &nodes, &stakes); assert!(active_set.0.iter().all(|entry| entry.0.len() == 7)); - assert!(active_set - .get_nodes(&pubkey, origin, &stakes) - .eq([18, 0, 7, 15, 11].into_iter().map(|k| &nodes[k]))); - assert!(active_set - .get_nodes(&pubkey, other, &stakes) - .eq([18, 16, 0, 7, 15, 11].into_iter().map(|k| &nodes[k]))); + assert!( + active_set + .get_nodes(&pubkey, origin, &stakes) + .eq([18, 0, 7, 15, 11].into_iter().map(|k| &nodes[k])) + ); + assert!( + active_set + .get_nodes(&pubkey, other, &stakes) + .eq([18, 16, 0, 7, 15, 11].into_iter().map(|k| &nodes[k])) + ); let origins = [*origin, *other]; active_set.prune(&pubkey, &nodes[18], &origins, &stakes); active_set.prune(&pubkey, &nodes[0], &origins, &stakes); active_set.prune(&pubkey, &nodes[15], &origins, &stakes); - assert!(active_set - .get_nodes(&pubkey, origin, &stakes) - .eq([7, 11].into_iter().map(|k| &nodes[k]))); - assert!(active_set - .get_nodes(&pubkey, other, &stakes) - .eq([16, 7, 11].into_iter().map(|k| &nodes[k]))); + assert!( + active_set + .get_nodes(&pubkey, origin, &stakes) + .eq([7, 11].into_iter().map(|k| &nodes[k])) + ); + assert!( + active_set + .get_nodes(&pubkey, other, &stakes) + .eq([16, 7, 11].into_iter().map(|k| &nodes[k])) + ); } #[test] @@ -284,9 +300,11 @@ mod tests { if !keys.contains(&origin) { assert!(entry.get_nodes(pubkey, origin).eq(keys)); } else { - assert!(entry - .get_nodes(pubkey, origin) - .eq(keys.into_iter().filter(|&key| key != origin))); + assert!( + entry + .get_nodes(pubkey, origin) + .eq(keys.into_iter().filter(|&key| key != origin)) + ); } } // Assert that each filter already prunes the key. @@ -294,9 +312,11 @@ mod tests { assert!(filter.contains(node)); } for (pubkey, origin) in iproduct!(&nodes, keys) { - assert!(entry - .get_nodes(pubkey, origin) - .eq(keys.into_iter().filter(|&node| node != origin))); + assert!( + entry + .get_nodes(pubkey, origin) + .eq(keys.into_iter().filter(|&node| node != origin)) + ); } // Assert that prune excludes node from get. let origin = &nodes[3]; @@ -304,9 +324,11 @@ mod tests { entry.prune(&nodes[14], origin); entry.prune(&nodes[19], origin); for pubkey in &nodes { - assert!(entry.get_nodes(pubkey, origin).eq(keys - .into_iter() - .filter(|&&node| pubkey == origin || (node != nodes[11] && node != nodes[14])))); + assert!( + entry.get_nodes(pubkey, origin).eq(keys + .into_iter() + .filter(|&&node| pubkey == origin || (node != nodes[11] && node != nodes[14]))) + ); } // Assert that rotate adds new nodes. entry.rotate(&mut rng, 5, NUM_BLOOM_FILTER_ITEMS, &nodes, &weights); diff --git a/gossip/src/weighted_shuffle.rs b/gossip/src/weighted_shuffle.rs index 0ce74648688db7..ece56349361d79 100644 --- a/gossip/src/weighted_shuffle.rs +++ b/gossip/src/weighted_shuffle.rs @@ -266,16 +266,20 @@ mod tests { })); assert!(mask.iter().all(|&x| x)); // Assert that the random shuffle is weighted. - assert!(shuffle - .chunks(shuffle.len() / 10) - .map(|chunk| chunk.iter().map(|&i| weights[i]).sum::()) - .tuple_windows() - .all(|(a, b)| a > b)); + assert!( + shuffle + .chunks(shuffle.len() / 10) + .map(|chunk| chunk.iter().map(|&i| weights[i]).sum::()) + .tuple_windows() + .all(|(a, b)| a > b) + ); // Assert that zero weights only appear at the end of the shuffle. - assert!(shuffle - .iter() - .tuple_windows() - .all(|(&i, &j)| weights[i] != 0 || weights[j] == 0)); + assert!( + shuffle + .iter() + .tuple_windows() + .all(|(&i, &j)| weights[i] != 0 || weights[j] == 0) + ); } fn weighted_shuffle_slow(rng: &mut R, mut weights: Vec) -> Vec @@ -445,7 +449,9 @@ mod tests { let mut shuffle = WeightedShuffle::new("", weights); assert_eq!( shuffle.clone().shuffle(&mut rng).collect::>(), - [10, 3, 14, 18, 0, 9, 19, 6, 2, 1, 17, 7, 13, 15, 20, 12, 4, 8, 5, 16, 11] + [ + 10, 3, 14, 18, 0, 9, 19, 6, 2, 1, 17, 7, 13, 15, 20, 12, 4, 8, 5, 16, 11 + ] ); let mut rng = ChaChaRng::from_seed(seed); assert_eq!(shuffle.first(&mut rng), Some(10)); @@ -465,7 +471,9 @@ mod tests { let mut shuffle = WeightedShuffle::new("", weights); assert_eq!( shuffle.clone().shuffle(&mut rng).collect::>(), - [3, 15, 10, 6, 19, 17, 2, 0, 9, 20, 1, 14, 7, 8, 12, 18, 4, 13, 5, 11, 16] + [ + 3, 15, 10, 6, 19, 17, 2, 0, 9, 20, 1, 14, 7, 8, 12, 18, 4, 13, 5, 11, 16 + ] ); let mut rng = ChaChaRng::from_seed(seed); assert_eq!(shuffle.first(&mut rng), Some(3)); diff --git a/gossip/tests/crds_gossip.rs b/gossip/tests/crds_gossip.rs index 570c6768d1d2e2..ff970bf1f529b9 100644 --- a/gossip/tests/crds_gossip.rs +++ b/gossip/tests/crds_gossip.rs @@ -3,7 +3,7 @@ use { bincode::serialized_size, itertools::Itertools, log::*, - rayon::{prelude::*, ThreadPool, ThreadPoolBuilder}, + rayon::{ThreadPool, ThreadPoolBuilder, prelude::*}, serial_test::serial, solana_gossip::{ cluster_info_metrics::GossipStats, @@ -13,7 +13,7 @@ use { crds_gossip::*, crds_gossip_error::CrdsGossipError, crds_gossip_pull::{ - CrdsTimeouts, ProcessPullStats, PullRequest, CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, + CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS, CrdsTimeouts, ProcessPullStats, PullRequest, }, crds_gossip_push::CRDS_GOSSIP_PUSH_MSG_TIMEOUT_MS, crds_value::{CrdsValue, CrdsValueLabel}, diff --git a/gossip/tests/gossip.rs b/gossip/tests/gossip.rs index 3522188e824012..0d80511fbb8e5c 100644 --- a/gossip/tests/gossip.rs +++ b/gossip/tests/gossip.rs @@ -18,15 +18,15 @@ use { solana_pubkey::Pubkey, solana_runtime::bank_forks::BankForks, solana_signer::Signer, - solana_streamer::sendmmsg::{multi_target_send, SendPktsError}, + solana_streamer::sendmmsg::{SendPktsError, multi_target_send}, solana_time_utils::timestamp, solana_transaction::Transaction, solana_vote_program::{vote_instruction, vote_state::Vote}, std::{ net::UdpSocket, sync::{ - atomic::{AtomicBool, Ordering}, Arc, RwLock, + atomic::{AtomicBool, Ordering}, }, thread::sleep, time::Duration, @@ -290,7 +290,7 @@ pub fn cluster_info_scale() { solana_perf::test_tx::test_tx, solana_runtime::{ bank::Bank, - genesis_utils::{create_genesis_config_with_vote_accounts, ValidatorVoteKeypairs}, + genesis_utils::{ValidatorVoteKeypairs, create_genesis_config_with_vote_accounts}, }, }; agave_logger::setup(); diff --git a/keygen/src/keygen.rs b/keygen/src/keygen.rs index d8466e66fae80d..b1cc992eb43fc6 100644 --- a/keygen/src/keygen.rs +++ b/keygen/src/keygen.rs @@ -532,7 +532,7 @@ fn do_main(matches: &ArgMatches) -> Result<(), Box> { ("bls_pubkey", matches) => { let keypair = get_keypair_from_matches(matches, config, &mut wallet_manager)?; let bls_keypair = BLSKeypair::derive_from_signer(&keypair, BLS_KEYPAIR_DERIVE_SEED)?; - let bls_pubkey: BLSPubkey = bls_keypair.public; + let bls_pubkey: BLSPubkey = bls_keypair.public.into(); if matches.try_contains_id("outfile")? { let outfile = matches.get_one::("outfile").unwrap(); @@ -1269,7 +1269,7 @@ mod tests { fn test_read_write_bls_pubkey() -> Result<(), std::boxed::Box> { let filename = "test_bls_pubkey.json"; let bls_keypair = BLSKeypair::new(); - let bls_pubkey = bls_keypair.public; + let bls_pubkey: BLSPubkey = bls_keypair.public.into(); write_bls_pubkey_file(filename, bls_pubkey)?; let read = read_bls_pubkey_file(filename)?; assert_eq!(read, bls_pubkey); @@ -1300,6 +1300,6 @@ mod tests { let bls_keypair = BLSKeypair::derive_from_signer(&my_keypair, BLS_KEYPAIR_DERIVE_SEED).unwrap(); let read_bls_pubkey = read_bls_pubkey_file(&outfile_path).unwrap(); - assert_eq!(read_bls_pubkey, bls_keypair.public); + assert_eq!(read_bls_pubkey, bls_keypair.public.into()); } } diff --git a/ledger-tool/src/args.rs b/ledger-tool/src/args.rs index ee1627b3c2d930..647df2bfd078d4 100644 --- a/ledger-tool/src/args.rs +++ b/ledger-tool/src/args.rs @@ -328,7 +328,7 @@ pub fn get_accounts_db_config( AccountsDbConfig { index: Some(accounts_index_config), - bank_hash_details_dir: Some(ledger_tool_ledger_path), + bank_hash_details_dir: ledger_tool_ledger_path, ancient_append_vec_offset: value_t!(arg_matches, "accounts_db_ancient_append_vecs", i64) .ok(), ancient_storage_ideal_size: value_t!( diff --git a/net-utils/Cargo.toml b/net-utils/Cargo.toml index 18159296c96b06..0fb2e0f273c1ef 100644 --- a/net-utils/Cargo.toml +++ b/net-utils/Cargo.toml @@ -7,7 +7,7 @@ authors = { workspace = true } repository = { workspace = true } homepage = { workspace = true } license = { workspace = true } -edition = { workspace = true } +edition = "2024" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] diff --git a/net-utils/benches/token_bucket.rs b/net-utils/benches/token_bucket.rs index 803a373e6eb15f..36d47e712589ab 100644 --- a/net-utils/benches/token_bucket.rs +++ b/net-utils/benches/token_bucket.rs @@ -24,34 +24,38 @@ fn bench_token_bucket() { std::thread::scope(|scope| { for _ in 0..workers { - scope.spawn(|| loop { - if start.elapsed() > run_duration { - break; + scope.spawn(|| { + loop { + if start.elapsed() > run_duration { + break; + } + match tb.consume_tokens(request_size) { + Ok(_) => accepted.fetch_add(1, Ordering::Relaxed), + Err(_) => rejected.fetch_add(1, Ordering::Relaxed), + }; } - match tb.consume_tokens(request_size) { - Ok(_) => accepted.fetch_add(1, Ordering::Relaxed), - Err(_) => rejected.fetch_add(1, Ordering::Relaxed), - }; }); } // periodically check for races - let jh = scope.spawn(|| loop { - std::thread::sleep(Duration::from_millis(100)); - let elapsed = start.elapsed(); - if elapsed > run_duration { - break; + let jh = scope.spawn(|| { + loop { + std::thread::sleep(Duration::from_millis(100)); + let elapsed = start.elapsed(); + if elapsed > run_duration { + break; + } + let acc = accepted.load(Ordering::Relaxed); + let rate = acc as f64 / elapsed.as_secs_f64(); + assert!( + tb.current_tokens() < request_size * 2, + "bucket should have no spare tokens" + ); + assert!( + // allow 1% error + (rate - target_rate).abs() < target_rate / 100.0, + "Accepted rate should be about {target_rate}, actual {rate}" + ); } - let acc = accepted.load(Ordering::Relaxed); - let rate = acc as f64 / elapsed.as_secs_f64(); - assert!( - tb.current_tokens() < request_size * 2, - "bucket should have no spare tokens" - ); - assert!( - // allow 1% error - (rate - target_rate).abs() < target_rate / 100.0, - "Accepted rate should be about {target_rate}, actual {rate}" - ); }); jh.join().expect("Rate checks should pass"); }); diff --git a/net-utils/src/ip_echo_client.rs b/net-utils/src/ip_echo_client.rs index c1ed42ebc5e50a..7045720a50ce0a 100644 --- a/net-utils/src/ip_echo_client.rs +++ b/net-utils/src/ip_echo_client.rs @@ -1,7 +1,7 @@ use { crate::{ - ip_echo_server::{IpEchoServerMessage, IpEchoServerResponse}, HEADER_LENGTH, IP_ECHO_SERVER_RESPONSE_LENGTH, MAX_PORT_COUNT_PER_MESSAGE, + ip_echo_server::{IpEchoServerMessage, IpEchoServerResponse}, }, anyhow::bail, bytes::{BufMut, BytesMut}, diff --git a/net-utils/src/ip_echo_server.rs b/net-utils/src/ip_echo_server.rs index 0a32d2b8d215db..77f427985a8241 100644 --- a/net-utils/src/ip_echo_server.rs +++ b/net-utils/src/ip_echo_server.rs @@ -1,5 +1,5 @@ use { - crate::{bind_to_unspecified, HEADER_LENGTH, IP_ECHO_SERVER_RESPONSE_LENGTH}, + crate::{HEADER_LENGTH, IP_ECHO_SERVER_RESPONSE_LENGTH, bind_to_unspecified}, log::*, serde::{Deserialize, Serialize}, solana_serde::default_on_eof, diff --git a/net-utils/src/lib.rs b/net-utils/src/lib.rs index 4c0ebf5bb24f58..466e46944b7cde 100644 --- a/net-utils/src/lib.rs +++ b/net-utils/src/lib.rs @@ -30,7 +30,7 @@ pub mod tooling_for_tests; use { ip_echo_client::{ip_echo_server_request, ip_echo_server_request_with_binding}, ip_echo_server::IpEchoServerMessage, - rand::{rng, Rng}, + rand::{Rng, rng}, std::{ io::{self}, net::{IpAddr, Ipv4Addr, SocketAddr, TcpListener, ToSocketAddrs, UdpSocket}, @@ -39,8 +39,8 @@ use { }; pub use { ip_echo_server::{ - ip_echo_server, IpEchoServer, DEFAULT_IP_ECHO_SERVER_THREADS, MAX_PORT_COUNT_PER_MESSAGE, - MINIMUM_IP_ECHO_SERVER_THREADS, + DEFAULT_IP_ECHO_SERVER_THREADS, IpEchoServer, MAX_PORT_COUNT_PER_MESSAGE, + MINIMUM_IP_ECHO_SERVER_THREADS, ip_echo_server, }, socket_addr_space::SocketAddrSpace, }; diff --git a/net-utils/src/multihomed_sockets.rs b/net-utils/src/multihomed_sockets.rs index 3162e65685d76f..8299b58c992f91 100644 --- a/net-utils/src/multihomed_sockets.rs +++ b/net-utils/src/multihomed_sockets.rs @@ -3,8 +3,8 @@ use std::{ net::{IpAddr, Ipv4Addr, UdpSocket}, ops::Deref, sync::{ - atomic::{AtomicUsize, Ordering}, Arc, + atomic::{AtomicUsize, Ordering}, }, }; diff --git a/net-utils/src/sockets.rs b/net-utils/src/sockets.rs index 7c538e87b489c4..bdf662b292ae8b 100644 --- a/net-utils/src/sockets.rs +++ b/net-utils/src/sockets.rs @@ -340,11 +340,11 @@ mod tests { use { super::*, crate::{ - bind_in_range, get_cluster_shred_version, get_public_ip_addr_with_binding, - ip_echo_client, ip_echo_server, parse_host, + DEFAULT_IP_ECHO_SERVER_THREADS, MAX_PORT_VERIFY_THREADS, bind_in_range, + get_cluster_shred_version, get_public_ip_addr_with_binding, ip_echo_client, + ip_echo_server, parse_host, sockets::{localhost_port_range_for_tests, unique_port_range_for_tests}, - verify_all_reachable_tcp, verify_all_reachable_udp, DEFAULT_IP_ECHO_SERVER_THREADS, - MAX_PORT_VERIFY_THREADS, + verify_all_reachable_tcp, verify_all_reachable_udp, }, itertools::Itertools, std::{net::Ipv4Addr, time::Duration}, @@ -448,14 +448,16 @@ mod tests { ) { assert!(port2 == port1 + offset); } - assert!(bind_two_in_range_with_offset_and_config( - ip_addr, - (port_range.start, port_range.start + 5), - offset, - config, - config - ) - .is_err()); + assert!( + bind_two_in_range_with_offset_and_config( + ip_addr, + (port_range.start, port_range.start + 5), + offset, + config, + config + ) + .is_err() + ); } #[test] diff --git a/net-utils/src/token_bucket.rs b/net-utils/src/token_bucket.rs index a5c3f8ccc4b2db..04dcf8e3c661ae 100644 --- a/net-utils/src/token_bucket.rs +++ b/net-utils/src/token_bucket.rs @@ -4,7 +4,7 @@ //! as connections. use { cfg_if::cfg_if, - dashmap::{mapref::entry::Entry, DashMap}, + dashmap::{DashMap, mapref::entry::Entry}, solana_svm_type_overrides::sync::atomic::{AtomicU64, AtomicUsize, Ordering}, std::{borrow::Borrow, cmp::Reverse, hash::Hash, time::Instant}, }; diff --git a/programs/bpf_loader/src/lib.rs b/programs/bpf_loader/src/lib.rs index f60d80b55d4c06..eef391342faa08 100644 --- a/programs/bpf_loader/src/lib.rs +++ b/programs/bpf_loader/src/lib.rs @@ -1211,7 +1211,18 @@ fn process_loader_upgradeable_instruction( &[], )?; - if upgrade_authority_address.is_none() { + if let Some(upgrade_authority_address) = upgrade_authority_address { + if migration_authority::check_id(&provided_authority_address) { + invoke_context.native_invoke( + solana_loader_v4_interface::instruction::transfer_authority( + &program_address, + &provided_authority_address, + &upgrade_authority_address, + ), + &[], + )?; + } + } else { invoke_context.native_invoke( solana_loader_v4_interface::instruction::finalize( &program_address, @@ -1220,15 +1231,6 @@ fn process_loader_upgradeable_instruction( ), &[], )?; - } else if migration_authority::check_id(&provided_authority_address) { - invoke_context.native_invoke( - solana_loader_v4_interface::instruction::transfer_authority( - &program_address, - &provided_authority_address, - &upgrade_authority_address.unwrap(), - ), - &[], - )?; } } diff --git a/programs/sbf/Cargo.lock b/programs/sbf/Cargo.lock index 0665690fa78f58..a9f0300606eed5 100644 --- a/programs/sbf/Cargo.lock +++ b/programs/sbf/Cargo.lock @@ -13,10 +13,10 @@ dependencies = [ ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aead" @@ -392,6 +392,7 @@ dependencies = [ "solana-signature", "solana-signer", "solana-signer-store", + "solana-streamer", "solana-time-utils", "solana-transaction", "solana-transaction-error", @@ -406,7 +407,10 @@ version = "4.0.0-alpha.0" dependencies = [ "agave-feature-set", "agave-logger", + "bitvec", + "bytemuck", "log", + "num_enum", "serde", "solana-address 2.0.0", "solana-bls-signatures", @@ -1488,9 +1492,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -2482,9 +2486,9 @@ checksum = "279fb028e20b3c4c320317955b77c5e0c9701f05a1d309905d6fc702cdc5053e" [[package]] name = "flate2" -version = "1.0.31" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" dependencies = [ "crc32fast", "miniz_oxide", @@ -4052,11 +4056,12 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ - "adler", + "adler2", + "simd-adler32", ] [[package]] @@ -5855,6 +5860,12 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "simpl" version = "0.1.0" @@ -6276,9 +6287,9 @@ dependencies = [ [[package]] name = "solana-bls-signatures" -version = "2.0.0-alpha.1" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acb2f8e2ce54f8798c8a5eb5d06f4cb4f596a5879be5ad81e95a60a1bde9359" +checksum = "e21cad136370a83c91bbe9348c69a510222d8d70144154ca37edff59df789661" dependencies = [ "base64 0.22.1", "blst", @@ -6510,7 +6521,6 @@ dependencies = [ "solana-pubkey 4.0.0", "solana-pubsub-client", "solana-quic-client", - "solana-quic-definitions", "solana-rpc-client", "solana-rpc-client-api", "solana-rpc-client-nonce-utils", @@ -6518,6 +6528,7 @@ dependencies = [ "solana-signer", "solana-streamer", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client", "solana-transaction", "solana-transaction-error", @@ -6760,7 +6771,6 @@ dependencies = [ "solana-poh-config", "solana-pubkey 4.0.0", "solana-quic-client", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rent", "solana-rpc", @@ -7068,7 +7078,7 @@ dependencies = [ "solana-packet 4.0.0", "solana-pubkey 4.0.0", "solana-signer", - "solana-system-interface 2.0.0", + "solana-system-interface 3.0.0", "solana-system-transaction", "solana-transaction", "solana-version", @@ -7253,7 +7263,6 @@ dependencies = [ "solana-packet 4.0.0", "solana-perf", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rpc-client", "solana-runtime", @@ -7920,11 +7929,11 @@ dependencies = [ [[package]] name = "solana-program-memory" -version = "3.0.0" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10e5660c60749c7bfb30b447542529758e4dbcecd31b1e8af1fdc92e2bdde90a" +checksum = "4068648649653c2c50546e9a7fb761791b5ab0cda054c771bb5808d3a4b9eb52" dependencies = [ - "solana-define-syscall 3.0.0", + "solana-define-syscall 4.0.1", ] [[package]] @@ -8109,7 +8118,6 @@ dependencies = [ "solana-metrics", "solana-net-utils", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-rpc-client-api", "solana-signer", "solana-streamer", @@ -8119,15 +8127,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "solana-quic-definitions" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15319accf7d3afd845817aeffa6edd8cc185f135cefbc6b985df29cfd8c09609" -dependencies = [ - "solana-keypair", -] - [[package]] name = "solana-rayon-threadlimit" version = "4.0.0-alpha.0" @@ -8231,7 +8230,6 @@ dependencies = [ "solana-poh-config", "solana-program-pack", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-rayon-threadlimit", "solana-rpc-client-api", "solana-runtime", @@ -8246,6 +8244,7 @@ dependencies = [ "solana-system-transaction", "solana-sysvar", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client", "solana-transaction", "solana-transaction-context", @@ -9398,10 +9397,10 @@ dependencies = [ "solana-metrics", "solana-nonce-account", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-runtime", "solana-signature", "solana-time-utils", + "solana-tls-utils", "solana-tpu-client-next", "tokio", "tokio-util 0.7.18", @@ -9652,7 +9651,6 @@ dependencies = [ "solana-packet 4.0.0", "solana-perf", "solana-pubkey 4.0.0", - "solana-quic-definitions", "solana-signature", "solana-signer", "solana-time-utils", @@ -10422,7 +10420,7 @@ dependencies = [ "solana-program-runtime", "solana-sdk-ids", "solana-svm-log-collector", - "solana-zk-sdk", + "solana-zk-sdk 5.0.0", ] [[package]] @@ -10462,6 +10460,40 @@ dependencies = [ "zeroize", ] +[[package]] +name = "solana-zk-sdk" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d89042b5867c7440526d47085db2cd11a7ae557461a4f41a3b3a569799dd9d6" +dependencies = [ + "aes-gcm-siv", + "base64 0.22.1", + "bincode", + "bytemuck", + "bytemuck_derive", + "curve25519-dalek 4.1.3", + "itertools 0.14.0", + "merlin", + "num-derive", + "num-traits", + "rand 0.8.5", + "serde", + "serde_derive", + "serde_json", + "sha3", + "solana-address 2.0.0", + "solana-derivation-path", + "solana-instruction", + "solana-sdk-ids", + "solana-seed-derivable", + "solana-seed-phrase", + "solana-signature", + "solana-signer", + "subtle", + "thiserror 2.0.17", + "zeroize", +] + [[package]] name = "solana-zk-token-proof-program" version = "4.0.0-alpha.0" @@ -10567,7 +10599,7 @@ dependencies = [ "solana-program-error", "solana-program-option", "solana-pubkey 3.0.0", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "thiserror 2.0.17", ] @@ -10589,7 +10621,7 @@ dependencies = [ "solana-program-pack", "solana-pubkey 3.0.0", "solana-sdk-ids", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "spl-pod", "spl-token-confidential-transfer-proof-extraction", "spl-token-confidential-transfer-proof-generation", @@ -10614,7 +10646,7 @@ dependencies = [ "solana-program-error", "solana-pubkey 3.0.0", "solana-sdk-ids", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "spl-pod", "thiserror 2.0.17", ] @@ -10626,7 +10658,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63a2b41095945dc15274b924b21ccae9b3ec9dc2fdd43dbc08de8c33bbcd915" dependencies = [ "curve25519-dalek 4.1.3", - "solana-zk-sdk", + "solana-zk-sdk 4.0.0", "thiserror 2.0.17", ] diff --git a/programs/sbf/Cargo.toml b/programs/sbf/Cargo.toml index d489dd4a8aba79..563d3f7014fdd2 100644 --- a/programs/sbf/Cargo.toml +++ b/programs/sbf/Cargo.toml @@ -140,7 +140,7 @@ solana-poseidon = "=4.0.0" solana-program = "=3.0.0" solana-program-entrypoint = "=3.1.1" solana-program-error = "=3.0.0" -solana-program-memory = "=3.0.0" +solana-program-memory = "=3.1.0" solana-program-runtime = { path = "../../program-runtime", version = "=4.0.0-alpha.0" } solana-pubkey = { version = "=4.0.0", default-features = false } solana-runtime = { path = "../../runtime", version = "=4.0.0-alpha.0" } diff --git a/programs/vote/src/vote_state/mod.rs b/programs/vote/src/vote_state/mod.rs index 36c7731f2f7444..3c34498196492f 100644 --- a/programs/vote/src/vote_state/mod.rs +++ b/programs/vote/src/vote_state/mod.rs @@ -11,11 +11,7 @@ use { handler::{VoteStateHandle, VoteStateHandler, VoteStateTargetVersion}, log::*, solana_account::{AccountSharedData, WritableAccount}, - solana_bls_signatures::{ - keypair::Keypair as BLSKeypair, ProofOfPossession as BLSProofOfPossession, - ProofOfPossessionCompressed as BLSProofOfPossessionCompressed, Pubkey as BLSPubkey, - PubkeyCompressed as BLSPubkeyCompressed, VerifiableProofOfPossession, - }, + solana_bls_signatures::{keypair::Keypair as BLSKeypair, VerifiableProofOfPossession}, solana_clock::{Clock, Epoch, Slot}, solana_epoch_schedule::EpochSchedule, solana_hash::Hash, @@ -920,26 +916,15 @@ pub(crate) fn generate_pop_message( message } -// TODO(sam): use custom payload for PoP once solana-bls-signatures v2.0.0 is published. pub fn verify_bls_proof_of_possession( vote_account_pubkey: &Pubkey, bls_pubkey_compressed_bytes: &[u8; BLS_PUBLIC_KEY_COMPRESSED_SIZE], bls_proof_of_possession_compressed_bytes: &[u8; BLS_PROOF_OF_POSSESSION_COMPRESSED_SIZE], ) -> Result<(), InstructionError> { - let bls_pubkey_compressed = BLSPubkeyCompressed(*bls_pubkey_compressed_bytes); - let bls_pubkey = BLSPubkey::try_from(bls_pubkey_compressed) - .map_err(|_| InstructionError::InvalidArgument)?; - let bls_proof_of_possession_compressed = - BLSProofOfPossessionCompressed(*bls_proof_of_possession_compressed_bytes); - let bls_proof_of_possession = - BLSProofOfPossession::try_from(bls_proof_of_possession_compressed) - .map_err(|_| InstructionError::InvalidArgument)?; let message = generate_pop_message(vote_account_pubkey, bls_pubkey_compressed_bytes); - if Ok(true) == bls_proof_of_possession.verify(&bls_pubkey, Some(&message)) { - Ok(()) - } else { - Err(InstructionError::InvalidArgument) - } + bls_proof_of_possession_compressed_bytes + .verify(bls_pubkey_compressed_bytes, Some(&message)) + .map_err(|_| InstructionError::InvalidArgument) } /// Withdraw funds from the vote account @@ -1266,13 +1251,13 @@ pub fn create_bls_proof_of_possession( [u8; BLS_PUBLIC_KEY_COMPRESSED_SIZE], [u8; BLS_PROOF_OF_POSSESSION_COMPRESSED_SIZE], ) { - let bls_pubkey_compressed: BLSPubkeyCompressed = bls_keypair.public.try_into().unwrap(); - let message = generate_pop_message(vote_account_pubkey, &bls_pubkey_compressed.0); + let bls_pubkey_bytes = bls_keypair.public.to_bytes_compressed(); + let message = generate_pop_message(vote_account_pubkey, &bls_pubkey_bytes); + let proof_of_possession = bls_keypair.proof_of_possession(Some(&message)); - let proof_of_possession: BLSProofOfPossession = proof_of_possession.into(); - let proof_of_possession_compressed: BLSProofOfPossessionCompressed = - proof_of_possession.try_into().unwrap(); - (bls_pubkey_compressed.0, proof_of_possession_compressed.0) + let proof_of_possession_bytes = proof_of_possession.to_bytes_compressed(); + + (bls_pubkey_bytes, proof_of_possession_bytes) } #[allow(clippy::arithmetic_side_effects)] diff --git a/programs/zk-elgamal-proof-tests/tests/process_transaction.rs b/programs/zk-elgamal-proof-tests/tests/process_transaction.rs index adfa96c68c410d..7c726a08219b02 100644 --- a/programs/zk-elgamal-proof-tests/tests/process_transaction.rs +++ b/programs/zk-elgamal-proof-tests/tests/process_transaction.rs @@ -11,7 +11,7 @@ use { solana_transaction_error::TransactionError, solana_zk_sdk::{ encryption::{ - elgamal::{ElGamalKeypair, ElGamalSecretKey}, + elgamal::{ElGamalKeypair, ElGamalPubkey, ElGamalSecretKey}, grouped_elgamal::GroupedElGamal, pedersen::{Pedersen, PedersenOpening}, }, @@ -43,12 +43,12 @@ async fn test_zero_balance() { let success_proof_data = ZeroCiphertextProofData::new(&elgamal_keypair, &zero_ciphertext).unwrap(); - let incorrect_pubkey = elgamal_keypair.pubkey(); - let incorrect_secret = ElGamalSecretKey::new_rand(); - let incorrect_keypair = ElGamalKeypair::new_for_tests(*incorrect_pubkey, incorrect_secret); - - let fail_proof_data = - ZeroCiphertextProofData::new(&incorrect_keypair, &zero_ciphertext).unwrap(); + let mut fail_proof_context = success_proof_data.context; + fail_proof_context.pubkey = ElGamalPubkey::default().into(); + let fail_proof_data = ZeroCiphertextProofData { + context: fail_proof_context, + proof: success_proof_data.proof, + }; test_verify_proof_without_context( ProofInstruction::VerifyZeroCiphertext, @@ -104,19 +104,12 @@ async fn test_ciphertext_ciphertext_equality() { ) .unwrap(); - let incorrect_pubkey = source_keypair.pubkey(); - let incorrect_secret = ElGamalSecretKey::new_rand(); - let incorrect_keypair = ElGamalKeypair::new_for_tests(*incorrect_pubkey, incorrect_secret); - - let fail_proof_data = CiphertextCiphertextEqualityProofData::new( - &incorrect_keypair, - destination_keypair.pubkey(), - &source_ciphertext, - &destination_ciphertext, - &destination_opening, - amount, - ) - .unwrap(); + let mut fail_proof_context = success_proof_data.context; + fail_proof_context.first_pubkey = ElGamalPubkey::default().into(); + let fail_proof_data = CiphertextCiphertextEqualityProofData { + context: fail_proof_context, + proof: success_proof_data.proof, + }; test_verify_proof_without_context( ProofInstruction::VerifyCiphertextCiphertextEquality, @@ -379,18 +372,12 @@ async fn test_ciphertext_commitment_equality() { ) .unwrap(); - let incorrect_pubkey = keypair.pubkey(); - let incorrect_secret = ElGamalSecretKey::new_rand(); - let incorrect_keypair = ElGamalKeypair::new_for_tests(*incorrect_pubkey, incorrect_secret); - - let fail_proof_data = CiphertextCommitmentEqualityProofData::new( - &incorrect_keypair, - &ciphertext, - &commitment, - &opening, - amount, - ) - .unwrap(); + let mut fail_proof_context = success_proof_data.context; + fail_proof_context.pubkey = ElGamalPubkey::default().into(); + let fail_proof_data = CiphertextCommitmentEqualityProofData { + context: fail_proof_context, + proof: success_proof_data.proof, + }; test_verify_proof_without_context( ProofInstruction::VerifyCiphertextCommitmentEquality, @@ -445,15 +432,12 @@ async fn test_grouped_ciphertext_2_handles_validity() { ) .unwrap(); - let incorrect_opening = PedersenOpening::new_rand(); - let fail_proof_data = GroupedCiphertext2HandlesValidityProofData::new( - destination_pubkey, - auditor_pubkey, - &grouped_ciphertext, - amount, - &incorrect_opening, - ) - .unwrap(); + let mut fail_proof_context = success_proof_data.context; + fail_proof_context.first_pubkey = ElGamalPubkey::default().into(); + let fail_proof_data = GroupedCiphertext2HandlesValidityProofData { + context: fail_proof_context, + proof: success_proof_data.proof, + }; test_verify_proof_without_context( ProofInstruction::VerifyGroupedCiphertext2HandlesValidity, @@ -517,18 +501,12 @@ async fn test_batched_grouped_ciphertext_2_handles_validity() { ) .unwrap(); - let incorrect_opening = PedersenOpening::new_rand(); - let fail_proof_data = BatchedGroupedCiphertext2HandlesValidityProofData::new( - destination_pubkey, - auditor_pubkey, - &grouped_ciphertext_lo, - &grouped_ciphertext_hi, - amount_lo, - amount_hi, - &incorrect_opening, - &opening_hi, - ) - .unwrap(); + let mut fail_proof_context = success_proof_data.context; + fail_proof_context.first_pubkey = ElGamalPubkey::default().into(); + let fail_proof_data = BatchedGroupedCiphertext2HandlesValidityProofData { + context: fail_proof_context, + proof: success_proof_data.proof, + }; test_verify_proof_without_context( ProofInstruction::VerifyBatchedGroupedCiphertext2HandlesValidity, @@ -590,16 +568,12 @@ async fn test_grouped_ciphertext_3_handles_validity() { ) .unwrap(); - let incorrect_opening = PedersenOpening::new_rand(); - let fail_proof_data = GroupedCiphertext3HandlesValidityProofData::new( - source_pubkey, - destination_pubkey, - auditor_pubkey, - &grouped_ciphertext, - amount, - &incorrect_opening, - ) - .unwrap(); + let mut fail_proof_context = success_proof_data.context; + fail_proof_context.first_pubkey = ElGamalPubkey::default().into(); + let fail_proof_data = GroupedCiphertext3HandlesValidityProofData { + context: fail_proof_context, + proof: success_proof_data.proof, + }; test_verify_proof_without_context( ProofInstruction::VerifyGroupedCiphertext3HandlesValidity, @@ -665,19 +639,12 @@ async fn test_batched_grouped_ciphertext_3_handles_validity() { ) .unwrap(); - let incorrect_opening = PedersenOpening::new_rand(); - let fail_proof_data = BatchedGroupedCiphertext3HandlesValidityProofData::new( - source_pubkey, - destination_pubkey, - auditor_pubkey, - &grouped_ciphertext_lo, - &grouped_ciphertext_hi, - amount_lo, - amount_hi, - &incorrect_opening, - &opening_hi, - ) - .unwrap(); + let mut fail_proof_context = success_proof_data.context; + fail_proof_context.first_pubkey = ElGamalPubkey::default().into(); + let fail_proof_data = BatchedGroupedCiphertext3HandlesValidityProofData { + context: fail_proof_context, + proof: success_proof_data.proof, + }; test_verify_proof_without_context( ProofInstruction::VerifyBatchedGroupedCiphertext3HandlesValidity, diff --git a/quic-client/Cargo.toml b/quic-client/Cargo.toml index a1c30363c339d4..4adfa595a2b47f 100644 --- a/quic-client/Cargo.toml +++ b/quic-client/Cargo.toml @@ -27,7 +27,6 @@ solana-measure = { workspace = true } solana-metrics = { workspace = true } solana-net-utils = { workspace = true } solana-pubkey = { workspace = true, default-features = false } -solana-quic-definitions = { workspace = true } solana-rpc-client-api = { workspace = true } solana-signer = { workspace = true } solana-streamer = { workspace = true } diff --git a/quic-client/src/nonblocking/quic_client.rs b/quic-client/src/nonblocking/quic_client.rs index 276011d0e56a0d..415c0784cb57ef 100644 --- a/quic-client/src/nonblocking/quic_client.rs +++ b/quic-client/src/nonblocking/quic_client.rs @@ -18,11 +18,8 @@ use { solana_keypair::Keypair, solana_measure::measure::Measure, solana_net_utils::sockets, - solana_quic_definitions::{ - QUIC_CONNECTION_HANDSHAKE_TIMEOUT, QUIC_KEEP_ALIVE, QUIC_MAX_TIMEOUT, QUIC_SEND_FAIRNESS, - }, solana_rpc_client_api::client_error::ErrorKind as ClientErrorKind, - solana_streamer::nonblocking::quic::ALPN_TPU_PROTOCOL_ID, + solana_streamer::{nonblocking::quic::ALPN_TPU_PROTOCOL_ID, quic::QUIC_MAX_TIMEOUT}, solana_tls_utils::{ new_dummy_x509_certificate, socket_addr_to_quic_server_name, tls_client_config_builder, QuicClientCertificate, @@ -32,11 +29,19 @@ use { net::{SocketAddr, UdpSocket}, sync::{atomic::Ordering, Arc}, thread, + time::Duration, }, thiserror::Error, tokio::{sync::OnceCell, time::timeout}, }; +const QUIC_KEEP_ALIVE: Duration = Duration::from_secs(1); + +// Based on commonly-used handshake timeouts for various TCP +// applications. Different applications vary, but most seem to +// be in the 30-60 second range +pub const QUIC_CONNECTION_HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(60); + /// A lazy-initialized Quic Endpoint pub struct QuicLazyInitializedEndpoint { endpoint: OnceCell>, @@ -107,7 +112,7 @@ impl QuicLazyInitializedEndpoint { let timeout = IdleTimeout::try_from(QUIC_MAX_TIMEOUT).unwrap(); transport_config.max_idle_timeout(Some(timeout)); transport_config.keep_alive_interval(Some(QUIC_KEEP_ALIVE)); - transport_config.send_fairness(QUIC_SEND_FAIRNESS); + transport_config.send_fairness(false); config.transport_config(Arc::new(transport_config)); endpoint.set_default_client_config(config); diff --git a/rpc/Cargo.toml b/rpc/Cargo.toml index 4f05b430ce9241..c8d90ec4da000f 100644 --- a/rpc/Cargo.toml +++ b/rpc/Cargo.toml @@ -70,7 +70,6 @@ solana-poh = { workspace = true } solana-poh-config = { workspace = true } solana-program-pack = { workspace = true } solana-pubkey = { workspace = true } -solana-quic-definitions = { workspace = true } solana-rayon-threadlimit = { workspace = true } solana-rpc-client-api = { workspace = true } solana-runtime = { workspace = true } @@ -85,6 +84,7 @@ solana-system-interface = { workspace = true } solana-system-transaction = { workspace = true } solana-sysvar = { workspace = true } solana-time-utils = { workspace = true } +solana-tls-utils = { workspace = true } solana-tpu-client = { workspace = true } solana-transaction = { workspace = true } solana-transaction-context = { workspace = true } diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index ba18ef14dd81e4..e2cddfc02cd5a2 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -34,7 +34,6 @@ use { solana_metrics::inc_new_counter_info, solana_perf::thread::renice_this_thread, solana_poh::poh_recorder::PohRecorder, - solana_quic_definitions::NotifyKeyUpdate, solana_runtime::{ bank::Bank, bank_forks::BankForks, commitment::BlockCommitmentCache, non_circulating_supply::calculate_non_circulating_supply, @@ -45,6 +44,7 @@ use { transaction_client::{TpuClientNextClient, TransactionClient}, }, solana_storage_bigtable::CredentialType, + solana_tls_utils::NotifyKeyUpdate, solana_validator_exit::Exit, std::{ net::{SocketAddr, UdpSocket}, diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index e135f27d09fe1f..3bb3117badc678 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -457,7 +457,9 @@ pub struct BankFieldsToDeserialize { pub(crate) epoch_schedule: EpochSchedule, pub(crate) inflation: Inflation, pub(crate) stakes: DeserializableStakes, - pub(crate) versioned_epoch_stakes: HashMap, + /// Transformed into `HashMap` in `serde_snapshot` and passed to + /// `Bank::new_from_snapshot` as separate parameter for performance (conversion is time consuming) + pub(crate) versioned_epoch_stakes: Vec<(Epoch, DeserializableVersionedEpochStakes)>, pub(crate) is_delta: bool, pub(crate) accounts_data_len: u64, pub(crate) accounts_lt_hash: AccountsLtHash, @@ -1816,6 +1818,7 @@ impl Bank { fields: BankFieldsToDeserialize, debug_keys: Option>>, accounts_data_size_initial: u64, + epoch_stakes: HashMap, ) -> Self { let now = Instant::now(); let ancestors = Ancestors::from(&fields.ancestors); @@ -1843,6 +1846,14 @@ impl Bank { snapshot or bugs in cached accounts or accounts-db.", )); info!("Loading Stakes took: {stakes_time}"); + assert!( + fields.versioned_epoch_stakes.is_empty(), + "should be already converted and passed in epoch_stakes parameter" + ); + assert!( + !epoch_stakes.is_empty(), + "should be populated (from fields.versioned_epoch_stakes)" + ); let stakes_accounts_load_duration = now.elapsed(); let mut bank = Self { rc: bank_rc, @@ -1879,11 +1890,7 @@ impl Bank { epoch_schedule: fields.epoch_schedule, inflation: Arc::new(RwLock::new(fields.inflation)), stakes_cache: StakesCache::new(stakes), - epoch_stakes: fields - .versioned_epoch_stakes - .into_iter() - .map(|(k, v)| (k, v.into())) - .collect(), + epoch_stakes, is_delta: AtomicBool::new(fields.is_delta), rewards: RwLock::new(vec![]), cluster_type: Some(genesis_config.cluster_type), diff --git a/runtime/src/bank/tests.rs b/runtime/src/bank/tests.rs index 1f68c203f0b351..23bdb943352e56 100644 --- a/runtime/src/bank/tests.rs +++ b/runtime/src/bank/tests.rs @@ -5486,7 +5486,10 @@ fn test_clean_nonrooted() { test_utils::deposit(&bank1, &pubkey0, some_lamports).unwrap(); goto_end_of_slot(bank1.clone()); bank1.freeze(); - bank1.flush_accounts_cache_slot_for_tests(); + bank1 + .accounts() + .accounts_db + .flush_unrooted_slot_cache(bank1.slot()); bank1.print_accounts_stats(); diff --git a/runtime/src/epoch_stakes.rs b/runtime/src/epoch_stakes.rs index 21bffaa482d542..5d8f0e6a61013e 100644 --- a/runtime/src/epoch_stakes.rs +++ b/runtime/src/epoch_stakes.rs @@ -303,7 +303,7 @@ pub(crate) mod tests { iter::repeat_with(|| { let authorized_voter = solana_pubkey::new_rand(); let bls_pubkey_compressed: BLSPubkeyCompressed = - BLSKeypair::new().public.try_into().unwrap(); + BLSKeypair::new().public.into(); let bls_pubkey_compressed_serialized = bincode::serialize(&bls_pubkey_compressed) .unwrap() diff --git a/runtime/src/genesis_utils.rs b/runtime/src/genesis_utils.rs index d96cc472c0a92a..9ff039661e42df 100644 --- a/runtime/src/genesis_utils.rs +++ b/runtime/src/genesis_utils.rs @@ -155,7 +155,7 @@ pub fn create_genesis_config_with_vote_accounts_and_cluster_type( BLS_KEYPAIR_DERIVE_SEED, ) .unwrap(); - Some(bls_pubkey_to_compressed_bytes(&bls_keypair.public)) + Some(bls_keypair.public.to_bytes_compressed()) } else { None }; @@ -195,7 +195,7 @@ pub fn create_genesis_config_with_vote_accounts_and_cluster_type( BLS_KEYPAIR_DERIVE_SEED, ) .unwrap(); - Some(bls_pubkey_to_compressed_bytes(&bls_keypair.public)) + Some(bls_keypair.public.to_bytes_compressed()) } else { None }; diff --git a/runtime/src/serde_snapshot.rs b/runtime/src/serde_snapshot.rs index b2ab89a475cf18..d86945f803cb60 100644 --- a/runtime/src/serde_snapshot.rs +++ b/runtime/src/serde_snapshot.rs @@ -53,6 +53,7 @@ use { atomic::{AtomicBool, AtomicUsize, Ordering}, Arc, }, + thread, time::Instant, }, storage::SerializableStorage, @@ -222,7 +223,7 @@ impl From for BankFieldsToDeserialize { inflation: dvb.inflation, stakes: dvb.stakes, is_delta: dvb.is_delta, - versioned_epoch_stakes: HashMap::default(), // populated from ExtraFieldsToDeserialize + versioned_epoch_stakes: vec![], // populated from ExtraFieldsToDeserialize accounts_lt_hash: AccountsLtHash(LT_HASH_CANARY), // populated from ExtraFieldsToDeserialize bank_hash_stats: BankHashStats::default(), // populated from AccountsDbFields } @@ -425,9 +426,19 @@ struct ExtraFieldsToDeserialize { #[serde(deserialize_with = "default_on_eof")] _obsolete_epoch_accounts_hash: Option, #[serde(deserialize_with = "default_on_eof")] - versioned_epoch_stakes: HashMap, + versioned_epoch_stakes: Vec<(u64, DeserializableVersionedEpochStakes)>, #[serde(deserialize_with = "default_on_eof")] accounts_lt_hash: Option, + /// In order to maintain snapshot compatibility between adjacent versions + /// (edge <-> beta, and beta <-> stable), we must be able to deserialize + /// (and ignore) this new field (block id) in adjacent versions *before* + /// we serialize the new field into snapshots. + /// Hence the annotation to allow dead code. + /// This code is not truly dead though, as it enables newer versions to + /// populate this field and have older versions still load the snapshot. + #[allow(dead_code)] + #[serde(deserialize_with = "default_on_eof")] + block_id: Option, } /// Extra fields that are serialized at the end of snapshots. @@ -476,6 +487,7 @@ where _obsolete_epoch_accounts_hash, versioned_epoch_stakes, accounts_lt_hash, + block_id: _, } = extra_fields; bank_fields.fee_rate_governor = bank_fields @@ -799,6 +811,16 @@ where E: SerializableStorage + std::marker::Sync, { let mut bank_fields = bank_fields.collapse_into(); + // Epoch stakes take several seconds to reconstruct, do it in parallel with loading accountsdb + let deserializable_epoch_stakes = std::mem::take(&mut bank_fields.versioned_epoch_stakes); + let epoch_stakes_handle = thread::Builder::new() + .name("solRctEpochStk".into()) + .spawn(|| { + deserializable_epoch_stakes + .into_iter() + .map(|(epoch, stakes)| (epoch, stakes.into())) + .collect() + })?; let (accounts_db, reconstructed_accounts_db_info) = reconstruct_accountsdb_from_fields( snapshot_accounts_db_fields, account_paths, @@ -813,6 +835,7 @@ where let bank_rc = BankRc::new(Accounts::new(Arc::new(accounts_db))); let runtime_config = Arc::new(runtime_config.clone()); + let epoch_stakes = epoch_stakes_handle.join().expect("calculate epoch stakes"); let bank = Bank::new_from_snapshot( bank_rc, @@ -821,6 +844,7 @@ where bank_fields, debug_keys, reconstructed_accounts_db_info.accounts_data_len, + epoch_stakes, ); info!("rent_collector: {:?}", bank.rent_collector()); diff --git a/send-transaction-service/Cargo.toml b/send-transaction-service/Cargo.toml index ec6ae2ba2a3b27..d2062670164383 100644 --- a/send-transaction-service/Cargo.toml +++ b/send-transaction-service/Cargo.toml @@ -30,10 +30,10 @@ solana-metrics = { workspace = true } solana-net-utils = { workspace = true, optional = true } solana-nonce-account = { workspace = true } solana-pubkey = { workspace = true } -solana-quic-definitions = { workspace = true } solana-runtime = { workspace = true } solana-signature = { workspace = true } solana-time-utils = { workspace = true } +solana-tls-utils = { workspace = true } solana-tpu-client-next = { workspace = true, features = ["metrics"] } tokio = { workspace = true, features = ["full"] } tokio-util = { workspace = true } diff --git a/send-transaction-service/src/transaction_client.rs b/send-transaction-service/src/transaction_client.rs index 5e36b3378280f4..2862ef64cd4edb 100644 --- a/send-transaction-service/src/transaction_client.rs +++ b/send-transaction-service/src/transaction_client.rs @@ -4,7 +4,7 @@ use { log::warn, solana_keypair::Keypair, solana_measure::measure::Measure, - solana_quic_definitions::NotifyKeyUpdate, + solana_tls_utils::NotifyKeyUpdate, solana_tpu_client_next::{ connection_workers_scheduler::{ BindTarget, ConnectionWorkersSchedulerConfig, Fanout, StakeIdentity, diff --git a/streamer/Cargo.toml b/streamer/Cargo.toml index 376a29b8190ced..e1a72a74ca3f85 100644 --- a/streamer/Cargo.toml +++ b/streamer/Cargo.toml @@ -49,7 +49,6 @@ solana-net-utils = { workspace = true, features = ["agave-unstable-api"] } solana-packet = { workspace = true } solana-perf = { workspace = true } solana-pubkey = { workspace = true } -solana-quic-definitions = { workspace = true } solana-signature = { workspace = true } solana-signer = { workspace = true } solana-time-utils = { workspace = true } diff --git a/streamer/src/nonblocking/quic.rs b/streamer/src/nonblocking/quic.rs index 74a1b1089d587f..32abebd90b0112 100644 --- a/streamer/src/nonblocking/quic.rs +++ b/streamer/src/nonblocking/quic.rs @@ -61,10 +61,6 @@ const CONNECTION_CLOSE_REASON_DROPPED_ENTRY: &[u8] = b"dropped"; pub(crate) const CONNECTION_CLOSE_CODE_DISALLOWED: u32 = 2; pub(crate) const CONNECTION_CLOSE_REASON_DISALLOWED: &[u8] = b"disallowed"; -pub(crate) const CONNECTION_CLOSE_CODE_EXCEED_MAX_STREAM_COUNT: u32 = 3; -pub(crate) const CONNECTION_CLOSE_REASON_EXCEED_MAX_STREAM_COUNT: &[u8] = - b"exceed_max_stream_count"; - const CONNECTION_CLOSE_CODE_TOO_MANY: u32 = 4; const CONNECTION_CLOSE_REASON_TOO_MANY: &[u8] = b"too_many"; @@ -412,7 +408,6 @@ pub fn get_connection_stake( #[derive(Debug)] pub(crate) enum ConnectionHandlerError { ConnectionAddError, - MaxStreamError, } pub(crate) fn update_open_connections_stat( diff --git a/streamer/src/nonblocking/stream_throttle.rs b/streamer/src/nonblocking/stream_throttle.rs index 7329e426b962f0..3fd4917d7c4dc8 100644 --- a/streamer/src/nonblocking/stream_throttle.rs +++ b/streamer/src/nonblocking/stream_throttle.rs @@ -5,9 +5,8 @@ use { }, percentage::Percentage, std::{ - cmp, sync::{ - atomic::{AtomicU64, Ordering}, + atomic::{AtomicBool, AtomicU64, Ordering}, Arc, RwLock, }, time::{Duration, Instant}, @@ -24,22 +23,25 @@ pub const STREAM_THROTTLING_INTERVAL_MS: u64 = 100; pub const STREAM_THROTTLING_INTERVAL: Duration = Duration::from_millis(STREAM_THROTTLING_INTERVAL_MS); const STREAM_LOAD_EMA_INTERVAL_MS: u64 = 5; -const STREAM_LOAD_EMA_INTERVAL_COUNT: u64 = 10; -const EMA_WINDOW_MS: u64 = STREAM_LOAD_EMA_INTERVAL_MS * STREAM_LOAD_EMA_INTERVAL_COUNT; +// EMA smoothing window to reduce sensitivity to short-lived load spikes at the start +// of a leader slot. Throttling is only triggered when saturation is sustained. +// The value 40 was chosen based on simulations: at a max target TPS of ~400K, +// it allows the system to absorb a burst of ~50K transactions over ~40 ms +// before throttling activates. +const STREAM_LOAD_EMA_INTERVAL_COUNT: u64 = 40; + +const STAKED_THROTTLING_ON_LOAD_THRESHOLD_PERCENT: u64 = 95; pub(crate) struct StakedStreamLoadEMA { current_load_ema: AtomicU64, load_in_recent_interval: AtomicU64, last_update: RwLock, stats: Arc, - // Maximum number of streams for a staked connection in EMA window - // Note: EMA window can be different than stream throttling window. EMA is being calculated - // specifically for staked connections. Unstaked connections have fixed limit on - // stream load, which is tracked by `max_unstaked_load_in_throttling_window` field. - max_staked_load_in_ema_window: u64, - // Maximum number of streams for an unstaked connection in stream throttling window + max_staked_load_in_throttling_window: u64, max_unstaked_load_in_throttling_window: u64, max_streams_per_ms: u64, + staked_throttling_on_load_threshold: u64, // in streams/STREAM_LOAD_EMA_INTERVAL_MS + staked_throttling_enabled: AtomicBool, } impl StakedStreamLoadEMA { @@ -49,28 +51,37 @@ impl StakedStreamLoadEMA { max_streams_per_ms: u64, ) -> Self { let allow_unstaked_streams = max_unstaked_connections > 0; - let max_staked_load_in_ema_window = if allow_unstaked_streams { - (max_streams_per_ms - - Percentage::from(EXPECTED_UNSTAKED_STREAMS_PERCENT).apply_to(max_streams_per_ms)) - * EMA_WINDOW_MS + let max_staked_load_in_ms = if allow_unstaked_streams { + max_streams_per_ms + - Percentage::from(EXPECTED_UNSTAKED_STREAMS_PERCENT).apply_to(max_streams_per_ms) } else { - max_streams_per_ms * EMA_WINDOW_MS + max_streams_per_ms }; + let max_staked_load_in_ema_interval = max_staked_load_in_ms * STREAM_LOAD_EMA_INTERVAL_MS; + let max_staked_load_in_throttling_window = + max_staked_load_in_ms * STREAM_THROTTLING_INTERVAL_MS; + let max_unstaked_load_in_throttling_window = if allow_unstaked_streams { MAX_UNSTAKED_TPS * STREAM_THROTTLING_INTERVAL_MS / 1000 } else { 0 }; + let staked_throttling_on_load_threshold = + Percentage::from(STAKED_THROTTLING_ON_LOAD_THRESHOLD_PERCENT) + .apply_to(max_staked_load_in_ema_interval); + Self { current_load_ema: AtomicU64::default(), load_in_recent_interval: AtomicU64::default(), last_update: RwLock::new(Instant::now()), stats, - max_staked_load_in_ema_window, + max_staked_load_in_throttling_window, max_unstaked_load_in_throttling_window, max_streams_per_ms, + staked_throttling_on_load_threshold, + staked_throttling_enabled: AtomicBool::new(false), } } @@ -105,7 +116,10 @@ impl StakedStreamLoadEMA { ); for _ in 0..num_extra_updates { - updated_load_ema = Self::ema_function(updated_load_ema, load_in_recent_interval); + updated_load_ema = Self::ema_function(updated_load_ema, 0); + if updated_load_ema == 0 { + break; + } } let Ok(updated_load_ema) = u64::try_from(updated_load_ema) else { @@ -116,6 +130,13 @@ impl StakedStreamLoadEMA { return; }; + if self.staked_throttling_on_load_threshold > 0 { + self.staked_throttling_enabled.store( + updated_load_ema >= self.staked_throttling_on_load_threshold, + Ordering::Relaxed, + ); + } + self.current_load_ema .store(updated_load_ema, Ordering::Relaxed); self.stats @@ -152,40 +173,17 @@ impl StakedStreamLoadEMA { match peer_type { ConnectionPeerType::Unstaked => self.max_unstaked_load_in_throttling_window, ConnectionPeerType::Staked(stake) => { - // If the current load is low, cap it to 25% of max_load. - let current_load = u128::from(cmp::max( - self.current_load_ema.load(Ordering::Relaxed), - self.max_staked_load_in_ema_window / 4, - )); - - // Formula is (max_load ^ 2 / current_load) * (stake / total_stake) - let capacity_in_ema_window = (u128::from(self.max_staked_load_in_ema_window) - * u128::from(self.max_staked_load_in_ema_window) - * u128::from(stake)) - / (current_load * u128::from(total_stake)); - - let calculated_capacity = capacity_in_ema_window - * u128::from(STREAM_THROTTLING_INTERVAL_MS) - / u128::from(EMA_WINDOW_MS); - let calculated_capacity = u64::try_from(calculated_capacity).unwrap_or_else(|_| { - error!( - "Failed to convert stream capacity {calculated_capacity} to u64. Using \ - minimum load capacity" - ); - self.stats - .stream_load_capacity_overflow - .fetch_add(1, Ordering::Relaxed); - self.max_unstaked_load_in_throttling_window - .saturating_add(1) - }); - - // 1 is added to `max_unstaked_load_in_throttling_window` to guarantee that staked - // clients get at least 1 more number of streams than unstaked connections. - cmp::max( - calculated_capacity, - self.max_unstaked_load_in_throttling_window - .saturating_add(1), - ) + if self.staked_throttling_enabled.load(Ordering::Relaxed) { + // 1 is added to `max_unstaked_load_in_throttling_window` to guarantee that staked + // clients get at least 1 more number of streams than unstaked connections. + self.max_staked_load_in_throttling_window + .saturating_mul(stake) + .checked_div(total_stake) + .unwrap_or(self.max_unstaked_load_in_throttling_window + 1) + .max(self.max_unstaked_load_in_throttling_window + 1) + } else { + self.max_staked_load_in_throttling_window + } } } } @@ -277,14 +275,10 @@ pub(crate) async fn throttle_stream( pub mod test { use { super::*, - crate::{ - nonblocking::stream_throttle::STREAM_LOAD_EMA_INTERVAL_MS, - quic::{StreamerStats, DEFAULT_MAX_STREAMS_PER_MS, DEFAULT_MAX_UNSTAKED_CONNECTIONS}, - }, - std::{ - sync::{atomic::Ordering, Arc}, - time::{Duration, Instant}, + crate::quic::{ + StreamerStats, DEFAULT_MAX_STREAMS_PER_MS, DEFAULT_MAX_UNSTAKED_CONNECTIONS, }, + std::sync::{atomic::Ordering, Arc}, }; #[test] @@ -294,7 +288,6 @@ pub mod test { DEFAULT_MAX_UNSTAKED_CONNECTIONS, DEFAULT_MAX_STREAMS_PER_MS, )); - // 50K packets per ms * 20% / 500 max unstaked connections assert_eq!( load_ema.available_load_capacity_in_throttling_duration( ConnectionPeerType::Unstaked, @@ -305,266 +298,121 @@ pub mod test { } #[test] - fn test_max_streams_for_staked_connection() { - let load_ema = Arc::new(StakedStreamLoadEMA::new( + fn test_staked_throttling_on_off() { + let mut load_ema = StakedStreamLoadEMA::new( Arc::new(StreamerStats::default()), DEFAULT_MAX_UNSTAKED_CONNECTIONS, DEFAULT_MAX_STREAMS_PER_MS, - )); - - // EMA load is used for staked connections to calculate max number of allowed streams. - // EMA window = 5ms interval * 10 intervals = 50ms - // max streams per window = 500K streams/sec * 80% = 400K/sec = 20K per 50ms - // max_streams in 50ms = ((20K * 20K) / ema_load) * stake / total_stake - // - // Stream throttling window is 100ms. So it'll double the amount of max streams. - // max_streams in 100ms (throttling window) = 2 * ((20K * 20K) / ema_load) * stake / total_stake - - load_ema.current_load_ema.store(20000, Ordering::Relaxed); - // ema_load = 20K, stake = 15, total_stake = 10K - // max_streams in 100ms (throttling window) = 2 * ((20K * 20K) / 20K) * 15 / 10K = 60 - assert_eq!( - load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(15), - 10000, - ), - 60 ); - // ema_load = 20K, stake = 1K, total_stake = 10K - // max_streams in 100ms (throttling window) = 2 * ((20K * 20K) / 20K) * 1K / 10K = 4K - assert_eq!( - load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(1000), - 10000, - ), - 4000 - ); + load_ema.staked_throttling_on_load_threshold = 10; - load_ema.current_load_ema.store(5000, Ordering::Relaxed); - // ema_load = 5K, stake = 15, total_stake = 10K - // max_streams in 100ms (throttling window) = 2 * ((20K * 20K) / 5K) * 15 / 10K = 240 - assert_eq!( - load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(15), - 10000, - ), - 240 - ); + load_ema.current_load_ema.store(12, Ordering::Relaxed); + load_ema + .load_in_recent_interval + .store(12, Ordering::Relaxed); + load_ema.update_ema(u128::from(STREAM_LOAD_EMA_INTERVAL_MS)); + assert!(load_ema.staked_throttling_enabled.load(Ordering::Relaxed)); + + load_ema.current_load_ema.store(4, Ordering::Relaxed); + load_ema.load_in_recent_interval.store(0, Ordering::Relaxed); + load_ema.update_ema(u128::from(STREAM_LOAD_EMA_INTERVAL_MS)); + assert!(!load_ema.staked_throttling_enabled.load(Ordering::Relaxed)); + } - // ema_load = 5K, stake = 1K, total_stake = 10K - // max_streams in 100ms (throttling window) = 2 * ((20K * 20K) / 5K) * 1K / 10K = 16000 - assert_eq!( - load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(1000), - 10000, - ), - 16000 + #[test] + fn test_staked_capacity_shares_when_throttled() { + let mut load_ema = StakedStreamLoadEMA::new( + Arc::new(StreamerStats::default()), + DEFAULT_MAX_UNSTAKED_CONNECTIONS, + DEFAULT_MAX_STREAMS_PER_MS, ); - // At 4000, the load is less than 25% of max_load (20K). - // Test that we cap it to 25%, yielding the same result as if load was 5000. - load_ema.current_load_ema.store(4000, Ordering::Relaxed); - // function = ((20K * 20K) / 25% of 20K) * stake / total_stake - assert_eq!( - load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(15), - 10000, - ), - 240 - ); + load_ema + .staked_throttling_enabled + .store(true, Ordering::Relaxed); + load_ema.max_staked_load_in_throttling_window = 100; + load_ema.max_unstaked_load_in_throttling_window = 20; - // function = ((20K * 20K) / 25% of 20K) * stake / total_stake assert_eq!( load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(1000), - 10000, + ConnectionPeerType::Staked(10), + 100 ), - 16000 + load_ema.max_unstaked_load_in_throttling_window + 1 ); - - // At 1/40000 stake weight, and minimum load, it should still allow - // max_unstaked_load_in_throttling_window + 1 streams. assert_eq!( load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(1), - 40000, + ConnectionPeerType::Staked(50), + 100 ), - load_ema - .max_unstaked_load_in_throttling_window - .saturating_add(1) + 50 ); } #[test] - fn test_max_streams_for_staked_connection_with_no_unstaked_connections() { - let load_ema = Arc::new(StakedStreamLoadEMA::new( + fn test_no_throttle_below_threshold() { + let mut load_ema = StakedStreamLoadEMA::new( Arc::new(StreamerStats::default()), - 0, + DEFAULT_MAX_UNSTAKED_CONNECTIONS, DEFAULT_MAX_STREAMS_PER_MS, - )); - - // EMA load is used for staked connections to calculate max number of allowed streams. - // EMA window = 5ms interval * 10 intervals = 50ms - // max streams per window = 500K streams/sec = 25K per 50ms - // max_streams in 50ms = ((25K * 25K) / ema_load) * stake / total_stake - // - // Stream throttling window is 100ms. So it'll double the amount of max streams. - // max_streams in 100ms (throttling window) = 2 * ((25K * 25K) / ema_load) * stake / total_stake - - load_ema.current_load_ema.store(20000, Ordering::Relaxed); - // ema_load = 20K, stake = 15, total_stake = 10K - // max_streams in 100ms (throttling window) = 2 * ((25K * 25K) / 20K) * 15 / 10K = 93.75 - // Loss of precision occurs here because max streams is computed for 50ms window and then doubled. - assert!( - (92u64..=94).contains(&load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(15), - 10000 - )) - ); - - // ema_load = 20K, stake = 1K, total_stake = 10K - // max_streams in 100ms (throttling window) = 2 * ((25K * 25K) / 20K) * 1K / 10K = 6250 - assert!((6249u64..=6250).contains( - &load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(1000), - 10000 - ) - )); - - load_ema.current_load_ema.store(10000, Ordering::Relaxed); - // ema_load = 10K, stake = 15, total_stake = 10K - // max_streams in 100ms (throttling window) = 2 * ((25K * 25K) / 10K) * 15 / 10K = 187.5 - // Loss of precision occurs here because max streams is computed for 50ms window and then doubled. - assert!( - (186u64..=188).contains(&load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(15), - 10000 - )) - ); - - // ema_load = 10K, stake = 1K, total_stake = 10K - // max_streams in 100ms (throttling window) = 2 * ((25K * 25K) / 10K) * 1K / 10K = 12500 - assert!((12499u64..=12500).contains( - &load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(1000), - 10000 - ) - )); - - // At 4000, the load is less than 25% of max_load (25K). - // Test that we cap it to 25%, yielding the same result as if load was 25K/4. - load_ema.current_load_ema.store(4000, Ordering::Relaxed); - // function = ((20K * 20K) / 25% of 25K) * stake / total_stake - assert_eq!( - load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(15), - 10000 - ), - 300 ); - // function = ((25K * 25K) / 25% of 25K) * stake / total_stake - assert_eq!( - load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(1000), - 10000 - ), - 20000 - ); + load_ema + .staked_throttling_enabled + .store(false, Ordering::Relaxed); + load_ema.max_staked_load_in_throttling_window = 100; + load_ema.max_unstaked_load_in_throttling_window = 20; - // At 1/400000 stake weight, and minimum load, it should still allow - // max_unstaked_load_in_throttling_window + 1 streams. assert_eq!( load_ema.available_load_capacity_in_throttling_duration( - ConnectionPeerType::Staked(1), - 400000 + ConnectionPeerType::Staked(10), + 100 ), - load_ema - .max_unstaked_load_in_throttling_window - .saturating_add(1) + load_ema.max_staked_load_in_throttling_window ); } #[test] - fn test_update_ema() { - let stream_load_ema = Arc::new(StakedStreamLoadEMA::new( + fn test_ema_decay_handles_missing_intervals() { + let load_ema = StakedStreamLoadEMA::new( Arc::new(StreamerStats::default()), DEFAULT_MAX_UNSTAKED_CONNECTIONS, DEFAULT_MAX_STREAMS_PER_MS, - )); - stream_load_ema - .load_in_recent_interval - .store(2500, Ordering::Relaxed); - stream_load_ema - .current_load_ema - .store(2000, Ordering::Relaxed); - - stream_load_ema.update_ema(5); - - let updated_ema = stream_load_ema.current_load_ema.load(Ordering::Relaxed); - assert_eq!(updated_ema, 2090); - - stream_load_ema - .load_in_recent_interval - .store(2500, Ordering::Relaxed); - - stream_load_ema.update_ema(5); - - let updated_ema = stream_load_ema.current_load_ema.load(Ordering::Relaxed); - assert_eq!(updated_ema, 2164); - } + ); - #[test] - fn test_update_ema_missing_interval() { - let stream_load_ema = Arc::new(StakedStreamLoadEMA::new( - Arc::new(StreamerStats::default()), - DEFAULT_MAX_UNSTAKED_CONNECTIONS, - DEFAULT_MAX_STREAMS_PER_MS, - )); - stream_load_ema + load_ema.current_load_ema.store(100, Ordering::Relaxed); + load_ema .load_in_recent_interval - .store(2500, Ordering::Relaxed); - stream_load_ema - .current_load_ema - .store(2000, Ordering::Relaxed); + .store(100, Ordering::Relaxed); - stream_load_ema.update_ema(8); + load_ema.update_ema(u128::from(STREAM_LOAD_EMA_INTERVAL_MS * 3)); - let updated_ema = stream_load_ema.current_load_ema.load(Ordering::Relaxed); - assert_eq!(updated_ema, 2164); + let expected = StakedStreamLoadEMA::ema_function( + StakedStreamLoadEMA::ema_function(StakedStreamLoadEMA::ema_function(100, 100), 0), + 0, + ); + assert_eq!( + load_ema.current_load_ema.load(Ordering::Relaxed), + u64::try_from(expected).unwrap() + ); } #[test] - fn test_update_ema_if_needed() { - let stream_load_ema = Arc::new(StakedStreamLoadEMA::new( + fn test_total_stake_zero_safety() { + let load_ema = StakedStreamLoadEMA::new( Arc::new(StreamerStats::default()), DEFAULT_MAX_UNSTAKED_CONNECTIONS, DEFAULT_MAX_STREAMS_PER_MS, - )); - stream_load_ema - .load_in_recent_interval - .store(2500, Ordering::Relaxed); - stream_load_ema - .current_load_ema - .store(2000, Ordering::Relaxed); - - stream_load_ema.update_ema_if_needed(); - - let updated_ema = stream_load_ema.current_load_ema.load(Ordering::Relaxed); - assert_eq!(updated_ema, 2000); - - let ema_interval = Duration::from_millis(STREAM_LOAD_EMA_INTERVAL_MS); - *stream_load_ema.last_update.write().unwrap() = - Instant::now().checked_sub(ema_interval).unwrap(); - - stream_load_ema.update_ema_if_needed(); - assert!( - Instant::now().duration_since(*stream_load_ema.last_update.read().unwrap()) - < ema_interval ); + load_ema + .staked_throttling_enabled + .store(true, Ordering::Relaxed); - let updated_ema = stream_load_ema.current_load_ema.load(Ordering::Relaxed); - assert_eq!(updated_ema, 2090); + assert_eq!( + load_ema + .available_load_capacity_in_throttling_duration(ConnectionPeerType::Staked(10), 0), + load_ema.max_unstaked_load_in_throttling_window + 1 + ); } } diff --git a/streamer/src/nonblocking/swqos.rs b/streamer/src/nonblocking/swqos.rs index 99a8dd8582de1c..6408070062cf2c 100644 --- a/streamer/src/nonblocking/swqos.rs +++ b/streamer/src/nonblocking/swqos.rs @@ -6,8 +6,7 @@ use { get_connection_stake, update_open_connections_stat, ClientConnectionTracker, ConnectionHandlerError, ConnectionPeerType, ConnectionTable, ConnectionTableKey, ConnectionTableType, CONNECTION_CLOSE_CODE_DISALLOWED, - CONNECTION_CLOSE_CODE_EXCEED_MAX_STREAM_COUNT, CONNECTION_CLOSE_REASON_DISALLOWED, - CONNECTION_CLOSE_REASON_EXCEED_MAX_STREAM_COUNT, + CONNECTION_CLOSE_REASON_DISALLOWED, }, stream_throttle::{ throttle_stream, ConnectionStreamCounter, StakedStreamLoadEMA, @@ -23,10 +22,6 @@ use { }, percentage::Percentage, quinn::{Connection, VarInt}, - solana_quic_definitions::{ - QUIC_MAX_STAKED_CONCURRENT_STREAMS, QUIC_MAX_UNSTAKED_CONCURRENT_STREAMS, - QUIC_MIN_STAKED_CONCURRENT_STREAMS, QUIC_TOTAL_STAKED_CONCURRENT_STREAMS, - }, solana_time_utils as timing, std::{ future::Future, @@ -39,6 +34,27 @@ use { tokio_util::sync::CancellationToken, }; +// Empirically found max number of concurrent streams +// that seems to maximize TPS on GCE (higher values don't seem to +// give significant improvement or seem to impact stability) +pub const QUIC_MAX_UNSTAKED_CONCURRENT_STREAMS: usize = 128; +pub const QUIC_MIN_STAKED_CONCURRENT_STREAMS: usize = 128; + +// Set the maximum concurrent stream numbers to avoid excessive streams. +// The value was lowered from 2048 to reduce contention of the limited +// receive_window among the streams which is observed in CI bench-tests with +// forwarded packets from staked nodes. +pub const QUIC_MAX_STAKED_CONCURRENT_STREAMS: usize = 512; + +pub const QUIC_TOTAL_STAKED_CONCURRENT_STREAMS: usize = 100_000; +/// Below this RTT, we apply the legacy logic (no BDP scaling) +/// Above this RTT, we increase the RX window and number of streams +/// as RTT increases to preserve reasonable bandwidth. +const REFERENCE_RTT_MS: u64 = 50; + +/// Above this RTT we stop scaling for BDP +const MAX_RTT_MS: u64 = 350; + #[derive(Clone)] pub struct SwQosConfig { pub max_streams_per_ms: u64, @@ -130,8 +146,12 @@ impl SwQos { } } -fn compute_max_allowed_uni_streams(peer_type: ConnectionPeerType, total_stake: u64) -> usize { - match peer_type { +fn compute_max_allowed_uni_streams( + rtt_millis: u64, + peer_type: ConnectionPeerType, + total_stake: u64, +) -> u32 { + let streams = match peer_type { ConnectionPeerType::Staked(peer_stake) => { // No checked math for f64 type. So let's explicitly check for 0 here if total_stake == 0 || peer_stake > total_stake { @@ -154,7 +174,10 @@ fn compute_max_allowed_uni_streams(peer_type: ConnectionPeerType, total_stake: u } } ConnectionPeerType::Unstaked => QUIC_MAX_UNSTAKED_CONCURRENT_STREAMS, - } + }; + let streams = + streams as u64 * rtt_millis.clamp(REFERENCE_RTT_MS, MAX_RTT_MS) / REFERENCE_RTT_MS; + streams.min(u32::MAX as u64) as u32 } impl SwQos { @@ -172,58 +195,51 @@ impl SwQos { ), ConnectionHandlerError, > { - if let Ok(max_uni_streams) = VarInt::from_u64(compute_max_allowed_uni_streams( + // get current RTT and limit it to MAX_RTT_MS + let rtt_millis = connection.rtt().as_millis() as u64; + let max_uni_streams = VarInt::from_u32(compute_max_allowed_uni_streams( + rtt_millis, conn_context.peer_type(), conn_context.total_stake, - ) as u64) - { - let remote_addr = connection.remote_address(); + )); + + let remote_addr = connection.remote_address(); + + debug!( + "Peer type {:?}, total stake {}, max streams {} from peer {}", + conn_context.peer_type(), + conn_context.total_stake, + max_uni_streams.into_inner(), + remote_addr, + ); - debug!( - "Peer type {:?}, total stake {}, max streams {} from peer {}", + let max_connections_per_peer = match conn_context.peer_type() { + ConnectionPeerType::Unstaked => self.config.max_connections_per_unstaked_peer, + ConnectionPeerType::Staked(_) => self.config.max_connections_per_staked_peer, + }; + if let Some((last_update, cancel_connection, stream_counter)) = connection_table_l + .try_add_connection( + ConnectionTableKey::new(remote_addr.ip(), conn_context.remote_pubkey), + remote_addr.port(), + client_connection_tracker, + Some(connection.clone()), conn_context.peer_type(), - conn_context.total_stake, - max_uni_streams.into_inner(), - remote_addr, - ); + conn_context.last_update.clone(), + max_connections_per_peer, + || Arc::new(ConnectionStreamCounter::new()), + ) + { + update_open_connections_stat(&self.stats, &connection_table_l); + drop(connection_table_l); - let max_connections_per_peer = match conn_context.peer_type() { - ConnectionPeerType::Unstaked => self.config.max_connections_per_unstaked_peer, - ConnectionPeerType::Staked(_) => self.config.max_connections_per_staked_peer, - }; - if let Some((last_update, cancel_connection, stream_counter)) = connection_table_l - .try_add_connection( - ConnectionTableKey::new(remote_addr.ip(), conn_context.remote_pubkey), - remote_addr.port(), - client_connection_tracker, - Some(connection.clone()), - conn_context.peer_type(), - conn_context.last_update.clone(), - max_connections_per_peer, - || Arc::new(ConnectionStreamCounter::new()), - ) - { - update_open_connections_stat(&self.stats, &connection_table_l); - drop(connection_table_l); - - connection.set_max_concurrent_uni_streams(max_uni_streams); - - Ok((last_update, cancel_connection, stream_counter)) - } else { - self.stats - .connection_add_failed - .fetch_add(1, Ordering::Relaxed); - Err(ConnectionHandlerError::ConnectionAddError) - } + connection.set_max_concurrent_uni_streams(max_uni_streams); + + Ok((last_update, cancel_connection, stream_counter)) } else { - connection.close( - CONNECTION_CLOSE_CODE_EXCEED_MAX_STREAM_COUNT.into(), - CONNECTION_CLOSE_REASON_EXCEED_MAX_STREAM_COUNT, - ); self.stats - .connection_add_failed_invalid_stream_count + .connection_add_failed .fetch_add(1, Ordering::Relaxed); - Err(ConnectionHandlerError::MaxStreamError) + Err(ConnectionHandlerError::ConnectionAddError) } } @@ -518,27 +534,35 @@ pub mod test { #[test] fn test_max_allowed_uni_streams() { assert_eq!( - compute_max_allowed_uni_streams(ConnectionPeerType::Unstaked, 0), - QUIC_MAX_UNSTAKED_CONCURRENT_STREAMS + compute_max_allowed_uni_streams(REFERENCE_RTT_MS, ConnectionPeerType::Unstaked, 0), + QUIC_MAX_UNSTAKED_CONCURRENT_STREAMS as u32 ); assert_eq!( - compute_max_allowed_uni_streams(ConnectionPeerType::Staked(10), 0), - QUIC_MIN_STAKED_CONCURRENT_STREAMS + compute_max_allowed_uni_streams(REFERENCE_RTT_MS, ConnectionPeerType::Staked(10), 0), + QUIC_MIN_STAKED_CONCURRENT_STREAMS as u32 ); let delta = (QUIC_TOTAL_STAKED_CONCURRENT_STREAMS - QUIC_MIN_STAKED_CONCURRENT_STREAMS) as f64; assert_eq!( - compute_max_allowed_uni_streams(ConnectionPeerType::Staked(1000), 10000), - QUIC_MAX_STAKED_CONCURRENT_STREAMS, + compute_max_allowed_uni_streams( + REFERENCE_RTT_MS, + ConnectionPeerType::Staked(1000), + 10000 + ), + QUIC_MAX_STAKED_CONCURRENT_STREAMS as u32, ); assert_eq!( - compute_max_allowed_uni_streams(ConnectionPeerType::Staked(100), 10000), + compute_max_allowed_uni_streams( + REFERENCE_RTT_MS, + ConnectionPeerType::Staked(100), + 10000 + ), ((delta / (100_f64)) as usize + QUIC_MIN_STAKED_CONCURRENT_STREAMS) - .min(QUIC_MAX_STAKED_CONCURRENT_STREAMS) + .min(QUIC_MAX_STAKED_CONCURRENT_STREAMS) as u32 ); assert_eq!( - compute_max_allowed_uni_streams(ConnectionPeerType::Unstaked, 10000), - QUIC_MAX_UNSTAKED_CONCURRENT_STREAMS + compute_max_allowed_uni_streams(REFERENCE_RTT_MS, ConnectionPeerType::Unstaked, 10000), + QUIC_MAX_UNSTAKED_CONCURRENT_STREAMS as u32 ); } } diff --git a/streamer/src/nonblocking/testing_utilities.rs b/streamer/src/nonblocking/testing_utilities.rs index 36ca9e95fb3ed5..b9f5b7fe37fc37 100644 --- a/streamer/src/nonblocking/testing_utilities.rs +++ b/streamer/src/nonblocking/testing_utilities.rs @@ -6,7 +6,7 @@ use { quic::spawn_server, swqos::{SwQos, SwQosConfig}, }, - quic::{QuicServerError, QuicStreamerConfig, StreamerStats}, + quic::{QuicServerError, QuicStreamerConfig, StreamerStats, QUIC_MAX_TIMEOUT}, streamer::StakedNodes, }, crossbeam_channel::{unbounded, Receiver, Sender}, @@ -20,7 +20,6 @@ use { SocketConfiguration as SocketConfig, }, solana_perf::packet::PacketBatch, - solana_quic_definitions::{QUIC_KEEP_ALIVE, QUIC_MAX_TIMEOUT, QUIC_SEND_FAIRNESS}, solana_tls_utils::{new_dummy_x509_certificate, tls_client_config_builder}, std::{ net::{IpAddr, Ipv4Addr, SocketAddr, UdpSocket}, @@ -31,6 +30,11 @@ use { tokio_util::sync::CancellationToken, }; +/// Duration for QUIC keep-alive in tests. Typically tests are running for shorter duration that +/// connection timeout and keep-alive is not strictly necessary. But for longer running tests, it +/// makes sense to have keep-alive enable and set the value to be around half of the connection timeout. +const QUIC_KEEP_ALIVE_FOR_TESTS: Duration = Duration::from_secs(5); + /// Spawn a streamer instance in the current tokio runtime. pub fn spawn_stake_weighted_qos_server( name: &'static str, @@ -80,8 +84,8 @@ pub fn get_client_config(keypair: &Keypair) -> ClientConfig { let mut transport_config = TransportConfig::default(); let timeout = IdleTimeout::try_from(QUIC_MAX_TIMEOUT).unwrap(); transport_config.max_idle_timeout(Some(timeout)); - transport_config.keep_alive_interval(Some(QUIC_KEEP_ALIVE)); - transport_config.send_fairness(QUIC_SEND_FAIRNESS); + transport_config.keep_alive_interval(Some(QUIC_KEEP_ALIVE_FOR_TESTS)); + transport_config.send_fairness(false); config.transport_config(Arc::new(transport_config)); config diff --git a/streamer/src/quic.rs b/streamer/src/quic.rs index 78bd9ebff378d7..2616e467d82621 100644 --- a/streamer/src/quic.rs +++ b/streamer/src/quic.rs @@ -18,8 +18,7 @@ use { solana_keypair::Keypair, solana_packet::PACKET_DATA_SIZE, solana_perf::packet::PacketBatch, - solana_quic_definitions::{NotifyKeyUpdate, QUIC_MAX_TIMEOUT}, - solana_tls_utils::{new_dummy_x509_certificate, tls_server_config_builder}, + solana_tls_utils::{new_dummy_x509_certificate, tls_server_config_builder, NotifyKeyUpdate}, std::{ net::UdpSocket, num::NonZeroUsize, @@ -34,6 +33,10 @@ use { tokio_util::sync::CancellationToken, }; +/// QUIC connection idle timeout. The connection will be closed if there are no activities on it +/// within the timeout window. The chosen value is default for quinn. +pub const QUIC_MAX_TIMEOUT: Duration = Duration::from_secs(30); + // allow multiple connections for NAT and any open/close overlap pub const DEFAULT_MAX_QUIC_CONNECTIONS_PER_UNSTAKED_PEER: usize = 8; diff --git a/tls-utils/Cargo.toml b/tls-utils/Cargo.toml index 6afd2b351f3a12..559c5f9295cd75 100644 --- a/tls-utils/Cargo.toml +++ b/tls-utils/Cargo.toml @@ -7,7 +7,7 @@ authors = { workspace = true } repository = { workspace = true } homepage = { workspace = true } license = { workspace = true } -edition = { workspace = true } +edition = "2024" [features] agave-unstable-api = [] diff --git a/tls-utils/src/config.rs b/tls-utils/src/config.rs index c0f038821547fd..2cc9d29eef3bd0 100644 --- a/tls-utils/src/config.rs +++ b/tls-utils/src/config.rs @@ -1,6 +1,6 @@ use { rustls::{ - client::WantsClientCert, server::WantsServerCert, ClientConfig, ConfigBuilder, ServerConfig, + ClientConfig, ConfigBuilder, ServerConfig, client::WantsClientCert, server::WantsServerCert, }, std::sync::Arc, }; diff --git a/tls-utils/src/crypto_provider.rs b/tls-utils/src/crypto_provider.rs index 1e1d754fda4de8..32dbd57b628392 100644 --- a/tls-utils/src/crypto_provider.rs +++ b/tls-utils/src/crypto_provider.rs @@ -1,4 +1,4 @@ -use rustls::{crypto::CryptoProvider, NamedGroup}; +use rustls::{NamedGroup, crypto::CryptoProvider}; pub fn crypto_provider() -> CryptoProvider { let mut provider = rustls::crypto::ring::default_provider(); diff --git a/tls-utils/src/lib.rs b/tls-utils/src/lib.rs index d61c22e8f8c0e3..9fd1c2440ada8d 100644 --- a/tls-utils/src/lib.rs +++ b/tls-utils/src/lib.rs @@ -27,3 +27,6 @@ pub use skip_server_verification::SkipServerVerification; mod skip_client_verification; pub use skip_client_verification::SkipClientVerification; + +pub mod notify_key_update; +pub use notify_key_update::NotifyKeyUpdate; diff --git a/tls-utils/src/notify_key_update.rs b/tls-utils/src/notify_key_update.rs new file mode 100644 index 00000000000000..978fcb9445e695 --- /dev/null +++ b/tls-utils/src/notify_key_update.rs @@ -0,0 +1,9 @@ +use solana_keypair::Keypair; + +/// [`NotifyKeyUpdate`] is a trait used for updating the certificate used for QUIC connections. +/// +/// When validator receives signal to update its identity through the admin_rpc, we need to change +/// the keypair used for QUIC connections. This trait provides an interface for that. +pub trait NotifyKeyUpdate { + fn update_key(&self, key: &Keypair) -> Result<(), Box>; +} diff --git a/tls-utils/src/skip_client_verification.rs b/tls-utils/src/skip_client_verification.rs index 5ecc9bb866c990..b7f8a1b53fdc91 100644 --- a/tls-utils/src/skip_client_verification.rs +++ b/tls-utils/src/skip_client_verification.rs @@ -1,11 +1,11 @@ use { crate::crypto_provider, rustls::{ + DigitallySignedStruct, DistinguishedName, Error, SignatureScheme, client::danger::HandshakeSignatureValid, crypto::CryptoProvider, pki_types::{CertificateDer, UnixTime}, server::danger::{ClientCertVerified, ClientCertVerifier}, - DigitallySignedStruct, DistinguishedName, Error, SignatureScheme, }, std::{fmt::Debug, sync::Arc}, }; diff --git a/tls-utils/src/skip_server_verification.rs b/tls-utils/src/skip_server_verification.rs index 4fdef2c389679a..5e1dab5b5c2d14 100644 --- a/tls-utils/src/skip_server_verification.rs +++ b/tls-utils/src/skip_server_verification.rs @@ -1,10 +1,10 @@ use { crate::crypto_provider, rustls::{ + DigitallySignedStruct, Error, SignatureScheme, client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier}, - crypto::{verify_tls12_signature, verify_tls13_signature, CryptoProvider}, + crypto::{CryptoProvider, verify_tls12_signature, verify_tls13_signature}, pki_types::{CertificateDer, ServerName, UnixTime}, - DigitallySignedStruct, Error, SignatureScheme, }, std::{ fmt::{self, Debug, Formatter}, diff --git a/transaction-status/src/parse_associated_token.rs b/transaction-status/src/parse_associated_token.rs index 96052dff7fd1c8..db66c2e0742924 100644 --- a/transaction-status/src/parse_associated_token.rs +++ b/transaction-status/src/parse_associated_token.rs @@ -24,8 +24,11 @@ pub fn parse_associated_token( let ata_instruction = if instruction.data.is_empty() { AssociatedTokenAccountInstruction::Create } else { - AssociatedTokenAccountInstruction::try_from_slice(&instruction.data) - .map_err(|_| ParseInstructionError::InstructionNotParsable(ParsableProgram::SplToken))? + AssociatedTokenAccountInstruction::try_from_slice(&instruction.data).map_err(|_| { + ParseInstructionError::InstructionNotParsable( + ParsableProgram::SplAssociatedTokenAccount, + ) + })? }; match ata_instruction { diff --git a/turbine/Cargo.toml b/turbine/Cargo.toml index 5b9c8ee87c4a12..2a84d45c4770df 100644 --- a/turbine/Cargo.toml +++ b/turbine/Cargo.toml @@ -7,7 +7,7 @@ description = { workspace = true } repository = { workspace = true } homepage = { workspace = true } license = { workspace = true } -edition = { workspace = true } +edition = "2024" [features] agave-unstable-api = [] diff --git a/turbine/benches/cluster_info.rs b/turbine/benches/cluster_info.rs index ed12982bbbad3c..b6714dd581014c 100644 --- a/turbine/benches/cluster_info.rs +++ b/turbine/benches/cluster_info.rs @@ -1,23 +1,23 @@ use { - bencher::{benchmark_group, benchmark_main, Bencher}, - rand::{rng, Rng}, + bencher::{Bencher, benchmark_group, benchmark_main}, + rand::{Rng, rng}, solana_entry::entry::Entry, solana_gossip::{cluster_info::ClusterInfo, contact_info::ContactInfo, node::Node}, solana_hash::Hash, solana_keypair::Keypair, solana_ledger::{ - genesis_utils::{create_genesis_config, GenesisConfigInfo}, + genesis_utils::{GenesisConfigInfo, create_genesis_config}, shred::{ProcessShredsStats, ReedSolomonCache, Shredder}, }, - solana_net_utils::{sockets::bind_to_localhost_unique, SocketAddrSpace}, + solana_net_utils::{SocketAddrSpace, sockets::bind_to_localhost_unique}, solana_pubkey as pubkey, solana_runtime::{bank::Bank, bank_forks::BankForks}, solana_signer::Signer, - solana_time_utils::{timestamp, AtomicInterval}, + solana_time_utils::{AtomicInterval, timestamp}, solana_turbine::{ broadcast_stage::{ - broadcast_metrics::TransmitShredsStats, broadcast_shreds, BroadcastSocket, - BroadcastStage, + BroadcastSocket, BroadcastStage, broadcast_metrics::TransmitShredsStats, + broadcast_shreds, }, cluster_nodes::ClusterNodesCache, }, diff --git a/turbine/benches/cluster_nodes.rs b/turbine/benches/cluster_nodes.rs index 9ed4afe582ec77..00b6c09cd7903b 100644 --- a/turbine/benches/cluster_nodes.rs +++ b/turbine/benches/cluster_nodes.rs @@ -1,6 +1,6 @@ use { - bencher::{benchmark_group, benchmark_main, Bencher}, - rand::{prelude::IndexedRandom as _, Rng}, + bencher::{Bencher, benchmark_group, benchmark_main}, + rand::{Rng, prelude::IndexedRandom as _}, solana_clock::Slot, solana_cluster_type::ClusterType, solana_gossip::contact_info::ContactInfo, @@ -10,7 +10,7 @@ use { solana_net_utils::SocketAddrSpace, solana_pubkey::Pubkey, solana_turbine::{ - cluster_nodes::{make_test_cluster, new_cluster_nodes, ClusterNodes}, + cluster_nodes::{ClusterNodes, make_test_cluster, new_cluster_nodes}, retransmit_stage::RetransmitStage, }, }; diff --git a/turbine/src/addr_cache.rs b/turbine/src/addr_cache.rs index b1033f5aae3f0d..52ffdf92d942fc 100644 --- a/turbine/src/addr_cache.rs +++ b/turbine/src/addr_cache.rs @@ -3,11 +3,11 @@ use { itertools::Itertools, solana_clock::Slot, solana_ledger::shred::{ - ShredId, ShredType, MAX_CODE_SHREDS_PER_SLOT, MAX_DATA_SHREDS_PER_SLOT, + MAX_CODE_SHREDS_PER_SLOT, MAX_DATA_SHREDS_PER_SLOT, ShredId, ShredType, }, std::{ cmp::Reverse, - collections::{hash_map::Entry, HashMap, VecDeque}, + collections::{HashMap, VecDeque, hash_map::Entry}, net::SocketAddr, }, }; @@ -397,9 +397,11 @@ mod tests { assert_eq!(entry.index_data, 2); entry.last_shred_in_slot = true; - assert!(entry - .get_shreds(7) - .eq([(ShredType::Code, 3), (ShredType::Data, 2)])); + assert!( + entry + .get_shreds(7) + .eq([(ShredType::Code, 3), (ShredType::Data, 2)]) + ); assert_eq!(entry.index_code, 3); assert_eq!(entry.index_data, 2); diff --git a/turbine/src/broadcast_stage.rs b/turbine/src/broadcast_stage.rs index ca72ccd041294d..4c7f1b913610ef 100644 --- a/turbine/src/broadcast_stage.rs +++ b/turbine/src/broadcast_stage.rs @@ -12,7 +12,7 @@ use { cluster_nodes::{ClusterNodes, ClusterNodesCache}, xdp::XdpSender, }, - crossbeam_channel::{unbounded, Receiver, RecvError, RecvTimeoutError, Sender}, + crossbeam_channel::{Receiver, RecvError, RecvTimeoutError, Sender, unbounded}, itertools::Itertools, solana_clock::Slot, solana_gossip::{ @@ -27,14 +27,14 @@ use { solana_poh::poh_recorder::WorkingBankEntry, solana_pubkey::Pubkey, solana_runtime::{bank::MAX_LEADER_SCHEDULE_STAKES, bank_forks::BankForks}, - solana_streamer::sendmmsg::{batch_send, SendPktsError}, - solana_time_utils::{timestamp, AtomicInterval}, + solana_streamer::sendmmsg::{SendPktsError, batch_send}, + solana_time_utils::{AtomicInterval, timestamp}, std::{ collections::{HashMap, HashSet}, net::UdpSocket, sync::{ - atomic::{AtomicBool, Ordering}, Arc, Mutex, RwLock, + atomic::{AtomicBool, Ordering}, }, thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, @@ -389,16 +389,18 @@ impl BroadcastStage { let retransmit_thread = Builder::new() .name("solBroadcastRtx".to_string()) - .spawn(move || loop { - if let Some(res) = Self::handle_error( - Self::check_retransmit_signals( - &blockstore, - &retransmit_slots_receiver, - &socket_sender, - ), - "solana-broadcaster-retransmit-check_retransmit_signals", - ) { - return res; + .spawn(move || { + loop { + if let Some(res) = Self::handle_error( + Self::check_retransmit_signals( + &blockstore, + &retransmit_slots_receiver, + &socket_sender, + ), + "solana-broadcaster-retransmit-check_retransmit_signals", + ) { + return res; + } } }) .unwrap(); @@ -424,9 +426,11 @@ impl BroadcastStage { .get_data_shreds_for_slot(new_retransmit_slot, 0) .expect("My own shreds must be reconstructable"), ); - debug_assert!(data_shreds - .iter() - .all(|shred| shred.slot() == new_retransmit_slot)); + debug_assert!( + data_shreds + .iter() + .all(|shred| shred.slot() == new_retransmit_slot) + ); if !data_shreds.is_empty() { socket_sender.send((data_shreds, None))?; } @@ -437,9 +441,11 @@ impl BroadcastStage { .expect("My own shreds must be reconstructable"), ); - debug_assert!(coding_shreds - .iter() - .all(|shred| shred.slot() == new_retransmit_slot)); + debug_assert!( + coding_shreds + .iter() + .all(|shred| shred.slot() == new_retransmit_slot) + ); if !coding_shreds.is_empty() { socket_sender.send((coding_shreds, None))?; } @@ -563,15 +569,15 @@ pub mod test { solana_keypair::Keypair, solana_ledger::{ blockstore::Blockstore, - genesis_utils::{create_genesis_config, GenesisConfigInfo}, + genesis_utils::{GenesisConfigInfo, create_genesis_config}, get_tmp_ledger_path_auto_delete, - shred::{max_ticks_per_n_shreds, ProcessShredsStats, ReedSolomonCache, Shredder}, + shred::{ProcessShredsStats, ReedSolomonCache, Shredder, max_ticks_per_n_shreds}, }, solana_runtime::bank::Bank, solana_signer::Signer, std::{ path::Path, - sync::{atomic::AtomicBool, Arc}, + sync::{Arc, atomic::AtomicBool}, thread::sleep, }, }; diff --git a/turbine/src/broadcast_stage/broadcast_duplicates_run.rs b/turbine/src/broadcast_stage/broadcast_duplicates_run.rs index 140d959b21809f..d5ded506263fc4 100644 --- a/turbine/src/broadcast_stage/broadcast_duplicates_run.rs +++ b/turbine/src/broadcast_stage/broadcast_duplicates_run.rs @@ -275,12 +275,16 @@ impl BroadcastRun for BroadcastDuplicatesRun { // Store the original shreds that this node replayed blockstore_sender.send((original_last_data_shred.clone(), None))?; - assert!(original_last_data_shred - .iter() - .all(|shred| shred.slot() == bank.slot())); - assert!(partition_last_data_shred - .iter() - .all(|shred| shred.slot() == bank.slot())); + assert!( + original_last_data_shred + .iter() + .all(|shred| shred.slot() == bank.slot()) + ); + assert!( + partition_last_data_shred + .iter() + .all(|shred| shred.slot() == bank.slot()) + ); if let Some(duplicate_slot_sender) = &self.config.duplicate_slot_sender { let _ = duplicate_slot_sender.send(bank.slot()); diff --git a/turbine/src/broadcast_stage/broadcast_metrics.rs b/turbine/src/broadcast_stage/broadcast_metrics.rs index 85513f0053fa41..57926b400e80b1 100644 --- a/turbine/src/broadcast_stage/broadcast_metrics.rs +++ b/turbine/src/broadcast_stage/broadcast_metrics.rs @@ -166,15 +166,15 @@ impl SlotBroadcastStats { if let Some(num_expected_batches) = batch_info.num_expected_batches { slot_batch_counter.num_expected_batches = Some(num_expected_batches); } - if let Some(num_expected_batches) = slot_batch_counter.num_expected_batches { - if slot_batch_counter.num_batches == num_expected_batches { - slot_batch_counter.broadcast_shred_stats.report_stats( - batch_info.slot, - batch_info.slot_start_ts, - batch_info.was_interrupted, - ); - should_delete = true; - } + if let Some(num_expected_batches) = slot_batch_counter.num_expected_batches + && slot_batch_counter.num_batches == num_expected_batches + { + slot_batch_counter.broadcast_shred_stats.report_stats( + batch_info.slot, + batch_info.slot_start_ts, + batch_info.was_interrupted, + ); + should_delete = true; } } if should_delete { diff --git a/turbine/src/broadcast_stage/broadcast_utils.rs b/turbine/src/broadcast_stage/broadcast_utils.rs index 2b556f372c4cd7..fe242f96ef8f19 100644 --- a/turbine/src/broadcast_stage/broadcast_utils.rs +++ b/turbine/src/broadcast_stage/broadcast_utils.rs @@ -6,7 +6,7 @@ use { solana_hash::Hash, solana_ledger::{ blockstore::Blockstore, - shred::{self, get_data_shred_bytes_per_batch_typical, ProcessShredsStats}, + shred::{self, ProcessShredsStats, get_data_shred_bytes_per_batch_typical}, }, solana_poh::poh_recorder::WorkingBankEntry, solana_runtime::bank::Bank, @@ -198,7 +198,7 @@ mod tests { super::*, crossbeam_channel::unbounded, solana_genesis_config::GenesisConfig, - solana_ledger::genesis_utils::{create_genesis_config, GenesisConfigInfo}, + solana_ledger::genesis_utils::{GenesisConfigInfo, create_genesis_config}, solana_pubkey::Pubkey, solana_system_transaction as system_transaction, solana_transaction::Transaction, diff --git a/turbine/src/broadcast_stage/standard_broadcast_run.rs b/turbine/src/broadcast_stage/standard_broadcast_run.rs index b8d953db962777..4f14a61379da76 100644 --- a/turbine/src/broadcast_stage/standard_broadcast_run.rs +++ b/turbine/src/broadcast_stage/standard_broadcast_run.rs @@ -10,8 +10,8 @@ use { solana_hash::Hash, solana_keypair::Keypair, solana_ledger::shred::{ - ProcessShredsStats, ReedSolomonCache, Shred, ShredType, Shredder, MAX_CODE_SHREDS_PER_SLOT, - MAX_DATA_SHREDS_PER_SLOT, + MAX_CODE_SHREDS_PER_SLOT, MAX_DATA_SHREDS_PER_SLOT, ProcessShredsStats, ReedSolomonCache, + Shred, ShredType, Shredder, }, solana_time_utils::AtomicInterval, std::{borrow::Cow, sync::RwLock}, @@ -278,16 +278,16 @@ impl StandardBroadcastRun { // https://github.com/solana-labs/solana/blob/92a0b310c/turbine/src/broadcast_stage/standard_broadcast_run.rs#L132-L142 // By contrast Self::insert skips the 1st data shred with index zero: // https://github.com/solana-labs/solana/blob/92a0b310c/turbine/src/broadcast_stage/standard_broadcast_run.rs#L367-L373 - if let Some(shred) = shreds.iter().find(|shred| shred.is_data()) { - if shred.index() == 0 { - blockstore - .insert_cow_shreds( - [Cow::Borrowed(shred)], - None, // leader_schedule - true, // is_trusted - ) - .expect("Failed to insert shreds in blockstore"); - } + if let Some(shred) = shreds.iter().find(|shred| shred.is_data()) + && shred.index() == 0 + { + blockstore + .insert_cow_shreds( + [Cow::Borrowed(shred)], + None, // leader_schedule + true, // is_trusted + ) + .expect("Failed to insert shreds in blockstore"); } to_shreds_time.stop(); @@ -483,9 +483,9 @@ mod test { blockstore::Blockstore, genesis_utils::create_genesis_config, get_tmp_ledger_path, - shred::{max_ticks_per_n_shreds, DATA_SHREDS_PER_FEC_BLOCK}, + shred::{DATA_SHREDS_PER_FEC_BLOCK, max_ticks_per_n_shreds}, }, - solana_net_utils::{sockets::bind_to_localhost_unique, SocketAddrSpace}, + solana_net_utils::{SocketAddrSpace, sockets::bind_to_localhost_unique}, solana_runtime::bank::Bank, solana_signer::Signer, std::{ops::Deref, sync::Arc, time::Duration}, @@ -678,18 +678,22 @@ mod test { ); // Broadcast stats for interrupted slot should be cleared - assert!(standard_broadcast_run - .transmit_shreds_stats - .lock() - .unwrap() - .get(interrupted_slot) - .is_none()); - assert!(standard_broadcast_run - .insert_shreds_stats - .lock() - .unwrap() - .get(interrupted_slot) - .is_none()); + assert!( + standard_broadcast_run + .transmit_shreds_stats + .lock() + .unwrap() + .get(interrupted_slot) + .is_none() + ); + assert!( + standard_broadcast_run + .insert_shreds_stats + .lock() + .unwrap() + .get(interrupted_slot) + .is_none() + ); // Try to fetch the incomplete ticks from blockstore, should succeed assert_eq!(blockstore.get_slot_entries(0, 0).unwrap(), ticks0); diff --git a/turbine/src/cluster_nodes.rs b/turbine/src/cluster_nodes.rs index daffefd1d28866..b34895eb076976 100644 --- a/turbine/src/cluster_nodes.rs +++ b/turbine/src/cluster_nodes.rs @@ -3,7 +3,7 @@ use { agave_feature_set::{self as feature_set}, itertools::Either, lazy_lru::LruCache, - rand::{seq::SliceRandom, Rng, RngCore, SeedableRng}, + rand::{Rng, RngCore, SeedableRng, seq::SliceRandom}, rand_chacha::{ChaCha8Rng, ChaChaRng}, solana_clock::{Epoch, Slot}, solana_cluster_type::ClusterType, diff --git a/turbine/src/retransmit_stage.rs b/turbine/src/retransmit_stage.rs index a49877b0f46db7..a9b2d0f64f6720 100644 --- a/turbine/src/retransmit_stage.rs +++ b/turbine/src/retransmit_stage.rs @@ -4,7 +4,7 @@ use { crate::{ addr_cache::AddrCache, cluster_nodes::{ - ClusterNodes, ClusterNodesCache, Error, DATA_PLANE_FANOUT, MAX_NUM_TURBINE_HOPS, + ClusterNodes, ClusterNodesCache, DATA_PLANE_FANOUT, Error, MAX_NUM_TURBINE_HOPS, }, xdp::XdpSender, }, @@ -12,7 +12,7 @@ use { crossbeam_channel::{Receiver, RecvError, Sender, TryRecvError}, lru::LruCache, rand::Rng, - rayon::{prelude::*, ThreadPool, ThreadPoolBuilder}, + rayon::{ThreadPool, ThreadPoolBuilder, prelude::*}, solana_clock::Slot, solana_gossip::cluster_info::ClusterInfo, solana_ledger::{ @@ -32,7 +32,7 @@ use { bank::{Bank, MAX_LEADER_SCHEDULE_STAKES}, bank_forks::BankForks, }, - solana_streamer::sendmmsg::{multi_target_send, SendPktsError}, + solana_streamer::sendmmsg::{SendPktsError, multi_target_send}, solana_time_utils::timestamp, std::{ borrow::Cow, @@ -40,8 +40,8 @@ use { net::{SocketAddr, UdpSocket}, ops::AddAssign, sync::{ - atomic::{AtomicU64, AtomicUsize, Ordering}, Arc, RwLock, + atomic::{AtomicU64, AtomicUsize, Ordering}, }, thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, @@ -481,14 +481,14 @@ fn retransmit_shred( let num_nodes = match socket { RetransmitSocket::Xdp(sender) => { let mut sent = num_addrs; - if num_addrs > 0 { - if let Err(e) = sender.try_send(key.index() as usize, addrs.to_vec(), shred) { - log::warn!("xdp channel full: {e:?}"); - stats - .num_shreds_dropped_xdp_full - .fetch_add(num_addrs, Ordering::Relaxed); - sent = 0; - } + if num_addrs > 0 + && let Err(e) = sender.try_send(key.index() as usize, addrs.to_vec(), shred) + { + log::warn!("xdp channel full: {e:?}"); + stats + .num_shreds_dropped_xdp_full + .fetch_add(num_addrs, Ordering::Relaxed); + sent = 0; } sent } @@ -883,13 +883,13 @@ fn notify_subscribers( .unwrap() .notify_first_shred_received(slot); } - if let Some(votor_event_sender) = votor_event_sender { - if let Err(err) = votor_event_sender.send(VotorEvent::FirstShred(slot)) { - warn!( - "Sending {:?} failed as channel became disconnected. Ignoring.", - err.into_inner() - ); - } + if let Some(votor_event_sender) = votor_event_sender + && let Err(err) = votor_event_sender.send(VotorEvent::FirstShred(slot)) + { + warn!( + "Sending {:?} failed as channel became disconnected. Ignoring.", + err.into_inner() + ); } } diff --git a/turbine/src/sigverify_shreds.rs b/turbine/src/sigverify_shreds.rs index 2e731a361e8ece..f6186f04ac074d 100644 --- a/turbine/src/sigverify_shreds.rs +++ b/turbine/src/sigverify_shreds.rs @@ -1,12 +1,12 @@ use { crate::{ - cluster_nodes::{check_feature_activation, ClusterNodesCache, DATA_PLANE_FANOUT}, + cluster_nodes::{ClusterNodesCache, DATA_PLANE_FANOUT, check_feature_activation}, retransmit_stage::RetransmitStage, }, agave_feature_set as feature_set, crossbeam_channel::{Receiver, RecvTimeoutError, SendError, Sender}, itertools::{Either, Itertools}, - rayon::{prelude::*, ThreadPool, ThreadPoolBuilder}, + rayon::{ThreadPool, ThreadPoolBuilder, prelude::*}, solana_clock::Slot, solana_gossip::cluster_info::ClusterInfo, solana_keypair::Keypair, @@ -17,7 +17,7 @@ use { layout::{get_shred, resign_packet}, wire::is_retransmitter_signed_variant, }, - sigverify_shreds::{verify_shreds, LruCache, SlotPubkeys}, + sigverify_shreds::{LruCache, SlotPubkeys, verify_shreds}, }, solana_perf::{ self, @@ -31,8 +31,8 @@ use { std::{ num::NonZeroUsize, sync::{ - atomic::{AtomicUsize, Ordering}, Arc, RwLock, + atomic::{AtomicUsize, Ordering}, }, thread::{Builder, JoinHandle}, time::{Duration, Instant}, @@ -555,7 +555,7 @@ mod tests { use { super::*, rand::Rng, - solana_entry::entry::{create_ticks, Entry}, + solana_entry::entry::{Entry, create_ticks}, solana_gossip::contact_info::ContactInfo, solana_hash::Hash, solana_keypair::Keypair, diff --git a/turbine/src/xdp.rs b/turbine/src/xdp.rs index a5468d899e08f6..9e715829f7d43c 100644 --- a/turbine/src/xdp.rs +++ b/turbine/src/xdp.rs @@ -19,7 +19,7 @@ use { std::{ error::Error, net::{Ipv4Addr, SocketAddr}, - sync::{atomic::AtomicBool, Arc}, + sync::{Arc, atomic::AtomicBool}, thread, }, }; diff --git a/validator/src/commands/run/execute.rs b/validator/src/commands/run/execute.rs index 9a238c8b6b0dd3..adaaf6ca21e528 100644 --- a/validator/src/commands/run/execute.rs +++ b/validator/src/commands/run/execute.rs @@ -432,7 +432,7 @@ pub fn execute( let accounts_db_config = AccountsDbConfig { index: Some(accounts_index_config), account_indexes: Some(account_indexes.clone()), - bank_hash_details_dir: Some(ledger_path.clone()), + bank_hash_details_dir: ledger_path.clone(), shrink_paths: account_shrink_run_paths, shrink_ratio, read_cache_limit_bytes, diff --git a/vortexor/Cargo.toml b/vortexor/Cargo.toml index e8a7dd15659b79..24d66fe5bc3ef7 100644 --- a/vortexor/Cargo.toml +++ b/vortexor/Cargo.toml @@ -57,9 +57,9 @@ solana-metrics = { workspace = true } solana-net-utils = { workspace = true } solana-perf = { workspace = true } solana-pubkey = { workspace = true } -solana-quic-definitions = { workspace = true } solana-signer = { workspace = true } solana-streamer = { workspace = true } +solana-tls-utils = { workspace = true } solana-transaction-metrics-tracker = { workspace = true } solana-version = { workspace = true } thiserror = { workspace = true } diff --git a/vortexor/src/vortexor.rs b/vortexor/src/vortexor.rs index 8481b22913d8a7..e66c20ee9ad70a 100644 --- a/vortexor/src/vortexor.rs +++ b/vortexor/src/vortexor.rs @@ -9,7 +9,6 @@ use { multi_bind_in_range_with_config, SocketConfiguration as SocketConfig, }, solana_perf::packet::PacketBatch, - solana_quic_definitions::NotifyKeyUpdate, solana_streamer::{ nonblocking::{quic::DEFAULT_WAIT_FOR_CHUNK_TIMEOUT, swqos::SwQosConfig}, quic::{ @@ -18,6 +17,7 @@ use { }, streamer::StakedNodes, }, + solana_tls_utils::NotifyKeyUpdate, std::{ net::{SocketAddr, UdpSocket}, sync::{Arc, Mutex, RwLock}, diff --git a/votor-messages/Cargo.toml b/votor-messages/Cargo.toml index a3490f300caa74..1cf49b742fb080 100644 --- a/votor-messages/Cargo.toml +++ b/votor-messages/Cargo.toml @@ -22,7 +22,10 @@ frozen-abi = [ [dependencies] agave-feature-set = { workspace = true } agave-logger = { workspace = true } +bitvec = { workspace = true } +bytemuck = { workspace = true } log = { workspace = true } +num_enum = { workspace = true } serde = { workspace = true } solana-address = { workspace = true, features = ["curve25519"] } solana-bls-signatures = { workspace = true, features = [ @@ -39,5 +42,9 @@ solana-frozen-abi-macro = { workspace = true, optional = true, features = [ solana-hash = { workspace = true, features = ["serde"] } solana-pubkey = { workspace = true } +[dev-dependencies] +agave-votor-messages = { path = ".", features = ["dev-context-only-utils"] } +tempfile = { workspace = true } + [lints] workspace = true diff --git a/votor-messages/src/consensus_message.rs b/votor-messages/src/consensus_message.rs index 4ea229223ace82..6e71991dc80d73 100644 --- a/votor-messages/src/consensus_message.rs +++ b/votor-messages/src/consensus_message.rs @@ -12,14 +12,13 @@ pub const BLS_KEYPAIR_DERIVE_SEED: &[u8; 9] = b"alpenglow"; /// Block, a (slot, hash) tuple pub type Block = (Slot, Hash); - /// A consensus vote. #[cfg_attr( feature = "frozen-abi", derive(AbiExample), frozen_abi(digest = "5eorzdc18a1sNEUDLAKPgrHCqHmA8ssuTwKSGsZLwBqR") )] -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] pub struct VoteMessage { /// The type of the vote. pub vote: Vote, @@ -55,23 +54,96 @@ impl CertificateType { /// Get the slot of the certificate pub fn slot(&self) -> Slot { match self { - Self::Finalize(slot) - | Self::FinalizeFast(slot, _) - | Self::Notarize(slot, _) - | Self::NotarizeFallback(slot, _) - | Self::Skip(slot) - | Self::Genesis(slot, _) => *slot, + CertificateType::Finalize(slot) + | CertificateType::FinalizeFast(slot, _) + | CertificateType::Notarize(slot, _) + | CertificateType::NotarizeFallback(slot, _) + | CertificateType::Genesis(slot, _) + | CertificateType::Skip(slot) => *slot, } } + /// Is this a fast finalize certificate? + pub fn is_fast_finalization(&self) -> bool { + matches!(self, Self::FinalizeFast(_, _)) + } + + /// Is this a finalize / fast finalize certificate? + pub fn is_finalization(&self) -> bool { + matches!(self, Self::Finalize(_) | Self::FinalizeFast(_, _)) + } + + /// Is this a notarize fallback certificate? + pub fn is_notarize_fallback(&self) -> bool { + matches!(self, Self::NotarizeFallback(_, _)) + } + + /// Is this a skip certificate? + pub fn is_skip(&self) -> bool { + matches!(self, Self::Skip(_)) + } + + /// Is this a genesis certificate? + pub fn is_genesis(&self) -> bool { + matches!(self, Self::Genesis(_, _)) + } + /// Gets the block associated with this certificate, if present pub fn to_block(self) -> Option { match self { - Self::Finalize(_) | Self::Skip(_) => None, + CertificateType::Finalize(_) | CertificateType::Skip(_) => None, + CertificateType::Notarize(slot, block_id) + | CertificateType::NotarizeFallback(slot, block_id) + | CertificateType::Genesis(slot, block_id) + | CertificateType::FinalizeFast(slot, block_id) => Some((slot, block_id)), + } + } + + /// Reconstructs the single source `Vote` payload for this certificate. + /// + /// This method is used primarily by the signature verifier. For + /// certificates formed by aggregating a single type of vote + /// (e.g., a `Notarize` certificate from `Notarize` votes), this function + /// reconstructs the canonical message payload that was signed by validators. + /// + /// For `NotarizeFallback` and `Skip` certificates, this function returns the + /// appropriate payload *only* if the certificate was formed from a single + /// vote type (e.g., exclusively from `Notarize` or `Skip` votes). For + /// certificates formed from a mix of two vote types, use the `to_source_votes` + /// function. + pub fn to_source_vote(self) -> Vote { + match self { Self::Notarize(slot, block_id) - | Self::NotarizeFallback(slot, block_id) | Self::FinalizeFast(slot, block_id) - | Self::Genesis(slot, block_id) => Some((slot, block_id)), + | Self::NotarizeFallback(slot, block_id) => Vote::new_notarization_vote(slot, block_id), + Self::Finalize(slot) => Vote::new_finalization_vote(slot), + Self::Skip(slot) => Vote::new_skip_vote(slot), + Self::Genesis(slot, block_id) => Vote::new_genesis_vote(slot, block_id), + } + } + + /// Reconstructs the two distinct source `Vote` payloads for this certificate. + /// + /// This method is primarily used by the signature verifier for certificates that + /// can be formed by aggregating two different types of votes. For example, a + /// `NotarizeFallback` certificate accepts both `Notarize` and `NotarizeFallback`. + /// + /// It reconstructs both potential message payloads that were signed by validators, which + /// the verifier uses to check the single aggregate signature. + pub fn to_source_votes(self) -> Option<(Vote, Vote)> { + match self { + Self::NotarizeFallback(slot, block_id) => { + let vote1 = Vote::new_notarization_vote(slot, block_id); + let vote2 = Vote::new_notarization_fallback_vote(slot, block_id); + Some((vote1, vote2)) + } + Self::Skip(slot) => { + let vote1 = Vote::new_skip_vote(slot); + let vote2 = Vote::new_skip_fallback_vote(slot); + Some((vote1, vote2)) + } + // Other certificate types do not use Base3 encoding. + _ => None, } } } @@ -118,4 +190,17 @@ impl ConsensusMessage { rank, }) } + + /// Create a new certificate. + pub fn new_certificate( + cert_type: CertificateType, + bitmap: Vec, + signature: BLSSignature, + ) -> Self { + Self::Certificate(Certificate { + cert_type, + signature, + bitmap, + }) + } } diff --git a/votor-messages/src/vote.rs b/votor-messages/src/vote.rs index 205bb1ae8d620d..72c5710ced8dfa 100644 --- a/votor-messages/src/vote.rs +++ b/votor-messages/src/vote.rs @@ -12,7 +12,7 @@ use { derive(AbiExample, AbiEnumVisitor), frozen_abi(digest = "AgKoR2cpjUSVCW7Cpihob5nDiPcFt1PXmoPKWJg3zuSB") )] -#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum Vote { /// A notarization vote Notarize(NotarizationVote), @@ -28,6 +28,23 @@ pub enum Vote { Genesis(GenesisVote), } +/// Enum of different types of [`Vote`]s. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum VoteType { + /// Finalize vote. + Finalize, + /// Notarize vote. + Notarize, + /// Notarize fallback vote. + NotarizeFallback, + /// Skip vote + Skip, + /// Skip fallback vote. + SkipFallback, + /// Genesis vote. + Genesis, +} + impl Vote { /// Create a new notarization vote pub fn new_notarization_vote(slot: Slot, block_id: Hash) -> Self { @@ -54,7 +71,7 @@ impl Vote { Self::from(SkipFallbackVote { slot }) } - /// Create a new skip fallback vote + /// Create a new genesis vote pub fn new_genesis_vote(slot: Slot, block_id: Hash) -> Self { Self::from(GenesisVote { slot, block_id }) } @@ -106,14 +123,26 @@ impl Vote { matches!(self, Self::SkipFallback(_)) } + /// Whether the vote is a genesis vote + pub fn is_genesis_vote(&self) -> bool { + matches!(self, Self::Genesis(_)) + } + /// Whether the vote is a notarization or finalization pub fn is_notarization_or_finalization(&self) -> bool { matches!(self, Self::Notarize(_) | Self::Finalize(_)) } - /// Whether the vote is a genesis vote - pub fn is_genesis_vote(&self) -> bool { - matches!(self, Self::Genesis(_)) + /// Returns the [`VoteType`] for the vote. + pub fn get_type(&self) -> VoteType { + match self { + Vote::Notarize(_) => VoteType::Notarize, + Vote::NotarizeFallback(_) => VoteType::NotarizeFallback, + Vote::Skip(_) => VoteType::Skip, + Vote::SkipFallback(_) => VoteType::SkipFallback, + Vote::Finalize(_) => VoteType::Finalize, + Vote::Genesis(_) => VoteType::Genesis, + } } } @@ -159,7 +188,7 @@ impl From for Vote { derive(AbiExample), frozen_abi(digest = "5AdwChAjsj5QUXLdpDnGGK2L2nA8y8EajVXi6jsmTv1m") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct NotarizationVote { /// The slot this vote is cast for. pub slot: Slot, @@ -173,7 +202,7 @@ pub struct NotarizationVote { derive(AbiExample), frozen_abi(digest = "2XQ5N6YLJjF28w7cMFFUQ9SDgKuf9JpJNtAiXSPA8vR2") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct FinalizationVote { /// The slot this vote is cast for. pub slot: Slot, @@ -187,7 +216,7 @@ pub struct FinalizationVote { derive(AbiExample), frozen_abi(digest = "G8Nrx3sMYdnLpHsCNark3BGA58BmW2sqNnqjkYhQHtN") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct SkipVote { /// The slot this vote is cast for. pub slot: Slot, @@ -199,7 +228,7 @@ pub struct SkipVote { derive(AbiExample), frozen_abi(digest = "7j5ZPwwyz1FaG3fpyQv5PVnQXicdSmqSk8NvqzkG1Eqz") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct NotarizationFallbackVote { /// The slot this vote is cast for. pub slot: Slot, @@ -213,7 +242,7 @@ pub struct NotarizationFallbackVote { derive(AbiExample), frozen_abi(digest = "WsUNum8V62gjRU1yAnPuBMAQui4YvMwD1RwrzHeYkeF") )] -#[derive(Clone, Copy, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct SkipFallbackVote { /// The slot this vote is cast for. pub slot: Slot, @@ -227,8 +256,8 @@ pub struct SkipFallbackVote { )] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct GenesisVote { - /// The slot this genesis vote is for + /// The slot this vote is cast for. pub slot: Slot, - /// The block hash being voted on + /// The block id this vote is for. pub block_id: Hash, } diff --git a/votor/Cargo.toml b/votor/Cargo.toml index a1d46b32d67f10..da9cea1b9096ed 100644 --- a/votor/Cargo.toml +++ b/votor/Cargo.toml @@ -11,7 +11,10 @@ edition = { workspace = true } [features] agave-unstable-api = [] -dev-context-only-utils = ["solana-runtime/dev-context-only-utils"] +dev-context-only-utils = [ + "solana-runtime/dev-context-only-utils", + "agave-votor-messages/dev-context-only-utils", +] frozen-abi = [ "dep:solana-frozen-abi", "dep:solana-frozen-abi-macro", @@ -20,6 +23,8 @@ frozen-abi = [ "solana-ledger/frozen-abi", "solana-runtime/frozen-abi", "solana-signature/frozen-abi", + "solana-vote/frozen-abi", + "solana-vote-program/frozen-abi", "agave-votor-messages/frozen-abi", ] @@ -44,7 +49,7 @@ serde_bytes = { workspace = true } solana-account = { workspace = true } solana-accounts-db = { workspace = true } solana-bloom = { workspace = true } -solana-bls-signatures = { workspace = true } +solana-bls-signatures = { workspace = true, features = ["solana-signer-derive"] } solana-client = { workspace = true } solana-clock = { workspace = true } solana-connection-cache = { workspace = true } @@ -69,6 +74,7 @@ solana-runtime = { workspace = true } solana-signature = { workspace = true } solana-signer = { workspace = true } solana-signer-store = { workspace = true } +solana-streamer = { workspace = true } solana-time-utils = { workspace = true } solana-transaction = { workspace = true } solana-transaction-error = { workspace = true } @@ -77,6 +83,7 @@ solana-vote-program = { workspace = true } thiserror = { workspace = true } [dev-dependencies] +agave-votor = { path = ".", features = ["dev-context-only-utils"] } rand = { workspace = true } solana-net-utils = { workspace = true } solana-perf = { workspace = true, features = ["dev-context-only-utils"] } diff --git a/votor/src/common.rs b/votor/src/common.rs index fcb3a1001d8ffc..bdf58aaad1c895 100644 --- a/votor/src/common.rs +++ b/votor/src/common.rs @@ -1,6 +1,8 @@ use { agave_votor_messages::{ - consensus_message::CertificateType, migration::GENESIS_VOTE_THRESHOLD, vote::Vote, + consensus_message::CertificateType, + migration::GENESIS_VOTE_THRESHOLD, + vote::{Vote, VoteType}, }, std::time::Duration, }; @@ -8,70 +10,50 @@ use { // Core consensus types and constants pub type Stake = u64; -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum VoteType { - Finalize, - Notarize, - NotarizeFallback, - Skip, - SkipFallback, - Genesis, -} - -impl VoteType { - pub fn get_type(vote: &Vote) -> VoteType { - match vote { - Vote::Notarize(_) => VoteType::Notarize, - Vote::NotarizeFallback(_) => VoteType::NotarizeFallback, - Vote::Skip(_) => VoteType::Skip, - Vote::SkipFallback(_) => VoteType::SkipFallback, - Vote::Finalize(_) => VoteType::Finalize, - Vote::Genesis(_) => VoteType::Genesis, - } - } - - #[allow(dead_code)] - pub fn is_notarize_type(&self) -> bool { - matches!(self, Self::Notarize | Self::NotarizeFallback) +pub const fn conflicting_types(vote_type: VoteType) -> &'static [VoteType] { + match vote_type { + VoteType::Finalize => &[VoteType::NotarizeFallback, VoteType::Skip], + VoteType::Notarize => &[VoteType::Skip, VoteType::NotarizeFallback], + VoteType::NotarizeFallback => &[VoteType::Finalize, VoteType::Notarize], + VoteType::Skip => &[ + VoteType::Finalize, + VoteType::Notarize, + VoteType::SkipFallback, + ], + VoteType::SkipFallback => &[VoteType::Skip], + VoteType::Genesis => &[ + VoteType::Finalize, + VoteType::Notarize, + VoteType::NotarizeFallback, + VoteType::Skip, + VoteType::SkipFallback, + ], } } -/// For a given [`CertificateType`], returns the fractional stake, the [`Vote`], and the optional fallback [`Vote`] required to construct it. +/// Lookup from `CertificateId` to the `VoteType`s that contribute, +/// as well as the stake fraction required for certificate completion. /// -/// Must be in sync with [`vote_to_certificate_ids`]. -pub(crate) fn certificate_limits_and_votes( +/// Must be in sync with `vote_to_cert_types` +pub(crate) const fn certificate_limits_and_vote_types( cert_type: &CertificateType, -) -> (f64, Vote, Option) { +) -> (f64, &'static [VoteType]) { match cert_type { - CertificateType::Notarize(slot, block_id) => { - (0.6, Vote::new_notarization_vote(*slot, *block_id), None) - } - CertificateType::NotarizeFallback(slot, block_id) => ( - 0.6, - Vote::new_notarization_vote(*slot, *block_id), - Some(Vote::new_notarization_fallback_vote(*slot, *block_id)), - ), - CertificateType::FinalizeFast(slot, block_id) => { - (0.8, Vote::new_notarization_vote(*slot, *block_id), None) + CertificateType::Notarize(_, _) => (0.6, &[VoteType::Notarize]), + CertificateType::NotarizeFallback(_, _) => { + (0.6, &[VoteType::Notarize, VoteType::NotarizeFallback]) } - CertificateType::Finalize(slot) => (0.6, Vote::new_finalization_vote(*slot), None), - CertificateType::Skip(slot) => ( - 0.6, - Vote::new_skip_vote(*slot), - Some(Vote::new_skip_fallback_vote(*slot)), - ), - CertificateType::Genesis(slot, block_id) => ( - GENESIS_VOTE_THRESHOLD, - Vote::new_genesis_vote(*slot, *block_id), - None, - ), + CertificateType::FinalizeFast(_, _) => (0.8, &[VoteType::Notarize]), + CertificateType::Finalize(_) => (0.6, &[VoteType::Finalize]), + CertificateType::Skip(_) => (0.6, &[VoteType::Skip, VoteType::SkipFallback]), + CertificateType::Genesis(_, _) => (GENESIS_VOTE_THRESHOLD, &[VoteType::Genesis]), } } /// Lookup from `Vote` to the `CertificateId`s the vote accounts for /// /// Must be in sync with `certificate_limits_and_vote_types` and `VoteType::get_type` -pub fn vote_to_certificate_ids(vote: &Vote) -> Vec { +pub fn vote_to_cert_types(vote: &Vote) -> Vec { match vote { Vote::Notarize(vote) => vec![ CertificateType::Notarize(vote.slot, vote.block_id), diff --git a/votor/src/consensus_metrics.rs b/votor/src/consensus_metrics.rs index cb20e8b5f6ca47..dab12f0ff03d15 100644 --- a/votor/src/consensus_metrics.rs +++ b/votor/src/consensus_metrics.rs @@ -18,7 +18,7 @@ use { }, }; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum ConsensusMetricsEvent { /// A vote was received from the node with `id`. Vote { id: Pubkey, vote: Vote }, @@ -86,7 +86,7 @@ impl NodeVoteMetrics { Vote::Skip(_) => self.skip.increment(elapsed), Vote::SkipFallback(_) => self.skip_fallback.increment(elapsed), Vote::Finalize(_) => self.final_.increment(elapsed), - Vote::Genesis(_) => Ok(()), + Vote::Genesis(_) => Ok(()), // Only for migration, tracked elsewhere }; match res { Ok(()) => (), @@ -117,10 +117,13 @@ pub enum RecordBlockHashError { pub struct ConsensusMetrics { /// Used to track this node's view of how the other nodes on the network are voting. node_metrics: BTreeMap, + /// Used to track when this node received blocks from different leaders in the network. leader_metrics: BTreeMap, + /// Counts number of times metrics recording failed. metrics_recording_failed: usize, + /// Tracks when individual slots began. /// /// Relies on [`TimerManager`] to notify of start of slots. @@ -152,10 +155,8 @@ impl ConsensusMetrics { Builder::new() .name("solConsMetrics".into()) .spawn(move || { - info!("ConsensusMetricsService has started"); let mut metrics = Self::new(epoch, receiver); metrics.run(exit); - info!("ConsensusMetricsService has stopped"); }) .expect("Failed to start consensus metrics thread") } @@ -163,20 +164,20 @@ impl ConsensusMetrics { fn run(&mut self, exit: Arc) { while !exit.load(Ordering::Relaxed) { match self.receiver.recv_timeout(Duration::from_secs(1)) { - Ok((recorded, events)) => { + Ok((received, events)) => { for event in events { match event { ConsensusMetricsEvent::Vote { id, vote } => { - self.record_vote(id, &vote, recorded); + self.record_vote(id, &vote, received); } ConsensusMetricsEvent::BlockHashSeen { leader, slot } => { - self.record_block_hash_seen(leader, slot, recorded); + self.record_block_hash_seen(leader, slot, received); } ConsensusMetricsEvent::MaybeNewEpoch { epoch } => { self.maybe_new_epoch(epoch); } ConsensusMetricsEvent::StartOfSlot { slot } => { - self.record_start_of_slot(slot, recorded); + self.record_start_of_slot(slot, received); } } } @@ -193,23 +194,23 @@ impl ConsensusMetrics { } /// Records a `vote` from the node with `id`. - fn record_vote(&mut self, id: Pubkey, vote: &Vote, recorded: Instant) { + fn record_vote(&mut self, id: Pubkey, vote: &Vote, received: Instant) { let Some(start) = self.start_of_slot.get(&vote.slot()) else { self.metrics_recording_failed = self.metrics_recording_failed.saturating_add(1); return; }; let node = self.node_metrics.entry(id).or_default(); - let elapsed = recorded.duration_since(*start); + let elapsed = received.duration_since(*start); node.record_vote(vote, elapsed); } /// Records when a block for `slot` was seen and the `leader` is responsible for producing it. - fn record_block_hash_seen(&mut self, leader: Pubkey, slot: Slot, recorded: Instant) { + fn record_block_hash_seen(&mut self, leader: Pubkey, slot: Slot, received: Instant) { let Some(start) = self.start_of_slot.get(&slot) else { self.metrics_recording_failed = self.metrics_recording_failed.saturating_add(1); return; }; - let elapsed = recorded.duration_since(*start).as_micros(); + let elapsed = received.duration_since(*start).as_micros(); let elapsed = match elapsed.try_into() { Ok(e) => e, Err(err) => { @@ -236,57 +237,65 @@ impl ConsensusMetrics { } /// Records when a given slot started. - fn record_start_of_slot(&mut self, slot: Slot, recorded: Instant) { - self.start_of_slot.entry(slot).or_insert(recorded); + fn record_start_of_slot(&mut self, slot: Slot, received: Instant) { + self.start_of_slot.entry(slot).or_insert(received); } /// Performs end of epoch reporting and reset all the statistics for the subsequent epoch. - fn end_of_epoch_reporting(&mut self) { + fn end_of_epoch_reporting(&mut self, epoch: Epoch) { for (addr, metrics) in &self.node_metrics { let addr = addr.to_string(); - datapoint_info!("votor_consensus_metrics", + datapoint_info!("consensus_vote_metrics", "address" => addr, ("notar_vote_count", metrics.notar.entries(), i64), - ("notar_vote_mean", metrics.notar.mean().ok(), Option), - ("notar_vote_stddev", metrics.notar.stddev(), Option), - ("notar_vote_maximum", metrics.notar.maximum().ok(), Option), + ("notar_vote_us_mean", metrics.notar.mean().ok(), Option), + ("notar_vote_us_stddev", metrics.notar.stddev(), Option), + ("notar_vote_us_maximum", metrics.notar.maximum().ok(), Option), ("notar_fallback_vote_count", metrics.notar_fallback.entries(), i64), - ("notar_fallback_vote_mean", metrics.notar_fallback.mean().ok(), Option), - ("notar_fallback_vote_stddev", metrics.notar_fallback.stddev(), Option), - ("notar_fallback_vote_maximum", metrics.notar_fallback.maximum().ok(), Option), + ("notar_fallback_vote_us_mean", metrics.notar_fallback.mean().ok(), Option), + ("notar_fallback_vote_us_stddev", metrics.notar_fallback.stddev(), Option), + ("notar_fallback_vote_us_maximum", metrics.notar_fallback.maximum().ok(), Option), ("skip_vote_count", metrics.skip.entries(), i64), - ("skip_vote_mean", metrics.skip.mean().ok(), Option), - ("skip_vote_stddev", metrics.skip.stddev(), Option), - ("skip_vote_maximum", metrics.skip.maximum().ok(), Option), + ("skip_vote_us_mean", metrics.skip.mean().ok(), Option), + ("skip_vote_us_stddev", metrics.skip.stddev(), Option), + ("skip_vote_us_maximum", metrics.skip.maximum().ok(), Option), ("skip_fallback_vote_count", metrics.skip_fallback.entries(), i64), - ("skip_fallback_vote_mean", metrics.skip_fallback.mean().ok(), Option), - ("skip_fallback_vote_stddev", metrics.skip_fallback.stddev(), Option), - ("skip_fallback_vote_maximum", metrics.skip_fallback.maximum().ok(), Option), + ("skip_fallback_vote_us_mean", metrics.skip_fallback.mean().ok(), Option), + ("skip_fallback_vote_us_stddev", metrics.skip_fallback.stddev(), Option), + ("skip_fallback_vote_us_maximum", metrics.skip_fallback.maximum().ok(), Option), ("finalize_vote_count", metrics.final_.entries(), i64), - ("finalize_vote_mean", metrics.final_.mean().ok(), Option), - ("finalize_vote_stddev", metrics.final_.stddev(), Option), - ("finalize_vote_maximum", metrics.final_.maximum().ok(), Option), + ("finalize_vote_us_mean", metrics.final_.mean().ok(), Option), + ("finalize_vote_us_stddev", metrics.final_.stddev(), Option), + ("finalize_vote_us_maximum", metrics.final_.maximum().ok(), Option), ); } for (addr, histogram) in &self.leader_metrics { let addr = addr.to_string(); - datapoint_info!("votor_consensus_metrics", + datapoint_info!("consensus_block_hash_seen_metrics", "address" => addr, - ("blocks_seen_vote_count", histogram.entries(), i64), - ("blocks_seen_vote_mean", histogram.mean().ok(), Option), - ("blocks_seen_vote_stddev", histogram.stddev(), Option), - ("blocks_seen_vote_maximum", histogram.maximum().ok(), Option), + ("block_hash_seen_count", histogram.entries(), i64), + ("block_hash_seen_us_mean", histogram.mean().ok(), Option), + ("block_hash_seen_us_stddev", histogram.stddev(), Option), + ("block_hash_seen_us_maximum", histogram.maximum().ok(), Option), ); } - self.node_metrics.clear(); - self.leader_metrics.clear(); - self.start_of_slot.clear(); + datapoint_info!( + "consensus_metrics_internals", + ("start_of_slot_count", self.start_of_slot.len(), i64), + ( + "metrics_recording_failed", + self.metrics_recording_failed, + i64 + ), + ); + + *self = Self::new(epoch, self.receiver.clone()); } /// This function can be called if there is a new [`Epoch`] and it will carry out end of epoch reporting. @@ -294,7 +303,7 @@ impl ConsensusMetrics { assert!(epoch >= self.current_epoch); if epoch != self.current_epoch { self.current_epoch = epoch; - self.end_of_epoch_reporting(); + self.end_of_epoch_reporting(epoch); } } } diff --git a/votor/src/consensus_pool.rs b/votor/src/consensus_pool.rs index f4345d2b1f8618..d503a207ebe7f9 100644 --- a/votor/src/consensus_pool.rs +++ b/votor/src/consensus_pool.rs @@ -1,25 +1,29 @@ //! Defines ConsensusPool to store received and generated votes and certificates. - use { crate::{ - common::{certificate_limits_and_votes, vote_to_certificate_ids, Stake}, + commitment::CommitmentError, + common::{ + certificate_limits_and_vote_types, conflicting_types, vote_to_cert_types, Stake, + MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE, MAX_ENTRIES_PER_PUBKEY_FOR_OTHER_TYPES, + }, consensus_pool::{ - certificate_builder::{BuildError as CertificateBuilderError, CertificateBuilder}, parent_ready_tracker::ParentReadyTracker, slot_stake_counters::SlotStakeCounters, stats::ConsensusPoolStats, - vote_pool::{AddVoteError as VotePoolError, VotePool}, + vote_pool::{DuplicateBlockVotePool, SimpleVotePool, VotePool}, }, event::VotorEvent, }, agave_votor_messages::{ consensus_message::{Block, Certificate, CertificateType, ConsensusMessage, VoteMessage}, - vote::Vote, + migration::MigrationStatus, + vote::{Vote, VoteType}, }, - log::trace, + certificate_builder::{BuildError as CertificateBuildError, CertificateBuilder}, + log::{error, trace}, solana_clock::{Epoch, Slot}, solana_epoch_schedule::EpochSchedule, - solana_gossip::cluster_info::ClusterInfo, + solana_hash::Hash, solana_pubkey::Pubkey, solana_runtime::{bank::Bank, epoch_stakes::VersionedEpochStakes}, std::{ @@ -36,34 +40,37 @@ mod slot_stake_counters; mod stats; mod vote_pool; +pub type PoolId = (Slot, VoteType); + /// Different failure cases from calling `add_vote()`. -#[derive(Debug, Error)] -enum AddVoteError { +#[derive(Debug, Error, PartialEq)] +pub(crate) enum AddVoteError { + #[error("Conflicting vote type: {0:?} vs existing {1:?} for slot: {2} pubkey: {3}")] + ConflictingVoteType(VoteType, VoteType, Slot, Pubkey), + #[error("Epoch stakes missing for epoch: {0}")] EpochStakesNotFound(Epoch), + #[error("Unrooted slot")] UnrootedSlot, - #[error("Certificate builder error: {0}")] - CertificateBuilder(#[from] CertificateBuilderError), - #[error("Invalid rank: {0}")] - InvalidRank(u16), - #[error("vote pool returned error: {0}")] - VotePool(#[from] VotePoolError), -} + #[error("Certificate error: {0}")] + Certificate(#[from] CertificateBuildError), -/// Different failure cases from calling `add_certificate()`. -#[derive(Debug, Error)] -enum AddCertError { - #[error("Unrooted slot")] - UnrootedSlot, + #[error("{0} channel disconnected")] + ChannelDisconnected(String), + + #[error("Voting Service queue full")] + VotingServiceQueueFull, + + #[error("Invalid rank: {0}")] + InvalidRank(u16), } -/// Different failure cases from calling `add_message()`. -#[derive(Debug, PartialEq, Eq, Error)] -pub(crate) enum AddMessageError { - #[error("internal failure {0}")] - Internal(String), +impl From for AddVoteError { + fn from(_: CommitmentError) -> Self { + AddVoteError::ChannelDisconnected("CommitmentSender".to_string()) + } } fn get_key_and_stakes( @@ -89,15 +96,13 @@ fn get_key_and_stakes( } Ok((*vote_key, stake, epoch_stakes.total_stake())) } - /// Container to store received votes and certificates. /// /// Based on received votes and certificates, generates new `VotorEvent`s and generates new certificates. pub(crate) struct ConsensusPool { - cluster_info: Arc, - // Storage for per slot votes. - // Adding new votes in the vote uses the prior votes to check for invalid and duplicate votes. - vote_pools: BTreeMap, + my_pubkey: Pubkey, + // Vote pools to do bean counting for votes. + vote_pools: BTreeMap, /// Completed certificates completed_certificates: BTreeMap>, /// Tracks slots which have reached the parent ready condition: @@ -113,74 +118,180 @@ pub(crate) struct ConsensusPool { stats: ConsensusPoolStats, /// Slot stake counters, used to calculate safe_to_notar and safe_to_skip slot_stake_counters_map: BTreeMap, + /// Stores details about the genesis vote during the migration + migration_status: Option>, } impl ConsensusPool { - pub(crate) fn new_from_root_bank(cluster_info: Arc, bank: &Bank) -> Self { + pub(crate) fn new_from_root_bank_pre_migration( + my_pubkey: Pubkey, + bank: &Bank, + migration_status: Arc, + ) -> Self { + let mut pool = Self::new_from_root_bank(my_pubkey, bank); + pool.migration_status = Some(migration_status); + pool + } + + pub fn new_from_root_bank(my_pubkey: Pubkey, bank: &Bank) -> Self { // To account for genesis and snapshots we allow default block id until // block id can be serialized as part of the snapshot let root_block = (bank.slot(), bank.block_id().unwrap_or_default()); - let parent_ready_tracker = ParentReadyTracker::new(cluster_info.clone(), root_block); + let parent_ready_tracker = ParentReadyTracker::new(my_pubkey, root_block); Self { - cluster_info, + my_pubkey, vote_pools: BTreeMap::new(), completed_certificates: BTreeMap::new(), highest_finalized_slot: None, highest_finalized_with_notarize: None, parent_ready_tracker, - stats: ConsensusPoolStats::default(), + stats: ConsensusPoolStats::new(), slot_stake_counters_map: BTreeMap::new(), + migration_status: None, + } + } + + fn new_vote_pool(vote_type: VoteType) -> VotePool { + match vote_type { + VoteType::NotarizeFallback => VotePool::DuplicateBlockVotePool( + DuplicateBlockVotePool::new(MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE), + ), + VoteType::Notarize => VotePool::DuplicateBlockVotePool(DuplicateBlockVotePool::new( + MAX_ENTRIES_PER_PUBKEY_FOR_OTHER_TYPES, + )), + _ => VotePool::SimpleVotePool(SimpleVotePool::default()), + } + } + + fn update_vote_pool( + &mut self, + vote: VoteMessage, + validator_vote_key: Pubkey, + validator_stake: Stake, + ) -> Option { + let vote_type = vote.vote.get_type(); + let pool = self + .vote_pools + .entry((vote.vote.slot(), vote_type)) + .or_insert_with(|| Self::new_vote_pool(vote_type)); + match pool { + VotePool::SimpleVotePool(pool) => { + pool.add_vote(validator_vote_key, validator_stake, vote) + } + VotePool::DuplicateBlockVotePool(pool) => { + pool.add_vote(validator_vote_key, validator_stake, vote) + } } } - /// Builds new [`Certificate`]s that depend on votes of type [`Vote`] if enough stake has voted for them. - fn build_certs( + /// For a new vote `slot` , `vote_type` checks if any + /// of the related certificates are newly complete. + /// For each newly constructed certificate + /// - Insert it into `self.certificates` + /// - Potentially update `self.highest_finalized_slot`, + /// - If we have a new highest finalized slot, return it + /// - update any newly created events + fn update_certificates( &mut self, vote: &Vote, + block_id: Option, + events: &mut Vec, total_stake: Stake, ) -> Result>, AddVoteError> { - let Some(vote_pool) = self.vote_pools.get(&vote.slot()) else { - return Ok(vec![]); - }; + let slot = vote.slot(); let mut new_certificates_to_send = Vec::new(); - for cert_type in vote_to_certificate_ids(vote) { + for cert_type in vote_to_cert_types(vote) { // If the certificate is already complete, skip it if self.completed_certificates.contains_key(&cert_type) { continue; } // Otherwise check whether the certificate is complete - let (limit, vote, fallback_vote) = certificate_limits_and_votes(&cert_type); - let accumulated_stake = vote_pool - .get_stake(&vote) - .saturating_add(fallback_vote.map_or(0, |v| vote_pool.get_stake(&v))); - + let (limit, vote_types) = certificate_limits_and_vote_types(&cert_type); + let accumulated_stake = vote_types + .iter() + .filter_map(|vote_type| { + Some(match self.vote_pools.get(&(slot, *vote_type))? { + VotePool::SimpleVotePool(pool) => pool.total_stake(), + VotePool::DuplicateBlockVotePool(pool) => { + pool.total_stake_by_block_id(block_id.as_ref().expect( + "Duplicate block pool for {vote_type:?} expects a block id for \ + certificate {cert_type:?}", + )) + } + }) + }) + .sum::(); if accumulated_stake as f64 / (total_stake as f64) < limit { continue; } let mut cert_builder = CertificateBuilder::new(cert_type); - cert_builder.aggregate(&vote_pool.get_votes(&vote)).unwrap(); - if let Some(v) = fallback_vote { - cert_builder.aggregate(&vote_pool.get_votes(&v)).unwrap(); - } + vote_types.iter().for_each(|vote_type| { + if let Some(vote_pool) = self.vote_pools.get(&(slot, *vote_type)) { + match vote_pool { + VotePool::SimpleVotePool(pool) => { + cert_builder.aggregate(pool.votes()).unwrap(); + } + VotePool::DuplicateBlockVotePool(pool) => { + if let Some(votes) = pool.votes(block_id.as_ref().unwrap()) { + cert_builder.aggregate(votes).unwrap(); + } + } + }; + } + }); let new_cert = Arc::new(cert_builder.build()?); + self.insert_certificate(cert_type, new_cert.clone(), events); self.stats.incr_cert_type(&new_cert.cert_type, true); new_certificates_to_send.push(new_cert); } Ok(new_certificates_to_send) } - /// Inserts a new [`Certificate`]. - /// - /// Based on the type of certificate being inserted, updates [`self.parent_ready_tracker`] and other metadata on self. - fn insert_certificate(&mut self, cert: Arc, events: &mut Vec) { - let cert_type = cert.cert_type; - trace!( - "{}: Inserting certificate {:?}", - self.cluster_info.id(), - cert_type - ); - self.completed_certificates.insert(cert_type, cert); + fn has_conflicting_vote( + &self, + slot: Slot, + vote_type: VoteType, + validator_vote_key: &Pubkey, + block_id: &Option, + ) -> Option { + for conflicting_type in conflicting_types(vote_type) { + if let Some(pool) = self.vote_pools.get(&(slot, *conflicting_type)) { + let is_conflicting = match pool { + // In a simple vote pool, just check if the validator previously voted at all. If so, that's a conflict + VotePool::SimpleVotePool(pool) => { + pool.has_prev_validator_vote(validator_vote_key) + } + // In a duplicate block vote pool, because some conflicts between things like Notarize and NotarizeFallback + // for different blocks are allowed, we need a more specific check. + // TODO: This can be made much cleaner/safer if VoteType carried the bank hash, block id so we + // could check which exact VoteType(blockid, bankhash) was the source of the conflict. + VotePool::DuplicateBlockVotePool(pool) => { + if let Some(block_id) = &block_id { + // Reject votes for the same block with a conflicting type, i.e. + // a NotarizeFallback vote for the same block as a Notarize vote. + pool.has_prev_validator_vote_for_block(validator_vote_key, block_id) + } else { + pool.has_prev_validator_vote(validator_vote_key) + } + } + }; + if is_conflicting { + return Some(*conflicting_type); + } + } + } + None + } + + fn insert_certificate( + &mut self, + cert_type: CertificateType, + cert: Arc, + events: &mut Vec, + ) { + trace!("{}: Inserting certificate {:?}", self.my_pubkey, cert_type); + self.completed_certificates.insert(cert_type, cert.clone()); match cert_type { CertificateType::NotarizeFallback(slot, block_id) => { self.parent_ready_tracker @@ -231,7 +342,14 @@ impl ConsensusPool { self.highest_finalized_with_notarize = Some((slot, true)); } } - CertificateType::Genesis(_slot, _block_id) => {} + CertificateType::Genesis(slot, block_id) => { + if let Some(ref migration_status) = self.migration_status { + migration_status.set_genesis_certificate(cert); + } + // The genesis block is automatically certified + self.parent_ready_tracker + .add_new_notar_fallback_or_stronger((slot, block_id), events); + } } } @@ -251,22 +369,18 @@ impl ConsensusPool { my_vote_pubkey: &Pubkey, message: ConsensusMessage, events: &mut Vec, - ) -> Result<(Option, Vec>), AddMessageError> { + ) -> Result<(Option, Vec>), AddVoteError> { let current_highest_finalized_slot = self.highest_finalized_slot; let new_certficates_to_send = match message { - ConsensusMessage::Vote(vote_message) => self - .add_vote( - epoch_schedule, - epoch_stakes_map, - root_slot, - my_vote_pubkey, - vote_message, - events, - ) - .map_err(|e| AddMessageError::Internal(e.to_string()))?, - ConsensusMessage::Certificate(cert) => self - .add_certificate(root_slot, cert, events) - .map_err(|e| AddMessageError::Internal(e.to_string()))?, + ConsensusMessage::Vote(vote_message) => self.add_vote( + epoch_schedule, + epoch_stakes_map, + root_slot, + my_vote_pubkey, + vote_message, + events, + )?, + ConsensusMessage::Certificate(cert) => self.add_certificate(root_slot, cert, events)?, }; // If we have a new highest finalized slot, return it let new_finalized_slot = if self.highest_finalized_slot > current_highest_finalized_slot { @@ -303,43 +417,50 @@ impl ConsensusPool { self.stats.out_of_range_votes = self.stats.out_of_range_votes.saturating_add(1); return Err(AddVoteError::UnrootedSlot); } - let vote = vote_message.vote; - match self - .vote_pools - .entry(vote_slot) - .or_insert(VotePool::new(vote_slot)) - .add_vote(validator_vote_key, validator_stake, vote_message) + let block_id = vote.block_id().map(|block_id| { + if !matches!( + vote, + Vote::Notarize(_) | Vote::NotarizeFallback(_) | Vote::Genesis(_) + ) { + panic!("expected Notarize/ NotarizeFallback/ Genesis vote"); + } + *block_id + }); + let vote_type = vote.get_type(); + if let Some(conflicting_type) = + self.has_conflicting_vote(vote_slot, vote_type, &validator_vote_key, &block_id) { - Ok(stake) => { + self.stats.conflicting_votes = self.stats.conflicting_votes.saturating_add(1); + return Err(AddVoteError::ConflictingVoteType( + vote_type, + conflicting_type, + vote_slot, + validator_vote_key, + )); + } + match self.update_vote_pool(vote_message, validator_vote_key, validator_stake) { + None => { + // No new vote pool entry was created, just return empty vec + self.stats.exist_votes = self.stats.exist_votes.saturating_add(1); + return Ok(vec![]); + } + Some(entry_stake) => { let fallback_vote_counters = self .slot_stake_counters_map .entry(vote_slot) .or_insert_with(|| SlotStakeCounters::new(total_stake)); fallback_vote_counters.add_vote( - &vote, - stake, + vote, + entry_stake, my_vote_pubkey == &validator_vote_key, events, &mut self.stats, ); } - Err(e) => match e { - vote_pool::AddVoteError::Duplicate => { - self.stats.exist_votes = self.stats.exist_votes.saturating_add(1); - return Ok(vec![]); - } - vote_pool::AddVoteError::Invalid => { - self.stats.invalid_votes = self.stats.invalid_votes.saturating_add(1); - return Err(e.into()); - } - }, } - self.stats.incr_ingested_vote(&vote); - self.build_certs(&vote, total_stake).inspect(|certs| { - for cert in certs { - self.insert_certificate(cert.clone(), events) - } - }) + self.stats.incr_ingested_vote_type(vote_type); + + self.update_certificates(vote, block_id, events, total_stake) } fn add_certificate( @@ -347,20 +468,22 @@ impl ConsensusPool { root_slot: Slot, cert: Certificate, events: &mut Vec, - ) -> Result>, AddCertError> { - let cert_type = &cert.cert_type; + ) -> Result>, AddVoteError> { + let cert_type = cert.cert_type; self.stats.incoming_certs = self.stats.incoming_certs.saturating_add(1); if cert_type.slot() < root_slot { self.stats.out_of_range_certs = self.stats.out_of_range_certs.saturating_add(1); - return Err(AddCertError::UnrootedSlot); + return Err(AddVoteError::UnrootedSlot); } - if self.completed_certificates.contains_key(cert_type) { + if self.completed_certificates.contains_key(&cert_type) { self.stats.exist_certs = self.stats.exist_certs.saturating_add(1); return Ok(vec![]); } - self.stats.incr_cert_type(cert_type, false); let cert = Arc::new(cert); - self.insert_certificate(cert.clone(), events); + self.insert_certificate(cert_type, cert.clone(), events); + + self.stats.incr_cert_type(&cert_type, false); + Ok(vec![cert]) } @@ -438,6 +561,11 @@ impl ConsensusPool { .contains_key(&CertificateType::Skip(slot)) } + #[cfg(test)] + pub(crate) fn my_pubkey(&self) -> Pubkey { + self.my_pubkey + } + #[cfg(test)] fn make_start_leader_decision( &self, @@ -487,14 +615,22 @@ impl ConsensusPool { | CertificateType::FinalizeFast(s, _) | CertificateType::Notarize(s, _) | CertificateType::NotarizeFallback(s, _) - | CertificateType::Skip(s) - | CertificateType::Genesis(s, _) => s >= &root_slot, + | CertificateType::Genesis(s, _) + | CertificateType::Skip(s) => s >= &root_slot, }); - self.vote_pools = self.vote_pools.split_off(&root_slot); + self.vote_pools = self.vote_pools.split_off(&(root_slot, VoteType::Finalize)); self.slot_stake_counters_map = self.slot_stake_counters_map.split_off(&root_slot); self.parent_ready_tracker.set_root(root_slot); } + /// Updates the pubkey used for logging purposes only. + /// This avoids the need to recreate the entire certificate pool since it's + /// not distinguished by the pubkey. + pub fn update_pubkey(&mut self, new_pubkey: Pubkey) { + self.my_pubkey = new_pubkey; + self.parent_ready_tracker.update_pubkey(new_pubkey); + } + pub(crate) fn maybe_report(&mut self) { self.stats.maybe_report(); } @@ -525,11 +661,7 @@ impl ConsensusPool { _ => None, }; if cert_to_send.is_some() { - trace!( - "{}: Refreshing certificate {:?}", - self.cluster_info.id(), - cert_type - ); + trace!("{}: Refreshing certificate {:?}", self.my_pubkey, cert_type); } cert_to_send }) @@ -547,10 +679,7 @@ mod tests { VerifiableSignature, }, solana_clock::Slot, - solana_gossip::contact_info::ContactInfo, solana_hash::Hash, - solana_keypair::Keypair, - solana_net_utils::SocketAddrSpace, solana_runtime::{ bank::{Bank, NewBankOptions}, bank_forks::BankForks, @@ -563,16 +692,6 @@ mod tests { test_case::test_case, }; - fn new_cluster_info() -> Arc { - let keypair = Keypair::new(); - let contact_info = ContactInfo::new_localhost(&keypair.pubkey(), 0); - Arc::new(ClusterInfo::new( - contact_info, - Arc::new(keypair), - SocketAddrSpace::Unspecified, - )) - } - fn dummy_vote_message( keypairs: &[ValidatorVoteKeypairs], vote: &Vote, @@ -614,7 +733,7 @@ mod tests { let root_bank = bank_forks.read().unwrap().root_bank(); ( validator_keypairs, - ConsensusPool::new_from_root_bank(new_cluster_info(), &root_bank), + ConsensusPool::new_from_root_bank(Pubkey::new_unique(), &root_bank), bank_forks, ) } @@ -626,32 +745,34 @@ mod tests { vote: Vote, ) { for rank in 0..6 { - pool.add_message( + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(validator_keypairs, &vote, rank), + &mut vec![] + ) + .is_ok()); + } + assert!(pool + .add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), &Pubkey::new_unique(), - dummy_vote_message(validator_keypairs, &vote, rank), - &mut vec![], + dummy_vote_message(validator_keypairs, &vote, 6), + &mut vec![] ) - .unwrap(); - } - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(validator_keypairs, &vote, 6), - &mut vec![], - ) - .unwrap(); + .is_ok()); match vote { Vote::Notarize(vote) => assert_eq!(pool.highest_notarized_slot(), vote.slot), Vote::NotarizeFallback(vote) => assert_eq!(pool.highest_notarized_slot(), vote.slot), Vote::Skip(vote) => assert_eq!(pool.highest_skip_slot(), vote.slot), Vote::SkipFallback(vote) => assert_eq!(pool.highest_skip_slot(), vote.slot), Vote::Finalize(vote) => assert_eq!(pool.highest_finalized_slot(), vote.slot), - Vote::Genesis(_) => {} + Vote::Genesis(_genesis_vote) => (), } } @@ -665,15 +786,16 @@ mod tests { ) { for slot in start..=end { let vote = Vote::new_skip_vote(slot); - pool.add_message( - root_bank.epoch_schedule(), - root_bank.epoch_stakes_map(), - root_bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(keypairs, &vote, rank), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + root_bank.epoch_schedule(), + root_bank.epoch_stakes_map(), + root_bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(keypairs, &vote, rank), + &mut vec![] + ) + .is_ok()); } } @@ -927,40 +1049,12 @@ mod tests { assert!(pool.make_start_leader_decision(my_leader_slot, parent_slot, first_alpenglow_slot)); } - #[test] - fn test_add_vote_and_create_new_certificate_with_types() { - let slot = 5; - let vote = Vote::new_finalization_vote(slot); - let cert_types = vec![CertificateType::Finalize(slot)]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - - let slot = 6; - let block_id = Hash::new_unique(); - let vote = Vote::new_notarization_vote(slot, block_id); - let cert_types = vec![ - CertificateType::Notarize(slot, block_id), - CertificateType::NotarizeFallback(slot, block_id), - ]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - - let slot = 7; - let block_id = Hash::new_unique(); - let vote = Vote::new_notarization_fallback_vote(slot, block_id); - let cert_types = vec![CertificateType::NotarizeFallback(slot, block_id)]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - - let slot = 8; - let vote = Vote::new_skip_vote(slot); - let cert_types = vec![CertificateType::Skip(slot)]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - - let slot = 9; - let vote = Vote::new_skip_fallback_vote(slot); - let cert_types = vec![CertificateType::Skip(slot)]; - do_test_add_vote_and_create_new_certificate_with_types(vote, cert_types); - } - - fn do_test_add_vote_and_create_new_certificate_with_types( + #[test_case(Vote::new_finalization_vote(5), vec![CertificateType::Finalize(5)])] + #[test_case(Vote::new_notarization_vote(6, Hash::default()), vec![CertificateType::Notarize(6, Hash::default()), CertificateType::NotarizeFallback(6, Hash::default())])] + #[test_case(Vote::new_notarization_fallback_vote(7, Hash::default()), vec![CertificateType::NotarizeFallback(7, Hash::default())])] + #[test_case(Vote::new_skip_vote(8), vec![CertificateType::Skip(8)])] + #[test_case(Vote::new_skip_fallback_vote(9), vec![CertificateType::Skip(9)])] + fn test_add_vote_and_create_new_certificate_with_types( vote: Vote, expected_cert_types: Vec, ) { @@ -972,41 +1066,44 @@ mod tests { Vote::NotarizeFallback(_) => |pool: &ConsensusPool| pool.highest_notarized_slot(), Vote::Skip(_) => |pool: &ConsensusPool| pool.highest_skip_slot(), Vote::SkipFallback(_) => |pool: &ConsensusPool| pool.highest_skip_slot(), - Vote::Genesis(_) => |_pool: &ConsensusPool| 0, + Vote::Genesis(_genesis_vote) => |_pool: &ConsensusPool| 0, }; let bank = bank_forks.read().unwrap().root_bank(); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, my_validator_ix), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, my_validator_ix), + &mut vec![] + ) + .is_ok()); let slot = vote.slot(); assert!(highest_slot_fn(&pool) < slot); // Same key voting again shouldn't make a certificate - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, my_validator_ix), - &mut vec![], - ) - .unwrap(); - assert!(highest_slot_fn(&pool) < slot); - for rank in 0..4 { - pool.add_message( + assert!(pool + .add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut vec![], + dummy_vote_message(&validator_keypairs, &vote, my_validator_ix), + &mut vec![] ) - .unwrap(); + .is_ok()); + assert!(highest_slot_fn(&pool) < slot); + for rank in 0..4 { + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut vec![] + ) + .is_ok()); } assert!(highest_slot_fn(&pool) < slot); let new_validator_ix = 6; @@ -1027,9 +1124,9 @@ mod tests { } // Assert certs_to_send contains the expected certificate types for expected_cert_type in expected_cert_types { - assert!(certs_to_send - .iter() - .any(|cert| { cert.cert_type == expected_cert_type })); + assert!(certs_to_send.iter().any(|cert| { + cert.cert_type == expected_cert_type && cert.cert_type.slot() == slot + })); } assert_eq!(highest_slot_fn(&pool), slot); // Now add the same certificate again, this should silently exit. @@ -1131,8 +1228,8 @@ mod tests { fn test_add_vote_zero_stake() { let (_, mut pool, bank_forks) = create_initial_state(); let bank = bank_forks.read().unwrap().root_bank(); - let err = pool - .add_message( + assert_eq!( + pool.add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), @@ -1142,12 +1239,9 @@ mod tests { rank: 100, signature: BLSSignature::default(), }), - &mut vec![], - ) - .unwrap_err(); - assert_eq!( - err, - AddMessageError::Internal("Invalid rank: 100".to_string()) + &mut vec![] + ), + Err(AddVoteError::InvalidRank(100)) ); } @@ -1178,15 +1272,16 @@ mod tests { let slot = (i as u64).saturating_add(16); let vote = Vote::new_skip_vote(slot); // These should not extend the skip range - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, i), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, i), + &mut vec![] + ) + .is_ok()); } assert_single_certificate_range(&pool, 15, 15); @@ -1299,44 +1394,47 @@ mod tests { let bank = bank_forks.read().unwrap().root_bank(); // 10% vote for skip 2 let vote = Vote::new_skip_vote(2); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 6), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); assert_eq!(pool.highest_skip_slot(), 2); assert_single_certificate_range(&pool, 2, 2); // 10% vote for skip 4 let vote = Vote::new_skip_vote(4); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 7), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 7), + &mut vec![] + ) + .is_ok()); assert_eq!(pool.highest_skip_slot(), 4); assert_single_certificate_range(&pool, 2, 2); assert_single_certificate_range(&pool, 4, 4); // 10% vote for skip 3 let vote = Vote::new_skip_vote(3); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 8), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 8), + &mut vec![] + ) + .is_ok()); assert_eq!(pool.highest_skip_slot(), 4); assert_single_certificate_range(&pool, 2, 4); assert!(pool.skip_certified(3)); @@ -1371,15 +1469,16 @@ mod tests { let bank = bank_forks.read().unwrap().root_bank(); // Range expansion on a singleton vote should be ok let vote = Vote::new_skip_vote(1); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 6), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); assert_eq!(pool.highest_skip_slot(), 1); add_skip_vote_range( &mut pool, @@ -1408,15 +1507,16 @@ mod tests { // AlreadyExists, silently fail let vote = Vote::new_skip_vote(20); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 6), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 6), + &mut vec![] + ) + .is_ok()); } #[test] @@ -1496,39 +1596,38 @@ mod tests { // Add a skip from myself. let vote = Vote::new_skip_vote(2); let mut new_events = vec![]; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &my_vote_key, - dummy_vote_message(&validator_keypairs, &vote, 0), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &my_vote_key, + dummy_vote_message(&validator_keypairs, &vote, 0), + &mut new_events + ) + .is_ok()); assert!(new_events.is_empty()); // 40% notarized, should succeed for rank in 1..5 { let vote = Vote::new_notarization_vote(2, block_id); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); } assert_eq!(new_events.len(), 1); - match new_events[0] { - VotorEvent::SafeToNotar((event_slot, event_block_id)) => { - assert_eq!(block_id, event_block_id); - assert_eq!(slot, event_slot); - } - _ => { - panic!("Expected SafeToNotar event"); - } + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[0] { + assert_eq!(block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); } new_events.clear(); @@ -1539,60 +1638,59 @@ mod tests { // Add 20% notarize, but no vote from myself, should fail for rank in 1..3 { let vote = Vote::new_notarization_vote(3, block_id); - pool.add_message( + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); + } + assert!(new_events.is_empty()); + + // Add a notarize from myself for some other block, but still not enough notar or skip, should fail. + let vote = Vote::new_notarization_vote(3, Hash::new_unique()); + assert!(pool + .add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, + &my_vote_key, + dummy_vote_message(&validator_keypairs, &vote, 0), + &mut new_events ) - .unwrap(); - } - assert!(new_events.is_empty()); - - // Add a notarize from myself for some other block, but still not enough notar or skip, should fail. - let vote = Vote::new_notarization_vote(3, Hash::new_unique()); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &my_vote_key, - dummy_vote_message(&validator_keypairs, &vote, 0), - &mut new_events, - ) - .unwrap(); + .is_ok()); assert!(new_events.is_empty()); // Now add 40% skip, should succeed // Funny thing is in this case we will also get SafeToSkip(3) for rank in 3..7 { let vote = Vote::new_skip_vote(3); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); } assert_eq!(new_events.len(), 2); - match new_events[0] { - VotorEvent::SafeToSkip(event_slot) => { - assert_eq!(slot, event_slot); - } - _ => { - panic!("Expected SafeToSkip event"); - } + if let VotorEvent::SafeToSkip(event_slot) = new_events[0] { + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToSkip event"); } - match new_events[1] { - VotorEvent::SafeToNotar((event_slot, event_block_id)) => { - assert_eq!(block_id, event_block_id); - assert_eq!(slot, event_slot); - } - _ => panic!("Expected SafeToNotar event"), + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[1] { + assert_eq!(block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); } new_events.clear(); @@ -1601,24 +1699,24 @@ mod tests { let duplicate_block_id = Hash::new_unique(); for rank in 7..9 { let vote = Vote::new_notarization_vote(3, duplicate_block_id); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); } assert_eq!(new_events.len(), 1); - match new_events[0] { - VotorEvent::SafeToNotar((event_slot, event_block_id)) => { - assert_eq!(duplicate_block_id, event_block_id); - assert_eq!(slot, event_slot); - } - _ => panic!("Expected SafeToNotar event"), + if let VotorEvent::SafeToNotar((event_slot, event_block_id)) = new_events[0] { + assert_eq!(duplicate_block_id, event_block_id); + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToNotar event"); } } @@ -1634,50 +1732,125 @@ mod tests { // Add a notarize from myself. let block_id = Hash::new_unique(); let vote = Vote::new_notarization_vote(2, block_id); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &my_vote_key, - dummy_vote_message(&validator_keypairs, &vote, 0), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &my_vote_key, + dummy_vote_message(&validator_keypairs, &vote, 0), + &mut new_events + ) + .is_ok()); // Should still fail because there are no other votes. assert!(new_events.is_empty()); // Add 50% skip, should succeed for rank in 1..6 { let vote = Vote::new_skip_vote(2); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, rank), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, rank), + &mut new_events + ) + .is_ok()); } assert_eq!(new_events.len(), 1); - match new_events[0] { - VotorEvent::SafeToSkip(event_slot) => assert_eq!(slot, event_slot), - _ => panic!("Expected SafeToSkip event"), + if let VotorEvent::SafeToSkip(event_slot) = new_events[0] { + assert_eq!(slot, event_slot); + } else { + panic!("Expected SafeToSkip event"); } new_events.clear(); // Add 10% more notarize, will not send new SafeToSkip because the event was already sent let vote = Vote::new_notarization_vote(2, block_id); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - dummy_vote_message(&validator_keypairs, &vote, 6), - &mut new_events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(&validator_keypairs, &vote, 6), + &mut new_events + ) + .is_ok()); assert!(new_events.is_empty()); } + fn create_new_vote(vote_type: VoteType, slot: Slot) -> Vote { + match vote_type { + VoteType::Notarize => Vote::new_notarization_vote(slot, Hash::default()), + VoteType::NotarizeFallback => { + Vote::new_notarization_fallback_vote(slot, Hash::default()) + } + VoteType::Skip => Vote::new_skip_vote(slot), + VoteType::SkipFallback => Vote::new_skip_fallback_vote(slot), + VoteType::Finalize => Vote::new_finalization_vote(slot), + VoteType::Genesis => Vote::new_genesis_vote(slot, Hash::default()), + } + } + + fn test_reject_conflicting_vote( + pool: &mut ConsensusPool, + bank: &Bank, + validator_keypairs: &[ValidatorVoteKeypairs], + vote_type_1: VoteType, + vote_type_2: VoteType, + slot: Slot, + ) { + let vote_1 = create_new_vote(vote_type_1, slot); + let vote_2 = create_new_vote(vote_type_2, slot); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(validator_keypairs, &vote_1, 0), + &mut vec![] + ) + .is_ok()); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + dummy_vote_message(validator_keypairs, &vote_2, 0), + &mut vec![] + ) + .is_err()); + } + + #[test] + fn test_reject_conflicting_votes_with_type() { + let (validator_keypairs, mut pool, bank_forks) = create_initial_state(); + let mut slot = 2; + for vote_type_1 in [ + VoteType::Finalize, + VoteType::Notarize, + VoteType::NotarizeFallback, + VoteType::Skip, + VoteType::SkipFallback, + ] { + let conflicting_vote_types = conflicting_types(vote_type_1); + for vote_type_2 in conflicting_vote_types { + test_reject_conflicting_vote( + &mut pool, + &bank_forks.read().unwrap().root_bank(), + &validator_keypairs, + vote_type_1, + *vote_type_2, + slot, + ); + } + slot = slot.saturating_add(4); + } + } + #[test] fn test_handle_new_root() { let validator_keypairs = (0..10) @@ -1685,7 +1858,7 @@ mod tests { .collect::>(); let bank_forks = create_bank_forks(&validator_keypairs); let mut pool = ConsensusPool::new_from_root_bank( - new_cluster_info(), + Pubkey::new_unique(), &bank_forks.read().unwrap().root_bank(), ); @@ -1696,29 +1869,31 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - root_bank.epoch_schedule(), - root_bank.epoch_stakes_map(), - root_bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_1.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + root_bank.epoch_schedule(), + root_bank.epoch_stakes_map(), + root_bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_1.clone()), + &mut vec![] + ) + .is_ok()); let cert_2 = Certificate { cert_type: CertificateType::FinalizeFast(2, Hash::new_unique()), signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - root_bank.epoch_schedule(), - root_bank.epoch_stakes_map(), - root_bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_2.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + root_bank.epoch_schedule(), + root_bank.epoch_stakes_map(), + root_bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_2.clone()), + &mut vec![] + ) + .is_ok()); assert!(pool.skip_certified(1)); assert!(pool.is_finalized(2)); @@ -1778,29 +1953,31 @@ mod tests { bitmap: Vec::new(), }; let bank = bank_forks.read().unwrap().root_bank(); - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_3.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_3.clone()), + &mut vec![] + ) + .is_ok()); let cert_4 = Certificate { cert_type: CertificateType::Finalize(4), signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_4.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_4.clone()), + &mut vec![] + ) + .is_ok()); // Should return both certificates let certs = pool.get_certs_for_standstill(); assert_eq!(certs.len(), 2); @@ -1815,15 +1992,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_5.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_5.clone()), + &mut vec![] + ) + .is_ok()); // Add Finalize cert on 5 let cert_5_finalize = Certificate { @@ -1831,15 +2009,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_5_finalize.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_5_finalize.clone()), + &mut vec![] + ) + .is_ok()); // Add FinalizeFast cert on 5 let cert_5 = Certificate { @@ -1847,15 +2026,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_5.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_5.clone()), + &mut vec![] + ) + .is_ok()); // Should return only FinalizeFast cert on 5 let certs = pool.get_certs_for_standstill(); assert_eq!(certs.len(), 1); @@ -1870,15 +2050,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_6.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_6.clone()), + &mut vec![] + ) + .is_ok()); // Should return certs on 5 and 6 let certs = pool.get_certs_for_standstill(); assert_eq!(certs.len(), 2); @@ -1893,30 +2074,32 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_6_finalize.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_6_finalize.clone()), + &mut vec![] + ) + .is_ok()); // Add a NotarizeFallback cert on 6 let cert_6_notarize_fallback = Certificate { cert_type: CertificateType::NotarizeFallback(6, Hash::new_unique()), signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_6_notarize_fallback.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_6_notarize_fallback.clone()), + &mut vec![] + ) + .is_ok()); // This should not be returned because 6 is the current highest finalized slot // only Notarize/Finalze/FinalizeFast should be returned let certs = pool.get_certs_for_standstill(); @@ -1932,15 +2115,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_7.clone()), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_7.clone()), + &mut vec![] + ) + .is_ok()); // Should return certs on 6 and 7 let certs = pool.get_certs_for_standstill(); assert_eq!(certs.len(), 3); @@ -1959,29 +2143,31 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_8_finalize), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_8_finalize), + &mut vec![] + ) + .is_ok()); let cert_8_notarize = Certificate { cert_type: CertificateType::Notarize(8, Hash::new_unique()), signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert_8_notarize), - &mut vec![], - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert_8_notarize), + &mut vec![] + ) + .is_ok()); // Should only return certs on 8 now let certs = pool.get_certs_for_standstill(); @@ -2006,15 +2192,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert), - &mut events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); } // events should now contain ParentReady for slot 4 error!("Events: {events:?}"); @@ -2033,15 +2220,16 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert), - &mut events, - ) - .unwrap(); + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); } // events should now contain ParentReady for slot 8 error!("Events: {events:?}"); @@ -2060,7 +2248,24 @@ mod tests { signature: BLSSignature::default(), bitmap: Vec::new(), }; - pool.add_message( + assert!(pool + .add_message( + bank.epoch_schedule(), + bank.epoch_stakes_map(), + bank.slot(), + &Pubkey::new_unique(), + ConsensusMessage::Certificate(cert), + &mut events, + ) + .is_ok()); + } + let cert = Certificate { + cert_type: CertificateType::FinalizeFast(11, hash), + signature: BLSSignature::default(), + bitmap: Vec::new(), + }; + assert!(pool + .add_message( bank.epoch_schedule(), bank.epoch_stakes_map(), bank.slot(), @@ -2068,22 +2273,7 @@ mod tests { ConsensusMessage::Certificate(cert), &mut events, ) - .unwrap(); - } - let cert = Certificate { - cert_type: CertificateType::FinalizeFast(11, hash), - signature: BLSSignature::default(), - bitmap: Vec::new(), - }; - pool.add_message( - bank.epoch_schedule(), - bank.epoch_stakes_map(), - bank.slot(), - &Pubkey::new_unique(), - ConsensusMessage::Certificate(cert), - &mut events, - ) - .unwrap(); + .is_ok()); // events should now contain ParentReady for slot 12 error!("Events: {events:?}"); assert!(events @@ -2109,13 +2299,28 @@ mod tests { let bls_keypair = BLSKeypair::derive_from_signer(validator_vote_keypair, BLS_KEYPAIR_DERIVE_SEED) .unwrap(); - let bls_pubkey: BLSPubkey = bls_keypair.public; + let bls_pubkey: BLSPubkey = bls_keypair.public.into(); let signed_message = bincode::serialize(&vote).unwrap(); - vote_message - .signature - .verify(&bls_pubkey, &signed_message) - .expect("BLS signature verification failed for VoteMessage"); + assert!( + vote_message + .signature + .verify(&bls_pubkey, &signed_message) + .is_ok(), + "BLS signature verification failed for VoteMessage" + ); + } + + #[test] + fn test_update_pubkey() { + let new_pubkey = Pubkey::new_unique(); + let (_, mut pool, _) = create_initial_state(); + let old_pubkey = pool.my_pubkey(); + assert_eq!(pool.parent_ready_tracker.my_pubkey(), old_pubkey); + assert_ne!(old_pubkey, new_pubkey); + pool.update_pubkey(new_pubkey); + assert_eq!(pool.my_pubkey(), new_pubkey); + assert_eq!(pool.parent_ready_tracker.my_pubkey(), new_pubkey); } } diff --git a/votor/src/consensus_pool/certificate_builder.rs b/votor/src/consensus_pool/certificate_builder.rs index c3ee633f15382e..ea099308636fd8 100644 --- a/votor/src/consensus_pool/certificate_builder.rs +++ b/votor/src/consensus_pool/certificate_builder.rs @@ -1,5 +1,5 @@ use { - crate::common::certificate_limits_and_votes, + crate::common::certificate_limits_and_vote_types, agave_votor_messages::consensus_message::{Certificate, CertificateType, VoteMessage}, bitvec::prelude::*, solana_bls_signatures::{BlsError, SignatureProjective}, @@ -7,7 +7,7 @@ use { thiserror::Error, }; -/// Maximum number of validators in a certificate +/// Maximum number of validators in a certificate. /// /// There are around 1500 validators currently. For a clean power-of-two /// implementation, we should choose either 2048 or 4096. Choose a more @@ -17,7 +17,7 @@ use { const MAXIMUM_VALIDATORS: usize = 4096; /// Different types of errors that can be returned from the [`CertificateBuilder::aggregate()`] function. -#[derive(Debug, PartialEq, Eq, Error)] +#[derive(Debug, Error, PartialEq, Eq)] pub(super) enum AggregateError { #[error("BLS error: {0}")] Bls(#[from] BlsError), @@ -25,19 +25,24 @@ pub(super) enum AggregateError { InvalidRank(u16), #[error("Validator already included")] ValidatorAlreadyIncluded, - #[error("assumption for vote_types array broken")] - InvalidVoteTypes, } /// Different types of errors that can be returned from the [`CertificateBuilder::build()`] function. -#[derive(Debug, PartialEq, Eq, Error)] +#[derive(Debug, Error, PartialEq)] pub(crate) enum BuildError { + #[error("Encoding failed: {0:?}")] + Encode(EncodeError), #[error("BLS error: {0}")] Bls(#[from] BlsError), +} + +/// Different types of errors that can be returned from the [`CertificateBuilder::build_for_rewards()`] function. +#[derive(Debug, Error, PartialEq)] +pub enum BuildForRewardsError { #[error("Encoding failed: {0:?}")] Encode(EncodeError), - #[error("Invalid rank: {0}")] - InvalidRank(usize), + #[error("rewards certs of these types are not needed")] + InvalidCertType, } fn default_bitvec() -> BitVec { @@ -49,15 +54,10 @@ fn build_cert_from_bitmap( cert_type: CertificateType, signature: SignatureProjective, mut bitmap: BitVec, -) -> Result { +) -> Result { let new_len = bitmap.last_one().map_or(0, |i| i.saturating_add(1)); - // checks in `aggregate()` guarantee that this assertion is valid - debug_assert!(new_len <= MAXIMUM_VALIDATORS); - if new_len > MAXIMUM_VALIDATORS { - return Err(BuildError::InvalidRank(new_len)); - } bitmap.resize(new_len, false); - let bitmap = encode_base2(&bitmap).map_err(BuildError::Encode)?; + let bitmap = encode_base2(&bitmap)?; Ok(Certificate { cert_type, signature: signature.into(), @@ -71,18 +71,13 @@ fn build_cert_from_bitmaps( signature: SignatureProjective, mut bitmap0: BitVec, mut bitmap1: BitVec, -) -> Result { +) -> Result { let last_one_0 = bitmap0.last_one().map_or(0, |i| i.saturating_add(1)); let last_one_1 = bitmap1.last_one().map_or(0, |i| i.saturating_add(1)); - let new_len = last_one_0.max(last_one_1); - // checks in `aggregate()` guarantee that this assertion is valid - debug_assert!(new_len <= MAXIMUM_VALIDATORS); - if new_len > MAXIMUM_VALIDATORS { - return Err(BuildError::InvalidRank(new_len)); - } - bitmap0.resize(new_len, false); - bitmap1.resize(new_len, false); - let bitmap = encode_base3(&bitmap0, &bitmap1).map_err(BuildError::Encode)?; + let new_length = last_one_0.max(last_one_1); + bitmap0.resize(new_length, false); + bitmap1.resize(new_length, false); + let bitmap = encode_base3(&bitmap0, &bitmap1)?; Ok(Certificate { cert_type, signature: signature.into(), @@ -110,10 +105,20 @@ enum BuilderType { signature: SignatureProjective, bitmap: BitVec, }, - /// A [`Certificate`] of type NotarFallback or Skip will be produced. + /// A [`Certificate`] of type Skip will be produced. + /// + /// It can require two types of [`VoteMessage`]s. + /// In order to be able to produce certificates for reward purposes, signature aggregates for the two types are tracked separately. + Skip { + signature0: SignatureProjective, + bitmap0: BitVec, + sig_and_bitmap1: Option<(SignatureProjective, BitVec)>, + }, + /// A [`Certificate`] of type NotarFallback will be produced. /// /// It can require two types of [`VoteMessage`]s. - DoubleVote { + /// This certificate is not used for rewards so its signature can be aggregated in a single container. + NotarFallback { signature: SignatureProjective, bitmap0: BitVec, bitmap1: Option>, @@ -124,17 +129,17 @@ impl BuilderType { /// Creates a new instance of [`BuilderType`]. fn new(cert_type: &CertificateType) -> Self { match cert_type { - CertificateType::NotarizeFallback(_, _) | CertificateType::Skip(_) => { - Self::DoubleVote { - signature: SignatureProjective::identity(), - bitmap0: default_bitvec(), - bitmap1: None, - } - } - CertificateType::Finalize(_) - | CertificateType::FinalizeFast(_, _) - | CertificateType::Notarize(_, _) - | CertificateType::Genesis(_, _) => Self::SingleVote { + CertificateType::Skip(_) => Self::Skip { + signature0: SignatureProjective::identity(), + bitmap0: default_bitvec(), + sig_and_bitmap1: None, + }, + CertificateType::NotarizeFallback(_, _) => Self::NotarFallback { + signature: SignatureProjective::identity(), + bitmap0: default_bitvec(), + bitmap1: None, + }, + _ => Self::SingleVote { signature: SignatureProjective::identity(), bitmap: default_bitvec(), }, @@ -147,48 +152,62 @@ impl BuilderType { cert_type: &CertificateType, msgs: &[VoteMessage], ) -> Result<(), AggregateError> { - let (_, vote, fallback_vote) = certificate_limits_and_votes(cert_type); + let vote_types = certificate_limits_and_vote_types(cert_type).1; match self { - Self::DoubleVote { + Self::Skip { + signature0, + bitmap0, + sig_and_bitmap1, + } => { + assert_eq!(vote_types.len(), 2); + for msg in msgs { + let vote_type = msg.vote.get_type(); + if vote_type == vote_types[0] { + try_set_bitmap(bitmap0, msg.rank)?; + } else { + assert_eq!(vote_type, vote_types[1]); + let (_, bitmap) = sig_and_bitmap1 + .get_or_insert((SignatureProjective::identity(), default_bitvec())); + try_set_bitmap(bitmap, msg.rank)?; + } + } + signature0.aggregate_with(msgs.iter().filter_map(|msg| { + (msg.vote.get_type() == vote_types[0]).then_some(&msg.signature) + }))?; + sig_and_bitmap1 + .as_mut() + .map(|(signature, _)| { + signature.aggregate_with(msgs.iter().filter_map(|msg| { + (msg.vote.get_type() == vote_types[1]).then_some(&msg.signature) + })) + }) + .unwrap_or(Ok(()))?; + Ok(()) + } + + Self::NotarFallback { signature, bitmap0, bitmap1, } => { - debug_assert!(fallback_vote.is_some()); - let Some(fallback_vote) = fallback_vote else { - return Err(AggregateError::InvalidVoteTypes); - }; + assert_eq!(vote_types.len(), 2); for msg in msgs { - if msg.vote == vote { + let vote_type = msg.vote.get_type(); + if vote_type == vote_types[0] { try_set_bitmap(bitmap0, msg.rank)?; } else { - debug_assert_eq!(msg.vote, fallback_vote); - if msg.vote != fallback_vote { - return Err(AggregateError::InvalidVoteTypes); - } - match bitmap1 { - Some(bitmap) => try_set_bitmap(bitmap, msg.rank)?, - None => { - let mut bitmap = default_bitvec(); - try_set_bitmap(&mut bitmap, msg.rank)?; - *bitmap1 = Some(bitmap); - } - } + assert_eq!(vote_type, vote_types[1]); + let bitmap = bitmap1.get_or_insert(default_bitvec()); + try_set_bitmap(bitmap, msg.rank)?; } } Ok(signature.aggregate_with(msgs.iter().map(|m| &m.signature))?) } Self::SingleVote { signature, bitmap } => { - debug_assert!(fallback_vote.is_none()); - if fallback_vote.is_some() { - return Err(AggregateError::InvalidVoteTypes); - } + assert_eq!(vote_types.len(), 1); for msg in msgs { - debug_assert_eq!(msg.vote, vote); - if msg.vote != vote { - return Err(AggregateError::InvalidVoteTypes); - } + assert_eq!(msg.vote.get_type(), vote_types[0]); try_set_bitmap(bitmap, msg.rank)?; } Ok(signature.aggregate_with(msgs.iter().map(|m| &m.signature))?) @@ -200,16 +219,54 @@ impl BuilderType { fn build(self, cert_type: CertificateType) -> Result { match self { Self::SingleVote { signature, bitmap } => { - build_cert_from_bitmap(cert_type, signature, bitmap) + build_cert_from_bitmap(cert_type, signature, bitmap).map_err(BuildError::Encode) } - Self::DoubleVote { + Self::Skip { + mut signature0, + bitmap0, + sig_and_bitmap1, + } => match sig_and_bitmap1 { + None => build_cert_from_bitmap(cert_type, signature0, bitmap0) + .map_err(BuildError::Encode), + Some((signature1, bitmap1)) => { + signature0.aggregate_with([signature1].iter())?; + build_cert_from_bitmaps(cert_type, signature0, bitmap0, bitmap1) + .map_err(BuildError::Encode) + } + }, + Self::NotarFallback { signature, bitmap0, bitmap1, } => match bitmap1 { - None => build_cert_from_bitmap(cert_type, signature, bitmap0), - Some(bitmap1) => build_cert_from_bitmaps(cert_type, signature, bitmap0, bitmap1), + None => build_cert_from_bitmap(cert_type, signature, bitmap0) + .map_err(BuildError::Encode), + Some(bitmap1) => build_cert_from_bitmaps(cert_type, signature, bitmap0, bitmap1) + .map_err(BuildError::Encode), + }, + } + } + + /// Builds a [`Certificate`] for rewards purposes from the builder. + fn build_for_rewards( + self, + cert_type: CertificateType, + ) -> Result { + match self { + Self::Skip { + signature0, + bitmap0, + sig_and_bitmap1: _, + } => build_cert_from_bitmap(cert_type, signature0, bitmap0) + .map_err(BuildForRewardsError::Encode), + Self::SingleVote { signature, bitmap } => match cert_type { + CertificateType::Notarize(_, _) => { + build_cert_from_bitmap(cert_type, signature, bitmap) + .map_err(BuildForRewardsError::Encode) + } + _ => Err(BuildForRewardsError::InvalidCertType), }, + Self::NotarFallback { .. } => Err(BuildForRewardsError::InvalidCertType), } } } @@ -239,6 +296,12 @@ impl CertificateBuilder { pub(super) fn build(self) -> Result { self.builder_type.build(self.cert_type) } + + /// Builds a [`Certificate`] for rewards purposes from the builder. + #[allow(dead_code)] + pub(super) fn build_for_rewards(self) -> Result { + self.builder_type.build_for_rewards(self.cert_type) + } } #[cfg(test)] @@ -300,9 +363,11 @@ mod tests { .aggregate(&messages_2) .expect("Failed to aggregate notarization fallback votes"); - let cert = builder.build().expect("Failed to build certificate"); - assert_eq!(cert.cert_type, cert_type); - match decode(&cert.bitmap, MAXIMUM_VALIDATORS).expect("Failed to decode bitmap") { + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.cert_type, cert_type); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { Decoded::Base3(bitmap1, bitmap2) => { assert_eq!(bitmap1.len(), 8); assert_eq!(bitmap2.len(), 8); @@ -323,9 +388,11 @@ mod tests { builder .aggregate(&messages_1) .expect("Failed to aggregate notarization votes"); - let cert = builder.build().expect("Failed to build certificate"); - assert_eq!(cert.cert_type, cert_type); - match decode(&cert.bitmap, MAXIMUM_VALIDATORS).expect("Failed to decode bitmap") { + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.cert_type, cert_type); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { Decoded::Base2(bitmap1) => { assert_eq!(bitmap1.len(), 7); for i in rank_1 { @@ -342,9 +409,11 @@ mod tests { builder .aggregate(&messages_2) .expect("Failed to aggregate notarization fallback votes"); - let cert = builder.build().expect("Failed to build certificate"); - assert_eq!(cert.cert_type, cert_type); - match decode(&cert.bitmap, MAXIMUM_VALIDATORS).expect("Failed to decode bitmap") { + let certificate_message = builder.build().expect("Failed to build certificate"); + assert_eq!(certificate_message.cert_type, cert_type); + match decode(&certificate_message.bitmap, MAXIMUM_VALIDATORS) + .expect("Failed to decode bitmap") + { Decoded::Base3(bitmap1, bitmap2) => { assert_eq!(bitmap1.count_ones(), 0); assert_eq!(bitmap2.len(), 8); @@ -453,13 +522,9 @@ mod tests { let aggregate_pubkey = BLSPubkeyProjective::aggregate(keypairs.iter().map(|kp| &kp.public)) .expect("Failed to aggregate public keys"); - let verification_result = - aggregate_pubkey.verify_signature(&certificate_message.signature, &serialized_vote); - - assert!( - verification_result.unwrap_or(false), - "BLS aggregate signature verification failed for base2 encoded certificate" - ); + aggregate_pubkey + .verify_signature(&certificate_message.signature, &serialized_vote) + .expect("BLS aggregate signature verification failed for base2 encoded certificate"); } #[test] diff --git a/votor/src/consensus_pool/parent_ready_tracker.rs b/votor/src/consensus_pool/parent_ready_tracker.rs index 73ecb1e4021d58..5a4f654e5bd387 100644 --- a/votor/src/consensus_pool/parent_ready_tracker.rs +++ b/votor/src/consensus_pool/parent_ready_tracker.rs @@ -16,8 +16,8 @@ use { crate::{common::MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE, event::VotorEvent}, agave_votor_messages::consensus_message::Block, solana_clock::{Slot, NUM_CONSECUTIVE_LEADER_SLOTS}, - solana_gossip::cluster_info::ClusterInfo, - std::{collections::HashMap, sync::Arc}, + solana_pubkey::Pubkey, + std::collections::HashMap, }; #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -27,8 +27,10 @@ pub(crate) enum BlockProductionParent { Parent(Block), } +#[derive(Clone, Debug, Default)] pub(crate) struct ParentReadyTracker { - cluster_info: Arc, + /// Our pubkey for logging + my_pubkey: Pubkey, /// Parent ready status for each slot slot_statuses: HashMap, @@ -56,7 +58,7 @@ struct ParentReadyStatus { impl ParentReadyTracker { /// Creates a new tracker with the root bank as implicitely notarized fallback - pub(super) fn new(cluster_info: Arc, root_block @ (root_slot, _): Block) -> Self { + pub(super) fn new(my_pubkey: Pubkey, root_block @ (root_slot, _): Block) -> Self { let mut slot_statuses = HashMap::new(); slot_statuses.insert( root_slot, @@ -75,7 +77,7 @@ impl ParentReadyTracker { }, ); Self { - cluster_info, + my_pubkey, slot_statuses, root: root_slot, highest_with_parent_ready: root_slot.saturating_add(1), @@ -98,7 +100,7 @@ impl ParentReadyTracker { } trace!( "{}: Adding new notar fallback for {block:?}", - self.cluster_info.id() + self.my_pubkey ); status.notar_fallbacks.push(block); assert!(status.notar_fallbacks.len() <= MAX_ENTRIES_PER_PUBKEY_FOR_NOTARIZE_LITE); @@ -107,7 +109,7 @@ impl ParentReadyTracker { for s in slot.saturating_add(1).. { trace!( "{}: Adding new parent ready for {s} parent {block:?}", - self.cluster_info.id() + self.my_pubkey ); let status = self.slot_statuses.entry(s).or_default(); if !status.parents_ready.contains(&block) { @@ -136,7 +138,7 @@ impl ParentReadyTracker { return; } - trace!("{}: Adding new skip for {slot:?}", self.cluster_info.id()); + trace!("{}: Adding new skip for {slot:?}", self.my_pubkey); let status = self.slot_statuses.entry(slot).or_default(); status.skip = true; @@ -175,7 +177,7 @@ impl ParentReadyTracker { for s in future_slots { trace!( "{}: Adding new parent ready for {s} parents {potential_parents:?}", - self.cluster_info.id(), + self.my_pubkey, ); let status = self.slot_statuses.entry(s).or_default(); for &block in &potential_parents { @@ -231,30 +233,29 @@ impl ParentReadyTracker { self.root = root; self.slot_statuses.retain(|&s, _| s >= root); } + + /// Updates the pubkey. Note that the pubkey is used for logging purposes only. + pub fn update_pubkey(&mut self, new_pubkey: Pubkey) { + self.my_pubkey = new_pubkey; + } + + #[cfg(test)] + pub fn my_pubkey(&self) -> Pubkey { + self.my_pubkey + } } #[cfg(test)] mod tests { use { super::*, itertools::Itertools, solana_clock::NUM_CONSECUTIVE_LEADER_SLOTS, - solana_gossip::contact_info::ContactInfo, solana_hash::Hash, solana_keypair::Keypair, - solana_net_utils::SocketAddrSpace, solana_signer::Signer, + solana_hash::Hash, solana_pubkey::Pubkey, }; - fn new_cluster_info() -> Arc { - let keypair = Keypair::new(); - let contact_info = ContactInfo::new_localhost(&keypair.pubkey(), 0); - Arc::new(ClusterInfo::new( - contact_info, - Arc::new(keypair), - SocketAddrSpace::Unspecified, - )) - } - #[test] fn basic() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; for i in 1..2 * NUM_CONSECUTIVE_LEADER_SLOTS { @@ -268,7 +269,7 @@ mod tests { #[test] fn skips() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; let block = (1, Hash::new_unique()); @@ -285,7 +286,7 @@ mod tests { #[test] fn out_of_order() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; let block = (1, Hash::new_unique()); @@ -305,7 +306,7 @@ mod tests { fn snapshot_wfsm() { let root_slot = 2147; let root_block = (root_slot, Hash::new_unique()); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), root_block); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), root_block); let mut events = vec![]; assert!(tracker.parent_ready(root_slot + 1, root_block)); @@ -332,7 +333,7 @@ mod tests { #[test] fn highest_parent_ready_out_of_order() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; assert_eq!(tracker.highest_parent_ready(), 1); @@ -354,7 +355,7 @@ mod tests { #[test] fn missed_window() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; assert_eq!(tracker.highest_parent_ready(), 1); assert_eq!( @@ -384,7 +385,7 @@ mod tests { #[test] fn pick_more_skips() { let genesis = Block::default(); - let mut tracker = ParentReadyTracker::new(new_cluster_info(), genesis); + let mut tracker = ParentReadyTracker::new(Pubkey::default(), genesis); let mut events = vec![]; for i in 1..=10 { diff --git a/votor/src/consensus_pool/slot_stake_counters.rs b/votor/src/consensus_pool/slot_stake_counters.rs index 60e6e4ed2e9ce8..8c0a343753f722 100644 --- a/votor/src/consensus_pool/slot_stake_counters.rs +++ b/votor/src/consensus_pool/slot_stake_counters.rs @@ -1,6 +1,3 @@ -#![allow(dead_code)] -// TODO(wen): remove allow(dead_code) when consensus_pool is fully integrated - use { crate::{ common::{ diff --git a/votor/src/consensus_pool/stats.rs b/votor/src/consensus_pool/stats.rs index 4011f5a7bf434b..b89666e1d1e97c 100644 --- a/votor/src/consensus_pool/stats.rs +++ b/votor/src/consensus_pool/stats.rs @@ -1,12 +1,11 @@ use { - agave_votor_messages::{consensus_message::CertificateType, vote::Vote}, + agave_votor_messages::{consensus_message::CertificateType, vote::VoteType}, solana_metrics::datapoint_info, std::time::{Duration, Instant}, }; const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); -/// Struct to hold stats for different certificate types. #[derive(Default)] struct CertificateStats { finalize: u64, @@ -19,8 +18,8 @@ struct CertificateStats { impl CertificateStats { /// Increments the stats associated with the certificate type by one. - fn increment(&mut self, cert_type: &CertificateType) { - match cert_type { + fn increment(&mut self, certificate: &CertificateType) { + match certificate { CertificateType::Finalize(_) => self.finalize = self.finalize.saturating_add(1), CertificateType::FinalizeFast(_, _) => { self.finalize_fast = self.finalize_fast.saturating_add(1) @@ -34,78 +33,20 @@ impl CertificateStats { } } - /// Reports the certificate related statistics. - fn report(&self, header: &'static str) { - let Self { - finalize, - finalize_fast, - notarize, - notarize_fallback, - skip, - genesis, - } = *self; + fn record(&self, header: &'static str) { datapoint_info!( header, - ("finalize", finalize, i64), - ("finalize_fast", finalize_fast, i64), - ("notarize", notarize, i64), - ("notarize_fallback", notarize_fallback, i64), - ("skip", skip, i64), - ("genesis", genesis, i64), - ) - } -} - -/// Struct to hold stats for different vote types. -#[derive(Default)] -struct VoteStats { - notarize: u64, - finalize: u64, - skip: u64, - notarize_fallback: u64, - skip_fallback: u64, - genesis: u64, -} - -impl VoteStats { - /// Increments the stats associated with the votes by one. - fn increment(&mut self, vote: &Vote) { - match vote { - Vote::Notarize(_) => self.notarize = self.notarize.saturating_add(1), - Vote::NotarizeFallback(_) => { - self.notarize_fallback = self.notarize_fallback.saturating_add(1) - } - Vote::Skip(_) => self.skip = self.skip.saturating_add(1), - Vote::SkipFallback(_) => self.skip_fallback = self.skip_fallback.saturating_add(1), - Vote::Finalize(_) => self.finalize = self.finalize.saturating_add(1), - Vote::Genesis(_) => self.genesis = self.genesis.saturating_add(1), - } - } - - /// Reports the vote related statistics. - fn report(&self) { - let Self { - finalize, - notarize, - notarize_fallback, - skip, - skip_fallback, - genesis, - } = *self; - datapoint_info!( - "consensus_ingested_votes", - ("finalize", finalize, i64), - ("notarize", notarize, i64), - ("notarize_fallback", notarize_fallback, i64), - ("skip", skip, i64), - ("skip_fallback", skip_fallback, i64), - ("genesis", genesis, i64), + ("finalize", self.finalize, i64), + ("finalize_fast", self.finalize_fast, i64), + ("notarize", self.notarize, i64), + ("notarize_fallback", self.notarize_fallback, i64), + ("skip", self.skip, i64), ) } } pub(crate) struct ConsensusPoolStats { - pub(crate) invalid_votes: u32, + pub(crate) conflicting_votes: u32, pub(crate) event_safe_to_notarize: u32, pub(crate) event_safe_to_skip: u32, pub(crate) exist_certs: u32, @@ -114,16 +55,25 @@ pub(crate) struct ConsensusPoolStats { pub(crate) incoming_votes: u32, pub(crate) out_of_range_certs: u32, pub(crate) out_of_range_votes: u32, + new_certs_generated: CertificateStats, new_certs_ingested: CertificateStats, - ingested_votes: VoteStats, + pub(crate) ingested_votes: Vec, + pub(crate) last_request_time: Instant, } impl Default for ConsensusPoolStats { fn default() -> Self { + Self::new() + } +} + +impl ConsensusPoolStats { + pub fn new() -> Self { + let num_vote_types = (VoteType::Genesis as usize).saturating_add(1); Self { - invalid_votes: 0, + conflicting_votes: 0, event_safe_to_notarize: 0, event_safe_to_skip: 0, exist_certs: 0, @@ -132,69 +82,99 @@ impl Default for ConsensusPoolStats { incoming_votes: 0, out_of_range_certs: 0, out_of_range_votes: 0, + new_certs_ingested: CertificateStats::default(), new_certs_generated: CertificateStats::default(), - ingested_votes: VoteStats::default(), + ingested_votes: vec![0; num_vote_types], + last_request_time: Instant::now(), } } -} -impl ConsensusPoolStats { - pub fn incr_ingested_vote(&mut self, vote: &Vote) { - self.ingested_votes.increment(vote); + pub fn incr_ingested_vote_type(&mut self, vote_type: VoteType) { + let index = vote_type as usize; + + self.ingested_votes[index] = self.ingested_votes[index].saturating_add(1); } - pub fn incr_cert_type(&mut self, cert_type: &CertificateType, is_generated: bool) { + pub fn incr_cert_type(&mut self, certificate: &CertificateType, is_generated: bool) { if is_generated { - self.new_certs_generated.increment(cert_type); + self.new_certs_generated.increment(certificate); } else { - self.new_certs_ingested.increment(cert_type); - }; + self.new_certs_ingested.increment(certificate); + } } + /// Reports the certificate related statistics. fn report(&self) { - let Self { - invalid_votes, - event_safe_to_skip, - event_safe_to_notarize, - exist_votes, - exist_certs, - incoming_votes, - incoming_certs, - out_of_range_votes, - out_of_range_certs, - ingested_votes, - new_certs_generated, - new_certs_ingested, - last_request_time: _, - } = self; datapoint_info!( "consensus_pool_stats", - ("vote_pool_invalid_votes", *invalid_votes as i64, i64), - ("event_safe_to_skip", *event_safe_to_skip as i64, i64), + ("conflicting_votes", self.conflicting_votes as i64, i64), + ("event_safe_to_skip", self.event_safe_to_skip as i64, i64), ( "event_safe_to_notarize", - *event_safe_to_notarize as i64, + self.event_safe_to_notarize as i64, + i64 + ), + ("exist_votes", self.exist_votes as i64, i64), + ("exist_certs", self.exist_certs as i64, i64), + ("incoming_votes", self.incoming_votes as i64, i64), + ("incoming_certs", self.incoming_certs as i64, i64), + ("out_of_range_votes", self.out_of_range_votes as i64, i64), + ("out_of_range_certs", self.out_of_range_certs as i64, i64), + ); + + datapoint_info!( + "consensus_ingested_votes", + ( + "finalize", + *self + .ingested_votes + .get(VoteType::Finalize as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize", + *self + .ingested_votes + .get(VoteType::Notarize as usize) + .unwrap() as i64, + i64 + ), + ( + "notarize_fallback", + *self + .ingested_votes + .get(VoteType::NotarizeFallback as usize) + .unwrap() as i64, + i64 + ), + ( + "skip", + *self.ingested_votes.get(VoteType::Skip as usize).unwrap() as i64, + i64 + ), + ( + "skip_fallback", + *self + .ingested_votes + .get(VoteType::SkipFallback as usize) + .unwrap() as i64, i64 ), - ("exist_votes", *exist_votes as i64, i64), - ("exist_certs", *exist_certs as i64, i64), - ("incoming_votes", *incoming_votes as i64, i64), - ("incoming_certs", *incoming_certs as i64, i64), - ("out_of_range_votes", *out_of_range_votes as i64, i64), - ("out_of_range_certs", *out_of_range_certs as i64, i64), ); - ingested_votes.report(); - new_certs_generated.report("consensus_pool_generated_certs"); - new_certs_ingested.report("consensus_pool_ingested_certs"); + self.new_certs_ingested + .record("consensus_pool_ingested_certs"); + self.new_certs_generated + .record("consensus_pool_generated_certs"); } pub fn maybe_report(&mut self) { if self.last_request_time.elapsed() >= STATS_REPORT_INTERVAL { self.report(); - *self = Self::default(); + *self = Self::new(); } } } diff --git a/votor/src/consensus_pool/vote_pool.rs b/votor/src/consensus_pool/vote_pool.rs index 3aa7e2ee493711..1b0829bb1fb090 100644 --- a/votor/src/consensus_pool/vote_pool.rs +++ b/votor/src/consensus_pool/vote_pool.rs @@ -1,334 +1,129 @@ -//! Container to store received votes and associated stakes. -//! -//! Implements various checks for invalid votes as defined by the Alpenglow paper e.g. lemma 20 and 22. -//! Further detects duplicate votes which are defined as identical vote from the same sender received multiple times. - use { crate::common::Stake, - agave_votor_messages::{consensus_message::VoteMessage, vote::Vote}, - solana_clock::Slot, + agave_votor_messages::consensus_message::VoteMessage, solana_hash::Hash, solana_pubkey::Pubkey, - std::collections::{btree_map::Entry, BTreeMap}, - thiserror::Error, + std::collections::{BTreeMap, BTreeSet}, }; -/// As per the Alpenglow paper, a validator is allowed to vote notar fallback on at most 3 different block id for a given slot. -const MAX_NOTAR_FALLBACK_PER_VALIDATOR: usize = 3; +/// There are two types of vote pools: +/// - SimpleVotePool: Tracks all votes of a specfic vote type made by validators for some slot N, but only one vote per block. +/// - DuplicateBlockVotePool: Tracks all votes of a specfic vote type made by validators for some slot N, +/// but allows votes for different blocks by the same validator. Only relevant for VotePool's that are of type +/// Notarization or NotarizationFallback +pub(super) enum VotePool { + SimpleVotePool(SimpleVotePool), + DuplicateBlockVotePool(DuplicateBlockVotePool), +} -#[derive(Debug, PartialEq, Eq, Error)] -pub(crate) enum AddVoteError { - #[error("duplicate vote")] - Duplicate, - /// These are invalid votes as defined in the Alpenglow paper e.g. lemma 20 and 22. - #[error("invalid votes")] - Invalid, +#[derive(Default)] +pub(super) struct SimpleVotePool { + votes: Vec, + total_stake: Stake, + prev_voted_validators: BTreeSet, } -/// Helper function to reduce some code duplication. -fn insert_vote( - map: &mut BTreeMap, - voter: Pubkey, - vote: VoteMessage, -) -> Result<(), AddVoteError> { - match map.entry(voter) { - Entry::Occupied(_) => Err(AddVoteError::Duplicate), - Entry::Vacant(e) => { - e.insert(vote); - Ok(()) +impl SimpleVotePool { + pub(super) fn add_vote( + &mut self, + validator_vote_key: Pubkey, + validator_stake: Stake, + vote: VoteMessage, + ) -> Option { + if !self.prev_voted_validators.insert(validator_vote_key) { + return None; } + self.votes.push(vote); + self.total_stake = self.total_stake.saturating_add(validator_stake); + Some(self.total_stake) } -} - -/// Container to store per slot votes. -struct InternalVotePool { - /// The slot this instance of Votes is responsible for. - slot: Slot, - /// Skip votes are stored in map indexed by validator. - skip: BTreeMap, - /// Skip fallback votes are stored in map indexed by validator. - skip_fallback: BTreeMap, - /// Finalize votes are stored in map indexed by validator. - finalize: BTreeMap, - /// Notar votes are stored in map indexed by validator. - notar: BTreeMap, - /// A validator can vote notar fallback on upto 3 blocks. - /// - /// Per validator, we store a map of which block ids the validator has voted notar fallback on. - notar_fallback: BTreeMap>, - /// Genesis votes are stored in map indexed by validator. - genesis: BTreeMap, -} -impl InternalVotePool { - fn new(slot: Slot) -> Self { - Self { - slot, - skip: BTreeMap::default(), - skip_fallback: BTreeMap::default(), - finalize: BTreeMap::default(), - notar: BTreeMap::default(), - notar_fallback: BTreeMap::default(), - genesis: BTreeMap::default(), - } + pub(super) fn votes(&self) -> &[VoteMessage] { + &self.votes } - /// Adds votes. - /// - /// Checks for different types of invalid and duplicate votes returning appropriate errors. - fn add_vote(&mut self, voter: Pubkey, vote: VoteMessage) -> Result<(), AddVoteError> { - debug_assert_eq!(self.slot, vote.vote.slot()); - match vote.vote { - Vote::Notarize(notar) => { - if self.skip.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - match self.notar.entry(voter) { - Entry::Occupied(e) => { - // unwrap should be safe as we should only store notar type votes here - if e.get().vote.block_id().unwrap() == ¬ar.block_id { - Err(AddVoteError::Duplicate) - } else { - Err(AddVoteError::Invalid) - } - } - Entry::Vacant(e) => { - e.insert(vote); - Ok(()) - } - } - } - Vote::NotarizeFallback(notar_fallback) => { - if self.finalize.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - match self.notar_fallback.entry(voter) { - Entry::Vacant(e) => { - e.insert(BTreeMap::from([(notar_fallback.block_id, vote)])); - Ok(()) - } - Entry::Occupied(mut e) => { - let map = e.get_mut(); - let map_len = map.len(); - match map.entry(notar_fallback.block_id) { - Entry::Vacant(map_e) => { - if map_len == MAX_NOTAR_FALLBACK_PER_VALIDATOR { - Err(AddVoteError::Invalid) - } else { - map_e.insert(vote); - Ok(()) - } - } - Entry::Occupied(_) => Err(AddVoteError::Duplicate), - } - } - } - } - Vote::Skip(_) => { - if self.notar.contains_key(&voter) || self.finalize.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - insert_vote(&mut self.skip, voter, vote) - } - Vote::SkipFallback(_) => { - if self.finalize.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - insert_vote(&mut self.skip_fallback, voter, vote) - } - Vote::Finalize(_) => { - if self.skip.contains_key(&voter) || self.skip_fallback.contains_key(&voter) { - return Err(AddVoteError::Invalid); - } - if let Some(map) = self.notar_fallback.get(&voter) { - debug_assert!(!map.is_empty()); - return Err(AddVoteError::Invalid); - } - insert_vote(&mut self.finalize, voter, vote) - } - Vote::Genesis(genesis) => { - match self.genesis.entry(voter) { - Entry::Occupied(e) => { - // unwrap should be safe as we should only store genesis type votes here - if e.get().vote.block_id().unwrap() == &genesis.block_id { - Err(AddVoteError::Duplicate) - } else { - Err(AddVoteError::Invalid) - } - } - Entry::Vacant(e) => { - e.insert(vote); - Ok(()) - } - } - } - } + pub(super) fn total_stake(&self) -> Stake { + self.total_stake } - /// Get [`VoteMessage`]s for the corresponding [`Vote`]. - /// - // TODO: figure out how to return an iterator here instead which would require `CertificateBuilder::aggregate()` to accept an iterator. - fn get_votes(&self, vote: &Vote) -> Vec { - match vote { - Vote::Finalize(_) => self.finalize.values().cloned().collect(), - Vote::Notarize(notar) => self - .notar - .values() - .filter(|vote| { - // unwrap should be safe as we should only store notar votes here - vote.vote.block_id().unwrap() == ¬ar.block_id - }) - .cloned() - .collect(), - Vote::NotarizeFallback(nf) => self - .notar_fallback - .values() - .filter_map(|map| map.get(&nf.block_id)) - .cloned() - .collect(), - Vote::Skip(_) => self.skip.values().cloned().collect(), - Vote::SkipFallback(_) => self.skip_fallback.values().cloned().collect(), - Vote::Genesis(genesis) => self - .genesis - .values() - .filter(|vote| { - // unwrap should be safe as we should only store genesis votes here - vote.vote.block_id().unwrap() == &genesis.block_id - }) - .cloned() - .collect(), - } + pub(super) fn has_prev_validator_vote(&self, validator_vote_key: &Pubkey) -> bool { + self.prev_voted_validators.contains(validator_vote_key) } } -/// Container to store the total stakes for different types of votes. -struct Stakes { - slot: Slot, - /// Total stake that has voted skip. - skip: Stake, - /// Total stake that has voted skil fallback. - skip_fallback: Stake, - /// Total stake that has voted finalize. - finalize: Stake, - /// Stake that has voted notar. - /// - /// Different validators may vote notar for different blocks, so this tracks stake per block id. - notar: BTreeMap, - /// Stake that has voted notar fallback. - /// - /// A single validator may vote for upto 3 blocks and different validators can vote for different blocks. - /// Hence, this tracks stake per block id. - notar_fallback: BTreeMap, - /// Stake that has voted genesis. - genesis: BTreeMap, +#[derive(Default)] +struct VoteEntry { + votes: Vec, + total_stake_by_key: Stake, } -impl Stakes { - fn new(slot: Slot) -> Self { +pub(super) struct DuplicateBlockVotePool { + max_entries_per_pubkey: usize, + vote_entries: BTreeMap, + prev_voted_block_ids: BTreeMap>, +} + +impl DuplicateBlockVotePool { + pub(super) fn new(max_entries_per_pubkey: usize) -> Self { Self { - slot, - skip: 0, - skip_fallback: 0, - finalize: 0, - notar: BTreeMap::default(), - notar_fallback: BTreeMap::default(), - genesis: BTreeMap::default(), + max_entries_per_pubkey, + vote_entries: BTreeMap::new(), + prev_voted_block_ids: BTreeMap::new(), } } - /// Updates the corresponding stake after a vote has been successfully added to the pool. - /// - /// Returns the total stake of the corresponding type (and block id in case of notar or notar-fallback) after the update. - fn add_stake(&mut self, voter_stake: Stake, vote: &Vote) -> Stake { - debug_assert_eq!(self.slot, vote.slot()); - match vote { - Vote::Notarize(notar) => { - let stake = self.notar.entry(notar.block_id).or_default(); - *stake = (*stake).saturating_add(voter_stake); - *stake - } - Vote::NotarizeFallback(nf) => { - let stake = self.notar_fallback.entry(nf.block_id).or_default(); - *stake = (*stake).saturating_add(voter_stake); - *stake - } - Vote::Skip(_) => { - self.skip = self.skip.saturating_add(voter_stake); - self.skip - } - Vote::SkipFallback(_) => { - self.skip_fallback = self.skip_fallback.saturating_add(voter_stake); - self.skip_fallback - } - Vote::Finalize(_) => { - self.finalize = self.finalize.saturating_add(voter_stake); - self.finalize - } - Vote::Genesis(genesis) => { - let stake = self.genesis.entry(genesis.block_id).or_default(); - *stake = (*stake).saturating_add(voter_stake); - *stake - } + pub(super) fn add_vote( + &mut self, + validator_vote_key: Pubkey, + validator_stake: Stake, + vote: VoteMessage, + ) -> Option { + let block_id = *vote.vote.block_id().unwrap(); + // Check whether the validator_vote_key already used the same voted_block_id or exceeded max_entries_per_pubkey + // If so, return false, otherwise add the voted_block_id to the prev_votes + let prev_voted_block_ids = self + .prev_voted_block_ids + .entry(validator_vote_key) + .or_default(); + if prev_voted_block_ids.contains(&block_id) + || prev_voted_block_ids.len() >= self.max_entries_per_pubkey + { + return None; } - } + prev_voted_block_ids.insert(block_id); - /// Get the stake corresponding to the [`Vote`]. - fn get_stake(&self, vote: &Vote) -> Stake { - match vote { - Vote::Notarize(notar) => *self.notar.get(¬ar.block_id).unwrap_or(&0), - Vote::NotarizeFallback(nf) => *self.notar_fallback.get(&nf.block_id).unwrap_or(&0), - Vote::Skip(_) => self.skip, - Vote::SkipFallback(_) => self.skip_fallback, - Vote::Finalize(_) => self.finalize, - Vote::Genesis(genesis) => *self.genesis.get(&genesis.block_id).unwrap_or(&0), - } + let vote_entry = self.vote_entries.entry(block_id).or_default(); + vote_entry.votes.push(vote); + vote_entry.total_stake_by_key = vote_entry + .total_stake_by_key + .saturating_add(validator_stake); + Some(vote_entry.total_stake_by_key) } -} - -/// Container to store per slot votes and associated stake. -/// -/// When adding new votes, various checks for invalid and duplicate votes is performed. -pub(super) struct VotePool { - /// The slot this instance of the pool is responsible for. - slot: Slot, - /// Stores seen votes. - votes: InternalVotePool, - /// Stores total stake that voted. - stakes: Stakes, -} -impl VotePool { - pub(super) fn new(slot: Slot) -> Self { - Self { - slot, - votes: InternalVotePool::new(slot), - stakes: Stakes::new(slot), - } + pub(super) fn total_stake_by_block_id(&self, block_id: &Hash) -> Stake { + self.vote_entries + .get(block_id) + .map_or(0, |vote_entries| vote_entries.total_stake_by_key) } - /// Adds a vote to the pool. - /// - /// On success, returns the total stake of the corresponding vote type. - pub(super) fn add_vote( - &mut self, - voter: Pubkey, - voter_stake: Stake, - msg: VoteMessage, - ) -> Result { - debug_assert_eq!(self.slot, msg.vote.slot()); - let vote = msg.vote; - self.votes.add_vote(voter, msg)?; - Ok(self.stakes.add_stake(voter_stake, &vote)) + pub(super) fn votes(&self, block_id: &Hash) -> Option<&[VoteMessage]> { + self.vote_entries + .get(block_id) + .map(|entry| entry.votes.as_slice()) } - /// Returns the [`Stake`] corresponding to the specific [`Vote`]. - pub(super) fn get_stake(&self, vote: &Vote) -> Stake { - self.stakes.get_stake(vote) + pub(super) fn has_prev_validator_vote_for_block( + &self, + validator_vote_key: &Pubkey, + block_id: &Hash, + ) -> bool { + self.prev_voted_block_ids + .get(validator_vote_key) + .is_some_and(|vs| vs.contains(block_id)) } - /// Returns a list of votes corresponding to the specific [`Vote`]. - pub(super) fn get_votes(&self, vote: &Vote) -> Vec { - self.votes.get_votes(vote) + pub(super) fn has_prev_validator_vote(&self, validator_vote_key: &Pubkey) -> bool { + self.prev_voted_block_ids.contains_key(validator_vote_key) } } @@ -341,328 +136,105 @@ mod test { }; #[test] - fn test_notar_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; - - let mut votes = InternalVotePool::new(slot); - let skip = VoteMessage { - vote: Vote::new_skip_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, skip).unwrap(); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, notar), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - votes.add_vote(voter, notar).unwrap(); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, notar), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - votes.add_vote(voter, notar.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, notar), - Err(AddVoteError::Duplicate) - )); - } - - #[test] - fn test_notar_fallback_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; - - let mut votes = InternalVotePool::new(slot); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, finalize).unwrap(); - let nf = VoteMessage { - vote: Vote::new_notarization_fallback_vote(slot, Hash::default()), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, nf), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - for _ in 0..3 { - let nf = VoteMessage { - vote: Vote::new_notarization_fallback_vote(slot, Hash::new_unique()), - signature, - rank, - }; - votes.add_vote(voter, nf).unwrap(); - } - let nf = VoteMessage { - vote: Vote::new_notarization_fallback_vote(slot, Hash::new_unique()), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, nf), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - let nf = VoteMessage { - vote: Vote::new_notarization_fallback_vote(slot, Hash::new_unique()), - signature, - rank, + fn test_skip_vote_pool() { + let mut vote_pool = SimpleVotePool::default(); + let vote = Vote::new_skip_vote(5); + let vote_message = VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, }; - votes.add_vote(voter, nf.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, nf), - Err(AddVoteError::Duplicate) - )); - } - - #[test] - fn test_skip_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; + let my_pubkey = Pubkey::new_unique(); - let mut votes = InternalVotePool::new(slot); - let notar = VoteMessage { - vote: Vote::new_notarization_vote(slot, Hash::new_unique()), - signature, - rank, - }; - votes.add_vote(voter, notar).unwrap(); - let skip = VoteMessage { - vote: Vote::new_skip_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, skip), - Err(AddVoteError::Invalid) - )); + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote_message), Some(10)); + assert_eq!(vote_pool.total_stake(), 10); - let mut votes = InternalVotePool::new(slot); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, finalize).unwrap(); - let skip = VoteMessage { - vote: Vote::new_skip_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, skip), - Err(AddVoteError::Invalid) - )); + // Adding the same key again should fail + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote_message), None); + assert_eq!(vote_pool.total_stake(), 10); - let mut votes = InternalVotePool::new(slot); - let skip = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, skip.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, skip), - Err(AddVoteError::Duplicate) - )); + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + assert_eq!(vote_pool.add_vote(new_pubkey, 60, vote_message), Some(70)); + assert_eq!(vote_pool.total_stake(), 70); } #[test] - fn test_skip_fallback_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; - - let mut votes = InternalVotePool::new(slot); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, finalize).unwrap(); - let sf = VoteMessage { - vote: Vote::new_skip_fallback_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, sf), - Err(AddVoteError::Invalid) - )); - - let mut votes = InternalVotePool::new(slot); - let sf = VoteMessage { - vote: Vote::new_skip_fallback_vote(slot), - signature, - rank, + fn test_notarization_pool() { + let mut vote_pool = DuplicateBlockVotePool::new(1); + let my_pubkey = Pubkey::new_unique(); + let block_id = Hash::new_unique(); + let vote = Vote::new_notarization_vote(3, block_id); + let vote = VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, }; - votes.add_vote(voter, sf.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, sf), - Err(AddVoteError::Duplicate) - )); - } + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote), Some(10)); + assert_eq!(vote_pool.total_stake_by_block_id(&block_id), 10); - #[test] - fn test_finalize_failures() { - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let slot = 1; - - let mut votes = InternalVotePool::new(slot); - let skip = VoteMessage { - vote: Vote::new_skip_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, skip).unwrap(); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, finalize), - Err(AddVoteError::Invalid) - )); + // Adding the same key again should fail + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote), None); - let mut votes = InternalVotePool::new(slot); - let sf = VoteMessage { - vote: Vote::new_skip_fallback_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, sf).unwrap(); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - assert!(matches!( - votes.add_vote(voter, finalize), - Err(AddVoteError::Invalid) - )); + // Adding a different bankhash should fail + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote), None); - let mut votes = InternalVotePool::new(slot); - let finalize = VoteMessage { - vote: Vote::new_finalization_vote(slot), - signature, - rank, - }; - votes.add_vote(voter, finalize.clone()).unwrap(); - assert!(matches!( - votes.add_vote(voter, finalize), - Err(AddVoteError::Duplicate) - )); + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + assert_eq!(vote_pool.add_vote(new_pubkey, 60, vote), Some(70)); + assert_eq!(vote_pool.total_stake_by_block_id(&block_id), 70); } #[test] - fn test_stakes() { - let slot = 123; - let stake = 54321; - let mut stakes = Stakes::new(slot); - let vote = Vote::new_skip_vote(slot); - assert_eq!(stakes.add_stake(stake, &vote), stake); - assert_eq!(stakes.get_stake(&vote), stake); - - let mut stakes = Stakes::new(slot); - let vote = Vote::new_skip_fallback_vote(slot); - assert_eq!(stakes.add_stake(stake, &vote), stake); - assert_eq!(stakes.get_stake(&vote), stake); - - let mut stakes = Stakes::new(slot); - let vote = Vote::new_finalization_vote(slot); - assert_eq!(stakes.add_stake(stake, &vote), stake); - assert_eq!(stakes.get_stake(&vote), stake); - - let mut stakes = Stakes::new(slot); - let stake0 = 10; - let stake1 = 20; - let hash0 = Hash::new_unique(); - let hash1 = Hash::new_unique(); - let vote0 = Vote::new_notarization_vote(slot, hash0); - let vote1 = Vote::new_notarization_vote(slot, hash1); - assert_eq!(stakes.add_stake(stake0, &vote0), stake0); - assert_eq!(stakes.add_stake(stake1, &vote1), stake1); - assert_eq!(stakes.get_stake(&vote0), stake0); - assert_eq!(stakes.get_stake(&vote1), stake1); - - let mut stakes = Stakes::new(slot); - let stake0 = 10; - let stake1 = 20; - let hash0 = Hash::new_unique(); - let hash1 = Hash::new_unique(); - let vote0 = Vote::new_notarization_fallback_vote(slot, hash0); - let vote1 = Vote::new_notarization_fallback_vote(slot, hash1); - assert_eq!(stakes.add_stake(stake0, &vote0), stake0); - assert_eq!(stakes.add_stake(stake1, &vote1), stake1); - assert_eq!(stakes.get_stake(&vote0), stake0); - assert_eq!(stakes.get_stake(&vote1), stake1); - } + fn test_notarization_fallback_pool() { + agave_logger::setup(); + let mut vote_pool = DuplicateBlockVotePool::new(3); + let my_pubkey = Pubkey::new_unique(); + + let votes = (0..4) + .map(|_| { + let vote = Vote::new_notarization_fallback_vote(7, Hash::new_unique()); + VoteMessage { + vote, + signature: BLSSignature::default(), + rank: 1, + } + }) + .collect::>(); + + // Adding the first 3 votes should succeed, but total_stake should remain at 10 + for vote in votes.iter().take(3).cloned() { + assert_eq!(vote_pool.add_vote(my_pubkey, 10, vote), Some(10)); + assert_eq!( + vote_pool.total_stake_by_block_id(vote.vote.block_id().unwrap()), + 10 + ); + } + // Adding the 4th vote should fail + assert_eq!(vote_pool.add_vote(my_pubkey, 10, votes[3]), None); + assert_eq!( + vote_pool.total_stake_by_block_id(votes[3].vote.block_id().unwrap()), + 0 + ); - #[test] - fn test_vote_pool() { - let slot = 1; - let mut vote_pool = VotePool::new(slot); + // Adding a different key should succeed + let new_pubkey = Pubkey::new_unique(); + for vote in votes.iter().skip(1).take(2).cloned() { + assert_eq!(vote_pool.add_vote(new_pubkey, 60, vote), Some(70)); + assert_eq!( + vote_pool.total_stake_by_block_id(vote.vote.block_id().unwrap()), + 70 + ); + } - let voter = Pubkey::new_unique(); - let signature = BLSSignature::default(); - let rank = 1; - let vote = Vote::new_finalization_vote(slot); - let vote_message = VoteMessage { - vote, - signature, - rank, - }; - let stake = 12345; + // The new key only added 2 votes, so adding block_ids[3] should succeed + assert_eq!(vote_pool.add_vote(new_pubkey, 60, votes[3]), Some(60)); assert_eq!( - vote_pool - .add_vote(voter, stake, vote_message.clone()) - .unwrap(), - stake + vote_pool.total_stake_by_block_id(votes[3].vote.block_id().unwrap()), + 60 ); - assert_eq!(vote_pool.get_stake(&vote), stake); - let returned_votes = vote_pool.get_votes(&vote); - assert_eq!(returned_votes.len(), 1); - assert_eq!(returned_votes[0], vote_message); + + // Now if adding the same key again, it should fail + assert_eq!(vote_pool.add_vote(new_pubkey, 60, votes[0]), None); } } diff --git a/votor/src/consensus_pool_service.rs b/votor/src/consensus_pool_service.rs index 3193214626cb3b..5ed6a570ce8872 100644 --- a/votor/src/consensus_pool_service.rs +++ b/votor/src/consensus_pool_service.rs @@ -1,19 +1,23 @@ //! Service in charge of ingesting new messages into the certificate pool //! and notifying votor of new events that occur + +mod stats; + use { crate::{ - commitment::{ - update_commitment_cache, CommitmentAggregationData, CommitmentError, CommitmentType, - }, + commitment::{update_commitment_cache, CommitmentAggregationData, CommitmentType}, + common::DELTA_STANDSTILL, consensus_pool::{ - parent_ready_tracker::BlockProductionParent, AddMessageError, ConsensusPool, + parent_ready_tracker::BlockProductionParent, AddVoteError, ConsensusPool, }, event::{LeaderWindowInfo, VotorEvent, VotorEventSender}, voting_service::BLSOp, - votor::Votor, }, - agave_votor_messages::consensus_message::{Certificate, ConsensusMessage}, - crossbeam_channel::{Receiver, RecvTimeoutError, Sender, TrySendError}, + agave_votor_messages::{ + consensus_message::{Certificate, ConsensusMessage}, + migration::MigrationStatus, + }, + crossbeam_channel::{select, Receiver, Sender, TrySendError}, solana_clock::Slot, solana_gossip::cluster_info::ClusterInfo, solana_ledger::{ @@ -22,24 +26,21 @@ use { }, solana_pubkey::Pubkey, solana_runtime::{bank::Bank, bank_forks::SharableBanks}, - stats::Stats, + stats::ConsensusPoolServiceStats, std::{ sync::{ atomic::{AtomicBool, Ordering}, - Arc, Condvar, Mutex, + Arc, }, thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, }, - thiserror::Error, }; -mod stats; - /// Inputs for the certificate pool thread pub(crate) struct ConsensusPoolContext { pub(crate) exit: Arc, - pub(crate) start: Arc<(Mutex, Condvar)>, + pub(crate) migration_status: Arc, pub(crate) cluster_info: Arc, pub(crate) my_vote_pubkey: Pubkey, @@ -50,43 +51,26 @@ pub(crate) struct ConsensusPoolContext { // TODO: for now we ingest our own votes into the certificate pool // just like regular votes. However do we need to convert // Vote -> BLSMessage -> Vote? - // consider adding a separate pathway in consensus_pool.add_transaction for ingesting own votes + // consider adding a separate pathway in consensus_pool.add_message() for ingesting own votes pub(crate) consensus_message_receiver: Receiver, pub(crate) bls_sender: Sender, pub(crate) event_sender: VotorEventSender, pub(crate) commitment_sender: Sender, - - pub(crate) delta_standstill: Duration, } pub(crate) struct ConsensusPoolService { t_ingest: JoinHandle<()>, } -#[derive(Debug, Error)] -enum ServiceError { - #[error("Failed to add message into the consensus pool: {0}")] - AddMessage(#[from] AddMessageError), - #[error("Channel {0} disconnected")] - ChannelDisconnected(String), - #[error("Channel is full")] - ChannelFull, - #[error("Failed to add block event: {0}")] - FailedToAddBlockEvent(String), -} - impl ConsensusPoolService { - pub(crate) fn new(mut ctx: ConsensusPoolContext) -> Self { + pub(crate) fn new(ctx: ConsensusPoolContext) -> Self { let t_ingest = Builder::new() - .name("solCnsPoolIngst".to_string()) + .name("solCertPoolIngest".to_string()) .spawn(move || { - info!("ConsensusPoolService has started"); - if let Err(e) = Self::consensus_pool_ingest_loop(&mut ctx) { - ctx.exit.store(true, Ordering::Relaxed); - error!("ConsensusPoolService exited with error: {e}"); + if let Err(e) = Self::consensus_pool_ingest_loop(ctx) { + info!("Certificate pool service exited: {e:?}. Shutting down"); } - info!("ConsensusPoolService has stopped"); }) .unwrap(); @@ -100,16 +84,16 @@ impl ConsensusPoolService { new_finalized_slot: Option, new_certificates_to_send: Vec>, standstill_timer: &mut Instant, - stats: &mut Stats, - ) -> Result<(), ServiceError> { + stats: &mut ConsensusPoolServiceStats, + ) -> Result<(), AddVoteError> { // If we have a new finalized slot, update the root and send new certificates if new_finalized_slot.is_some() { // Reset standstill timer *standstill_timer = Instant::now(); stats.new_finalized_slot += 1; } - let root_bank = sharable_banks.root(); - consensus_pool.prune_old_state(root_bank.slot()); + let bank = sharable_banks.root(); + consensus_pool.prune_old_state(bank.slot()); stats.prune_old_state_called += 1; // Send new certificates to peers Self::send_certificates(bls_sender, new_certificates_to_send, stats) @@ -117,25 +101,26 @@ impl ConsensusPoolService { fn send_certificates( bls_sender: &Sender, - certs: Vec>, - stats: &mut Stats, - ) -> Result<(), ServiceError> { - let certs_len = certs.len(); - for (i, certificate) in certs.into_iter().enumerate() { - // The BLS cert channel is expected to be large enough, so we don't - // handle certificate re-send here. - match bls_sender.try_send(BLSOp::PushCertificate { certificate }) { - Ok(()) => { + certificates_to_send: Vec>, + stats: &mut ConsensusPoolServiceStats, + ) -> Result<(), AddVoteError> { + for (i, certificate) in certificates_to_send.iter().enumerate() { + // The buffer should normally be large enough, so we don't handle + // certificate re-send here. + match bls_sender.try_send(BLSOp::PushCertificate { + certificate: certificate.clone(), + }) { + Ok(_) => { stats.certificates_sent += 1; } Err(TrySendError::Disconnected(_)) => { - return Err(ServiceError::ChannelDisconnected( + return Err(AddVoteError::ChannelDisconnected( "VotingService".to_string(), )); } Err(TrySendError::Full(_)) => { - stats.certificates_dropped += certs_len.saturating_sub(i); - return Err(ServiceError::ChannelFull); + stats.certificates_dropped += certificates_to_send.len().saturating_sub(i); + return Err(AddVoteError::VotingServiceQueueFull); } } } @@ -149,8 +134,8 @@ impl ConsensusPoolService { consensus_pool: &mut ConsensusPool, events: &mut Vec, standstill_timer: &mut Instant, - stats: &mut Stats, - ) -> Result<(), ServiceError> { + stats: &mut ConsensusPoolServiceStats, + ) -> Result<(), AddVoteError> { match message { ConsensusMessage::Certificate(_) => { stats.received_certificates += 1; @@ -181,55 +166,97 @@ impl ConsensusPoolService { ) } - // Main loop for the consensus pool service. Only exits when signalled or if - // any channel is disconnected. - fn consensus_pool_ingest_loop(ctx: &mut ConsensusPoolContext) -> Result<(), ServiceError> { + fn handle_channel_disconnected( + ctx: &mut ConsensusPoolContext, + channel_name: &str, + ) -> Result<(), ()> { + info!( + "{}: {} disconnected. Exiting", + ctx.cluster_info.id(), + channel_name + ); + ctx.exit.store(true, Ordering::Relaxed); + Err(()) + } + + // Main loop for the certificate pool service, it only exits when any channel is disconnected + fn consensus_pool_ingest_loop(mut ctx: ConsensusPoolContext) -> Result<(), ()> { let mut events = vec![]; let mut my_pubkey = ctx.cluster_info.id(); let root_bank = ctx.sharable_banks.root(); - let mut consensus_pool = - ConsensusPool::new_from_root_bank(ctx.cluster_info.clone(), &root_bank); - // Wait until migration has completed - info!("{my_pubkey}: Consensus pool loop initialized, waiting for Alpenglow migration"); - Votor::wait_for_migration_or_exit(&ctx.exit, &ctx.start); - info!("{my_pubkey}: Consensus pool loop starting"); + // Unlike the other votor threads, consensus pool starts even before alpenglow is enabled + // As it is required to track the Genesis Vote. + let mut consensus_pool = if ctx.migration_status.is_alpenglow_enabled() { + ConsensusPool::new_from_root_bank(my_pubkey, &root_bank) + } else { + ConsensusPool::new_from_root_bank_pre_migration( + my_pubkey, + &root_bank, + ctx.migration_status.clone(), + ) + }; - let mut stats = Stats::default(); + info!("{}: Certificate pool loop starting", &my_pubkey); + let mut stats = ConsensusPoolServiceStats::new(); + let mut highest_parent_ready = root_bank.slot(); // Standstill tracking let mut standstill_timer = Instant::now(); // Kick off parent ready - let root_block = (root_bank.slot(), root_bank.block_id().unwrap_or_default()); - let mut highest_parent_ready = root_bank.slot(); - events.push(VotorEvent::ParentReady { - slot: root_bank.slot().checked_add(1).unwrap(), - parent_block: root_block, - }); + let mut kick_off_parent_ready = false; - // Ingest votes into consensus pool and notify voting loop of new events + // Ingest votes into certificate pool and notify voting loop of new events while !ctx.exit.load(Ordering::Relaxed) { // Update the current pubkey if it has changed let new_pubkey = ctx.cluster_info.id(); if my_pubkey != new_pubkey { my_pubkey = new_pubkey; - info!("Consensus pool pubkey updated to {my_pubkey}"); + consensus_pool.update_pubkey(my_pubkey); + warn!("Certificate pool pubkey updated to {my_pubkey}"); + } + + // Kick off parent ready event, this either happens: + // - When we first migrate to alpenglow from TowerBFT - kick off with genesis block + // - If we startup post alpenglow migration - kick off with root block + if !kick_off_parent_ready && ctx.migration_status.is_alpenglow_enabled() { + let genesis_block = ctx + .migration_status + .genesis_block() + .expect("Alpenglow is enabled"); + let root_bank = ctx.sharable_banks.root(); + // can expect once we have block id in snapshots (SIMD-0333) + let root_block = (root_bank.slot(), root_bank.block_id().unwrap_or_default()); + let kick_off_block @ (kick_off_slot, _) = genesis_block.max(root_block); + let start_slot = kick_off_slot.checked_add(1).unwrap(); + + events.push(VotorEvent::ParentReady { + slot: start_slot, + parent_block: kick_off_block, + }); + highest_parent_ready = start_slot; + kick_off_parent_ready = true; } Self::add_produce_block_event( &mut highest_parent_ready, &consensus_pool, &my_pubkey, - ctx, + &mut ctx, &mut events, &mut stats, - )?; + ); - if standstill_timer.elapsed() > ctx.delta_standstill { - events.push(VotorEvent::Standstill( - consensus_pool.highest_finalized_slot(), - )); + if standstill_timer.elapsed() > DELTA_STANDSTILL { + // No reason to pollute channel with Standstill before the + // migration is complete. We still need standstill to refresh the + // Genesis cert though. + if kick_off_parent_ready { + events.push(VotorEvent::Standstill( + consensus_pool.highest_finalized_slot(), + )); + } stats.standstill = true; standstill_timer = Instant::now(); match Self::send_certificates( @@ -238,8 +265,8 @@ impl ConsensusPoolService { &mut stats, ) { Ok(()) => (), - Err(ServiceError::ChannelDisconnected(channel_name)) => { - return Err(ServiceError::ChannelDisconnected(channel_name)); + Err(AddVoteError::ChannelDisconnected(channel_name)) => { + return Self::handle_channel_disconnected(&mut ctx, channel_name.as_str()); } Err(e) => { trace!("{my_pubkey}: unable to push standstill certificates into pool {e}"); @@ -247,29 +274,27 @@ impl ConsensusPoolService { } } - events + if events .drain(..) .try_for_each(|event| ctx.event_sender.send(event)) - .map_err(|_| { - ServiceError::ChannelDisconnected("Votor event receiver".to_string()) - })?; - - let consensus_message_receiver = ctx.consensus_message_receiver.clone(); - let messages = match consensus_message_receiver.recv_timeout(Duration::from_secs(1)) { - Ok(first_message) => { - std::iter::once(first_message).chain(consensus_message_receiver.try_iter()) - } - Err(RecvTimeoutError::Timeout) => continue, - Err(RecvTimeoutError::Disconnected) => { - return Err(ServiceError::ChannelDisconnected( - "BLS receiver".to_string(), - )); - } + .is_err() + { + return Self::handle_channel_disconnected(&mut ctx, "Votor event receiver"); + } + + let messages: Vec = select! { + recv(ctx.consensus_message_receiver) -> msg => { + let Ok(first) = msg else { + return Self::handle_channel_disconnected(&mut ctx, "BLS receiver"); + }; + std::iter::once(first).chain(ctx.consensus_message_receiver.try_iter()).collect() + }, + default(Duration::from_secs(1)) => continue }; for message in messages { match Self::process_consensus_message( - ctx, + &mut ctx, &my_pubkey, message, &mut consensus_pool, @@ -278,11 +303,12 @@ impl ConsensusPoolService { &mut stats, ) { Ok(()) => {} - Err(ServiceError::ChannelDisconnected(n)) => { - return Err(ServiceError::ChannelDisconnected(n)); + Err(AddVoteError::ChannelDisconnected(channel_name)) => { + return Self::handle_channel_disconnected(&mut ctx, channel_name.as_str()) } Err(e) => { - warn!("{my_pubkey}: process_consensus_message() failed with {e}"); + // This is a non critical error, a duplicate vote for example + trace!("{}: unable to push vote into pool {}", &my_pubkey, e); stats.add_message_failed += 1; } } @@ -293,10 +319,9 @@ impl ConsensusPoolService { Ok(()) } - /// Adds a message to the consensus pool and updates the commitment cache if necessary + /// Adds a vote to the certificate pool and updates the commitment cache if necessary /// - /// Returns any newly finalized slot as well as any new certificates to broadcast out. - /// Returns error if consensus message could not be added to the pool. + /// If a new finalization slot was recognized, returns the slot fn add_message_and_maybe_update_commitment( root_bank: &Bank, my_pubkey: &Pubkey, @@ -305,7 +330,7 @@ impl ConsensusPoolService { consensus_pool: &mut ConsensusPool, votor_events: &mut Vec, commitment_sender: &Sender, - ) -> Result<(Option, Vec>), ServiceError> { + ) -> Result<(Option, Vec>), AddVoteError> { let (new_finalized_slot, new_certificates_to_send) = consensus_pool.add_message( root_bank.epoch_schedule(), root_bank.epoch_stakes_map(), @@ -322,12 +347,7 @@ impl ConsensusPoolService { CommitmentType::Finalized, new_finalized_slot, commitment_sender, - ) - .map_err(|e| match e { - CommitmentError::ChannelDisconnected => { - ServiceError::ChannelDisconnected("CommitmentSender".to_string()) - } - })?; + )?; Ok((Some(new_finalized_slot), new_certificates_to_send)) } @@ -337,8 +357,8 @@ impl ConsensusPoolService { my_pubkey: &Pubkey, ctx: &mut ConsensusPoolContext, events: &mut Vec, - stats: &mut Stats, - ) -> Result<(), ServiceError> { + stats: &mut ConsensusPoolServiceStats, + ) { let Some(new_highest_parent_ready) = events .iter() .filter_map(|event| match event { @@ -348,11 +368,11 @@ impl ConsensusPoolService { .max() .copied() else { - return Ok(()); + return; }; if new_highest_parent_ready <= *highest_parent_ready { - return Ok(()); + return; } *highest_parent_ready = new_highest_parent_ready; @@ -361,14 +381,16 @@ impl ConsensusPoolService { .leader_schedule_cache .slot_leader_at(*highest_parent_ready, Some(&root_bank)) else { - return Err(ServiceError::FailedToAddBlockEvent(format!( + error!( "Unable to compute the leader at slot {highest_parent_ready}. Something is wrong, \ exiting" - ))); + ); + ctx.exit.store(true, Ordering::Relaxed); + return; }; if &leader_pubkey != my_pubkey { - return Ok(()); + return; } let start_slot = *highest_parent_ready; @@ -379,7 +401,7 @@ impl ConsensusPoolService { "{my_pubkey}: We have already produced shreds in the window \ {start_slot}-{end_slot}, skipping production of our leader window" ); - return Ok(()); + return; } match consensus_pool @@ -395,22 +417,23 @@ impl ConsensusPoolService { } BlockProductionParent::ParentNotReady => { // This can't happen, place holder depending on how we hook up optimistic - return Err(ServiceError::FailedToAddBlockEvent( - "Must have a block production parent".to_string(), - )); + ctx.exit.store(true, Ordering::Relaxed); + panic!( + "Must have a block production parent: {:#?}", + consensus_pool.parent_ready_tracker + ); } BlockProductionParent::Parent(parent_block) => { events.push(VotorEvent::ProduceWindow(LeaderWindowInfo { start_slot, end_slot, parent_block, + // TODO: we can just remove this skip_timer: Instant::now(), })); stats.parent_ready_produce_window += 1; } } - - Ok(()) } pub(crate) fn join(self) -> thread::Result<()> { @@ -422,7 +445,6 @@ impl ConsensusPoolService { mod tests { use { super::*, - crate::common::DELTA_STANDSTILL, agave_votor_messages::{ consensus_message::{CertificateType, VoteMessage, BLS_KEYPAIR_DERIVE_SEED}, vote::Vote, @@ -442,7 +464,7 @@ mod tests { }, }, solana_signer::Signer, - std::sync::{Arc, Mutex}, + std::sync::Arc, test_case::test_case, }; @@ -458,7 +480,7 @@ mod tests { sharable_banks: SharableBanks, } - fn setup(delta_standstill: Option) -> ConsensusPoolServiceTestComponents { + fn setup() -> ConsensusPoolServiceTestComponents { let (consensus_message_sender, consensus_message_receiver) = crossbeam_channel::unbounded(); let (bls_sender, bls_receiver) = crossbeam_channel::unbounded(); let (event_sender, event_receiver) = crossbeam_channel::unbounded(); @@ -495,7 +517,7 @@ mod tests { Arc::new(LeaderScheduleCache::new_from_bank(&sharable_banks.root())); let ctx = ConsensusPoolContext { exit: exit.clone(), - start: Arc::new((Mutex::new(true), Condvar::new())), + migration_status: Arc::new(MigrationStatus::post_migration_status()), cluster_info: Arc::new(cluster_info), my_vote_pubkey: Pubkey::new_unique(), blockstore: Arc::new(blockstore), @@ -505,7 +527,6 @@ mod tests { bls_sender, event_sender, commitment_sender, - delta_standstill: delta_standstill.unwrap_or(DELTA_STANDSTILL), }; ConsensusPoolServiceTestComponents { consensus_pool_service: ConsensusPoolService::new(ctx), @@ -524,8 +545,8 @@ mod tests { let start = Instant::now(); let mut event_received = false; while start.elapsed() < Duration::from_secs(5) { - let res = receiver.recv_timeout(Duration::from_millis(500)); - if let Ok(event) = res { + let recv = receiver.recv_timeout(Duration::from_millis(500)); + if let Ok(event) = recv { if condition(&event) { event_received = true; break; @@ -550,7 +571,7 @@ mod tests { #[test] fn test_receive_and_send_consensus_message() { agave_logger::setup(); - let setup_result = setup(None); + let setup_result = setup(); // validator 0 to 7 send Notarize on slot 2 let block_id = Hash::new_unique(); @@ -576,13 +597,14 @@ mod tests { |event| { if let BLSOp::PushCertificate { certificate } = event { assert_eq!(certificate.cert_type.slot(), target_slot); - let certificate_type = certificate.cert_type; - assert!(matches!( - certificate_type, - CertificateType::Notarize(_, _) - | CertificateType::FinalizeFast(_, _) - | CertificateType::NotarizeFallback(_, _) - )); + assert!( + matches!(certificate.cert_type, CertificateType::Notarize(_, _)) + || matches!(certificate.cert_type, CertificateType::FinalizeFast(_, _)) + || matches!( + certificate.cert_type, + CertificateType::NotarizeFallback(_, _) + ) + ); true } else { false @@ -639,7 +661,7 @@ mod tests { #[test] fn test_send_produce_block_event() { - let setup_result = setup(None); + let setup_result = setup(); // Find when is the next leader slot for me (validator 0) let my_pubkey = setup_result.validator_keypairs[0].node_keypair.pubkey(); let next_leader_slot = setup_result @@ -682,9 +704,9 @@ mod tests { #[test] fn test_send_standstill() { - let delta_standstill_for_test = Duration::from_millis(100); - let setup_result = setup(Some(delta_standstill_for_test)); - thread::sleep(delta_standstill_for_test); + let setup_result = setup(); + // Do nothing for a little more than DELTA_STANDSTILL + thread::sleep(DELTA_STANDSTILL + Duration::from_millis(100)); // Verify that we received a standstill event wait_for_event( &setup_result.event_receiver, @@ -699,7 +721,7 @@ mod tests { #[test_case("votor_event_receiver")] #[test_case("commitment_receiver")] fn test_channel_disconnection(channel_name: &str) { - let setup_result = setup(None); + let setup_result = setup(); // A lot of the receiver needs a finalize certificate to trigger an exit if channel_name != "consensus_message_receiver" { let finalize_certificate = Certificate { diff --git a/votor/src/consensus_pool_service/stats.rs b/votor/src/consensus_pool_service/stats.rs index cc46229767a0ec..0f9948330c131d 100644 --- a/votor/src/consensus_pool_service/stats.rs +++ b/votor/src/consensus_pool_service/stats.rs @@ -6,9 +6,10 @@ use { }, }; -const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(1); +const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); -pub(super) struct Stats { +#[derive(Debug)] +pub(super) struct ConsensusPoolServiceStats { pub(super) add_message_failed: Saturating, pub(super) certificates_sent: Saturating, pub(super) certificates_dropped: Saturating, @@ -22,8 +23,8 @@ pub(super) struct Stats { last_request_time: Instant, } -impl Default for Stats { - fn default() -> Self { +impl ConsensusPoolServiceStats { + pub fn new() -> Self { Self { add_message_failed: Saturating(0), certificates_sent: Saturating(0), @@ -38,9 +39,7 @@ impl Default for Stats { last_request_time: Instant::now(), } } -} -impl Stats { fn report(&self) { let &Self { add_message_failed: Saturating(add_message_failed), @@ -53,7 +52,7 @@ impl Stats { received_certificates: Saturating(received_certificates), standstill, prune_old_state_called: Saturating(prune_old_state_called), - last_request_time: _, + .. } = self; datapoint_info!( "consensus_pool_service", @@ -73,7 +72,7 @@ impl Stats { ), ("received_votes", received_votes, i64), ("received_certificates", received_certificates, i64), - ("entered_standstill_bool", standstill, bool), + ("in_standstill_bool", standstill, bool), ("prune_old_state_called", prune_old_state_called, i64), ); } @@ -81,7 +80,7 @@ impl Stats { pub(super) fn maybe_report(&mut self) { if self.last_request_time.elapsed() >= STATS_REPORT_INTERVAL { self.report(); - *self = Self::default(); + *self = Self::new(); } } } diff --git a/votor/src/event_handler.rs b/votor/src/event_handler.rs index 8166fcfea30e5e..7f28ce11eecaac 100644 --- a/votor/src/event_handler.rs +++ b/votor/src/event_handler.rs @@ -1,20 +1,21 @@ //! Handles incoming VotorEvents to take action or //! notify block creation loop + use { crate::{ commitment::{update_commitment_cache, CommitmentType}, - consensus_metrics::{ConsensusMetricsEvent, ConsensusMetricsEventSender}, + consensus_metrics::ConsensusMetricsEvent, event::{CompletedBlock, VotorEvent, VotorEventReceiver}, event_handler::stats::EventHandlerStats, - root_utils::{self, RootContext, SetRootError}, + root_utils::{self, RootContext}, timer_manager::TimerManager, vote_history::{VoteHistory, VoteHistoryError}, voting_service::BLSOp, voting_utils::{generate_vote_message, VoteError, VotingContext}, - votor::{SharedContext, Votor}, + votor::SharedContext, }, - agave_votor_messages::{consensus_message::Block, vote::Vote}, - crossbeam_channel::{RecvTimeoutError, TrySendError}, + agave_votor_messages::{consensus_message::Block, migration::MigrationStatus, vote::Vote}, + crossbeam_channel::{select, RecvError, SendError}, parking_lot::RwLock, solana_clock::Slot, solana_hash::Hash, @@ -29,7 +30,7 @@ use { collections::{BTreeMap, BTreeSet}, sync::{ atomic::{AtomicBool, Ordering}, - Arc, Condvar, Mutex, + Arc, }, thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, @@ -46,7 +47,7 @@ pub(crate) type PendingBlocks = BTreeMap>; /// Inputs for the event handler thread pub(crate) struct EventHandlerContext { pub(crate) exit: Arc, - pub(crate) start: Arc<(Mutex, Condvar)>, + pub(crate) migration_status: Arc, pub(crate) event_receiver: VotorEventReceiver, pub(crate) timer_manager: Arc>, @@ -60,19 +61,16 @@ pub(crate) struct EventHandlerContext { #[derive(Debug, Error)] enum EventLoopError { #[error("Receiver is disconnected")] - ReceiverDisconnected(#[from] RecvTimeoutError), + ReceiverDisconnected(#[from] RecvError), #[error("Sender is disconnected")] - SenderDisconnected, + SenderDisconnected(#[from] SendError<()>), #[error("Error generating and inserting vote")] VotingError(#[from] VoteError), #[error("Set identity error")] SetIdentityError(#[from] VoteHistoryError), - - #[error("Set root error: {0}")] - SetRoot(#[from] SetRootError), } pub(crate) struct EventHandler { @@ -93,12 +91,10 @@ impl EventHandler { let t_event_handler = Builder::new() .name("solVotorEvLoop".to_string()) .spawn(move || { - info!("EventHandler has started"); if let Err(e) = Self::event_loop(ctx) { + info!("Event loop exited: {e:?}. Shutting down"); exit.store(true, Ordering::Relaxed); - error!("EventHandler exited with error: {e}"); } - info!("EventHandler has stopped"); }) .unwrap(); @@ -108,7 +104,7 @@ impl EventHandler { fn event_loop(context: EventHandlerContext) -> Result<(), EventLoopError> { let EventHandlerContext { exit, - start, + migration_status, event_receiver, timer_manager, shared_context: ctx, @@ -125,21 +121,41 @@ impl EventHandler { // Wait until migration has completed info!("{}: Event loop initialized", local_context.my_pubkey); - Votor::wait_for_migration_or_exit(&exit, &start); - info!("{}: Event loop starting", local_context.my_pubkey); + let Some(genesis_block) = migration_status.wait_for_migration_or_exit(&exit) else { + // Exited during migration + return Ok(()); + }; + let root_slot = vctx.sharable_banks.root().slot(); + info!( + "{}: Event loop starting genesis {genesis_block:?} root {root_slot}", + local_context.my_pubkey + ); + + // Check for set identity + if let Err(e) = Self::handle_set_identity(&mut local_context.my_pubkey, &ctx, &mut vctx) { + error!( + "Unable to load new vote history when attempting to change identity at startup \ + from {} to {} on voting loop startup, Exiting: {}", + vctx.vote_history.node_pubkey, + ctx.cluster_info.id(), + e + ); + return Err(EventLoopError::SetIdentityError(e)); + } while !exit.load(Ordering::Relaxed) { let mut receive_event_time = Measure::start("receive_event"); - let event = match event_receiver.recv_timeout(Duration::from_secs(1)) { - Ok(event) => event, - Err(RecvTimeoutError::Timeout) => continue, - Err(e) => return Err(EventLoopError::ReceiverDisconnected(e)), + let event = select! { + recv(event_receiver) -> msg => { + msg? + }, + default(Duration::from_secs(1)) => continue }; receive_event_time.stop(); local_context.stats.receive_event_time_us = local_context .stats .receive_event_time_us - .saturating_add(receive_event_time.as_us()); + .saturating_add(receive_event_time.as_us() as u32); let root_bank = vctx.sharable_banks.root(); if event.should_ignore(root_bank.slot()) { @@ -165,15 +181,13 @@ impl EventHandler { let mut send_votes_batch_time = Measure::start("send_votes_batch"); for vote in votes { local_context.stats.incr_vote(&vote); - vctx.bls_sender - .send(vote) - .map_err(|_| EventLoopError::SenderDisconnected)?; + vctx.bls_sender.send(vote).map_err(|_| SendError(()))?; } send_votes_batch_time.stop(); local_context.stats.send_votes_batch_time_us = local_context .stats .send_votes_batch_time_us - .saturating_add(send_votes_batch_time.as_us()); + .saturating_add(send_votes_batch_time.as_us() as u32); local_context.stats.maybe_report(); } @@ -197,12 +211,8 @@ impl EventHandler { timer_manager.write().set_timeouts(slot); local_context.stats.timeout_set = local_context.stats.timeout_set.saturating_add(1); } - let mut highest_parent_ready = ctx - .leader_window_notifier - .highest_parent_ready - .write() - .unwrap(); + let mut highest_parent_ready = ctx.highest_parent_ready.write().unwrap(); let (current_slot, _) = *highest_parent_ready; if slot > current_slot { @@ -211,24 +221,6 @@ impl EventHandler { Ok(()) } - fn send_to_metrics( - consensus_metrics_sender: &ConsensusMetricsEventSender, - consensus_metrics_events: Vec, - stats: &mut EventHandlerStats, - ) -> Result<(), EventLoopError> { - // Do not kill or block event handler threads just because metrics - // send failed (maybe because the queue is full). - match consensus_metrics_sender.try_send((Instant::now(), consensus_metrics_events)) { - Ok(()) => Ok(()), - Err(TrySendError::Disconnected(_)) => Err(EventLoopError::SenderDisconnected), - Err(TrySendError::Full(_)) => { - warn!("send_to_metrics failed: queue is full"); - stats.metrics_queue_became_full = true; - Ok(()) - } - } - } - fn handle_event( event: VotorEvent, timer_manager: &RwLock, @@ -238,22 +230,22 @@ impl EventHandler { local_context: &mut LocalContext, ) -> Result, EventLoopError> { let mut votes = vec![]; - let LocalContext { - my_pubkey, - pending_blocks, - finalized_blocks, - received_shred, - stats, + let &mut LocalContext { + ref mut my_pubkey, + ref mut pending_blocks, + ref mut finalized_blocks, + ref mut received_shred, + ref mut stats, } = local_context; match event { // Block has completed replay VotorEvent::Block(CompletedBlock { slot, bank }) => { debug_assert!(bank.is_frozen()); + let now = Instant::now(); let mut consensus_metrics_events = vec![ConsensusMetricsEvent::StartOfSlot { slot }]; if slot == first_of_consecutive_leader_slots(slot) { - // all slots except the first in the window would typically start when - // the block is seen so the recording would essentially record 0. + // all slots except the first in the window would typically start when the block is seen so the recording would essentially record 0. // hence we skip it. consensus_metrics_events.push(ConsensusMetricsEvent::BlockHashSeen { leader: *bank.leader_id(), @@ -263,11 +255,9 @@ impl EventHandler { consensus_metrics_events.push(ConsensusMetricsEvent::MaybeNewEpoch { epoch: bank.epoch(), }); - Self::send_to_metrics( - &vctx.consensus_metrics_sender, - consensus_metrics_events, - stats, - )?; + vctx.consensus_metrics_sender + .send((now, consensus_metrics_events)) + .map_err(|_| SendError(()))?; let (block, parent_block) = Self::get_block_parent_block(&bank); info!("{my_pubkey}: Block {block:?} parent {parent_block:?}"); if Self::try_notar( @@ -294,12 +284,12 @@ impl EventHandler { finalized_blocks, received_shred, stats, - )?; - if let Some((ready_slot, parent_block)) = + ); + if let Some(parent_block) = Self::add_missing_parent_ready(block, ctx, vctx, local_context) { Self::handle_parent_ready_event( - ready_slot, + slot, parent_block, vctx, ctx, @@ -324,11 +314,12 @@ impl EventHandler { // Received a parent ready notification for `slot` VotorEvent::ParentReady { slot, parent_block } => { - Self::send_to_metrics( - &vctx.consensus_metrics_sender, - vec![ConsensusMetricsEvent::StartOfSlot { slot }], - stats, - )?; + vctx.consensus_metrics_sender + .send(( + Instant::now(), + vec![ConsensusMetricsEvent::StartOfSlot { slot }], + )) + .map_err(|_| SendError(()))?; Self::handle_parent_ready_event( slot, parent_block, @@ -352,13 +343,14 @@ impl EventHandler { VotorEvent::Timeout(slot) => { info!("{my_pubkey}: Timeout {slot}"); if slot != last_of_consecutive_leader_slots(slot) { - Self::send_to_metrics( - &vctx.consensus_metrics_sender, - vec![ConsensusMetricsEvent::StartOfSlot { - slot: slot.saturating_add(1), - }], - stats, - )?; + vctx.consensus_metrics_sender + .send(( + Instant::now(), + vec![ConsensusMetricsEvent::StartOfSlot { + slot: slot.saturating_add(1), + }], + )) + .map_err(|_| SendError(()))?; } if vctx.vote_history.voted(slot) { return Ok(votes); @@ -404,21 +396,7 @@ impl EventHandler { // It is time to produce our leader window VotorEvent::ProduceWindow(window_info) => { info!("{my_pubkey}: ProduceWindow {window_info:?}"); - let mut l_window_info = ctx.leader_window_notifier.window_info.lock().unwrap(); - if let Some(old_window_info) = l_window_info.as_ref() { - stats.leader_window_replaced = stats.leader_window_replaced.saturating_add(1); - error!( - "{my_pubkey}: Attempting to start leader window for {}-{}, however there \ - is already a pending window to produce {}-{}. Our production is lagging, \ - discarding in favor of the newer window", - window_info.start_slot, - window_info.end_slot, - old_window_info.start_slot, - old_window_info.end_slot, - ); - } - *l_window_info = Some(window_info); - ctx.leader_window_notifier.window_notification.notify_one(); + ctx.leader_window_info_sender.send(window_info).unwrap(); } // We have finalized this block consider it for rooting @@ -434,13 +412,13 @@ impl EventHandler { finalized_blocks, received_shred, stats, - )?; - if let Some((slot, block)) = + ); + if let Some(parent_block) = Self::add_missing_parent_ready(block, ctx, vctx, local_context) { Self::handle_parent_ready_event( - slot, - block, + block.0, + parent_block, vctx, ctx, local_context, @@ -495,12 +473,14 @@ impl EventHandler { /// all later slots. So B and C together can keep finalizing the blocks and unstuck the /// cluster. If we get a finalization cert for later slots of the window and we have the /// block replayed, trace back to the first slot of the window and emit parent ready. + /// + /// Returns [`Some(Block)`] of the parent if the parent ready for the `finalized_block` should be added. fn add_missing_parent_ready( finalized_block: Block, ctx: &SharedContext, vctx: &mut VotingContext, local_context: &mut LocalContext, - ) -> Option<(Slot, Block)> { + ) -> Option { let (slot, block_id) = finalized_block; let first_slot_of_window = first_of_consecutive_leader_slots(slot); if first_slot_of_window == slot || first_slot_of_window == 0 { @@ -537,7 +517,7 @@ impl EventHandler { {parent_block_id}", local_context.my_pubkey ); - Some((slot, (parent_slot, parent_block_id))) + Some((parent_slot, parent_block_id)) } fn handle_set_identity( @@ -639,7 +619,7 @@ impl EventHandler { } /// Checks the pending blocks that have completed replay to see if they - /// are eligible to be voted on now + /// are eligble to be voted on now fn check_pending_blocks( my_pubkey: &Pubkey, pending_blocks: &mut PendingBlocks, @@ -750,16 +730,15 @@ impl EventHandler { Ok(()) } - /// Checks if we can set root on a new block. The block must: - /// - Be present in bank forks + /// Checks if we can set root on a new block + /// The block must be: + /// - Present in bank forks /// - Newer than the current root - /// - Already been voted on (bank.slot()) - /// - Have its Bank frozen - /// - Finished shredding - /// - Have a finalization certificate (determined by presence in - /// `finalized_blocks`) + /// - We must have already voted on bank.slot() + /// - Bank is frozen and finished shredding + /// - Block has a finalization certificate /// - /// If so, set root on the highest block that fits these conditions. + /// If so set root on the highest block that fits these conditions fn check_rootable_blocks( my_pubkey: &Pubkey, ctx: &SharedContext, @@ -769,7 +748,7 @@ impl EventHandler { finalized_blocks: &mut BTreeSet, received_shred: &mut BTreeSet, stats: &mut EventHandlerStats, - ) -> Result<(), EventLoopError> { + ) { let bank_forks_r = ctx.bank_forks.read().unwrap(); let old_root = bank_forks_r.root(); let Some(new_root) = finalized_blocks @@ -785,10 +764,10 @@ impl EventHandler { .max() else { // No rootable banks - return Ok(()); + return; }; drop(bank_forks_r); - let set_root_result = root_utils::set_root( + root_utils::set_root( my_pubkey, new_root, ctx, @@ -797,14 +776,8 @@ impl EventHandler { pending_blocks, finalized_blocks, received_shred, - ) - .map_err(EventLoopError::SetRoot); - - if set_root_result.is_ok() { - stats.set_root(new_root) - } - - set_root_result + ); + stats.set_root(new_root); } pub(crate) fn join(self) -> thread::Result<()> { @@ -825,13 +798,12 @@ mod tests { VoteHistoryStorage, }, voting_service::BLSOp, - votor::LeaderWindowNotifier, }, agave_votor_messages::{ consensus_message::{ConsensusMessage, VoteMessage, BLS_KEYPAIR_DERIVE_SEED}, vote::Vote, }, - crossbeam_channel::{bounded, Receiver, TryRecvError}, + crossbeam_channel::{unbounded, Receiver, TryRecvError}, parking_lot::RwLock as PlRwLock, solana_bls_signatures::{ keypair::Keypair as BLSKeypair, signature::Signature as BLSSignature, @@ -861,14 +833,14 @@ mod tests { }; struct EventHandlerTestContext { - exit: Arc, bls_receiver: Receiver, commitment_receiver: Receiver, own_vote_receiver: Receiver, bank_forks: Arc>, my_bls_keypair: BLSKeypair, timer_manager: Arc>, - leader_window_notifier: Arc, + leader_window_info_receiver: Receiver, + highest_parent_ready: Arc>, drop_bank_receiver: Receiver>, cluster_info: Arc, consensus_metrics_receiver: ConsensusMetricsEventReceiver, @@ -879,118 +851,120 @@ mod tests { bls_ops: Vec, } - impl EventHandlerTestContext { - fn setup() -> EventHandlerTestContext { - // For tests, we just make each queue bounded at 100, should be enough. - let (bls_sender, bls_receiver) = bounded(100); - let (commitment_sender, commitment_receiver) = bounded(100); - let (own_vote_sender, own_vote_receiver) = bounded(100); - let (drop_bank_sender, drop_bank_receiver) = bounded(100); - let exit = Arc::new(AtomicBool::new(false)); - let (event_sender, _event_receiver) = bounded(100); - let (consensus_metrics_sender, consensus_metrics_receiver) = bounded(100); - let timer_manager = Arc::new(PlRwLock::new(TimerManager::new( - event_sender.clone(), - exit.clone(), - ))); - - // Create 10 node validatorvotekeypairs vec - let validator_keypairs = (0..10) - .map(|_| ValidatorVoteKeypairs::new(Keypair::new(), Keypair::new(), Keypair::new())) - .collect::>(); - let stakes = (0..validator_keypairs.len()) - .rev() - .map(|i| 100_u64.saturating_add(i as u64)) - .collect::>(); - let genesis = create_genesis_config_with_alpenglow_vote_accounts( - 1_000_000_000, - &validator_keypairs, - stakes, - ); - let my_index = 0; - let my_node_keypair = validator_keypairs[my_index].node_keypair.insecure_clone(); - let my_vote_keypair = validator_keypairs[my_index].vote_keypair.insecure_clone(); - let my_bls_keypair = - BLSKeypair::derive_from_signer(&my_vote_keypair, BLS_KEYPAIR_DERIVE_SEED).unwrap(); - let bank0 = Bank::new_for_tests(&genesis.genesis_config); - let bank_forks = BankForks::new_rw_arc(bank0); - let contact_info = ContactInfo::new_localhost(&my_node_keypair.pubkey(), 0); - let cluster_info = Arc::new(ClusterInfo::new( - contact_info, - Arc::new(my_node_keypair.insecure_clone()), - SocketAddrSpace::Unspecified, - )); - let blockstore = Arc::new( - Blockstore::open_with_options( - &get_tmp_ledger_path!(), - BlockstoreOptions::default_for_tests(), - ) - .unwrap(), - ); - - let leader_window_notifier = Arc::new(LeaderWindowNotifier::default()); - let shared_context = SharedContext { - cluster_info: cluster_info.clone(), - bank_forks: bank_forks.clone(), - vote_history_storage: Arc::new(FileVoteHistoryStorage::default()), - leader_window_notifier: leader_window_notifier.clone(), - blockstore, - rpc_subscriptions: None, - }; + fn setup() -> EventHandlerTestContext { + let (bls_sender, bls_receiver) = unbounded(); + let (commitment_sender, commitment_receiver) = unbounded(); + let (own_vote_sender, own_vote_receiver) = unbounded(); + let (drop_bank_sender, drop_bank_receiver) = unbounded(); + let exit = Arc::new(AtomicBool::new(false)); + let (event_sender, _event_receiver) = unbounded(); + let (consensus_metrics_sender, consensus_metrics_receiver) = unbounded(); + let (leader_window_info_sender, leader_window_info_receiver) = unbounded(); + let timer_manager = Arc::new(PlRwLock::new(TimerManager::new( + event_sender.clone(), + exit.clone(), + Arc::new(MigrationStatus::default()), + ))); + + // Create 10 node validatorvotekeypairs vec + let validator_keypairs = (0..10) + .map(|_| ValidatorVoteKeypairs::new(Keypair::new(), Keypair::new(), Keypair::new())) + .collect::>(); + let stakes = (0..validator_keypairs.len()) + .rev() + .map(|i| 100_u64.saturating_add(i as u64)) + .collect::>(); + let genesis = create_genesis_config_with_alpenglow_vote_accounts( + 1_000_000_000, + &validator_keypairs, + stakes, + ); + let my_index = 0; + let my_node_keypair = validator_keypairs[my_index].node_keypair.insecure_clone(); + let my_vote_keypair = validator_keypairs[my_index].vote_keypair.insecure_clone(); + let my_bls_keypair = + BLSKeypair::derive_from_signer(&my_vote_keypair, BLS_KEYPAIR_DERIVE_SEED).unwrap(); + let bank0 = Bank::new_for_tests(&genesis.genesis_config); + let bank_forks = BankForks::new_rw_arc(bank0); + let contact_info = ContactInfo::new_localhost(&my_node_keypair.pubkey(), 0); + let cluster_info = Arc::new(ClusterInfo::new( + contact_info, + Arc::new(my_node_keypair.insecure_clone()), + SocketAddrSpace::Unspecified, + )); + let blockstore = Arc::new( + Blockstore::open_with_options( + &get_tmp_ledger_path!(), + BlockstoreOptions::default_for_tests(), + ) + .unwrap(), + ); + let highest_parent_ready = Arc::new(RwLock::default()); + + let shared_context = SharedContext { + cluster_info: cluster_info.clone(), + bank_forks: bank_forks.clone(), + vote_history_storage: Arc::new(FileVoteHistoryStorage::default()), + leader_window_info_sender, + blockstore, + rpc_subscriptions: None, + highest_parent_ready: highest_parent_ready.clone(), + }; - let vote_history = VoteHistory::new(my_node_keypair.pubkey(), 0); - let voting_context = VotingContext { - identity_keypair: Arc::new(my_node_keypair.insecure_clone()), - sharable_banks: bank_forks.read().unwrap().sharable_banks(), - vote_history, - bls_sender, - commitment_sender, - vote_account_pubkey: my_vote_keypair.pubkey(), - wait_to_vote_slot: None, - authorized_voter_keypairs: Arc::new(RwLock::new(vec![Arc::new(my_vote_keypair)])), - derived_bls_keypairs: HashMap::new(), - has_new_vote_been_rooted: false, - own_vote_sender, - consensus_metrics_sender, - }; + let vote_history = VoteHistory::new(my_node_keypair.pubkey(), 0); + let voting_context = VotingContext { + identity_keypair: Arc::new(my_node_keypair.insecure_clone()), + sharable_banks: bank_forks.read().unwrap().sharable_banks(), + vote_history, + bls_sender, + commitment_sender, + vote_account_pubkey: my_vote_keypair.pubkey(), + wait_to_vote_slot: None, + authorized_voter_keypairs: Arc::new(RwLock::new(vec![Arc::new(my_vote_keypair)])), + derived_bls_keypairs: HashMap::new(), + has_new_vote_been_rooted: false, + own_vote_sender, + consensus_metrics_sender, + }; - let root_context = RootContext { - leader_schedule_cache: Arc::new(LeaderScheduleCache::new_from_bank( - &bank_forks.read().unwrap().root_bank(), - )), - snapshot_controller: None, - bank_notification_sender: None, - drop_bank_sender, - }; + let root_context = RootContext { + leader_schedule_cache: Arc::new(LeaderScheduleCache::new_from_bank( + &bank_forks.read().unwrap().root_bank(), + )), + snapshot_controller: None, + bank_notification_sender: None, + drop_bank_sender, + }; - let local_context = LocalContext { - my_pubkey: my_node_keypair.pubkey(), - pending_blocks: BTreeMap::new(), - finalized_blocks: BTreeSet::new(), - received_shred: BTreeSet::new(), - stats: EventHandlerStats::default(), - }; + let local_context = LocalContext { + my_pubkey: my_node_keypair.pubkey(), + pending_blocks: BTreeMap::new(), + finalized_blocks: BTreeSet::new(), + received_shred: BTreeSet::new(), + stats: EventHandlerStats::default(), + }; - EventHandlerTestContext { - exit, - bls_receiver, - commitment_receiver, - own_vote_receiver, - bank_forks, - my_bls_keypair, - timer_manager, - leader_window_notifier, - drop_bank_receiver, - cluster_info, - consensus_metrics_receiver, - shared_context, - voting_context, - root_context, - local_context, - bls_ops: vec![], - } + EventHandlerTestContext { + bls_receiver, + commitment_receiver, + own_vote_receiver, + bank_forks, + my_bls_keypair, + timer_manager, + leader_window_info_receiver, + drop_bank_receiver, + cluster_info, + consensus_metrics_receiver, + highest_parent_ready, + shared_context, + voting_context, + root_context, + local_context, + bls_ops: vec![], } + } + impl EventHandlerTestContext { fn send_parent_ready_event(&mut self, slot: Slot, parent_block: Block) { let mut new_ops = EventHandler::handle_event( VotorEvent::ParentReady { slot, parent_block }, @@ -1225,7 +1199,7 @@ mod tests { assert_eq!(commitment.slot, expected_slot); } - fn check_no_vote_or_commitment(&self) { + fn check_no_vote_or_commitment(&mut self) { assert_eq!( self.bls_receiver.try_recv().err(), Some(TryRecvError::Empty) @@ -1236,24 +1210,17 @@ mod tests { ); } - fn check_parent_ready_slot(&mut self, expected: (Slot, Block)) { - assert_eq!( - *self - .leader_window_notifier - .highest_parent_ready - .read() - .unwrap(), - expected - ); + fn check_parent_ready_slot(&self, expected: (Slot, Block)) { + assert_eq!(*self.highest_parent_ready.read().unwrap(), expected); let slot = expected.0; self.check_timeout_set(slot); } - fn check_timeout_set(&mut self, expected_slot: Slot) { + fn check_timeout_set(&self, expected_slot: Slot) { assert!(self.timer_manager.read().is_timeout_set(expected_slot)); } - fn check_for_metrics_event(&mut self, expected: ConsensusMetricsEvent) { + fn check_for_metrics_event(&self, expected: ConsensusMetricsEvent) { let event = self .consensus_metrics_receiver .try_recv() @@ -1261,7 +1228,7 @@ mod tests { assert!(event.1.contains(&expected)); } - fn create_vote_history_storage_and_switch_identity( + fn crate_vote_history_storage_and_switch_identity( &mut self, new_identity: &Keypair, ) -> PathBuf { @@ -1274,6 +1241,7 @@ mod tests { .is_ok()); self.cluster_info .set_keypair(Arc::new(new_identity.insecure_clone())); + self.send_set_identity_event(); file_vote_history_storage.filename(&new_identity.pubkey()) } @@ -1283,7 +1251,7 @@ mod tests { fn test_received_block_event_and_parent_ready_event() { // Test different orders of received block event and parent ready event // some will send Notarize immediately, some will wait for parent ready - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // Received block event which says block has completed replay // If there is a parent ready for block 1 Notarization is sent out. @@ -1305,7 +1273,6 @@ mod tests { // We should receive Notarize Vote for block 1 test_context.check_for_vote(&Vote::new_notarization_vote(slot, block_id_1)); test_context.check_for_commitment(CommitmentType::Notarize, slot); - // Add block event for 1 again will not trigger another Notarize or commitment test_context.send_block_event(1, bank1.clone()); test_context.check_no_vote_or_commitment(); @@ -1317,7 +1284,6 @@ mod tests { // Because 2 is middle of window, we should see Notarize vote for block 2 even without parentready test_context.check_for_vote(&Vote::new_notarization_vote(slot, block_id_2)); test_context.check_for_commitment(CommitmentType::Notarize, slot); - // Slot 3 somehow links to block 1, should not trigger Notarize vote because it has a wrong parent (not 2) let _ = test_context.create_block_and_send_block_event(3, bank1.clone()); test_context.check_no_vote_or_commitment(); @@ -1326,7 +1292,6 @@ mod tests { let slot = 4; let bank4 = test_context.create_block_and_send_block_event(slot, bank2.clone()); let block_id_4 = bank4.block_id().unwrap(); - test_context.check_no_vote_or_commitment(); // Send parent ready for slot 4 should trigger Notarize vote for slot 4 test_context.send_parent_ready_event(slot, (2, block_id_2)); @@ -1339,7 +1304,7 @@ mod tests { fn test_received_block_notarized_and_timeout() { // Test block notarized event will trigger Finalize vote when all conditions are met // But it will not trigger Finalize if any of the conditions are not met - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); let root_bank = test_context .bank_forks @@ -1355,7 +1320,6 @@ mod tests { test_context.check_parent_ready_slot((1, (0, Hash::default()))); test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1)); test_context.check_for_commitment(CommitmentType::Notarize, 1); - // Send block notarized event should trigger Finalize vote test_context.send_block_notarized_event((1, block_id_1)); test_context.check_for_vote(&Vote::new_finalization_vote(1)); @@ -1409,13 +1373,11 @@ mod tests { let bank5 = test_context.create_block_only(slot, bank4.clone()); test_context.send_block_event(slot, bank5.clone()); test_context.check_no_vote_or_commitment(); - - test_context.exit.store(true, Ordering::Relaxed); } #[test] fn test_received_timeout_crashed_leader_and_first_shred() { - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // Simulate a crashed leader for slot 4 test_context.send_timeout_crashed_leader_event(4); @@ -1435,7 +1397,7 @@ mod tests { #[test] fn test_received_safe_to_notar() { - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // We can theoretically not vote skip here and test will pass, but in real world // safe_to_notar event only fires after we voted skip for the whole window @@ -1448,6 +1410,7 @@ mod tests { let bank_1 = test_context.create_block_and_send_block_event(1, root_bank); let block_id_1_old = bank_1.block_id().unwrap(); test_context.send_parent_ready_event(1, (0, Hash::default())); + test_context.check_parent_ready_slot((1, (0, Hash::default()))); test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1_old)); test_context.check_for_commitment(CommitmentType::Notarize, 1); @@ -1478,7 +1441,7 @@ mod tests { #[test] fn test_received_safe_to_skip() { - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // The safe_to_skip event only fires after we voted notarize for the slot let root_bank = test_context @@ -1490,10 +1453,10 @@ mod tests { let bank_1 = test_context.create_block_and_send_block_event(1, root_bank); let block_id_1 = bank_1.block_id().unwrap(); test_context.send_parent_ready_event(1, (0, Hash::default())); + test_context.check_parent_ready_slot((1, (0, Hash::default()))); test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1)); test_context.check_for_commitment(CommitmentType::Notarize, 1); - // Now we got safe_to_skip event for slot 1 test_context.send_safe_to_skip_event(1); // We should see rest of the window skipped @@ -1509,46 +1472,35 @@ mod tests { #[test] fn test_received_produce_window() { - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // Produce a full window of blocks // Assume the leader for 1-3 is us, send produce window event test_context.send_produce_window_event(1, 3, (0, Hash::default())); - // Check that leader_window_notifier is updated - let mut guard = test_context - .leader_window_notifier - .window_info - .lock() - .unwrap(); - let received_leader_window_info = guard.take().unwrap(); + // Check that leader_window_info is sent via channel + let received_leader_window_info = + test_context.leader_window_info_receiver.try_recv().unwrap(); assert_eq!(received_leader_window_info.start_slot, 1); assert_eq!(received_leader_window_info.end_slot, 3); assert_eq!( received_leader_window_info.parent_block, (0, Hash::default()) ); - drop(guard); // Suddenly I found out I produced block 1 already, send new produce window event let block_id_1 = Hash::new_unique(); test_context.send_produce_window_event(2, 3, (1, block_id_1)); - let mut guard = test_context - .leader_window_notifier - .window_info - .lock() - .unwrap(); - let received_leader_window_info = guard.take().unwrap(); + let received_leader_window_info = + test_context.leader_window_info_receiver.try_recv().unwrap(); assert_eq!(received_leader_window_info.start_slot, 2); assert_eq!(received_leader_window_info.end_slot, 3); assert_eq!(received_leader_window_info.parent_block, (1, block_id_1)); - drop(guard); } #[test] fn test_received_finalized() { - agave_logger::setup(); - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); let root_bank = test_context .bank_forks @@ -1560,10 +1512,10 @@ mod tests { let block_id_1 = bank1.block_id().unwrap(); test_context.send_parent_ready_event(1, (0, Hash::default())); + test_context.check_parent_ready_slot((1, (0, Hash::default()))); test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1)); test_context.check_for_commitment(CommitmentType::Notarize, 1); - // Now we got finalized event for slot 1 test_context.send_finalized_event((1, block_id_1), true); // Listen on drop bank receiver, it should get bank 0 @@ -1576,8 +1528,7 @@ mod tests { #[test] fn test_parent_ready_in_middle_of_window() { - agave_logger::setup(); - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // We just woke up and received finalize for slot 5 let root_bank = test_context @@ -1593,6 +1544,7 @@ mod tests { let block_id_5 = bank5.block_id().unwrap(); test_context.send_finalized_event((5, block_id_5), true); + // We should now have parent ready for slot 5 test_context.check_parent_ready_slot((5, (4, block_id_4))); @@ -1601,6 +1553,7 @@ mod tests { let bank9 = test_context.create_block_only(9, bank5.clone()); let block_id_9 = bank9.block_id().unwrap(); test_context.send_finalized_event((9, block_id_9), true); + test_context.send_block_event(9, bank9.clone()); // We should now have parent ready for slot 9 @@ -1609,8 +1562,7 @@ mod tests { #[test] fn test_received_standstill() { - agave_logger::setup(); - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); // Send notarize vote for slot 1 then skip rest of the window let root_bank = test_context @@ -1622,13 +1574,16 @@ mod tests { let bank1 = test_context.create_block_and_send_block_event(1, root_bank); let block_id_1 = bank1.block_id().unwrap(); test_context.send_parent_ready_event(1, (0, Hash::default())); + test_context.check_for_vote(&Vote::new_notarization_vote(1, block_id_1)); + test_context.send_timeout_event(2); test_context.check_for_vote(&Vote::new_skip_vote(2)); test_context.check_for_vote(&Vote::new_skip_vote(3)); // Send a standstill event with highest parent ready at 0, we should refresh all the votes test_context.send_standstill_event(0); + test_context.check_for_votes(&[ Vote::new_notarization_vote(1, block_id_1), Vote::new_skip_vote(2), @@ -1636,22 +1591,21 @@ mod tests { ]); // Send another standstill event with highest parent ready at 1, we should refresh votes for 2 and 3 only - test_context.bls_ops.clear(); test_context.send_standstill_event(1); + test_context.check_for_votes(&[Vote::new_skip_vote(2), Vote::new_skip_vote(3)]); } #[test] fn test_received_set_identity() { - agave_logger::setup(); - let mut test_context = EventHandlerTestContext::setup(); + let mut test_context = setup(); let old_identity = test_context.cluster_info.keypair().insecure_clone(); let new_identity = Keypair::new(); let mut files_to_remove = vec![]; // Before set identity we need to manually create the vote history storage file for new identity files_to_remove - .push(test_context.create_vote_history_storage_and_switch_identity(&new_identity)); + .push(test_context.crate_vote_history_storage_and_switch_identity(&new_identity)); // Should not send any votes because we set to a different identity let root_bank = test_context @@ -1662,16 +1616,17 @@ mod tests { .root(); let _ = test_context.create_block_and_send_block_event(1, root_bank.clone()); test_context.send_parent_ready_event(1, (0, Hash::default())); + // There should be no votes but we should see commitments for hot spares assert_eq!( test_context.bls_receiver.try_recv().err(), - Some(crossbeam_channel::TryRecvError::Empty) + Some(TryRecvError::Empty) ); test_context.check_for_commitment(CommitmentType::Notarize, 1); // Now set back to original identity files_to_remove - .push(test_context.create_vote_history_storage_and_switch_identity(&old_identity)); + .push(test_context.crate_vote_history_storage_and_switch_identity(&old_identity)); // We should now be able to vote again let slot = 4; diff --git a/votor/src/event_handler/stats.rs b/votor/src/event_handler/stats.rs index 76dfd7b3bd3f1e..739f35e12fbe1c 100644 --- a/votor/src/event_handler/stats.rs +++ b/votor/src/event_handler/stats.rs @@ -1,36 +1,31 @@ use { - crate::{common::VoteType, event::VotorEvent, voting_service::BLSOp}, - agave_votor_messages::consensus_message::ConsensusMessage, + crate::{event::VotorEvent, voting_service::BLSOp}, + agave_votor_messages::{consensus_message::ConsensusMessage, vote::VoteType}, solana_clock::Slot, solana_metrics::datapoint_info, std::{ - collections::BTreeMap, + collections::{BTreeMap, HashMap}, time::{Duration, Instant}, }, }; -const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(1); +const STATS_REPORT_INTERVAL: Duration = Duration::from_secs(10); #[derive(Debug, Clone)] struct SlotTracking { /// The time when the slot tracking started start: Instant, - /// Duration in microseconds from start to when the first shred for this - /// slot was received - first_shred: Option, - /// Duration in microseconds from start to when the parent block for this - /// slot was ready - parent_ready: Option, - /// Duration in microseconds from start to when the notarization vote for - /// this slot was sent - vote_notarize: Option, - /// Duration in microseconds from start to when the skip vote for this slot - /// was sent - vote_skip: Option, - /// If the slot was finalized, this is the duration in microseconds from - /// start to when it was finalized, and the bool indicates if it was fast - /// finalized - finalized: Option<(i64, bool)>, + /// The time when the first shred for this slot was received + first_shred: Option, + /// The time when the parent block for this slot was ready + parent_ready: Option, + /// The time when the notarization vote for this slot was sent + vote_notarize: Option, + /// The time when the skip vote for this slot was sent + vote_skip: Option, + /// If the slot was finalized, this is the time when it was finalized, + /// the bool indicates if it was fast finalized + finalized: Option<(Instant, bool)>, } impl Default for SlotTracking { @@ -46,289 +41,45 @@ impl Default for SlotTracking { } } -impl SlotTracking { - fn finalized_elapsed_micros(&self) -> Option { - self.finalized.map(|(t, _fast_finalize)| t) - } - - fn is_fast_finalized(&self) -> Option { - self.finalized.map(|(_t, fast_finalize)| fast_finalize) - } - - fn report(&self, slot: Slot) { - datapoint_info!( - "event_handler_slot_tracking", - ("slot", slot as i64, i64), - ( - "first_shred", - self.first_shred, - Option - ), - ( - "parent_ready", - self.parent_ready, - Option - ), - ( - "vote_notarize", - self.vote_notarize, - Option - ), - ( - "vote_skip", - self.vote_skip, - Option - ), - ( - "finalized", - self.finalized_elapsed_micros(), - Option - ), - ("is_fast_finalization", self.is_fast_finalized(), Option) - ); - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum StatsEvent { - Block, - BlockNotarized, - FirstShred, - ParentReady, - TimeoutCrashedLeader, - Timeout, - SafeToNotar, - SafeToSkip, - ProduceWindow, - Finalized, - Standstill, - SetIdentity, -} - #[derive(Debug, Default)] struct EventCountAndTime { - count: usize, - time_us: u64, -} - -#[derive(Debug, Default)] -struct ReceivedEventsStats { - block: EventCountAndTime, - block_notarized: EventCountAndTime, - first_shred: EventCountAndTime, - parent_ready: EventCountAndTime, - timeout_crashed_leader: EventCountAndTime, - timeout: EventCountAndTime, - safe_to_notar: EventCountAndTime, - safe_to_skip: EventCountAndTime, - produce_window: EventCountAndTime, - finalized: EventCountAndTime, - standstill: EventCountAndTime, - set_identity: EventCountAndTime, -} - -impl ReceivedEventsStats { - fn incr_event_with_timing(&mut self, stats_event: StatsEvent, time_us: u64) { - match stats_event { - StatsEvent::Block => { - self.block.count = self.block.count.saturating_add(1); - self.block.time_us = self.block.time_us.saturating_add(time_us); - } - StatsEvent::BlockNotarized => { - self.block_notarized.count = self.block_notarized.count.saturating_add(1); - self.block_notarized.time_us = self.block_notarized.time_us.saturating_add(time_us); - } - StatsEvent::FirstShred => { - self.first_shred.count = self.first_shred.count.saturating_add(1); - self.first_shred.time_us = self.first_shred.time_us.saturating_add(time_us); - } - StatsEvent::ParentReady => { - self.parent_ready.count = self.parent_ready.count.saturating_add(1); - self.parent_ready.time_us = self.parent_ready.time_us.saturating_add(time_us); - } - StatsEvent::TimeoutCrashedLeader => { - self.timeout_crashed_leader.count = - self.timeout_crashed_leader.count.saturating_add(1); - self.timeout_crashed_leader.time_us = - self.timeout_crashed_leader.time_us.saturating_add(time_us); - } - StatsEvent::Timeout => { - self.timeout.count = self.timeout.count.saturating_add(1); - self.timeout.time_us = self.timeout.time_us.saturating_add(time_us); - } - StatsEvent::SafeToNotar => { - self.safe_to_notar.count = self.safe_to_notar.count.saturating_add(1); - self.safe_to_notar.time_us = self.safe_to_notar.time_us.saturating_add(time_us); - } - StatsEvent::SafeToSkip => { - self.safe_to_skip.count = self.safe_to_skip.count.saturating_add(1); - self.safe_to_skip.time_us = self.safe_to_skip.time_us.saturating_add(time_us); - } - StatsEvent::ProduceWindow => { - self.produce_window.count = self.produce_window.count.saturating_add(1); - self.produce_window.time_us = self.produce_window.time_us.saturating_add(time_us); - } - StatsEvent::Finalized => { - self.finalized.count = self.finalized.count.saturating_add(1); - self.finalized.time_us = self.finalized.time_us.saturating_add(time_us); - } - StatsEvent::Standstill => { - self.standstill.count = self.standstill.count.saturating_add(1); - self.standstill.time_us = self.standstill.time_us.saturating_add(time_us); - } - StatsEvent::SetIdentity => { - self.set_identity.count = self.set_identity.count.saturating_add(1); - self.set_identity.time_us = self.set_identity.time_us.saturating_add(time_us); - } - } - } - - fn report(&self) { - datapoint_info!( - "event_handler_received_event_count_and_timing", - ("block_count", self.block.count as i64, i64), - ("block_elapsed_us", self.block.time_us as i64, i64), - ( - "block_notarized_count", - self.block_notarized.count as i64, - i64 - ), - ( - "block_notarized_elapsed_us", - self.block_notarized.time_us as i64, - i64 - ), - ("first_shred_count", self.first_shred.count as i64, i64), - ( - "first_shred_elapsed_us", - self.first_shred.time_us as i64, - i64 - ), - ("parent_ready_count", self.parent_ready.count as i64, i64), - ( - "parent_ready_elapsed_us", - self.parent_ready.time_us as i64, - i64 - ), - ( - "timeout_crashed_leader_count", - self.timeout_crashed_leader.count as i64, - i64 - ), - ( - "timeout_crashed_leader_elapsed_us", - self.timeout_crashed_leader.time_us as i64, - i64 - ), - ("timeout_count", self.timeout.count as i64, i64), - ("timeout_elapsed_us", self.timeout.time_us as i64, i64), - ("safe_to_notar_count", self.safe_to_notar.count as i64, i64), - ( - "safe_to_notar_elapsed_us", - self.safe_to_notar.time_us as i64, - i64 - ), - ("safe_to_skip_count", self.safe_to_skip.count as i64, i64), - ( - "safe_to_skip_elapsed_us", - self.safe_to_skip.time_us as i64, - i64 - ), - ( - "produce_window_count", - self.produce_window.count as i64, - i64 - ), - ( - "produce_window_elapsed_us", - self.produce_window.time_us as i64, - i64 - ), - ("finalized_count", self.finalized.count as i64, i64), - ("finalized_elapsed_us", self.finalized.time_us as i64, i64), - ("standstill_count", self.standstill.count as i64, i64), - ("standstill_elapsed_us", self.standstill.time_us as i64, i64), - ("set_identity_count", self.set_identity.count as i64, i64), - ( - "set_identity_elapsed_us", - self.set_identity.time_us as i64, - i64 - ), - ); - } -} - -#[derive(Debug, Default)] -struct SentVoteStats { - finalize: usize, - notarize: usize, - notarize_fallback: usize, - skip: usize, - skip_fallback: usize, -} - -impl SentVoteStats { - fn incr_vote(&mut self, vote_type: VoteType) { - match vote_type { - VoteType::Finalize => self.finalize = self.finalize.saturating_add(1), - VoteType::Notarize => self.notarize = self.notarize.saturating_add(1), - VoteType::NotarizeFallback => { - self.notarize_fallback = self.notarize_fallback.saturating_add(1) - } - VoteType::Skip => self.skip = self.skip.saturating_add(1), - VoteType::SkipFallback => self.skip_fallback = self.skip_fallback.saturating_add(1), - VoteType::Genesis => (), - } - } - - fn report(&self) { - datapoint_info!( - "event_handler_sent_vote_count", - ("finalize", self.finalize as i64, i64), - ("notarize", self.notarize as i64, i64), - ("notarize_fallback", self.notarize_fallback as i64, i64), - ("skip", self.skip as i64, i64), - ("skip_fallback", self.skip_fallback as i64, i64), - ); - } + count: u16, + time_us: u32, } #[derive(Debug)] pub(crate) struct EventHandlerStats { - /// Number of events that were ignored. This includes events that were - /// received but not processed due to various reasons (e.g., outdated, - /// irrelevant). - pub(crate) ignored: usize, + // Number of events that were ignored. This includes events that were + // received but not processed due to various reasons (e.g., outdated, + // irrelevant). + pub(crate) ignored: u16, - /// Number of times where we are attempting to start a leader window but - /// there is already a pending window to produce. The older window is - /// discarded in favor of the newer one. - pub(crate) leader_window_replaced: usize, + // Number of times where we are attempting to start a leader window but + // there is already a pending window to produce. The older window is + // discarded in favor of the newer one. + pub(crate) leader_window_replaced: u16, - /// Number of times we updated the root. - pub(crate) set_root_count: usize, + // Number of times we updated the root. + pub(crate) set_root_count: u16, - /// Number of times we setup timeouts for a new leader window. - pub(crate) timeout_set: usize, + // Number of times we setup timeouts for a new leader window. + pub(crate) timeout_set: u16, - /// Amount of time spent receiving events. Includes waiting for events. - pub(crate) receive_event_time_us: u64, + // Amount of time spent receiving events. Includes waiting for events. + pub(crate) receive_event_time_us: u32, - /// Amount of time spent sending votes. - pub(crate) send_votes_batch_time_us: u64, + // Amount of time spent sending votes. + pub(crate) send_votes_batch_time_us: u32, - /// Number of times we saw each event and time spent processing the event. - received_events_stats: ReceivedEventsStats, + // Number of times we saw each event and time spent processing the event. + received_events_count_and_timing: HashMap, - /// Number of votes sent for each vote type. - sent_votes: SentVoteStats, + // Number of votes sent for each vote type. + sent_votes: HashMap, - /// Timing information for major events for each slot. + // Timing information for major events for each slot. slot_tracking_map: BTreeMap, - /// Whether the send metrics queue has been full. - pub(super) metrics_queue_became_full: bool, - root_slot: Slot, last_report_time: Instant, } @@ -339,6 +90,22 @@ impl Default for EventHandlerStats { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum StatsEvent { + Block, + BlockNotarized, + FirstShred, + ParentReady, + TimeoutCrashedLeader, + Timeout, + SafeToNotar, + SafeToSkip, + ProduceWindow, + Finalized, + Standstill, + SetIdentity, +} + impl StatsEvent { pub fn new(event: &VotorEvent) -> Self { match event { @@ -367,48 +134,28 @@ impl EventHandlerStats { timeout_set: 0, receive_event_time_us: 0, send_votes_batch_time_us: 0, - received_events_stats: ReceivedEventsStats::default(), - sent_votes: SentVoteStats::default(), + received_events_count_and_timing: HashMap::new(), + sent_votes: HashMap::new(), slot_tracking_map: BTreeMap::new(), root_slot: 0, last_report_time: Instant::now(), - metrics_queue_became_full: false, } } - fn reset(&mut self, root_slot: Slot, slot_tracking_map: BTreeMap) { - *self = EventHandlerStats::new(); - self.root_slot = root_slot; - self.slot_tracking_map = slot_tracking_map; - } - pub fn handle_event_arrival(&mut self, event: &VotorEvent) -> StatsEvent { match event { VotorEvent::FirstShred(slot) => { let entry = self.slot_tracking_map.entry(*slot).or_default(); - entry.first_shred = Some( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - ); + entry.first_shred = Some(Instant::now()); } VotorEvent::ParentReady { slot, .. } => { let entry = self.slot_tracking_map.entry(*slot).or_default(); - entry.parent_ready = Some( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - ); + entry.parent_ready = Some(Instant::now()); } VotorEvent::Finalized((slot, _), is_fast_finalization) => { let entry = self.slot_tracking_map.entry(*slot).or_default(); if entry.finalized.is_none() { - entry.finalized = Some(( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - *is_fast_finalization, - )); + entry.finalized = Some((Instant::now(), *is_fast_finalization)); } else if *is_fast_finalization { // We can accept Notarize and FastFinalization, never set the flag from true to false if let Some((instant, false)) = entry.finalized { @@ -427,39 +174,32 @@ impl EventHandlerStats { } pub fn incr_event_with_timing(&mut self, stats_event: StatsEvent, time_us: u64) { - self.received_events_stats - .incr_event_with_timing(stats_event, time_us); + let entry = self + .received_events_count_and_timing + .entry(stats_event) + .or_default(); + entry.count = entry.count.saturating_add(1); + entry.time_us = entry.time_us.saturating_add(time_us as u32); } pub fn incr_vote(&mut self, bls_op: &BLSOp) { - let BLSOp::PushVote { message, .. } = bls_op else { + if let BLSOp::PushVote { message, .. } = bls_op { + let ConsensusMessage::Vote(vote) = **message else { + warn!("Unexpected BLS message type: {message:?}"); + return; + }; + let vote_type = vote.vote.get_type(); + let entry = self.sent_votes.entry(vote_type).or_insert(0); + *entry = entry.saturating_add(1); + if vote_type == VoteType::Notarize { + let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); + entry.vote_notarize = Some(Instant::now()); + } else if vote_type == VoteType::Skip { + let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); + entry.vote_skip = Some(Instant::now()); + } + } else { warn!("Unexpected BLS operation: {bls_op:?}"); - return; - }; - let ConsensusMessage::Vote(ref vote) = **message else { - warn!("Unexpected BLS message type: {message:?}"); - return; - }; - - // Increment vote type counters - let vote_type = VoteType::get_type(&vote.vote); - self.sent_votes.incr_vote(vote_type); - - // Increment slot based counters - if vote_type == VoteType::Notarize { - let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); - entry.vote_notarize = Some( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - ); - } else if vote_type == VoteType::Skip { - let entry = self.slot_tracking_map.entry(vote.vote.slot()).or_default(); - entry.vote_skip = Some( - Instant::now() - .saturating_duration_since(entry.start) - .as_micros() as i64, - ); } } @@ -478,12 +218,16 @@ impl EventHandlerStats { ), ("set_root_count", self.set_root_count as i64, i64), ("timeout_set", self.timeout_set as i64, i64), - ( - "metrics_queue_became_full", - self.metrics_queue_became_full, - bool - ) ); + for (event, EventCountAndTime { count, time_us }) in &self.received_events_count_and_timing + { + datapoint_info!( + "event_handler_received_event_count_and_timing", + ("event", format!("{:?}", event), String), + ("count", *count as i64, i64), + ("elapsed_us", *time_us as i64, i64) + ); + } datapoint_info!( "event_handler_timing", ( @@ -497,16 +241,72 @@ impl EventHandlerStats { i64 ), ); - - self.received_events_stats.report(); - self.sent_votes.report(); - - // Only report slots lower than the `root_slot` + for (vote_type, count) in &self.sent_votes { + datapoint_info!( + "event_handler_sent_vote_count", + ("vote", format!("{:?}", vote_type), String), + ("count", *count as i64, i64) + ); + } + // Only report if the slot is lower than root_slot let split_off_map = self.slot_tracking_map.split_off(&self.root_slot); for (slot, tracking) in &self.slot_tracking_map { - tracking.report(*slot); + let start = tracking.start; + datapoint_info!( + "event_handler_slot_tracking", + ("slot", *slot as i64, i64), + ( + "first_shred", + tracking.first_shred.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "parent_ready", + tracking.parent_ready.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "vote_notarize", + tracking.vote_notarize.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "vote_skip", + tracking.vote_skip.map(|t| { + t.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ( + "finalized", + tracking.finalized.map(|t| { + t.0.saturating_duration_since(start) + .as_micros() + .min(i64::MAX as u128) as i64 + }), + Option + ), + ("is_fast_finalization", tracking.finalized.map(|t| t.1), Option) + ); } - - self.reset(self.root_slot, split_off_map); + self.last_report_time = now; + let root_slot = self.root_slot; + *self = EventHandlerStats::new(); + self.root_slot = root_slot; + self.slot_tracking_map = split_off_map; } } diff --git a/votor/src/root_utils.rs b/votor/src/root_utils.rs index 316fa24dbb56f9..1adb45d28cda5d 100644 --- a/votor/src/root_utils.rs +++ b/votor/src/root_utils.rs @@ -1,13 +1,10 @@ use { crate::{event_handler::PendingBlocks, voting_utils::VotingContext, votor::SharedContext}, agave_votor_messages::consensus_message::Block, - crossbeam_channel::{SendError, Sender}, - log::{info, warn}, + crossbeam_channel::Sender, solana_clock::Slot, - solana_ledger::{ - blockstore::{Blockstore, BlockstoreError}, - leader_schedule_cache::LeaderScheduleCache, - }, + solana_hash::Hash, + solana_ledger::{blockstore::Blockstore, leader_schedule_cache::LeaderScheduleCache}, solana_pubkey::Pubkey, solana_rpc::{ optimistically_confirmed_bank_tracker::{BankNotification, BankNotificationSenderConfig}, @@ -22,10 +19,8 @@ use { collections::BTreeSet, sync::{Arc, RwLock}, }, - thiserror::Error, }; -#[allow(dead_code)] /// Structures that are not used in the event loop, but need to be updated /// or notified when setting root pub(crate) struct RootContext { @@ -35,15 +30,6 @@ pub(crate) struct RootContext { pub(crate) drop_bank_sender: Sender>, } -#[derive(Debug, Error)] -pub enum SetRootError { - #[error("Failed to record slot in blockstore: {0}")] - Blockstore(#[from] BlockstoreError), - - #[error("Error sending bank nofification: {0}")] - SendNotification(#[from] SendError<()>), -} - /// Sets the root for the votor event handling loop. Handles rooting all things /// except the certificate pool pub(crate) fn set_root( @@ -55,12 +41,12 @@ pub(crate) fn set_root( pending_blocks: &mut PendingBlocks, finalized_blocks: &mut BTreeSet, received_shred: &mut BTreeSet, -) -> Result<(), SetRootError> { +) { info!("{my_pubkey}: setting root {new_root}"); vctx.vote_history.set_root(new_root); - pending_blocks.retain(|pending_block, _| *pending_block >= new_root); - finalized_blocks.retain(|(slot, _)| *slot >= new_root); - received_shred.retain(|slot| *slot >= new_root); + *pending_blocks = pending_blocks.split_off(&new_root); + *finalized_blocks = finalized_blocks.split_off(&(new_root, Hash::default())); + *received_shred = received_shred.split_off(&new_root); check_and_handle_new_root( new_root, @@ -75,12 +61,19 @@ pub(crate) fn set_root( ctx.rpc_subscriptions.as_deref(), my_pubkey, |_| {}, - )?; + ); // Distinguish between duplicate versions of same slot let hash = ctx.bank_forks.read().unwrap().bank_hash(new_root).unwrap(); - ctx.blockstore - .insert_optimistic_slot(new_root, &hash, timestamp().try_into().unwrap())?; + if let Err(e) = + ctx.blockstore + .insert_optimistic_slot(new_root, &hash, timestamp().try_into().unwrap()) + { + error!( + "failed to record optimistic slot in blockstore: slot={}: {:?}", + new_root, &e + ); + } // It is critical to send the OC notification in order to keep compatibility with // the RPC API. Additionally the PrioritizationFeeCache relies on this notification @@ -91,20 +84,16 @@ pub(crate) fn set_root( .dependency_tracker .as_ref() .map(|s| s.get_current_declared_work()); - config - .sender - .send(( - BankNotification::OptimisticallyConfirmed(new_root), - dependency_work, - )) - .map_err(|_| SendError(()))?; + // TODO: propagate error + let _ = config.sender.send(( + BankNotification::OptimisticallyConfirmed(new_root), + dependency_work, + )); } - - Ok(()) } /// Sets the new root, additionally performs the callback after setting the bank forks root -/// During this transition period where both replay stage and votor can root depending on the feature flag we +/// During this transition period where both replay stage and voting loop can root depending on the feature flag we /// have a callback that cleans up progress map and other tower bft structures. Then the callgraph is /// /// ReplayStage::check_and_handle_new_root -> root_utils::check_and_handle_new_root(callback) @@ -127,8 +116,7 @@ pub fn check_and_handle_new_root( rpc_subscriptions: Option<&RpcSubscriptions>, my_pubkey: &Pubkey, callback: CB, -) -> Result<(), SetRootError> -where +) where CB: FnOnce(&BankForks), { // get the root bank before squash @@ -156,8 +144,9 @@ where // get shreds for repair on gossip before we update leader schedule, otherwise they may // get dropped. leader_schedule_cache.set_root(rooted_banks.last().unwrap()); - blockstore.set_roots(rooted_slots.iter())?; - + blockstore + .set_roots(rooted_slots.iter()) + .expect("Ledger set roots failed"); set_bank_forks_root( new_root, bank_forks, @@ -192,8 +181,6 @@ where } } info!("{my_pubkey}: new root {new_root}"); - - Ok(()) } /// Sets the bank forks root: diff --git a/votor/src/staked_validators_cache.rs b/votor/src/staked_validators_cache.rs index 1540e2d81bc992..6633acd4140f53 100644 --- a/votor/src/staked_validators_cache.rs +++ b/votor/src/staked_validators_cache.rs @@ -4,7 +4,6 @@ use { crate::voting_service::AlpenglowPortOverride, lru::LruCache, solana_clock::{Epoch, Slot}, - solana_epoch_schedule::EpochSchedule, solana_gossip::cluster_info::ClusterInfo, solana_pubkey::Pubkey, solana_runtime::bank_forks::BankForks, @@ -41,9 +40,6 @@ pub struct StakedValidatorsCache { /// Bank forks bank_forks: Arc>, - // Cache Epoch schedule since it never changes - epoch_schedule: EpochSchedule, - /// Whether to include the running validator's socket address in cache entries include_self: bool, @@ -62,17 +58,10 @@ impl StakedValidatorsCache { include_self: bool, alpenglow_port_override: Option, ) -> Self { - let epoch_schedule = bank_forks - .read() - .unwrap() - .working_bank() - .epoch_schedule() - .clone(); Self { cache: LruCache::new(max_cache_size), ttl, bank_forks, - epoch_schedule, include_self, alpenglow_port_override, alpenglow_port_override_last_modified: Instant::now(), @@ -81,7 +70,12 @@ impl StakedValidatorsCache { #[inline] fn cur_epoch(&self, slot: Slot) -> Epoch { - self.epoch_schedule.get_epoch(slot) + self.bank_forks + .read() + .unwrap() + .working_bank() + .epoch_schedule() + .get_epoch(slot) } fn refresh_cache_entry( @@ -167,7 +161,6 @@ impl StakedValidatorsCache { cluster_info: &ClusterInfo, access_time: Instant, ) -> (&[SocketAddr], bool) { - let epoch = self.cur_epoch(slot); // Check if self.alpenglow_port_override has a different last_modified. // Immediately refresh the cache if it does. if let Some(alpenglow_port_override) = &self.alpenglow_port_override { @@ -176,14 +169,15 @@ impl StakedValidatorsCache { self.alpenglow_port_override_last_modified = alpenglow_port_override.last_modified(); trace!( - "refreshing cache entry for epoch {epoch} due to alpenglow port override \ - last_modified change" + "refreshing cache entry for epoch {} due to alpenglow port override \ + last_modified change", + self.cur_epoch(slot) ); - self.refresh_cache_entry(epoch, cluster_info, access_time); + self.refresh_cache_entry(self.cur_epoch(slot), cluster_info, access_time); } } - self.get_staked_validators_by_epoch(epoch, cluster_info, access_time) + self.get_staked_validators_by_epoch(self.cur_epoch(slot), cluster_info, access_time) } fn get_staked_validators_by_epoch( @@ -273,12 +267,12 @@ mod tests { .map(|(node_ix, pubkey)| { let mut contact_info = ContactInfo::new(*pubkey, 0_u64, 0_u16); - contact_info + assert!(contact_info .set_alpenglow(( Ipv4Addr::LOCALHOST, - 8080_u16.saturating_add(node_ix as u16), + 8080_u16.saturating_add(node_ix as u16) )) - .unwrap(); + .is_ok()); contact_info }); diff --git a/votor/src/timer_manager.rs b/votor/src/timer_manager.rs index 4fcd30d8c09b06..387b03c53e4de2 100644 --- a/votor/src/timer_manager.rs +++ b/votor/src/timer_manager.rs @@ -1,13 +1,15 @@ //! Controls the queueing and firing of skip timer events for use //! in the event loop. -// TODO: Make this mockable in event_handler for tests + mod stats; mod timers; + use { crate::{ common::{DELTA_BLOCK, DELTA_TIMEOUT}, event::VotorEvent, }, + agave_votor_messages::migration::MigrationStatus, crossbeam_channel::Sender, parking_lot::RwLock as PlRwLock, solana_clock::Slot, @@ -21,6 +23,7 @@ use { }, timers::Timers, }; + /// A manager of timer states. Uses a background thread to trigger next ready /// timers and send events. pub(crate) struct TimerManager { @@ -29,7 +32,11 @@ pub(crate) struct TimerManager { } impl TimerManager { - pub(crate) fn new(event_sender: Sender, exit: Arc) -> Self { + pub(crate) fn new( + event_sender: Sender, + exit: Arc, + migration_status: Arc, + ) -> Self { let timers = Arc::new(PlRwLock::new(Timers::new( DELTA_TIMEOUT, DELTA_BLOCK, @@ -38,6 +45,7 @@ impl TimerManager { let handle = { let timers = Arc::clone(&timers); thread::spawn(move || { + let _ = migration_status.wait_for_migration_or_exit(exit.as_ref()); while !exit.load(Ordering::Relaxed) { let duration = match timers.write().progress(Instant::now()) { None => { @@ -52,11 +60,14 @@ impl TimerManager { } }) }; + Self { timers, handle } } + pub(crate) fn set_timeouts(&self, slot: Slot) { self.timers.write().set_timeouts(slot, Instant::now()); } + pub(crate) fn join(self) { self.handle.join().unwrap(); } @@ -70,11 +81,16 @@ impl TimerManager { #[cfg(test)] mod tests { use {super::*, crate::event::VotorEvent, crossbeam_channel::unbounded, std::time::Duration}; + #[test] fn test_timer_manager() { let (event_sender, event_receiver) = unbounded(); let exit = Arc::new(AtomicBool::new(false)); - let timer_manager = TimerManager::new(event_sender, exit.clone()); + let timer_manager = TimerManager::new( + event_sender, + exit.clone(), + Arc::new(MigrationStatus::post_migration_status()), + ); let slot = 52; let start = Instant::now(); timer_manager.set_timeouts(slot); @@ -82,8 +98,8 @@ mod tests { let mut timeouts_received = 0; while timeouts_received < 2 && Instant::now().duration_since(start) < Duration::from_secs(2) { - let res = event_receiver.recv_timeout(Duration::from_millis(200)); - if let Ok(event) = res { + let recv = event_receiver.recv_timeout(Duration::from_millis(200)); + if let Ok(event) = recv { match event { VotorEvent::Timeout(s) => { assert_eq!(s, slot); diff --git a/votor/src/timer_manager/timers.rs b/votor/src/timer_manager/timers.rs index 1ad08b90a0b617..f62d504c0104e0 100644 --- a/votor/src/timer_manager/timers.rs +++ b/votor/src/timer_manager/timers.rs @@ -9,6 +9,7 @@ use { time::{Duration, Instant}, }, }; + /// Encodes a basic state machine of the different stages involved in handling /// timeouts for a window of slots. enum TimerState { @@ -29,6 +30,7 @@ enum TimerState { /// The state machine is done. Done, } + impl TimerState { /// Creates a new instance of the state machine. /// @@ -39,6 +41,7 @@ impl TimerState { let timeout = now.checked_add(delta_timeout).unwrap(); (Self::WaitDeltaTimeout { window, timeout }, timeout) } + /// Call to make progress on the state machine. /// /// Returns a potentially empty list of events that should be sent. @@ -75,6 +78,7 @@ impl TimerState { Self::Done => None, } } + /// When would this state machine next be able to make progress. fn next_fire(&self) -> Option { match self { @@ -84,6 +88,7 @@ impl TimerState { } } } + /// Maintains all active timer states for windows of slots. pub(super) struct Timers { delta_timeout: Duration, @@ -97,6 +102,7 @@ pub(super) struct Timers { /// Stats for the timer manager. stats: TimerManagerStats, } + impl Timers { pub(super) fn new( delta_timeout: Duration, @@ -112,6 +118,7 @@ impl Timers { stats: TimerManagerStats::new(), } } + /// Call to set timeouts for a new window of slots. pub(super) fn set_timeouts(&mut self, slot: Slot, now: Instant) { assert_eq!(self.heap.len(), self.timers.len()); @@ -127,6 +134,7 @@ impl Timers { self.stats .incr_timeout_count_with_heap_size(self.heap.len(), new_timer_inserted); } + /// Call to make progress on the timer states. If there are still active /// timer states, returns when the earliest one might become ready. pub(super) fn progress(&mut self, now: Instant) -> Option { @@ -159,6 +167,7 @@ impl Timers { } ret_timeout } + #[cfg(test)] pub(super) fn stats(&self) -> TimerManagerStats { self.stats.clone() @@ -230,6 +239,7 @@ mod tests { now = now.checked_add(one_micro).unwrap(); } let mut events = receiver.try_iter().collect::>(); + assert!(matches!( events.remove(0), VotorEvent::TimeoutCrashedLeader(0) diff --git a/votor/src/vote_history.rs b/votor/src/vote_history.rs index c9d5a4ea617eb7..2b32e980cfb28a 100644 --- a/votor/src/vote_history.rs +++ b/votor/src/vote_history.rs @@ -12,6 +12,8 @@ use { thiserror::Error, }; +pub const VOTE_THRESHOLD_SIZE: f64 = 2f64 / 3f64; + #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] pub enum VoteHistoryVersions { Current(VoteHistory), @@ -128,7 +130,7 @@ impl VoteHistory { pub fn votes_cast_since(&self, slot: Slot) -> Vec { self.votes_cast .iter() - .filter(|(s, _)| s > &&slot) + .filter(|&(&s, _)| s > slot) .flat_map(|(_, votes)| votes.iter()) .cloned() .collect() @@ -193,7 +195,11 @@ impl VoteHistory { self.skipped.insert(vote.slot); self.voted_skip_fallback.insert(vote.slot); } - Vote::Genesis(_vote) => {} + Vote::Genesis(_vote) => { + // Genesis votes are only used during migration. + // Since these votes are tracked and sent outside of + // votor, we do not need to insert anything here. + } } self.votes_cast.entry(vote.slot()).or_default().push(vote); } diff --git a/votor/src/voting_service.rs b/votor/src/voting_service.rs index 72b18ca242ce49..395060762a85b5 100644 --- a/votor/src/voting_service.rs +++ b/votor/src/voting_service.rs @@ -23,20 +23,11 @@ use { thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, }, - thiserror::Error, }; const STAKED_VALIDATORS_CACHE_TTL_S: u64 = 5; const STAKED_VALIDATORS_CACHE_NUM_EPOCH_CAP: usize = 5; -#[derive(Debug, Error)] -enum SendVoteError { - #[error(transparent)] - BincodeError(#[from] bincode::Error), - #[error(transparent)] - TransportError(#[from] TransportError), -} - #[derive(Debug)] pub enum BLSOp { PushVote { @@ -269,7 +260,7 @@ mod tests { solana_bls_signatures::Signature as BLSSignature, solana_gossip::{cluster_info::ClusterInfo, contact_info::ContactInfo}, solana_keypair::Keypair, - solana_net_utils::{sockets::bind_to_localhost_unique, SocketAddrSpace}, + solana_net_utils::SocketAddrSpace, solana_runtime::{ bank::Bank, bank_forks::BankForks, @@ -283,7 +274,10 @@ mod tests { quic::{spawn_stake_wighted_qos_server, QuicStreamerConfig, SpawnServerResult}, streamer::StakedNodes, }, - std::{net::SocketAddr, sync::Arc}, + std::{ + net::SocketAddr, + sync::{Arc, RwLock}, + }, test_case::test_case, tokio_util::sync::CancellationToken, }; @@ -360,14 +354,14 @@ mod tests { // Create listener thread on a random port we allocated and return SocketAddr to create VotingService // Bind to a random UDP port - let socket = bind_to_localhost_unique().unwrap(); + let socket = solana_net_utils::bind_to_localhost().unwrap(); let listener_addr = socket.local_addr().unwrap(); // Create VotingService with the listener address let (_, validator_keypairs) = create_voting_service(bls_receiver, listener_addr); // Send a BLS message via the VotingService - bls_sender.send(bls_op).unwrap(); + assert!(bls_sender.send(bls_op).is_ok()); // Start a quick streamer to handle quick control packets let (sender, receiver) = crossbeam_channel::unbounded(); @@ -379,22 +373,24 @@ mod tests { Arc::new(stakes), HashMap::::default(), // overrides ))); - let cancel_token = CancellationToken::new(); + let cancel = CancellationToken::new(); let SpawnServerResult { + endpoints: _, thread: quic_server_thread, - .. + key_updater: _, } = spawn_stake_wighted_qos_server( "AlpenglowLocalClusterTest", - "quic_streamer_test", + "voting_service_test", [socket], &Keypair::new(), sender, staked_nodes, QuicStreamerConfig::default_for_tests(), SwQosConfig::default(), - cancel_token.clone(), + cancel.clone(), ) .unwrap(); + let packets = receiver.recv().unwrap(); let packet = packets.first().expect("No packets received"); let received_message = packet @@ -407,7 +403,7 @@ mod tests { ) }); assert_eq!(received_message, expected_message); - cancel_token.cancel(); + cancel.cancel(); quic_server_thread.join().unwrap(); } } diff --git a/votor/src/voting_utils.rs b/votor/src/voting_utils.rs index 867ba73d3834b5..b93336f7d0f7f7 100644 --- a/votor/src/voting_utils.rs +++ b/votor/src/voting_utils.rs @@ -128,12 +128,12 @@ pub struct VotingContext { pub consensus_metrics_sender: ConsensusMetricsEventSender, } -fn get_bls_keypair( - context: &mut VotingContext, +fn get_or_insert_bls_keypair( + derived_bls_keypairs: &mut HashMap>, authorized_voter_keypair: &Arc, ) -> Result, BlsError> { let pubkey = authorized_voter_keypair.pubkey(); - if let Some(existing) = context.derived_bls_keypairs.get(&pubkey) { + if let Some(existing) = derived_bls_keypairs.get(&pubkey) { return Ok(existing.clone()); } @@ -142,23 +142,28 @@ fn get_bls_keypair( BLS_KEYPAIR_DERIVE_SEED, )?); - context - .derived_bls_keypairs - .insert(pubkey, bls_keypair.clone()); + derived_bls_keypairs.insert(pubkey, bls_keypair.clone()); Ok(bls_keypair) } -fn generate_vote_tx(vote: &Vote, bank: &Bank, context: &mut VotingContext) -> GenerateVoteTxResult { - let vote_account_pubkey = context.vote_account_pubkey; +fn generate_vote_tx( + vote: &Vote, + bank: &Bank, + vote_account_pubkey: Pubkey, + identity_keypair: &Arc, + authorized_voter_keypairs: &Arc>>>, + wait_to_vote_slot: Option, + derived_bls_keypairs: &mut HashMap>, +) -> GenerateVoteTxResult { let authorized_voter_keypair; let bls_pubkey_in_vote_account; { - let authorized_voter_keypairs = context.authorized_voter_keypairs.read().unwrap(); + let authorized_voter_keypairs = authorized_voter_keypairs.read().unwrap(); if authorized_voter_keypairs.is_empty() { return GenerateVoteTxResult::NonVoting; } - if let Some(slot) = context.wait_to_vote_slot { + if let Some(slot) = wait_to_vote_slot { if vote.slot() < slot { return GenerateVoteTxResult::WaitToVoteSlot(slot); } @@ -167,18 +172,18 @@ fn generate_vote_tx(vote: &Vote, bank: &Bank, context: &mut VotingContext) -> Ge return GenerateVoteTxResult::VoteAccountNotFound(vote_account_pubkey); }; let vote_state_view = vote_account.vote_state_view(); - if vote_state_view.node_pubkey() != &context.identity_keypair.pubkey() { + if vote_state_view.node_pubkey() != &identity_keypair.pubkey() { info!( "Vote account node_pubkey mismatch: {} (expected: {}). Unable to vote", vote_state_view.node_pubkey(), - context.identity_keypair.pubkey() + identity_keypair.pubkey() ); return GenerateVoteTxResult::HotSpare; } let Some(bls_pubkey_serialized) = vote_state_view.bls_pubkey_compressed() else { panic!( "No BLS pubkey in vote account {}", - context.identity_keypair.pubkey() + identity_keypair.pubkey() ); }; bls_pubkey_in_vote_account = @@ -187,7 +192,7 @@ fn generate_vote_tx(vote: &Vote, bank: &Bank, context: &mut VotingContext) -> Ge .unwrap_or_else(|_| { panic!( "Failed to decompress BLS pubkey in vote account {}", - context.identity_keypair.pubkey() + identity_keypair.pubkey() ); }); let Some(authorized_voter_pubkey) = vote_state_view.get_authorized_voter(bank.epoch()) @@ -209,9 +214,9 @@ fn generate_vote_tx(vote: &Vote, bank: &Bank, context: &mut VotingContext) -> Ge authorized_voter_keypair = keypair.clone(); } - let bls_keypair = get_bls_keypair(context, &authorized_voter_keypair) + let bls_keypair = get_or_insert_bls_keypair(derived_bls_keypairs, &authorized_voter_keypair) .unwrap_or_else(|e| panic!("Failed to derive my own BLS keypair: {e:?}")); - let my_bls_pubkey: BLSPubkey = bls_keypair.public; + let my_bls_pubkey: BLSPubkey = bls_keypair.public.into(); if my_bls_pubkey != bls_pubkey_in_vote_account { panic!( "Vote account bls_pubkey mismatch: {bls_pubkey_in_vote_account:?} (expected: \ @@ -263,7 +268,15 @@ fn insert_vote_and_create_bls_message( } let bank = context.sharable_banks.root(); - let message = match generate_vote_tx(&vote, &bank, context) { + let message = match generate_vote_tx( + &vote, + &bank, + context.vote_account_pubkey, + &context.identity_keypair, + &context.authorized_voter_keypairs, + context.wait_to_vote_slot, + &mut context.derived_bls_keypairs, + ) { GenerateVoteTxResult::ConsensusMessage(m) => m, e => { if e.is_transient_error() { @@ -360,9 +373,9 @@ mod tests { let my_keys = &validator_keypairs[my_index]; let sharable_banks = bank_forks.read().unwrap().sharable_banks(); - let (bls_sender, _bls_receiver) = unbounded(); - let (commitment_sender, _commitment_receiver) = unbounded(); - let (consensus_metrics_sender, _consensus_metrics_receiver) = unbounded(); + let bls_sender = unbounded().0; + let commitment_sender = unbounded().0; + let consensus_metrics_sender = unbounded().0; VotingContext { vote_history: VoteHistory::new(my_keys.node_keypair.pubkey(), 0), vote_account_pubkey: my_keys.vote_keypair.pubkey(), diff --git a/votor/src/votor.rs b/votor/src/votor.rs index 0bdb2680ff2649..382dc451f655eb 100644 --- a/votor/src/votor.rs +++ b/votor/src/votor.rs @@ -1,6 +1,6 @@ -//! ```text //! The entrypoint into votor the module responsible for voting, rooting, and notifying //! the core to create a new block. +//! ```text //! //! Votor //! ┌────────────────────────────────────────────────────────────────────────────┐ @@ -20,7 +20,7 @@ //! │ │ │ │ │ │ //! │ ┌────┼─────────┼───────────────┐ │ │ │ //! │ │ │ │ │ Block │ ┌────────────────────┐ -//! │ │ Consensus Pool Service │ │ │ ┌─────────────────────│─┼ Replay / Broadcast │ +//! │ │ Consensus Pool Service │ │ │ ┌─────────────────────│─┼ Replay / Broadcast │ //! │ │ │ │ │ │ │ └────────────────────┘ //! │ │ ┌──────────────────────────┐ │ │ │ │ │ //! │ │ │ │ │ │ │ │ │ @@ -44,7 +44,6 @@ use { crate::{ commitment::CommitmentAggregationData, - common::DELTA_STANDSTILL, consensus_metrics::{ ConsensusMetrics, ConsensusMetricsEventReceiver, ConsensusMetricsEventSender, }, @@ -58,7 +57,7 @@ use { voting_service::BLSOp, voting_utils::VotingContext, }, - agave_votor_messages::consensus_message::ConsensusMessage, + agave_votor_messages::{consensus_message::ConsensusMessage, migration::MigrationStatus}, crossbeam_channel::{Receiver, Sender}, parking_lot::RwLock as PlRwLock, solana_clock::Slot, @@ -77,23 +76,12 @@ use { }, std::{ collections::HashMap, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Condvar, Mutex, RwLock, - }, + sync::{atomic::AtomicBool, Arc, RwLock}, thread::{self, JoinHandle}, time::Duration, }, }; -/// Communication with the block creation loop to notify leader window -#[derive(Default)] -pub struct LeaderWindowNotifier { - pub window_info: Mutex>, - pub window_notification: Condvar, - pub highest_parent_ready: RwLock<(Slot, (Slot, Hash))>, -} - /// Inputs to Votor pub struct VotorConfig { pub exit: Arc, @@ -111,6 +99,8 @@ pub struct VotorConfig { pub cluster_info: Arc, pub leader_schedule_cache: Arc, pub rpc_subscriptions: Option>, + pub consensus_metrics_sender: ConsensusMetricsEventSender, + pub migration_status: Arc, // Senders / Notifiers pub snapshot_controller: Option>, @@ -118,10 +108,10 @@ pub struct VotorConfig { pub commitment_sender: Sender, pub drop_bank_sender: Sender>, pub bank_notification_sender: Option, - pub leader_window_notifier: Arc, + pub leader_window_info_sender: Sender, + pub highest_parent_ready: Arc>, pub event_sender: VotorEventSender, pub own_vote_sender: Sender, - pub consensus_metrics_sender: ConsensusMetricsEventSender, // Receivers pub event_receiver: VotorEventReceiver, @@ -135,19 +125,16 @@ pub(crate) struct SharedContext { pub(crate) bank_forks: Arc>, pub(crate) cluster_info: Arc, pub(crate) rpc_subscriptions: Option>, - pub(crate) leader_window_notifier: Arc, + pub(crate) leader_window_info_sender: Sender, + pub(crate) highest_parent_ready: Arc>, pub(crate) vote_history_storage: Arc, } pub struct Votor { - // TODO: Just a placeholder for how migration could look like, - // will fix once we finish the strategy - start: Arc<(Mutex, Condvar)>, - event_handler: EventHandler, consensus_pool_service: ConsensusPoolService, timer_manager: Arc>, - consensus_metrics_handle: JoinHandle<()>, + metrics: JoinHandle<()>, } impl Votor { @@ -165,22 +152,22 @@ impl Votor { cluster_info, leader_schedule_cache, rpc_subscriptions, + migration_status, snapshot_controller, bls_sender, commitment_sender, drop_bank_sender, bank_notification_sender, - leader_window_notifier, + leader_window_info_sender, + highest_parent_ready, event_sender, - event_receiver, own_vote_sender, - consensus_message_receiver, + event_receiver, + consensus_message_receiver: bls_receiver, consensus_metrics_sender, consensus_metrics_receiver, } = config; - let start = Arc::new((Mutex::new(false), Condvar::new())); - let identity_keypair = cluster_info.keypair().clone(); let has_new_vote_been_rooted = !wait_for_vote_to_start_leader; @@ -192,7 +179,8 @@ impl Votor { bank_forks: bank_forks.clone(), cluster_info: cluster_info.clone(), rpc_subscriptions, - leader_window_notifier, + highest_parent_ready, + leader_window_info_sender, vote_history_storage, }; @@ -221,11 +209,12 @@ impl Votor { let timer_manager = Arc::new(PlRwLock::new(TimerManager::new( event_sender.clone(), exit.clone(), + migration_status.clone(), ))); let event_handler_context = EventHandlerContext { exit: exit.clone(), - start: start.clone(), + migration_status: migration_status.clone(), event_receiver, timer_manager: Arc::clone(&timer_manager), shared_context, @@ -237,20 +226,19 @@ impl Votor { let consensus_pool_context = ConsensusPoolContext { exit: exit.clone(), - start: start.clone(), + migration_status, cluster_info: cluster_info.clone(), my_vote_pubkey: vote_account, blockstore, sharable_banks, leader_schedule_cache, - consensus_message_receiver, + consensus_message_receiver: bls_receiver, bls_sender, event_sender, commitment_sender, - delta_standstill: DELTA_STANDSTILL, }; - let consensus_metrics_handle = ConsensusMetrics::start_metrics_loop( + let metrics = ConsensusMetrics::start_metrics_loop( root_epoch, consensus_metrics_receiver, exit.clone(), @@ -259,35 +247,10 @@ impl Votor { let consensus_pool_service = ConsensusPoolService::new(consensus_pool_context); Self { - start, event_handler, consensus_pool_service, timer_manager, - consensus_metrics_handle, - } - } - - pub fn start_migration(&self) { - // TODO: evaluate once we have actual migration logic - let (lock, cvar) = &*self.start; - let mut started = lock.lock().unwrap(); - *started = true; - cvar.notify_all(); - } - - pub(crate) fn wait_for_migration_or_exit( - exit: &AtomicBool, - (lock, cvar): &(Mutex, Condvar), - ) { - let mut started = lock.lock().unwrap(); - while !*started { - if exit.load(Ordering::Relaxed) { - return; - } - // Add timeout to check for exit flag. Check infrequent enough to - // not hit performance while frequent enough that validator exit - // isn't delayed a lot. - (started, _) = cvar.wait_timeout(started, Duration::from_secs(1)).unwrap(); + metrics, } } @@ -308,7 +271,7 @@ impl Votor { } } } - self.event_handler.join()?; - self.consensus_metrics_handle.join() + self.metrics.join()?; + self.event_handler.join() } }