From 6b2c2c779a0db128d45f5cbdc014ff8aef73ed04 Mon Sep 17 00:00:00 2001 From: Cathy Zhao Date: Wed, 13 Aug 2025 22:29:25 +0000 Subject: [PATCH 01/12] add gracefulswitch and tests rename mock picker and remove spaces make test picker private --- .../client/load_balancing/child_manager.rs | 36 +- .../client/load_balancing/graceful_switch.rs | 1203 +++++++++++++++++ grpc/src/client/load_balancing/mod.rs | 2 + grpc/src/client/load_balancing/test_utils.rs | 64 +- 4 files changed, 1276 insertions(+), 29 deletions(-) create mode 100644 grpc/src/client/load_balancing/graceful_switch.rs diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index 9501bccb1..d49b2dd1b 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -363,8 +363,8 @@ mod test { Child, ChildManager, ChildUpdate, ChildWorkScheduler, ResolverUpdateSharder, }; use crate::client::load_balancing::test_utils::{ - self, StubPolicy, StubPolicyFuncs, TestChannelController, TestEvent, TestSubchannel, - TestWorkScheduler, + self, StubPolicy, StubPolicyData, StubPolicyFuncs, TestChannelController, TestEvent, + TestSubchannel, TestWorkScheduler, }; use crate::client::load_balancing::{ ChannelController, LbPolicy, LbPolicyBuilder, LbPolicyOptions, LbState, ParsedJsonLbConfig, @@ -444,7 +444,7 @@ mod test { let (tx_events, rx_events) = mpsc::unbounded_channel::(); let tcc = Box::new(TestChannelController { tx_events }); let builder: Arc = GLOBAL_LB_REGISTRY.get_policy(test_name).unwrap(); - let endpoint_sharder = EndpointSharder { builder: builder }; + let endpoint_sharder = EndpointSharder { builder }; let child_manager = ChildManager::new(Box::new(endpoint_sharder), default_runtime()); (rx_events, Box::new(child_manager), tcc) } @@ -517,25 +517,29 @@ mod test { // Defines the functions resolver_update and subchannel_update to test // aggregate_states. fn create_verifying_funcs_for_aggregate_tests() -> StubPolicyFuncs { + let data = StubPolicyData::new(); StubPolicyFuncs { // Closure for resolver_update. resolver_update should only receive // one endpoint and create one subchannel for the endpoint it // receives. - resolver_update: Some(move |update: ResolverUpdate, _, controller| { - assert_eq!(update.endpoints.iter().len(), 1); - let endpoint = update.endpoints.unwrap().pop().unwrap(); - let subchannel = controller.new_subchannel(&endpoint.addresses[0]); - Ok(()) - }), + resolver_update: Some(Arc::new( + move |data, update: ResolverUpdate, _, controller| { + assert_eq!(update.endpoints.iter().len(), 1); + let endpoint = update.endpoints.unwrap().pop().unwrap(); + let subchannel = controller.new_subchannel(&endpoint.addresses[0]); + Ok(()) + }, + )), // Closure for subchannel_update. Sends a picker of the same state // that was passed to it. - subchannel_update: Some(move |updated_subchannel, state, controller| { - controller.update_picker(LbState { - connectivity_state: state.connectivity_state, - picker: Arc::new(QueuingPicker {}), - }); - }), - ..Default::default() + subchannel_update: Some(Arc::new( + move |data, updated_subchannel, state, controller| { + controller.update_picker(LbState { + connectivity_state: state.connectivity_state, + picker: Arc::new(QueuingPicker {}), + }); + }, + )), } } diff --git a/grpc/src/client/load_balancing/graceful_switch.rs b/grpc/src/client/load_balancing/graceful_switch.rs new file mode 100644 index 000000000..def614ce9 --- /dev/null +++ b/grpc/src/client/load_balancing/graceful_switch.rs @@ -0,0 +1,1203 @@ +use crate::client::channel::{InternalChannelController, WorkQueueItem, WorkQueueTx}; +use crate::client::load_balancing::{ + ChannelController, ExternalSubchannel, Failing, LbConfig, LbPolicy, LbPolicyBuilder, + LbPolicyOptions, LbState, ParsedJsonLbConfig, Pick, PickResult, Picker, QueuingPicker, + Subchannel, SubchannelState, WeakSubchannel, WorkScheduler, GLOBAL_LB_REGISTRY, +}; +use crate::client::name_resolution::{Address, Endpoint, ResolverUpdate}; +use crate::client::transport::{Transport, GLOBAL_TRANSPORT_REGISTRY}; +use crate::client::ConnectivityState; +use crate::rt::{default_runtime, Runtime}; + +use std::collections::{HashMap, HashSet}; +use std::error::Error; +use std::hash::Hash; +use std::mem; +use std::ops::Add; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use crate::service::{Message, Request, Response, Service}; +use core::panic; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use tokio::sync::{mpsc, Notify}; +use tonic::{async_trait, metadata::MetadataMap}; + +#[derive(Deserialize)] +struct GracefulSwitchConfig { + children_policies: Vec>, +} + +struct GracefulSwitchLbConfig { + child_builder: Arc, + child_config: Option, +} + +impl GracefulSwitchLbConfig { + fn new(child_builder: Arc, child_config: Option) -> Self { + GracefulSwitchLbConfig { + child_builder, + child_config, + } + } +} + +/** +Struct for Graceful Switch. +*/ +pub struct GracefulSwitchPolicy { + subchannel_to_policy: HashMap, + managing_policy: Mutex, + work_scheduler: Arc, + runtime: Arc, +} + +impl LbPolicy for GracefulSwitchPolicy { + fn resolver_update( + &mut self, + update: ResolverUpdate, + config: Option<&LbConfig>, + channel_controller: &mut dyn ChannelController, + ) -> Result<(), Box> { + if update.service_config.as_ref().is_ok_and(|sc| sc.is_some()) { + return Err("can't do service configs yet".into()); + } + let cfg: Arc = + match config.unwrap().convert_to::>() { + Ok(cfg) => (*cfg).clone(), + Err(e) => panic!("convert_to failed: {e}"), + }; + let mut wrapped_channel_controller = WrappedController::new(channel_controller); + let mut target_child_kind = ChildKind::Pending; + + // Determine if we can switch the new policy in. If there is no children + // yet or the new policy isn't the same as the latest policy, then + // we can swap. + let needs_switch = { + let mut managing_policy = self.managing_policy.lock().unwrap(); + managing_policy.no_policy() + || managing_policy.latest_policy() != cfg.child_builder.name() + }; + + if needs_switch { + target_child_kind = self.switch_to(config); + } + { + let mut managing_policy = self.managing_policy.lock().unwrap(); + if let Some(child_policy) = managing_policy.get_child_policy(&target_child_kind) { + child_policy.policy.resolver_update( + update, + cfg.child_config.as_ref(), + &mut wrapped_channel_controller, + )?; + } + } + self.resolve_child_controller(&mut wrapped_channel_controller, target_child_kind); + Ok(()) + } + + fn subchannel_update( + &mut self, + subchannel: Arc, + state: &SubchannelState, + channel_controller: &mut dyn ChannelController, + ) { + let mut wrapped_channel_controller = WrappedController::new(channel_controller); + let which_child = self + .subchannel_to_policy + .get(&WeakSubchannel::new(&subchannel)) + .unwrap_or_else(|| { + panic!("Subchannel not found in graceful switch: {}", subchannel); + }); + { + let mut managing_policy = self.managing_policy.lock().unwrap(); + if let Some(child_policy) = managing_policy.get_child_policy(which_child) { + child_policy.policy.subchannel_update( + subchannel, + state, + &mut wrapped_channel_controller, + ); + } + } + self.resolve_child_controller(&mut wrapped_channel_controller, which_child.clone()); + } + + fn work(&mut self, channel_controller: &mut dyn ChannelController) { + let mut wrapped_channel_controller = WrappedController::new(channel_controller); + let mut child_kind = ChildKind::Pending; + { + let mut managing_policy = self.managing_policy.lock().unwrap(); + if let Some(ref mut pending_child) = managing_policy.pending_child { + pending_child.policy.work(&mut wrapped_channel_controller); + } else if let Some(ref mut current_child) = managing_policy.current_child { + current_child.policy.work(&mut wrapped_channel_controller); + child_kind = ChildKind::Current; + } + } + self.resolve_child_controller(&mut wrapped_channel_controller, child_kind); + } + + fn exit_idle(&mut self, channel_controller: &mut dyn ChannelController) { + let mut wrapped_channel_controller = WrappedController::new(channel_controller); + let mut child_kind = ChildKind::Pending; + { + let mut managing_policy = self.managing_policy.lock().unwrap(); + if let Some(ref mut pending_child) = managing_policy.pending_child { + pending_child + .policy + .exit_idle(&mut wrapped_channel_controller); + } else if let Some(ref mut current_child) = managing_policy.current_child { + current_child + .policy + .exit_idle(&mut wrapped_channel_controller); + child_kind = ChildKind::Current; + } + } + self.resolve_child_controller(&mut wrapped_channel_controller, child_kind); + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +enum ChildKind { + Current, + Pending, +} + +impl GracefulSwitchPolicy { + /// Create a new Graceful Switch policy. + pub fn new(work_scheduler: Arc, runtime: Arc) -> Self { + GracefulSwitchPolicy { + subchannel_to_policy: HashMap::default(), + managing_policy: Mutex::new(ChildPolicyManager::new()), + work_scheduler, + runtime, + } + } + + fn resolve_child_controller( + &mut self, + channel_controller: &mut WrappedController, + child_kind: ChildKind, + ) { + let mut should_swap = false; + let mut final_child_kind = child_kind.clone(); + { + let mut managing_policy = self.managing_policy.lock().unwrap(); + + match child_kind { + ChildKind::Pending => { + if let Some(ref mut pending_policy) = managing_policy.pending_child { + if let Some(picker) = channel_controller.picker_update.take() { + pending_policy.policy_state = picker.connectivity_state; + pending_policy.policy_picker_update = Some(picker); + } + } + } + + ChildKind::Current => { + if let Some(ref mut current_policy) = managing_policy.current_child { + if let Some(picker) = channel_controller.picker_update.take() { + current_policy.policy_state = picker.connectivity_state; + channel_controller.channel_controller.update_picker(picker); + } + } + } + } + + let current_child = managing_policy.current_child.as_ref(); + let pending_child = managing_policy.pending_child.as_ref(); + + // If the current child is in any state but Ready and the pending + // child is in any state but connecting, then the policies should + // swap. + if let (Some(current_child), Some(pending_child)) = (current_child, pending_child) { + if current_child.policy_state != ConnectivityState::Ready + || pending_child.policy_state != ConnectivityState::Connecting + { + println!("Condition met, should swap."); + should_swap = true; + } + } + } + + if should_swap { + self.swap(channel_controller); + final_child_kind = ChildKind::Current; + } + + // Any created subchannels are mapped to the appropriate child. + for csc in &channel_controller.created_subchannels { + println!("Printing csc: {:?}", csc); + let key = WeakSubchannel::new(csc); + self.subchannel_to_policy + .entry(key) + .or_insert_with(|| final_child_kind.clone()); + } + } + + fn swap(&mut self, channel_controller: &mut WrappedController) { + let mut managing_policy = self.managing_policy.lock().unwrap(); + managing_policy.current_child = managing_policy.pending_child.take(); + self.subchannel_to_policy + .retain(|_, v| *v == ChildKind::Pending); + + // Remap all the subchannels mapped to Pending to Current. + for (_, child_kind) in self.subchannel_to_policy.iter_mut() { + if *child_kind == ChildKind::Pending { + *child_kind = ChildKind::Current; + } + } + + // Send the pending child's cached picker update. + if let Some(current) = &mut managing_policy.current_child { + if let Some(picker) = current.policy_picker_update.take() { + channel_controller.channel_controller.update_picker(picker); + } + } + } + + fn parse_config(config: &ParsedJsonLbConfig) -> Result> { + let cfg: GracefulSwitchConfig = match config.convert_to() { + Ok(c) => c, + Err(e) => { + return Err(format!("failed to parse JSON config: {}", e).into()); + } + }; + for c in &cfg.children_policies { + assert!( + c.len() == 1, + "Each children_policies entry must contain exactly one policy, found {}", + c.len() + ); + if let Some((policy_name, policy_config)) = c.iter().next() { + if let Some(child) = GLOBAL_LB_REGISTRY.get_policy(policy_name.as_str()) { + if policy_name == "round_robin" { + println!("is round robin"); + let graceful_switch_lb_config = GracefulSwitchLbConfig::new(child, None); + return Ok(LbConfig::new(Arc::new(graceful_switch_lb_config))); + } + let parsed_config = ParsedJsonLbConfig { + value: policy_config.clone(), + }; + let config_result = child.parse_config(&parsed_config); + let config = match config_result { + Ok(Some(cfg)) => cfg, + Ok(None) => { + return Err("child policy config returned None".into()); + } + Err(e) => { + println!("returning error in parse_config"); + return Err( + format!("failed to parse child policy config: {}", e).into() + ); + } + }; + let graceful_switch_lb_config = + GracefulSwitchLbConfig::new(child, Some(config)); + return Ok(LbConfig::new(Arc::new(graceful_switch_lb_config))); + } else { + continue; + } + } else { + continue; + } + } + Err("no supported policies found in config".into()) + } + + fn switch_to(&mut self, config: Option<&LbConfig>) -> ChildKind { + let cfg: Arc = + match config.unwrap().convert_to::>() { + Ok(cfg) => (*cfg).clone(), + Err(e) => panic!("convert_to failed: {e}"), + }; + let options = LbPolicyOptions { + work_scheduler: self.work_scheduler.clone(), + runtime: self.runtime.clone(), + }; + let new_policy = cfg.child_builder.build(options); + let mut managing_policy = self.managing_policy.lock().unwrap(); + + let new_child = ChildPolicy::new( + cfg.child_builder.clone(), + new_policy, + ConnectivityState::Connecting, + ); + if managing_policy.current_child.is_none() { + managing_policy.current_child = Some(new_child); + ChildKind::Current + } else { + managing_policy.pending_child = Some(new_child); + ChildKind::Pending + } + } +} + +// Struct to wrap a channel controller around. The purpose is to +// store a picker update to check connectivity state of a child. +// This helps to decide whether to swap or not in subchannel_update. +// Also tracks created_subchannels, which then is then used to map subchannels to +// children policies. +struct WrappedController<'a> { + channel_controller: &'a mut dyn ChannelController, + created_subchannels: Vec>, + picker_update: Option, +} + +impl<'a> WrappedController<'a> { + fn new(channel_controller: &'a mut dyn ChannelController) -> Self { + Self { + channel_controller, + created_subchannels: vec![], + picker_update: None, + } + } +} + +impl ChannelController for WrappedController<'_> { + //call into the real channel controller + fn new_subchannel(&mut self, address: &Address) -> Arc { + let subchannel = self.channel_controller.new_subchannel(address); + self.created_subchannels.push(subchannel.clone()); + subchannel + } + + fn update_picker(&mut self, update: LbState) { + self.picker_update = Some(update); + } + + fn request_resolution(&mut self) { + self.channel_controller.request_resolution(); + } +} + +// ChildPolicy represents a child policy. +struct ChildPolicy { + policy_builder: Arc, + policy: Box, + policy_state: ConnectivityState, + policy_picker_update: Option, +} + +impl ChildPolicy { + fn new( + policy_builder: Arc, + policy: Box, + policy_state: ConnectivityState, + ) -> Self { + ChildPolicy { + policy_builder, + policy, + policy_state, + policy_picker_update: None, + } + } +} + +// This ChildPolicyManager keeps track of the current and pending children. It +// keeps track of the latest policy and retrieves it's child policy based on an +// enum. +struct ChildPolicyManager { + current_child: Option, + pending_child: Option, +} + +impl ChildPolicyManager { + fn new() -> Self { + ChildPolicyManager { + current_child: None, + pending_child: None, + } + } + + fn latest_policy(&mut self) -> String { + if let Some(pending_child) = &self.pending_child { + pending_child.policy_builder.name().to_string() + } else if let Some(current_child) = &self.current_child { + current_child.policy_builder.name().to_string() + } else { + "".to_string() + } + } + + fn no_policy(&self) -> bool { + if self.pending_child.is_none() && self.current_child.is_none() { + return true; + } + false + } + + fn get_child_policy(&mut self, kind: &ChildKind) -> Option<&mut ChildPolicy> { + match kind { + ChildKind::Current => self.current_child.as_mut(), + ChildKind::Pending => self.pending_child.as_mut(), + } + } +} + +#[cfg(test)] +mod test { + use crate::client::channel::WorkQueueItem; + use crate::client::load_balancing::graceful_switch::{self, GracefulSwitchPolicy}; + use crate::client::load_balancing::test_utils::{ + self, reg_stub_policy, StubPolicyBuilder, StubPolicyData, StubPolicyFuncs, + TestChannelController, TestEvent, TestSubchannel, TestWorkScheduler, + }; + use crate::client::load_balancing::{pick_first, LbState, Pick}; + use crate::client::load_balancing::{ + ChannelController, LbPolicy, LbPolicyBuilder, LbPolicyOptions, ParsedJsonLbConfig, + PickResult, Picker, Subchannel, SubchannelState, GLOBAL_LB_REGISTRY, + }; + use crate::client::name_resolution::{Address, Endpoint, ResolverUpdate}; + use crate::client::service_config::ServiceConfig; + use crate::client::ConnectivityState; + use crate::rt::{default_runtime, Runtime}; + use crate::service::Request; + use std::collections::HashMap; + use std::thread::current; + use std::{panic, sync::Arc}; + use tokio::sync::mpsc; + use tonic::metadata::MetadataMap; + + struct TestSubchannelList { + subchannels: Vec>, + } + + impl TestSubchannelList { + fn new(addresses: &Vec
, channel_controller: &mut dyn ChannelController) -> Self { + let mut scl = TestSubchannelList { + subchannels: Vec::new(), + }; + for address in addresses { + let sc = channel_controller.new_subchannel(address); + scl.subchannels.push(sc.clone()); + } + scl + } + + fn contains(&self, sc: &Arc) -> bool { + self.subchannels.contains(sc) + } + } + + struct TestPicker { + name: &'static str, + } + + impl TestPicker { + fn new(name: &'static str) -> Self { + Self { name } + } + } + impl Picker for TestPicker { + fn pick(&self, _req: &Request) -> PickResult { + PickResult::Pick(Pick { + subchannel: Arc::new(TestSubchannel::new( + Address { + address: self.name.to_string().into(), + ..Default::default() + }, + mpsc::unbounded_channel().0, + )), + on_complete: None, + metadata: MetadataMap::new(), + }) + } + } + + struct TestState { + subchannel_list: TestSubchannelList, + } + + // Defines the functions resolver_update and subchannel_update to test graceful switch + fn create_funcs_for_gracefulswitch_tests(name: &'static str) -> StubPolicyFuncs { + StubPolicyFuncs { + // Closure for resolver_update. It creates a subchannel for the + // endpoint it receives and stores which endpoint it received and + // which subchannel this child created in the data field. + resolver_update: Some(Arc::new( + move |data: &mut StubPolicyData, update: ResolverUpdate, _, channel_controller| { + if let Ok(ref endpoints) = update.endpoints { + let addresses: Vec<_> = endpoints + .iter() + .flat_map(|ep| ep.addresses.clone()) + .collect(); + let scl = TestSubchannelList::new(&addresses, channel_controller); + let child_state = TestState { + subchannel_list: scl, + }; + data.test_data = Some(Box::new(child_state)); + Ok(()) + } else { + data.test_data = None; + Ok(()) + } + }, + )), + // Closure for subchannel_update. Verify that the subchannel that + // being updated now is the same one that this child policy created + // in resolver_update. It then sends a picker of the same state that + // was passed to it. + subchannel_update: Some(Arc::new( + move |data: &mut StubPolicyData, updated_subchannel, state, channel_controller| { + // Retrieve the specific TestState from the generic test_data field. + // This downcasts the `Any` trait object + if let Some(test_data) = data.test_data.as_mut() { + if let Some(test_state) = test_data.downcast_mut::() { + let scl = &mut test_state.subchannel_list; + assert!( + scl.contains(&updated_subchannel), + "subchannel_update received an update for a subchannel it does not own." + ); + channel_controller.update_picker(LbState { + connectivity_state: state.connectivity_state, + picker: Arc::new(TestPicker { name }), + }); + } + } + }, + )), + } + } + + // Sets up the test environment. + // + // Performs the following: + // 1. Creates a work scheduler. + // 2. Creates a fake channel that acts as a channel controller. + // 3. Creates an StubPolicyBuilder with StubFuncs that each test will define + // and name of the test. + // 5. Creates a GracefulSwitch. + // + // Returns the following: + // 1. A receiver for events initiated by the LB policy (like creating a new + // subchannel, sending a new picker etc). + // 2. The GracefulSwitch to send resolver and subchannel updates from the + // test. + // 3. The controller to pass to the LB policy as part of the updates. + fn setup() -> ( + mpsc::UnboundedReceiver, + Box, + Box, + ) { + let (tx_events, rx_events) = mpsc::unbounded_channel::(); + let work_scheduler = Arc::new(TestWorkScheduler { + tx_events: tx_events.clone(), + }); + + let tcc = Box::new(TestChannelController { tx_events }); + + let graceful_switch = GracefulSwitchPolicy::new(work_scheduler, default_runtime()); + (rx_events, Box::new(graceful_switch), tcc) + } + + fn create_endpoint_with_one_address(addr: String) -> Endpoint { + Endpoint { + addresses: vec![Address { + address: addr.into(), + ..Default::default() + }], + ..Default::default() + } + } + + // Verifies that the expected number of subchannels is created. Returns the + // subchannels created. + async fn verify_subchannel_creation_from_policy( + rx_events: &mut mpsc::UnboundedReceiver, + ) -> Vec> { + let mut subchannels = Vec::new(); + match rx_events.recv().await.unwrap() { + TestEvent::NewSubchannel(sc) => { + subchannels.push(sc); + } + other => panic!("unexpected event {:?}", other), + }; + subchannels + } + + // Verifies that the channel moves to READY state with a picker that returns the + // given subchannel. + // + // Returns the picker for tests to make more picks, if required. + async fn verify_correct_picker_from_policy( + rx_events: &mut mpsc::UnboundedReceiver, + name: &str, + ) { + println!("verify ready picker"); + match rx_events.recv().await.unwrap() { + TestEvent::UpdatePicker(update) => { + let req = test_utils::new_request(); + println!("{:?}", update.connectivity_state); + + match update.picker.pick(&req) { + PickResult::Pick(pick) => { + let received_address = &pick.subchannel.address().address.to_string(); + // It's good practice to create the expected value once. + let expected_address = name.to_string(); + + // Check for inequality and panic with a detailed message if they don't match. + if received_address != &expected_address { + panic!( + "Picker address mismatch. Expected: '{}', but got: '{}'", + expected_address, received_address + ); + } + } + other => panic!("unexpected pick result"), + } + } + other => panic!("unexpected event {:?}", other), + } + } + + fn move_subchannel_to_state( + lb_policy: &mut dyn LbPolicy, + subchannel: Arc, + tcc: &mut dyn ChannelController, + state: ConnectivityState, + ) { + lb_policy.subchannel_update( + subchannel, + &SubchannelState { + connectivity_state: state, + ..Default::default() + }, + tcc, + ); + } + + // Tests that the gracefulswitch policy correctly sets a child and sends + // updates to that child when it receives its first config. + #[tokio::test] + async fn gracefulswitch_successful_first_update() { + reg_stub_policy( + "stub-gracefulswitch_successful_first_update-one", + create_funcs_for_gracefulswitch_tests( + "stub-gracefulswitch_successful_first_update-one", + ), + ); + reg_stub_policy( + "stub-gracefulswitch_successful_first_update-two", + create_funcs_for_gracefulswitch_tests( + "stub-gracefulswitch_successful_first_update-two", + ), + ); + + let (mut rx_events, mut graceful_switch, mut tcc) = setup(); + let service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_successful_first_update-one": serde_json::json!({}) }, + { "stub-gracefulswitch_successful_first_update-two": serde_json::json!({}) } + ] + }); + + let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: service_config, + }) + .unwrap(); + + let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); + let update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + graceful_switch + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .unwrap(); + + let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let mut subchannels = subchannels.into_iter(); + move_subchannel_to_state( + &mut *graceful_switch, + subchannels.next().unwrap(), + tcc.as_mut(), + ConnectivityState::Ready, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_successful_first_update-one", + ) + .await; + } + + // Tests that the gracefulswitch policy correctly sets a pending child and + // sends subchannel updates to that child when it receives a new config. + #[tokio::test] + async fn gracefulswitch_switching_to_resolver_update() { + let (mut rx_events, mut graceful_switch, mut tcc) = setup(); + reg_stub_policy( + "stub-gracefulswitch_switching_to_resolver_update-one", + create_funcs_for_gracefulswitch_tests( + "stub-gracefulswitch_switching_to_resolver_update-one", + ), + ); + reg_stub_policy( + "stub-gracefulswitch_switching_to_resolver_update-two", + create_funcs_for_gracefulswitch_tests( + "stub-gracefulswitch_switching_to_resolver_update-two", + ), + ); + + let service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_switching_to_resolver_update-one": serde_json::json!({}) } + ] + }); + let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: service_config, + }) + .unwrap(); + + let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); + let update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + + graceful_switch + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .unwrap(); + + // Subchannel creation and ready + let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let mut subchannels = subchannels.into_iter(); + move_subchannel_to_state( + &mut *graceful_switch, + subchannels.next().unwrap(), + tcc.as_mut(), + ConnectivityState::Ready, + ); + + // Assert picker is TestPickerOne by checking subchannel address + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_switching_to_resolver_update-one", + ) + .await; + + // 2. Switch to mock_policy_two as pending + let new_service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_switching_to_resolver_update-two": serde_json::json!({}) } + ] + }); + let new_parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: new_service_config, + }) + .unwrap(); + graceful_switch + .resolver_update(update.clone(), Some(&new_parsed_config), &mut *tcc) + .unwrap(); + + // Simulate subchannel creation and ready for pending + let subchannels_two = verify_subchannel_creation_from_policy(&mut rx_events).await; + let mut subchannels_two = subchannels_two.into_iter(); + move_subchannel_to_state( + &mut *graceful_switch, + subchannels_two.next().unwrap(), + tcc.as_mut(), + ConnectivityState::Ready, + ); + + // Assert picker is TestPickerTwo by checking subchannel address + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_switching_to_resolver_update-two", + ) + .await; + } + + // Tests that the gracefulswitch policy should do nothing when a receives a + // new config of the same policy that it received before. + #[tokio::test] + async fn gracefulswitch_two_policies_same_type() { + let (mut rx_events, mut graceful_switch, mut tcc) = setup(); + reg_stub_policy( + "stub-gracefulswitch_two_policies_same_type-one", + create_funcs_for_gracefulswitch_tests("stub-gracefulswitch_two_policies_same_type-one"), + ); + let service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_two_policies_same_type-one": serde_json::json!({}) } + ] + }); + let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: service_config, + }) + .unwrap(); + let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); + let update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + graceful_switch + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .unwrap(); + let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let mut subchannels = subchannels.into_iter(); + move_subchannel_to_state( + &mut *graceful_switch, + subchannels.next().unwrap(), + tcc.as_mut(), + ConnectivityState::Ready, + ); + let service_config2 = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_two_policies_same_type-one": serde_json::json!({}) } + ] + }); + let parsed_config2 = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: service_config2, + }) + .unwrap(); + graceful_switch + .resolver_update(update.clone(), Some(&parsed_config2), &mut *tcc) + .unwrap(); + } + + // Tests that the gracefulswitch policy should replace the current child + // with the pending child if the current child isn't ready. + #[tokio::test] + async fn gracefulswitch_current_not_ready_pending_update() { + let (mut rx_events, mut graceful_switch, mut tcc) = setup(); + reg_stub_policy( + "stub-gracefulswitch_current_not_ready_pending_update-one", + create_funcs_for_gracefulswitch_tests( + "stub-gracefulswitch_current_not_ready_pending_update-one", + ), + ); + reg_stub_policy( + "stub-gracefulswitch_current_not_ready_pending_update-two", + create_funcs_for_gracefulswitch_tests( + "stub-gracefulswitch_current_not_ready_pending_update-two", + ), + ); + + let service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_current_not_ready_pending_update-one": serde_json::json!({}) } + ] + }); + + let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: service_config, + }) + .unwrap(); + + let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); + let second_endpoint = create_endpoint_with_one_address("0.0.0.0.0".to_string()); + let update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + + // Switch to first one (current) + graceful_switch + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .unwrap(); + + let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let new_service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_current_not_ready_pending_update-two": serde_json::json!({ "shuffleAddressList": false }) }, + ] + }); + let second_update = ResolverUpdate { + endpoints: Ok(vec![second_endpoint.clone()]), + ..Default::default() + }; + let new_parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: new_service_config, + }) + .unwrap(); + graceful_switch + .resolver_update(second_update.clone(), Some(&new_parsed_config), &mut *tcc) + .unwrap(); + + let second_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let mut second_subchannels = second_subchannels.into_iter(); + move_subchannel_to_state( + &mut *graceful_switch, + second_subchannels.next().unwrap(), + tcc.as_mut(), + ConnectivityState::Ready, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_current_not_ready_pending_update-two", + ) + .await; + } + + // Tests that the gracefulswitch policy should replace the current child + // with the pending child if the current child was ready but then leaves ready. + #[tokio::test] + async fn gracefulswitch_current_leaving_ready() { + let (mut rx_events, mut graceful_switch, mut tcc) = setup(); + reg_stub_policy( + "stub-gracefulswitch_current_leaving_ready-one", + create_funcs_for_gracefulswitch_tests("stub-gracefulswitch_current_leaving_ready-one"), + ); + reg_stub_policy( + "stub-gracefulswitch_current_leaving_ready-two", + create_funcs_for_gracefulswitch_tests("stub-gracefulswitch_current_leaving_ready-two"), + ); + let service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_current_leaving_ready-one": serde_json::json!({}) } + ] + }); + let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: service_config, + }) + .unwrap(); + + let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); + // let pickfirst_endpoint = create_endpoint_with_one_address("0.0.0.0.0".to_string()); + let update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + + // Switch to first one (current) + graceful_switch + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .unwrap(); + + let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + move_subchannel_to_state( + &mut *graceful_switch, + current_subchannels[0].clone(), + tcc.as_mut(), + ConnectivityState::Ready, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_current_leaving_ready-one", + ) + .await; + let new_service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_current_leaving_ready-two": serde_json::json!({}) }, + + ] + }); + let new_update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + let new_parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: new_service_config, + }) + .unwrap(); + graceful_switch + .resolver_update(new_update.clone(), Some(&new_parsed_config), &mut *tcc) + .unwrap(); + + let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + + move_subchannel_to_state( + &mut *graceful_switch, + current_subchannels[0].clone(), + tcc.as_mut(), + ConnectivityState::Connecting, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_current_leaving_ready-one", + ) + .await; + move_subchannel_to_state( + &mut *graceful_switch, + pending_subchannels[0].clone(), + tcc.as_mut(), + ConnectivityState::Connecting, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_current_leaving_ready-two", + ) + .await; + } + + // Tests that the gracefulswitch policy should replace the current child + // with the pending child if the pending child leaves connecting. + #[tokio::test] + async fn gracefulswitch_pending_leaving_connecting() { + let (mut rx_events, mut graceful_switch, mut tcc) = setup(); + reg_stub_policy( + "stub-gracefulswitch_current_leaving_ready-one", + create_funcs_for_gracefulswitch_tests("stub-gracefulswitch_current_leaving_ready-one"), + ); + reg_stub_policy( + "stub-gracefulswitch_current_leaving_ready-two", + create_funcs_for_gracefulswitch_tests("stub-gracefulswitch_current_leaving_ready-two"), + ); + let service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_current_leaving_ready-one": serde_json::json!({}) } + ] + }); + let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: service_config, + }) + .unwrap(); + let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); + // let pickfirst_endpoint = create_endpoint_with_one_address("0.0.0.0.0".to_string()); + let update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + + // Switch to first one (current) + graceful_switch + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .unwrap(); + + let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + move_subchannel_to_state( + &mut *graceful_switch, + current_subchannels[0].clone(), + tcc.as_mut(), + ConnectivityState::Ready, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_current_leaving_ready-one", + ) + .await; + let new_service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_current_leaving_ready-two": serde_json::json!({}) }, + ] + }); + let new_update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + let new_parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: new_service_config, + }) + .unwrap(); + + graceful_switch + .resolver_update(new_update.clone(), Some(&new_parsed_config), &mut *tcc) + .unwrap(); + + let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + + move_subchannel_to_state( + &mut *graceful_switch, + pending_subchannels[0].clone(), + tcc.as_mut(), + ConnectivityState::TransientFailure, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_current_leaving_ready-two", + ) + .await; + move_subchannel_to_state( + &mut *graceful_switch, + pending_subchannels[0].clone(), + tcc.as_mut(), + ConnectivityState::Connecting, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_current_leaving_ready-two", + ) + .await; + } + + // Tests that the gracefulswitch policy should remove the current child's + // subchannels after swapping. + #[tokio::test] + #[should_panic( + expected = "Subchannel not found in graceful switch: Subchannel: :127.0.0.1:1234" + )] + async fn gracefulswitch_subchannels_removed_after_current_child_swapped() { + let (mut rx_events, mut graceful_switch, mut tcc) = setup(); + reg_stub_policy( + "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-one", + create_funcs_for_gracefulswitch_tests( + "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-one", + ), + ); + reg_stub_policy( + "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-two", + create_funcs_for_gracefulswitch_tests( + "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-two", + ), + ); + let service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-one": serde_json::json!({}) } + ] + }); + let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: service_config, + }) + .unwrap(); + let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); + let update = ResolverUpdate { + endpoints: Ok(vec![endpoint.clone()]), + ..Default::default() + }; + graceful_switch + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .unwrap(); + + let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + move_subchannel_to_state( + &mut *graceful_switch, + current_subchannels[0].clone(), + tcc.as_mut(), + ConnectivityState::Ready, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-one", + ) + .await; + let new_service_config = serde_json::json!({ + "children_policies": [ + { "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-two": serde_json::json!({ "shuffleAddressList": false }) }, + ] + }); + let second_endpoint = create_endpoint_with_one_address("0.0.0.0.0".to_string()); + let second_update = ResolverUpdate { + endpoints: Ok(vec![second_endpoint.clone()]), + ..Default::default() + }; + let new_parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { + value: new_service_config, + }) + .unwrap(); + graceful_switch + .resolver_update(second_update.clone(), Some(&new_parsed_config), &mut *tcc) + .unwrap(); + let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let mut pending_subchannels = pending_subchannels.into_iter(); + println!("moving subchannel to idle"); + move_subchannel_to_state( + &mut *graceful_switch, + pending_subchannels.next().unwrap(), + tcc.as_mut(), + ConnectivityState::Idle, + ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-two", + ) + .await; + move_subchannel_to_state( + &mut *graceful_switch, + current_subchannels[0].clone(), + tcc.as_mut(), + ConnectivityState::Connecting, + ); + } +} diff --git a/grpc/src/client/load_balancing/mod.rs b/grpc/src/client/load_balancing/mod.rs index 168da55e3..5a0106ecb 100644 --- a/grpc/src/client/load_balancing/mod.rs +++ b/grpc/src/client/load_balancing/mod.rs @@ -53,7 +53,9 @@ use crate::client::{ }; pub mod child_manager; +pub mod graceful_switch; pub mod pick_first; + #[cfg(test)] pub mod test_utils; diff --git a/grpc/src/client/load_balancing/test_utils.rs b/grpc/src/client/load_balancing/test_utils.rs index a7fb623fb..4ecbedb51 100644 --- a/grpc/src/client/load_balancing/test_utils.rs +++ b/grpc/src/client/load_balancing/test_utils.rs @@ -30,6 +30,7 @@ use crate::client::load_balancing::{ use crate::client::name_resolution::{Address, ResolverUpdate}; use crate::client::service_config::LbConfig; use crate::client::ConnectivityState; +use crate::rt::{Runtime, Sleep}; use crate::service::{Message, Request, Response, Service}; use serde::{Deserialize, Serialize}; use std::any::Any; @@ -37,7 +38,8 @@ use std::collections::HashMap; use std::error::Error; use std::hash::{Hash, Hasher}; use std::sync::Mutex; -use std::{fmt::Debug, ops::Add, sync::Arc}; +use std::time::Duration; +use std::{fmt::Debug, future::Future, ops::Add, sync::Arc}; use tokio::sync::mpsc::Sender; use tokio::sync::{mpsc, Notify}; use tokio::task::AbortHandle; @@ -158,26 +160,48 @@ impl WorkScheduler for TestWorkScheduler { } // The callback to invoke when resolver_update is invoked on the stub policy. -type ResolverUpdateFn = fn( - ResolverUpdate, - Option<&LbConfig>, - &mut dyn ChannelController, -) -> Result<(), Box>; +type ResolverUpdateFn = Arc< + dyn Fn( + &mut StubPolicyData, + ResolverUpdate, + Option<&LbConfig>, + &mut dyn ChannelController, + ) -> Result<(), Box> + + Send + + Sync, +>; // The callback to invoke when subchannel_update is invoked on the stub policy. -type SubchannelUpdateFn = fn(Arc, &SubchannelState, &mut dyn ChannelController); +type SubchannelUpdateFn = Arc< + dyn Fn(&mut StubPolicyData, Arc, &SubchannelState, &mut dyn ChannelController) + + Send + + Sync, +>; /// This struct holds `LbPolicy` trait stub functions that tests are expected to /// implement. -#[derive(Clone, Default)] +#[derive(Clone)] pub struct StubPolicyFuncs { pub resolver_update: Option, pub subchannel_update: Option, } +/// Data holds test data that will be passed all to functions in PolicyFuncs +pub struct StubPolicyData { + pub test_data: Option>, +} + +impl StubPolicyData { + /// Creates an instance of StubPolicyData. + pub fn new() -> Self { + Self { test_data: None } + } +} + /// The stub `LbPolicy` that calls the provided functions. pub struct StubPolicy { funcs: StubPolicyFuncs, + data: StubPolicyData, } impl LbPolicy for StubPolicy { @@ -187,8 +211,8 @@ impl LbPolicy for StubPolicy { config: Option<&LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { - if let Some(f) = &self.funcs.resolver_update { - return f(update, config, channel_controller); + if let Some(f) = &mut self.funcs.resolver_update { + return f(&mut self.data, update, config, channel_controller); } Ok(()) } @@ -200,7 +224,7 @@ impl LbPolicy for StubPolicy { channel_controller: &mut dyn ChannelController, ) { if let Some(f) = &self.funcs.subchannel_update { - f(subchannel, state, channel_controller); + f(&mut self.data, subchannel, state, channel_controller); } } @@ -219,10 +243,18 @@ pub struct StubPolicyBuilder { funcs: StubPolicyFuncs, } +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub(super) struct MockConfig { + shuffle_address_list: Option, +} + impl LbPolicyBuilder for StubPolicyBuilder { fn build(&self, options: LbPolicyOptions) -> Box { + let data = StubPolicyData::new(); Box::new(StubPolicy { funcs: self.funcs.clone(), + data, }) } @@ -232,9 +264,15 @@ impl LbPolicyBuilder for StubPolicyBuilder { fn parse_config( &self, - _config: &ParsedJsonLbConfig, + config: &ParsedJsonLbConfig, ) -> Result, Box> { - todo!("Implement parse_config in StubPolicyBuilder") + let cfg: MockConfig = match config.convert_to() { + Ok(c) => c, + Err(e) => { + return Err(format!("failed to parse JSON config: {}", e).into()); + } + }; + Ok(Some(LbConfig::new(cfg))) } } From 49a8bd094346909d26ebe89a2332c076a1534f53 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 5 Nov 2025 10:28:51 -0800 Subject: [PATCH 02/12] Implement graceful switch balancer other LB-related changes. General: - Add Debug to many traits and derive/impl in structs. - Pass LB config to LB policies via `Option` instead of `Option<&LbConfig>`. It should be rare that policies want to store a config except for the leaf policy. Child manager: The original assumption was that all children would be the same type/configuration, but several policies (including gracefulswitch) will not have that property. So, several changes are made: - Children are considered unique by both their identifier and their LbPolicyBuilder's name(). - Make it so the sharder also can shard LbConfig and provide it via the ChildUpdate.child_update field in addition to the ResolverUpdate. - Make ResolverUpdateSharder a generic instead of Box. - Add booleans so users of child manager can easily easily tell whether any child policies updated themselves, and which ones did. - Pass &mut self for sharder so that it can maintain and update its state if needed. - Change the sharder's output ChildUpdate.child_update field to an Option; if None then the child will not be called during the resolver update, but will remain in the child manager. - Change child_states into children and provide the whole Child struct, exposing the fields it contains. - Provide mutable access to the sharder. - Change the LB config to be a flat JSON array to facilitate use within another LB policy that should not need a struct to contain on the children. - Minor test cleanups Graceful switch: The previous implementation in #2399 contained a lot of logic to manage child policy delegation. It was intended that only ChildManager should need to have this kind of logic. - Create a new implementation of this policy that delegates to ChildManager. - Uses a Sharder that simply emits the active policy with no update alongside any new policy in the new LbConfig. - maybe_swap is called after every call into the ChildManager to determine if child updates necessitate a swap. - This logic is simple: if the active policy is not Ready, or if there is a new policy and it is not Connecting, then set the new policy as the active policy and call resolver_update on the ChildManager. The sharder will see that no LbConfig is provided and just emit the active policy with no config, causing the ChildManager to drop the previously active policy. If no swap is needed, update the picker of the active policy if it had an update. - Minor test cleanups/fixes vs. #2399. --- grpc/src/client/channel.rs | 3 +- .../client/load_balancing/child_manager.rs | 195 +++-- .../client/load_balancing/graceful_switch.rs | 681 ++++++------------ grpc/src/client/load_balancing/mod.rs | 26 +- grpc/src/client/load_balancing/pick_first.rs | 8 +- grpc/src/client/load_balancing/test_utils.rs | 14 +- grpc/src/client/name_resolution/dns/test.rs | 3 +- grpc/src/client/service_config.rs | 13 +- grpc/src/rt/mod.rs | 5 +- grpc/src/rt/tokio/mod.rs | 1 + 10 files changed, 408 insertions(+), 541 deletions(-) diff --git a/grpc/src/client/channel.rs b/grpc/src/client/channel.rs index edbd55131..8c16dbf5b 100644 --- a/grpc/src/client/channel.rs +++ b/grpc/src/client/channel.rs @@ -453,6 +453,7 @@ impl load_balancing::ChannelController for InternalChannelController { } // A channel that is not idle (connecting, ready, or erroring). +#[derive(Debug)] pub(super) struct GracefulSwitchBalancer { pub(super) policy: Mutex>>, policy_builder: Mutex>>, @@ -529,7 +530,7 @@ impl GracefulSwitchBalancer { p.as_mut() .unwrap() - .resolver_update(update, config.as_ref(), controller) + .resolver_update(update, config, controller) // TODO: close old LB policy gracefully vs. drop? } diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index d49b2dd1b..ea38a0eae 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -30,6 +30,7 @@ // production. Also, support for the work scheduler is missing. use std::collections::HashSet; +use std::fmt::Debug; use std::sync::Mutex; use std::{collections::HashMap, error::Error, hash::Hash, mem, sync::Arc}; @@ -44,18 +45,24 @@ use crate::rt::Runtime; use super::{Subchannel, SubchannelState}; // An LbPolicy implementation that manages multiple children. -pub struct ChildManager { +#[derive(Debug)] +pub struct ChildManager> { subchannel_child_map: HashMap, children: Vec>, - update_sharder: Box>, + update_sharder: S, pending_work: Arc>>, runtime: Arc, + updated: bool, // Set when any child updates its picker; cleared when accessed. } -struct Child { - identifier: T, - policy: Box, - state: LbState, +#[non_exhaustive] +#[derive(Debug)] +pub struct Child { + pub identifier: T, + pub policy: Box, + pub builder: Arc, + pub state: LbState, + pub updated: bool, // Set when the child updates its picker; cleared in child_states is called. work_scheduler: Arc, } @@ -64,44 +71,48 @@ pub struct ChildUpdate { /// The identifier the ChildManager should use for this child. pub child_identifier: T, /// The builder the ChildManager should use to create this child if it does - /// not exist. + /// not exist. The child_policy_builder's name is effectively a part of the + /// child_identifier. If two identifiers are identical but have different + /// builder names, they are treated as different children. pub child_policy_builder: Arc, - /// The relevant ResolverUpdate to send to this child. - pub child_update: ResolverUpdate, + /// The relevant ResolverUpdate and LbConfig to send to this child. If + /// None, then resolver_update will not be called on the child. Should + /// generally be Some for any new children, otherwise they will not be + /// called. + pub child_update: Option<(ResolverUpdate, Option)>, } pub trait ResolverUpdateSharder: Send { - /// Performs the operation of sharding an aggregate ResolverUpdate into one - /// or more ChildUpdates. Called automatically by the ChildManager when its - /// resolver_update method is called. The key in the returned map is the - /// identifier the ChildManager should use for this child. + /// Performs the operation of sharding an aggregate ResolverUpdate/LbConfig + /// into one or more ChildUpdates. Called automatically by the ChildManager + /// when its resolver_update method is called. fn shard_update( - &self, + &mut self, resolver_update: ResolverUpdate, - ) -> Result>>, Box>; + update: Option, + ) -> Result>, Box>; } -impl ChildManager { +impl ChildManager +where + S: ResolverUpdateSharder, +{ /// Creates a new ChildManager LB policy. shard_update is called whenever a /// resolver_update operation occurs. - pub fn new( - update_sharder: Box>, - runtime: Arc, - ) -> Self { + pub fn new(update_sharder: S, runtime: Arc) -> Self { Self { update_sharder, subchannel_child_map: Default::default(), children: Default::default(), pending_work: Default::default(), runtime, + updated: false, } } /// Returns data for all current children. - pub fn child_states(&mut self) -> impl Iterator { - self.children - .iter() - .map(|child| (&child.identifier, &child.state)) + pub fn children(&mut self) -> impl Iterator> { + self.children.iter() } /// Aggregates states from child policies. @@ -158,19 +169,37 @@ impl ChildManager { // Update the tracked state if the child produced an update. if let Some(state) = channel_controller.picker_update { self.children[child_idx].state = state; + self.children[child_idx].updated = true; + self.updated = true; }; } + + /// Returns a mutable reference to the update sharder so operations may be + /// performed on it for instances in which it needs to retain state. + pub fn update_sharder(&mut self) -> &mut S { + &mut self.update_sharder + } + + /// Returns true if any child has updated its picker since the last call to + /// child_updated. + pub fn child_updated(&mut self) -> bool { + mem::take(&mut self.updated) + } } -impl LbPolicy for ChildManager { +impl LbPolicy for ChildManager +where + T: PartialEq + Hash + Eq + Send + Sync + 'static, + S: ResolverUpdateSharder, +{ fn resolver_update( &mut self, resolver_update: ResolverUpdate, - config: Option<&LbConfig>, + config: Option, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { // First determine if the incoming update is valid. - let child_updates = self.update_sharder.shard_update(resolver_update)?; + let child_updates = self.update_sharder.shard_update(resolver_update, config)?; // Hold the lock to prevent new work requests during this operation and // rewrite the indices. @@ -197,14 +226,16 @@ impl LbPolicy for ChildManager } // Build a map of the old children from their IDs for efficient lookups. - let old_children = old_children - .into_iter() - .enumerate() - .map(|(old_idx, e)| (e.identifier, (e.policy, e.state, old_idx, e.work_scheduler))); - let mut old_children: HashMap = old_children.collect(); + let old_children = old_children.into_iter().enumerate().map(|(old_idx, e)| { + ( + (e.builder.name(), e.identifier), + (e.policy, e.state, old_idx, e.work_scheduler, e.updated), + ) + }); + let mut old_children: HashMap<(&'static str, T), _> = old_children.collect(); // Split the child updates into the IDs and builders, and the - // ResolverUpdates. + // ResolverUpdates/LbConfigs. let (ids_builders, updates): (Vec<_>, Vec<_>) = child_updates .map(|e| ((e.child_identifier, e.child_policy_builder), e.child_update)) .unzip(); @@ -213,7 +244,8 @@ impl LbPolicy for ChildManager // update, and create new children. Add entries back into the // subchannel map. for (new_idx, (identifier, builder)) in ids_builders.into_iter().enumerate() { - if let Some((policy, state, old_idx, work_scheduler)) = old_children.remove(&identifier) + let k = (builder.name(), identifier); + if let Some((policy, state, old_idx, work_scheduler, updated)) = old_children.remove(&k) { for subchannel in old_child_subchannels_map .remove(&old_idx) @@ -227,10 +259,12 @@ impl LbPolicy for ChildManager } *work_scheduler.idx.lock().unwrap() = Some(new_idx); self.children.push(Child { - identifier, + builder, + identifier: k.1, state, policy, work_scheduler, + updated, }); } else { let work_scheduler = Arc::new(ChildWorkScheduler { @@ -241,18 +275,19 @@ impl LbPolicy for ChildManager work_scheduler: work_scheduler.clone(), runtime: self.runtime.clone(), }); - let state = LbState::initial(); self.children.push(Child { - identifier, - state, + builder, + identifier: k.1, + state: LbState::initial(), policy, work_scheduler, + updated: false, }); }; } // Invalidate all deleted children's work_schedulers. - for (_, (_, _, _, work_scheduler)) in old_children { + for (_, (_, _, _, work_scheduler, _)) in old_children { *work_scheduler.idx.lock().unwrap() = None; } @@ -267,15 +302,20 @@ impl LbPolicy for ChildManager for child_idx in 0..self.children.len() { let child = &mut self.children[child_idx]; let child_update = updates.next().unwrap(); + let Some((resolver_update, config)) = child_update else { + continue; + }; let mut channel_controller = WrappedController::new(channel_controller); let _ = child .policy - .resolver_update(child_update, config, &mut channel_controller); + .resolver_update(resolver_update, config, &mut channel_controller); self.resolve_child_controller(channel_controller, child_idx); } Ok(()) } + // Forwards the subchannel_update to the child that created the subchannel + // being updated. fn subchannel_update( &mut self, subchannel: Arc, @@ -295,6 +335,7 @@ impl LbPolicy for ChildManager self.resolve_child_controller(channel_controller, child_idx); } + // Calls work on any children that scheduled work via our work scheduler. fn work(&mut self, channel_controller: &mut dyn ChannelController) { let child_idxes = mem::take(&mut *self.pending_work.lock().unwrap()); for child_idx in child_idxes { @@ -306,8 +347,14 @@ impl LbPolicy for ChildManager } } - fn exit_idle(&mut self, _channel_controller: &mut dyn ChannelController) { - todo!("implement exit_idle") + // Simply calls exit_idle on all children. + fn exit_idle(&mut self, channel_controller: &mut dyn ChannelController) { + for child_idx in 0..self.children.len() { + let child = &mut self.children[child_idx]; + let mut channel_controller = WrappedController::new(channel_controller); + child.policy.exit_idle(&mut channel_controller); + self.resolve_child_controller(channel_controller, child_idx); + } } } @@ -343,6 +390,7 @@ impl ChannelController for WrappedController<'_> { } } +#[derive(Debug)] struct ChildWorkScheduler { pending_work: Arc>>, // Must be taken first for correctness idx: Mutex>, // None if the child is deleted. @@ -387,31 +435,36 @@ mod test { // TODO: This needs to be moved to a common place that can be shared between // round_robin and this test. This EndpointSharder maps endpoints to // children policies. + #[derive(Debug)] struct EndpointSharder { builder: Arc, } impl ResolverUpdateSharder for EndpointSharder { fn shard_update( - &self, + &mut self, resolver_update: ResolverUpdate, - ) -> Result>>, Box> + config: Option, + ) -> Result>, Box> { let mut sharded_endpoints = Vec::new(); - for endpoint in resolver_update.endpoints.unwrap().iter() { + for endpoint in resolver_update.endpoints.unwrap().into_iter() { let child_update = ChildUpdate { child_identifier: endpoint.clone(), child_policy_builder: self.builder.clone(), - child_update: ResolverUpdate { - attributes: resolver_update.attributes.clone(), - endpoints: Ok(vec![endpoint.clone()]), - service_config: resolver_update.service_config.clone(), - resolution_note: resolver_update.resolution_note.clone(), - }, + child_update: Some(( + ResolverUpdate { + attributes: resolver_update.attributes.clone(), + endpoints: Ok(vec![endpoint]), + service_config: resolver_update.service_config.clone(), + resolution_note: resolver_update.resolution_note.clone(), + }, + config.clone(), + )), }; sharded_endpoints.push(child_update); } - Ok(Box::new(sharded_endpoints.into_iter())) + Ok(sharded_endpoints.into_iter()) } } @@ -437,7 +490,7 @@ mod test { test_name: &'static str, ) -> ( mpsc::UnboundedReceiver, - Box>, + ChildManager, Box, ) { test_utils::reg_stub_policy(test_name, funcs); @@ -445,8 +498,8 @@ mod test { let tcc = Box::new(TestChannelController { tx_events }); let builder: Arc = GLOBAL_LB_REGISTRY.get_policy(test_name).unwrap(); let endpoint_sharder = EndpointSharder { builder }; - let child_manager = ChildManager::new(Box::new(endpoint_sharder), default_runtime()); - (rx_events, Box::new(child_manager), tcc) + let child_manager = ChildManager::new(endpoint_sharder, default_runtime()); + (rx_events, child_manager, tcc) } fn create_n_endpoints_with_k_addresses(n: usize, k: usize) -> Vec { @@ -481,7 +534,7 @@ mod test { } fn move_subchannel_to_state( - lb_policy: &mut dyn LbPolicy, + lb_policy: &mut impl LbPolicy, subchannel: Arc, tcc: &mut dyn ChannelController, state: ConnectivityState, @@ -553,7 +606,7 @@ mod test { "stub-childmanager_aggregate_state_is_ready_if_any_child_is_ready", ); let endpoints = create_n_endpoints_with_k_addresses(4, 1); - send_resolver_update_to_policy(child_manager.as_mut(), endpoints.clone(), tcc.as_mut()); + send_resolver_update_to_policy(&mut child_manager, endpoints.clone(), tcc.as_mut()); let mut subchannels = vec![]; for endpoint in endpoints { subchannels.push( @@ -565,25 +618,25 @@ mod test { let mut subchannels = subchannels.into_iter(); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::TransientFailure, ); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::Idle, ); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::Connecting, ); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::Ready, @@ -601,7 +654,7 @@ mod test { "stub-childmanager_aggregate_state_is_connecting_if_no_child_is_ready", ); let endpoints = create_n_endpoints_with_k_addresses(3, 1); - send_resolver_update_to_policy(child_manager.as_mut(), endpoints.clone(), tcc.as_mut()); + send_resolver_update_to_policy(&mut child_manager, endpoints.clone(), tcc.as_mut()); let mut subchannels = vec![]; for endpoint in endpoints { subchannels.push( @@ -612,19 +665,19 @@ mod test { } let mut subchannels = subchannels.into_iter(); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::TransientFailure, ); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::Idle, ); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::Connecting, @@ -647,7 +700,7 @@ mod test { ); let endpoints = create_n_endpoints_with_k_addresses(2, 1); - send_resolver_update_to_policy(child_manager.as_mut(), endpoints.clone(), tcc.as_mut()); + send_resolver_update_to_policy(&mut child_manager, endpoints.clone(), tcc.as_mut()); let mut subchannels = vec![]; for endpoint in endpoints { subchannels.push( @@ -658,13 +711,13 @@ mod test { } let mut subchannels = subchannels.into_iter(); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::TransientFailure, ); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::Idle, @@ -682,7 +735,7 @@ mod test { "stub-childmanager_aggregate_state_is_transient_failure_if_all_children_are", ); let endpoints = create_n_endpoints_with_k_addresses(2, 1); - send_resolver_update_to_policy(child_manager.as_mut(), endpoints.clone(), tcc.as_mut()); + send_resolver_update_to_policy(&mut child_manager, endpoints.clone(), tcc.as_mut()); let mut subchannels = vec![]; for endpoint in endpoints { subchannels.push( @@ -693,13 +746,13 @@ mod test { } let mut subchannels = subchannels.into_iter(); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::TransientFailure, ); move_subchannel_to_state( - child_manager.as_mut(), + &mut child_manager, subchannels.next().unwrap(), tcc.as_mut(), ConnectivityState::TransientFailure, diff --git a/grpc/src/client/load_balancing/graceful_switch.rs b/grpc/src/client/load_balancing/graceful_switch.rs index def614ce9..93d6147d8 100644 --- a/grpc/src/client/load_balancing/graceful_switch.rs +++ b/grpc/src/client/load_balancing/graceful_switch.rs @@ -1,33 +1,19 @@ -use crate::client::channel::{InternalChannelController, WorkQueueItem, WorkQueueTx}; +#![warn(unused_imports)] + +use crate::client::load_balancing::child_manager::{ + self, ChildManager, ChildUpdate, ResolverUpdateSharder, +}; use crate::client::load_balancing::{ - ChannelController, ExternalSubchannel, Failing, LbConfig, LbPolicy, LbPolicyBuilder, - LbPolicyOptions, LbState, ParsedJsonLbConfig, Pick, PickResult, Picker, QueuingPicker, - Subchannel, SubchannelState, WeakSubchannel, WorkScheduler, GLOBAL_LB_REGISTRY, + ChannelController, LbConfig, LbPolicy, LbPolicyBuilder, ParsedJsonLbConfig, Subchannel, + SubchannelState, GLOBAL_LB_REGISTRY, }; -use crate::client::name_resolution::{Address, Endpoint, ResolverUpdate}; -use crate::client::transport::{Transport, GLOBAL_TRANSPORT_REGISTRY}; +use crate::client::name_resolution::ResolverUpdate; use crate::client::ConnectivityState; -use crate::rt::{default_runtime, Runtime}; +use crate::rt::Runtime; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::error::Error; -use std::hash::Hash; -use std::mem; -use std::ops::Add; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::{Arc, Mutex}; - -use crate::service::{Message, Request, Response, Service}; -use core::panic; -use serde::{Deserialize, Serialize}; -use serde_json::json; -use tokio::sync::{mpsc, Notify}; -use tonic::{async_trait, metadata::MetadataMap}; - -#[derive(Deserialize)] -struct GracefulSwitchConfig { - children_policies: Vec>, -} +use std::sync::Arc; struct GracefulSwitchLbConfig { child_builder: Arc, @@ -43,58 +29,94 @@ impl GracefulSwitchLbConfig { } } -/** -Struct for Graceful Switch. -*/ +#[derive(Debug)] +struct UpdateSharder { + active_child_builder: Option>, +} + +impl ResolverUpdateSharder<()> for UpdateSharder { + fn shard_update( + &mut self, + resolver_update: ResolverUpdate, + config: Option, // The config is always produced based on the state stored in the sharder. + ) -> Result>, Box> + { + if config.is_none() { + // A config should always be passed. When one is not passed, we + // delete any children besides active. This is how the graceful + // switch balaner performs a swap of active policies. + return Ok(vec![ChildUpdate { + child_policy_builder: self.active_child_builder.clone().unwrap(), + child_identifier: (), + child_update: None, + }] + .into_iter()); + } + + let gsb_config: Arc = + config.unwrap().convert_to().expect("invalid config"); + + if self.active_child_builder.is_none() { + // When there are no children yet, the current update immediately + // becomes the active child. + self.active_child_builder = Some(gsb_config.child_builder.clone()); + } + let active_child_builder = self.active_child_builder.as_ref().unwrap(); + + // Always include the incoming update + let mut resp = Vec::with_capacity(2); + + resp.push(ChildUpdate { + child_policy_builder: gsb_config.child_builder.clone(), + child_identifier: (), + child_update: Some((resolver_update, gsb_config.child_config.clone())), + }); + + // Always include the active child so that the child manager will not + // delete it. + if gsb_config.child_builder.name() != active_child_builder.name() { + resp.push(ChildUpdate { + child_policy_builder: active_child_builder.clone(), + child_identifier: (), + child_update: None, + }); + } + + Ok(resp.into_iter()) + } +} + +impl UpdateSharder { + fn new() -> Self { + Self { + active_child_builder: None, + } + } +} + +/// A graceful switching load balancing policy. In graceful switch, there is +/// always either one or two child policies. When there is one policy, all +/// operations are delegated to it. When the child policy type needs to change, +/// graceful switch creates a "pending" child policy alongside the "active" +/// policy. When the pending policy leaves the CONNECTING state, or when the +/// active policy is not READY, graceful switch will promote the pending policy +/// to to active and tear down the previously active policy. +#[derive(Debug)] pub struct GracefulSwitchPolicy { - subchannel_to_policy: HashMap, - managing_policy: Mutex, - work_scheduler: Arc, - runtime: Arc, + child_manager: ChildManager<(), UpdateSharder>, // Child ID is the name of the child policy. } impl LbPolicy for GracefulSwitchPolicy { fn resolver_update( &mut self, update: ResolverUpdate, - config: Option<&LbConfig>, + config: Option, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { - if update.service_config.as_ref().is_ok_and(|sc| sc.is_some()) { - return Err("can't do service configs yet".into()); - } - let cfg: Arc = - match config.unwrap().convert_to::>() { - Ok(cfg) => (*cfg).clone(), - Err(e) => panic!("convert_to failed: {e}"), - }; - let mut wrapped_channel_controller = WrappedController::new(channel_controller); - let mut target_child_kind = ChildKind::Pending; - - // Determine if we can switch the new policy in. If there is no children - // yet or the new policy isn't the same as the latest policy, then - // we can swap. - let needs_switch = { - let mut managing_policy = self.managing_policy.lock().unwrap(); - managing_policy.no_policy() - || managing_policy.latest_policy() != cfg.child_builder.name() - }; - - if needs_switch { - target_child_kind = self.switch_to(config); - } - { - let mut managing_policy = self.managing_policy.lock().unwrap(); - if let Some(child_policy) = managing_policy.get_child_policy(&target_child_kind) { - child_policy.policy.resolver_update( - update, - cfg.child_config.as_ref(), - &mut wrapped_channel_controller, - )?; - } - } - self.resolve_child_controller(&mut wrapped_channel_controller, target_child_kind); - Ok(()) + let res = self + .child_manager + .resolver_update(update, config, channel_controller)?; + self.maybe_swap(channel_controller) } fn subchannel_update( @@ -103,58 +125,19 @@ impl LbPolicy for GracefulSwitchPolicy { state: &SubchannelState, channel_controller: &mut dyn ChannelController, ) { - let mut wrapped_channel_controller = WrappedController::new(channel_controller); - let which_child = self - .subchannel_to_policy - .get(&WeakSubchannel::new(&subchannel)) - .unwrap_or_else(|| { - panic!("Subchannel not found in graceful switch: {}", subchannel); - }); - { - let mut managing_policy = self.managing_policy.lock().unwrap(); - if let Some(child_policy) = managing_policy.get_child_policy(which_child) { - child_policy.policy.subchannel_update( - subchannel, - state, - &mut wrapped_channel_controller, - ); - } - } - self.resolve_child_controller(&mut wrapped_channel_controller, which_child.clone()); + self.child_manager + .subchannel_update(subchannel, state, channel_controller); + let _ = self.maybe_swap(channel_controller); } fn work(&mut self, channel_controller: &mut dyn ChannelController) { - let mut wrapped_channel_controller = WrappedController::new(channel_controller); - let mut child_kind = ChildKind::Pending; - { - let mut managing_policy = self.managing_policy.lock().unwrap(); - if let Some(ref mut pending_child) = managing_policy.pending_child { - pending_child.policy.work(&mut wrapped_channel_controller); - } else if let Some(ref mut current_child) = managing_policy.current_child { - current_child.policy.work(&mut wrapped_channel_controller); - child_kind = ChildKind::Current; - } - } - self.resolve_child_controller(&mut wrapped_channel_controller, child_kind); + self.child_manager.work(channel_controller); + let _ = self.maybe_swap(channel_controller); } fn exit_idle(&mut self, channel_controller: &mut dyn ChannelController) { - let mut wrapped_channel_controller = WrappedController::new(channel_controller); - let mut child_kind = ChildKind::Pending; - { - let mut managing_policy = self.managing_policy.lock().unwrap(); - if let Some(ref mut pending_child) = managing_policy.pending_child { - pending_child - .policy - .exit_idle(&mut wrapped_channel_controller); - } else if let Some(ref mut current_child) = managing_policy.current_child { - current_child - .policy - .exit_idle(&mut wrapped_channel_controller); - child_kind = ChildKind::Current; - } - } - self.resolve_child_controller(&mut wrapped_channel_controller, child_kind); + self.child_manager.exit_idle(channel_controller); + let _ = self.maybe_swap(channel_controller); } } @@ -165,297 +148,112 @@ enum ChildKind { } impl GracefulSwitchPolicy { - /// Create a new Graceful Switch policy. - pub fn new(work_scheduler: Arc, runtime: Arc) -> Self { + /// Creates a new Graceful Switch policy. + pub fn new(runtime: Arc) -> Self { GracefulSwitchPolicy { - subchannel_to_policy: HashMap::default(), - managing_policy: Mutex::new(ChildPolicyManager::new()), - work_scheduler, - runtime, - } - } - - fn resolve_child_controller( - &mut self, - channel_controller: &mut WrappedController, - child_kind: ChildKind, - ) { - let mut should_swap = false; - let mut final_child_kind = child_kind.clone(); - { - let mut managing_policy = self.managing_policy.lock().unwrap(); - - match child_kind { - ChildKind::Pending => { - if let Some(ref mut pending_policy) = managing_policy.pending_child { - if let Some(picker) = channel_controller.picker_update.take() { - pending_policy.policy_state = picker.connectivity_state; - pending_policy.policy_picker_update = Some(picker); - } - } - } - - ChildKind::Current => { - if let Some(ref mut current_policy) = managing_policy.current_child { - if let Some(picker) = channel_controller.picker_update.take() { - current_policy.policy_state = picker.connectivity_state; - channel_controller.channel_controller.update_picker(picker); - } - } - } - } - - let current_child = managing_policy.current_child.as_ref(); - let pending_child = managing_policy.pending_child.as_ref(); - - // If the current child is in any state but Ready and the pending - // child is in any state but connecting, then the policies should - // swap. - if let (Some(current_child), Some(pending_child)) = (current_child, pending_child) { - if current_child.policy_state != ConnectivityState::Ready - || pending_child.policy_state != ConnectivityState::Connecting - { - println!("Condition met, should swap."); - should_swap = true; - } - } - } - - if should_swap { - self.swap(channel_controller); - final_child_kind = ChildKind::Current; - } - - // Any created subchannels are mapped to the appropriate child. - for csc in &channel_controller.created_subchannels { - println!("Printing csc: {:?}", csc); - let key = WeakSubchannel::new(csc); - self.subchannel_to_policy - .entry(key) - .or_insert_with(|| final_child_kind.clone()); + child_manager: ChildManager::new(UpdateSharder::new(), runtime), } } - fn swap(&mut self, channel_controller: &mut WrappedController) { - let mut managing_policy = self.managing_policy.lock().unwrap(); - managing_policy.current_child = managing_policy.pending_child.take(); - self.subchannel_to_policy - .retain(|_, v| *v == ChildKind::Pending); - - // Remap all the subchannels mapped to Pending to Current. - for (_, child_kind) in self.subchannel_to_policy.iter_mut() { - if *child_kind == ChildKind::Pending { - *child_kind = ChildKind::Current; - } - } - - // Send the pending child's cached picker update. - if let Some(current) = &mut managing_policy.current_child { - if let Some(picker) = current.policy_picker_update.take() { - channel_controller.channel_controller.update_picker(picker); - } - } - } - - fn parse_config(config: &ParsedJsonLbConfig) -> Result> { - let cfg: GracefulSwitchConfig = match config.convert_to() { + /// Parses a child config list and returns a LB config for the + /// GracefulSwitchPolicy. Config is expected to contain a JSON array of LB + /// policy names + configs matching the format of the "loadBalancingConfig" + /// field in the gRPC ServiceConfig. It returns a type that should be passed + /// to resolver_update in the LbConfig.config field. + pub fn parse_config( + config: &ParsedJsonLbConfig, + ) -> Result> { + let cfg: Vec> = match config.convert_to() { Ok(c) => c, Err(e) => { return Err(format!("failed to parse JSON config: {}", e).into()); } }; - for c in &cfg.children_policies { - assert!( - c.len() == 1, - "Each children_policies entry must contain exactly one policy, found {}", - c.len() - ); - if let Some((policy_name, policy_config)) = c.iter().next() { - if let Some(child) = GLOBAL_LB_REGISTRY.get_policy(policy_name.as_str()) { - if policy_name == "round_robin" { - println!("is round robin"); - let graceful_switch_lb_config = GracefulSwitchLbConfig::new(child, None); - return Ok(LbConfig::new(Arc::new(graceful_switch_lb_config))); - } - let parsed_config = ParsedJsonLbConfig { - value: policy_config.clone(), - }; - let config_result = child.parse_config(&parsed_config); - let config = match config_result { - Ok(Some(cfg)) => cfg, - Ok(None) => { - return Err("child policy config returned None".into()); - } - Err(e) => { - println!("returning error in parse_config"); - return Err( - format!("failed to parse child policy config: {}", e).into() - ); - } - }; - let graceful_switch_lb_config = - GracefulSwitchLbConfig::new(child, Some(config)); - return Ok(LbConfig::new(Arc::new(graceful_switch_lb_config))); - } else { - continue; - } - } else { - continue; + for c in cfg { + if c.len() != 1 { + return Err(format!( + "Each element in array must contain exactly one policy name/config; found {:?}", + c.keys() + ) + .into()); } - } - Err("no supported policies found in config".into()) - } - - fn switch_to(&mut self, config: Option<&LbConfig>) -> ChildKind { - let cfg: Arc = - match config.unwrap().convert_to::>() { - Ok(cfg) => (*cfg).clone(), - Err(e) => panic!("convert_to failed: {e}"), + let (policy_name, policy_config) = c.into_iter().next().unwrap(); + let Some(child) = GLOBAL_LB_REGISTRY.get_policy(policy_name.as_str()) else { + continue; }; - let options = LbPolicyOptions { - work_scheduler: self.work_scheduler.clone(), - runtime: self.runtime.clone(), - }; - let new_policy = cfg.child_builder.build(options); - let mut managing_policy = self.managing_policy.lock().unwrap(); - - let new_child = ChildPolicy::new( - cfg.child_builder.clone(), - new_policy, - ConnectivityState::Connecting, - ); - if managing_policy.current_child.is_none() { - managing_policy.current_child = Some(new_child); - ChildKind::Current - } else { - managing_policy.pending_child = Some(new_child); - ChildKind::Pending - } - } -} - -// Struct to wrap a channel controller around. The purpose is to -// store a picker update to check connectivity state of a child. -// This helps to decide whether to swap or not in subchannel_update. -// Also tracks created_subchannels, which then is then used to map subchannels to -// children policies. -struct WrappedController<'a> { - channel_controller: &'a mut dyn ChannelController, - created_subchannels: Vec>, - picker_update: Option, -} - -impl<'a> WrappedController<'a> { - fn new(channel_controller: &'a mut dyn ChannelController) -> Self { - Self { - channel_controller, - created_subchannels: vec![], - picker_update: None, - } - } -} - -impl ChannelController for WrappedController<'_> { - //call into the real channel controller - fn new_subchannel(&mut self, address: &Address) -> Arc { - let subchannel = self.channel_controller.new_subchannel(address); - self.created_subchannels.push(subchannel.clone()); - subchannel - } - - fn update_picker(&mut self, update: LbState) { - self.picker_update = Some(update); - } - - fn request_resolution(&mut self) { - self.channel_controller.request_resolution(); - } -} - -// ChildPolicy represents a child policy. -struct ChildPolicy { - policy_builder: Arc, - policy: Box, - policy_state: ConnectivityState, - policy_picker_update: Option, -} - -impl ChildPolicy { - fn new( - policy_builder: Arc, - policy: Box, - policy_state: ConnectivityState, - ) -> Self { - ChildPolicy { - policy_builder, - policy, - policy_state, - policy_picker_update: None, - } - } -} - -// This ChildPolicyManager keeps track of the current and pending children. It -// keeps track of the latest policy and retrieves it's child policy based on an -// enum. -struct ChildPolicyManager { - current_child: Option, - pending_child: Option, -} - -impl ChildPolicyManager { - fn new() -> Self { - ChildPolicyManager { - current_child: None, - pending_child: None, + let parsed_config = ParsedJsonLbConfig { + value: policy_config, + }; + let config = child.parse_config(&parsed_config)?; + let graceful_switch_lb_config = GracefulSwitchLbConfig::new(child, config); + return Ok(LbConfig::new(graceful_switch_lb_config)); } + Err("no supported policies found in config".into()) } - fn latest_policy(&mut self) -> String { - if let Some(pending_child) = &self.pending_child { - pending_child.policy_builder.name().to_string() - } else if let Some(current_child) = &self.current_child { - current_child.policy_builder.name().to_string() - } else { - "".to_string() + fn maybe_swap( + &mut self, + channel_controller: &mut dyn ChannelController, + ) -> Result<(), Box> { + if !self.child_manager.child_updated() { + return Ok(()); } - } - fn no_policy(&self) -> bool { - if self.pending_child.is_none() && self.current_child.is_none() { - return true; + let active_name = self + .child_manager + .update_sharder() + .active_child_builder + .as_ref() + .unwrap() + .name(); + let mut could_switch = false; + let mut active_state_if_updated = None; + let mut pending_builder = None; + let mut pending_state = None; + for child in self.child_manager.children() { + if child.builder.name() == active_name { + could_switch |= child.state.connectivity_state != ConnectivityState::Ready; + if child.updated { + active_state_if_updated = Some(child.state.clone()); + } + } else { + pending_builder = Some(child.builder.clone()); + pending_state = Some(child.state.clone()); + could_switch |= child.state.connectivity_state != ConnectivityState::Connecting; + } } - false - } - - fn get_child_policy(&mut self, kind: &ChildKind) -> Option<&mut ChildPolicy> { - match kind { - ChildKind::Current => self.current_child.as_mut(), - ChildKind::Pending => self.pending_child.as_mut(), + if could_switch && pending_builder.is_some() { + let sharder = self.child_manager.update_sharder(); + sharder.active_child_builder = pending_builder; + self.child_manager.resolver_update( + ResolverUpdate::default(), + None, + channel_controller, + )?; + channel_controller.update_picker(pending_state.unwrap().clone()); + } else if active_state_if_updated.is_some() { + channel_controller.update_picker(active_state_if_updated.unwrap()); } + return Ok(()); } } #[cfg(test)] mod test { - use crate::client::channel::WorkQueueItem; - use crate::client::load_balancing::graceful_switch::{self, GracefulSwitchPolicy}; + use crate::client::load_balancing::graceful_switch::GracefulSwitchPolicy; use crate::client::load_balancing::test_utils::{ - self, reg_stub_policy, StubPolicyBuilder, StubPolicyData, StubPolicyFuncs, - TestChannelController, TestEvent, TestSubchannel, TestWorkScheduler, + self, reg_stub_policy, StubPolicyData, StubPolicyFuncs, TestChannelController, TestEvent, + TestSubchannel, TestWorkScheduler, }; - use crate::client::load_balancing::{pick_first, LbState, Pick}; use crate::client::load_balancing::{ - ChannelController, LbPolicy, LbPolicyBuilder, LbPolicyOptions, ParsedJsonLbConfig, - PickResult, Picker, Subchannel, SubchannelState, GLOBAL_LB_REGISTRY, + ChannelController, LbPolicy, ParsedJsonLbConfig, PickResult, Picker, Subchannel, + SubchannelState, }; + use crate::client::load_balancing::{LbState, Pick}; use crate::client::name_resolution::{Address, Endpoint, ResolverUpdate}; - use crate::client::service_config::ServiceConfig; use crate::client::ConnectivityState; - use crate::rt::{default_runtime, Runtime}; + use crate::rt::default_runtime; use crate::service::Request; - use std::collections::HashMap; - use std::thread::current; use std::{panic, sync::Arc}; use tokio::sync::mpsc; use tonic::metadata::MetadataMap; @@ -481,6 +279,7 @@ mod test { } } + #[derive(Debug)] struct TestPicker { name: &'static str, } @@ -500,8 +299,9 @@ mod test { }, mpsc::unbounded_channel().0, )), - on_complete: None, + // on_complete: None, metadata: MetadataMap::new(), + on_complete: None, }) } } @@ -588,7 +388,7 @@ mod test { let tcc = Box::new(TestChannelController { tx_events }); - let graceful_switch = GracefulSwitchPolicy::new(work_scheduler, default_runtime()); + let graceful_switch = GracefulSwitchPolicy::new(default_runtime()); (rx_events, Box::new(graceful_switch), tcc) } @@ -631,7 +431,8 @@ mod test { let req = test_utils::new_request(); println!("{:?}", update.connectivity_state); - match update.picker.pick(&req) { + let pick = update.picker.pick(&req); + match &pick { PickResult::Pick(pick) => { let received_address = &pick.subchannel.address().address.to_string(); // It's good practice to create the expected value once. @@ -645,7 +446,7 @@ mod test { ); } } - other => panic!("unexpected pick result"), + other => panic!("unexpected pick result: {:?}", pick), } } other => panic!("unexpected event {:?}", other), @@ -686,12 +487,11 @@ mod test { ); let (mut rx_events, mut graceful_switch, mut tcc) = setup(); - let service_config = serde_json::json!({ - "children_policies": [ + let service_config = serde_json::json!([ { "stub-gracefulswitch_successful_first_update-one": serde_json::json!({}) }, { "stub-gracefulswitch_successful_first_update-two": serde_json::json!({}) } ] - }); + ); let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: service_config, @@ -704,7 +504,7 @@ mod test { ..Default::default() }; graceful_switch - .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) .unwrap(); let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -740,11 +540,10 @@ mod test { ), ); - let service_config = serde_json::json!({ - "children_policies": [ + let service_config = serde_json::json!([ { "stub-gracefulswitch_switching_to_resolver_update-one": serde_json::json!({}) } ] - }); + ); let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: service_config, }) @@ -757,7 +556,7 @@ mod test { }; graceful_switch - .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) .unwrap(); // Subchannel creation and ready @@ -778,17 +577,16 @@ mod test { .await; // 2. Switch to mock_policy_two as pending - let new_service_config = serde_json::json!({ - "children_policies": [ + let new_service_config = serde_json::json!([ { "stub-gracefulswitch_switching_to_resolver_update-two": serde_json::json!({}) } ] - }); + ); let new_parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: new_service_config, }) .unwrap(); graceful_switch - .resolver_update(update.clone(), Some(&new_parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(new_parsed_config), &mut *tcc) .unwrap(); // Simulate subchannel creation and ready for pending @@ -818,11 +616,11 @@ mod test { "stub-gracefulswitch_two_policies_same_type-one", create_funcs_for_gracefulswitch_tests("stub-gracefulswitch_two_policies_same_type-one"), ); - let service_config = serde_json::json!({ - "children_policies": [ + let service_config = serde_json::json!( + [ { "stub-gracefulswitch_two_policies_same_type-one": serde_json::json!({}) } ] - }); + ); let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: service_config, }) @@ -833,7 +631,7 @@ mod test { ..Default::default() }; graceful_switch - .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) .unwrap(); let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; let mut subchannels = subchannels.into_iter(); @@ -843,17 +641,17 @@ mod test { tcc.as_mut(), ConnectivityState::Ready, ); - let service_config2 = serde_json::json!({ - "children_policies": [ + let service_config2 = serde_json::json!( + [ { "stub-gracefulswitch_two_policies_same_type-one": serde_json::json!({}) } ] - }); + ); let parsed_config2 = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: service_config2, }) .unwrap(); graceful_switch - .resolver_update(update.clone(), Some(&parsed_config2), &mut *tcc) + .resolver_update(update.clone(), Some(parsed_config2), &mut *tcc) .unwrap(); } @@ -875,11 +673,10 @@ mod test { ), ); - let service_config = serde_json::json!({ - "children_policies": [ + let service_config = serde_json::json!([ { "stub-gracefulswitch_current_not_ready_pending_update-one": serde_json::json!({}) } ] - }); + ); let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: service_config, @@ -895,15 +692,14 @@ mod test { // Switch to first one (current) graceful_switch - .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; - let new_service_config = serde_json::json!({ - "children_policies": [ + let new_service_config = serde_json::json!([ { "stub-gracefulswitch_current_not_ready_pending_update-two": serde_json::json!({ "shuffleAddressList": false }) }, ] - }); + ); let second_update = ResolverUpdate { endpoints: Ok(vec![second_endpoint.clone()]), ..Default::default() @@ -913,7 +709,7 @@ mod test { }) .unwrap(); graceful_switch - .resolver_update(second_update.clone(), Some(&new_parsed_config), &mut *tcc) + .resolver_update(second_update.clone(), Some(new_parsed_config), &mut *tcc) .unwrap(); let second_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -944,18 +740,17 @@ mod test { "stub-gracefulswitch_current_leaving_ready-two", create_funcs_for_gracefulswitch_tests("stub-gracefulswitch_current_leaving_ready-two"), ); - let service_config = serde_json::json!({ - "children_policies": [ + let service_config = serde_json::json!([ { "stub-gracefulswitch_current_leaving_ready-one": serde_json::json!({}) } ] - }); + ); let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: service_config, }) .unwrap(); let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); - // let pickfirst_endpoint = create_endpoint_with_one_address("0.0.0.0.0".to_string()); + let endpoint2 = create_endpoint_with_one_address("127.0.0.1:1235".to_string()); let update = ResolverUpdate { endpoints: Ok(vec![endpoint.clone()]), ..Default::default() @@ -963,7 +758,7 @@ mod test { // Switch to first one (current) graceful_switch - .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -978,14 +773,14 @@ mod test { "stub-gracefulswitch_current_leaving_ready-one", ) .await; - let new_service_config = serde_json::json!({ - "children_policies": [ + let new_service_config = serde_json::json!( + [ { "stub-gracefulswitch_current_leaving_ready-two": serde_json::json!({}) }, ] - }); + ); let new_update = ResolverUpdate { - endpoints: Ok(vec![endpoint.clone()]), + endpoints: Ok(vec![endpoint2.clone()]), ..Default::default() }; let new_parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { @@ -993,14 +788,14 @@ mod test { }) .unwrap(); graceful_switch - .resolver_update(new_update.clone(), Some(&new_parsed_config), &mut *tcc) + .resolver_update(new_update.clone(), Some(new_parsed_config), &mut *tcc) .unwrap(); let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - current_subchannels[0].clone(), + pending_subchannels[0].clone(), tcc.as_mut(), ConnectivityState::Connecting, ); @@ -1011,7 +806,7 @@ mod test { .await; move_subchannel_to_state( &mut *graceful_switch, - pending_subchannels[0].clone(), + current_subchannels[0].clone(), tcc.as_mut(), ConnectivityState::Connecting, ); @@ -1035,17 +830,17 @@ mod test { "stub-gracefulswitch_current_leaving_ready-two", create_funcs_for_gracefulswitch_tests("stub-gracefulswitch_current_leaving_ready-two"), ); - let service_config = serde_json::json!({ - "children_policies": [ + let service_config = serde_json::json!( + [ { "stub-gracefulswitch_current_leaving_ready-one": serde_json::json!({}) } ] - }); + ); let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: service_config, }) .unwrap(); let endpoint = create_endpoint_with_one_address("127.0.0.1:1234".to_string()); - // let pickfirst_endpoint = create_endpoint_with_one_address("0.0.0.0.0".to_string()); + let endpoint2 = create_endpoint_with_one_address("127.0.0.1:1235".to_string()); let update = ResolverUpdate { endpoints: Ok(vec![endpoint.clone()]), ..Default::default() @@ -1053,7 +848,7 @@ mod test { // Switch to first one (current) graceful_switch - .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -1068,13 +863,13 @@ mod test { "stub-gracefulswitch_current_leaving_ready-one", ) .await; - let new_service_config = serde_json::json!({ - "children_policies": [ + let new_service_config = serde_json::json!( + [ { "stub-gracefulswitch_current_leaving_ready-two": serde_json::json!({}) }, ] - }); + ); let new_update = ResolverUpdate { - endpoints: Ok(vec![endpoint.clone()]), + endpoints: Ok(vec![endpoint2.clone()]), ..Default::default() }; let new_parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { @@ -1083,7 +878,7 @@ mod test { .unwrap(); graceful_switch - .resolver_update(new_update.clone(), Some(&new_parsed_config), &mut *tcc) + .resolver_update(new_update.clone(), Some(new_parsed_config), &mut *tcc) .unwrap(); let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -1115,9 +910,6 @@ mod test { // Tests that the gracefulswitch policy should remove the current child's // subchannels after swapping. #[tokio::test] - #[should_panic( - expected = "Subchannel not found in graceful switch: Subchannel: :127.0.0.1:1234" - )] async fn gracefulswitch_subchannels_removed_after_current_child_swapped() { let (mut rx_events, mut graceful_switch, mut tcc) = setup(); reg_stub_policy( @@ -1132,11 +924,11 @@ mod test { "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-two", ), ); - let service_config = serde_json::json!({ - "children_policies": [ + let service_config = serde_json::json!( + [ { "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-one": serde_json::json!({}) } ] - }); + ); let parsed_config = GracefulSwitchPolicy::parse_config(&ParsedJsonLbConfig { value: service_config, }) @@ -1147,7 +939,7 @@ mod test { ..Default::default() }; graceful_switch - .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -1162,12 +954,12 @@ mod test { "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-one", ) .await; - let new_service_config = serde_json::json!({ - "children_policies": [ + let new_service_config = serde_json::json!( + [ { "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-two": serde_json::json!({ "shuffleAddressList": false }) }, ] - }); - let second_endpoint = create_endpoint_with_one_address("0.0.0.0.0".to_string()); + ); + let second_endpoint = create_endpoint_with_one_address("127.0.0.1:1235".to_string()); let second_update = ResolverUpdate { endpoints: Ok(vec![second_endpoint.clone()]), ..Default::default() @@ -1177,7 +969,7 @@ mod test { }) .unwrap(); graceful_switch - .resolver_update(second_update.clone(), Some(&new_parsed_config), &mut *tcc) + .resolver_update(second_update.clone(), Some(new_parsed_config), &mut *tcc) .unwrap(); let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; let mut pending_subchannels = pending_subchannels.into_iter(); @@ -1193,11 +985,6 @@ mod test { "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-two", ) .await; - move_subchannel_to_state( - &mut *graceful_switch, - current_subchannels[0].clone(), - tcc.as_mut(), - ConnectivityState::Connecting, - ); + assert!(Arc::strong_count(¤t_subchannels[0]) == 1); } } diff --git a/grpc/src/client/load_balancing/mod.rs b/grpc/src/client/load_balancing/mod.rs index 5a0106ecb..a6a8426a0 100644 --- a/grpc/src/client/load_balancing/mod.rs +++ b/grpc/src/client/load_balancing/mod.rs @@ -75,7 +75,7 @@ pub struct LbPolicyOptions { /// Used to asynchronously request a call into the LbPolicy's work method if /// the LbPolicy needs to provide an update without waiting for an update /// from the channel first. -pub trait WorkScheduler: Send + Sync { +pub trait WorkScheduler: Send + Sync + Debug { // Schedules a call into the LbPolicy's work method. If there is already a // pending work call that has not yet started, this may not schedule another // call. @@ -123,7 +123,7 @@ impl ParsedJsonLbConfig { /// An LB policy factory that produces LbPolicy instances used by the channel /// to manage connections and pick connections for RPCs. -pub(crate) trait LbPolicyBuilder: Send + Sync { +pub(crate) trait LbPolicyBuilder: Send + Sync + Debug { /// Builds and returns a new LB policy instance. /// /// Note that build must not fail. Any optional configuration is delivered @@ -153,13 +153,13 @@ pub(crate) trait LbPolicyBuilder: Send + Sync { /// LB policies are responsible for creating connections (modeled as /// Subchannels) and producing Picker instances for picking connections for /// RPCs. -pub trait LbPolicy: Send { +pub trait LbPolicy: Send + Debug { /// Called by the channel when the name resolver produces a new set of /// resolved addresses or a new service config. fn resolver_update( &mut self, update: ResolverUpdate, - config: Option<&LbConfig>, + config: Option, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box>; @@ -246,7 +246,7 @@ impl Display for SubchannelState { /// /// If the ConnectivityState is TransientFailure, the Picker should return an /// Err with an error that describes why connections are failing. -pub trait Picker: Send + Sync { +pub trait Picker: Send + Sync + Debug { /// Picks a connection to use for the request. /// /// This function should not block. If the Picker needs to do blocking or @@ -256,6 +256,7 @@ pub trait Picker: Send + Sync { fn pick(&self, request: &Request) -> PickResult; } +#[derive(Debug)] pub enum PickResult { /// Indicates the Subchannel in the Pick should be used for the request. Pick(Pick), @@ -317,7 +318,7 @@ impl Display for PickResult { } } /// Data provided by the LB policy. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct LbState { pub connectivity_state: super::ConnectivityState, pub picker: Arc, @@ -347,6 +348,16 @@ pub struct Pick { pub on_complete: Option, } +impl Debug for Pick { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Pick") + .field("subchannel", &self.subchannel) + .field("metadata", &self.metadata) + .field("on_complete", &format_args!("{:p}", &self.on_complete)) + .finish() + } +} + pub trait DynHash { #[allow(clippy::redundant_allocation)] fn dyn_hash(&self, state: &mut Box<&mut dyn Hasher>); @@ -438,6 +449,7 @@ impl Display for dyn Subchannel { } } +#[derive(Debug)] struct WeakSubchannel(Weak); impl From> for WeakSubchannel { @@ -580,6 +592,7 @@ impl private::Sealed for T {} /// QueuingPicker always returns Queue. LB policies that are not actively /// Connecting should not use this picker. +#[derive(Debug)] pub struct QueuingPicker {} impl Picker for QueuingPicker { @@ -588,6 +601,7 @@ impl Picker for QueuingPicker { } } +#[derive(Debug)] pub struct Failing { pub error: String, } diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 6ac901709..e93feffd5 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -23,6 +23,7 @@ use super::{ pub static POLICY_NAME: &str = "pick_first"; +#[derive(Debug)] struct Builder {} impl LbPolicyBuilder for Builder { @@ -44,6 +45,7 @@ pub fn reg() { super::GLOBAL_LB_REGISTRY.add_builder(Builder {}) } +#[derive(Debug)] struct PickFirstPolicy { work_scheduler: Arc, subchannel: Option>, @@ -55,7 +57,7 @@ impl LbPolicy for PickFirstPolicy { fn resolver_update( &mut self, update: ResolverUpdate, - config: Option<&LbConfig>, + config: Option, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { let mut addresses = update @@ -108,6 +110,7 @@ impl LbPolicy for PickFirstPolicy { } } +#[derive(Debug)] struct OneSubchannelPicker { sc: Arc, } @@ -116,8 +119,9 @@ impl Picker for OneSubchannelPicker { fn pick(&self, request: &Request) -> PickResult { PickResult::Pick(Pick { subchannel: self.sc.clone(), - on_complete: None, + // on_complete: None, metadata: MetadataMap::new(), + on_complete: None, }) } } diff --git a/grpc/src/client/load_balancing/test_utils.rs b/grpc/src/client/load_balancing/test_utils.rs index 4ecbedb51..161971008 100644 --- a/grpc/src/client/load_balancing/test_utils.rs +++ b/grpc/src/client/load_balancing/test_utils.rs @@ -149,6 +149,7 @@ impl ChannelController for TestChannelController { } } +#[derive(Debug)] pub(crate) struct TestWorkScheduler { pub(crate) tx_events: mpsc::UnboundedSender, } @@ -164,7 +165,7 @@ type ResolverUpdateFn = Arc< dyn Fn( &mut StubPolicyData, ResolverUpdate, - Option<&LbConfig>, + Option, &mut dyn ChannelController, ) -> Result<(), Box> + Send @@ -186,7 +187,14 @@ pub struct StubPolicyFuncs { pub subchannel_update: Option, } +impl Debug for StubPolicyFuncs { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "stub funcs") + } +} + /// Data holds test data that will be passed all to functions in PolicyFuncs +#[derive(Debug)] pub struct StubPolicyData { pub test_data: Option>, } @@ -199,6 +207,7 @@ impl StubPolicyData { } /// The stub `LbPolicy` that calls the provided functions. +#[derive(Debug)] pub struct StubPolicy { funcs: StubPolicyFuncs, data: StubPolicyData, @@ -208,7 +217,7 @@ impl LbPolicy for StubPolicy { fn resolver_update( &mut self, update: ResolverUpdate, - config: Option<&LbConfig>, + config: Option, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { if let Some(f) = &mut self.funcs.resolver_update { @@ -238,6 +247,7 @@ impl LbPolicy for StubPolicy { } /// StubPolicyBuilder builds a StubLbPolicy. +#[derive(Debug)] pub struct StubPolicyBuilder { name: &'static str, funcs: StubPolicyFuncs, diff --git a/grpc/src/client/name_resolution/dns/test.rs b/grpc/src/client/name_resolution/dns/test.rs index 135e8ccfa..ee896a03c 100644 --- a/grpc/src/client/name_resolution/dns/test.rs +++ b/grpc/src/client/name_resolution/dns/test.rs @@ -252,7 +252,7 @@ pub async fn invalid_target() { .contains(&target.to_string())); } -#[derive(Clone)] +#[derive(Clone, Debug)] struct FakeDns { latency: Duration, lookup_result: Result, String>, @@ -270,6 +270,7 @@ impl rt::DnsResolver for FakeDns { } } +#[derive(Debug)] struct FakeRuntime { inner: TokioRuntime, dns: FakeDns, diff --git a/grpc/src/client/service_config.rs b/grpc/src/client/service_config.rs index da268ca33..d4920da0c 100644 --- a/grpc/src/client/service_config.rs +++ b/grpc/src/client/service_config.rs @@ -29,26 +29,21 @@ use std::{any::Any, error::Error, sync::Arc}; pub(crate) struct ServiceConfig; /// A convenience wrapper for an LB policy's configuration object. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct LbConfig { config: Arc, } impl LbConfig { /// Create a new LbConfig wrapper containing the provided config. - pub fn new(config: T) -> Self { + pub fn new(config: impl Any + Send + Sync) -> Self { LbConfig { config: Arc::new(config), } } /// Convenience method to extract the LB policy's configuration object. - pub fn convert_to( - &self, - ) -> Result, Box> { - match self.config.clone().downcast::() { - Ok(c) => Ok(c), - Err(e) => Err("failed to downcast to config type".into()), - } + pub fn convert_to(&self) -> Option> { + self.config.clone().downcast::().ok() } } diff --git a/grpc/src/rt/mod.rs b/grpc/src/rt/mod.rs index 81d22ff7c..4186fefff 100644 --- a/grpc/src/rt/mod.rs +++ b/grpc/src/rt/mod.rs @@ -23,6 +23,7 @@ */ use ::tokio::io::{AsyncRead, AsyncWrite}; +use std::fmt::Debug; use std::{future::Future, net::SocketAddr, pin::Pin, sync::Arc, time::Duration}; pub(crate) mod hyper_wrapper; @@ -39,7 +40,7 @@ pub(crate) type BoxedTaskHandle = Box; /// time-based operations such as sleeping. It provides a uniform interface /// that can be implemented for various async runtimes, enabling pluggable /// and testable infrastructure. -pub(super) trait Runtime: Send + Sync { +pub(super) trait Runtime: Send + Sync + Debug { /// Spawns the given asynchronous task to run in the background. fn spawn(&self, task: Pin + Send + 'static>>) -> BoxedTaskHandle; @@ -98,7 +99,7 @@ pub(crate) trait TcpStream: AsyncRead + AsyncWrite + Send + Unpin {} /// # Panics /// /// Panics if any of its functions are called. -#[derive(Default)] +#[derive(Default, Debug)] pub(crate) struct NoOpRuntime {} impl Runtime for NoOpRuntime { diff --git a/grpc/src/rt/tokio/mod.rs b/grpc/src/rt/tokio/mod.rs index 8caec4cf3..860f0617a 100644 --- a/grpc/src/rt/tokio/mod.rs +++ b/grpc/src/rt/tokio/mod.rs @@ -64,6 +64,7 @@ impl DnsResolver for TokioDefaultDnsResolver { } } +#[derive(Debug)] pub(crate) struct TokioRuntime {} impl TaskHandle for JoinHandle<()> { From 60419a5421815287d42a46b163434cea49cd5525 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Fri, 7 Nov 2025 08:06:45 -0800 Subject: [PATCH 03/12] no mut receiver; rename sharder params --- grpc/src/client/load_balancing/child_manager.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index ea38a0eae..39c8a0513 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -88,8 +88,8 @@ pub trait ResolverUpdateSharder: Send { /// when its resolver_update method is called. fn shard_update( &mut self, - resolver_update: ResolverUpdate, - update: Option, + update: ResolverUpdate, + config: Option, ) -> Result>, Box>; } @@ -111,7 +111,7 @@ where } /// Returns data for all current children. - pub fn children(&mut self) -> impl Iterator> { + pub fn children(&self) -> impl Iterator> { self.children.iter() } @@ -121,7 +121,7 @@ where /// Otherwise, if any child is CONNECTING, then report CONNECTING. /// Otherwise, if any child is IDLE, then report IDLE. /// Report TRANSIENT FAILURE if no conditions above apply. - pub fn aggregate_states(&mut self) -> ConnectivityState { + pub fn aggregate_states(&self) -> ConnectivityState { let mut is_connecting = false; let mut is_idle = false; From f933d877c87e97a6c18cb14a86a861f9f7efe6a6 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Fri, 7 Nov 2025 08:17:27 -0800 Subject: [PATCH 04/12] switch LbConfig back to a reference --- grpc/src/client/channel.rs | 2 +- .../client/load_balancing/child_manager.rs | 16 +++++----- .../client/load_balancing/graceful_switch.rs | 30 +++++++++---------- grpc/src/client/load_balancing/mod.rs | 2 +- grpc/src/client/load_balancing/pick_first.rs | 2 +- grpc/src/client/load_balancing/test_utils.rs | 4 +-- 6 files changed, 29 insertions(+), 27 deletions(-) diff --git a/grpc/src/client/channel.rs b/grpc/src/client/channel.rs index 8c16dbf5b..3a54b7f2d 100644 --- a/grpc/src/client/channel.rs +++ b/grpc/src/client/channel.rs @@ -530,7 +530,7 @@ impl GracefulSwitchBalancer { p.as_mut() .unwrap() - .resolver_update(update, config, controller) + .resolver_update(update, config.as_ref(), controller) // TODO: close old LB policy gracefully vs. drop? } diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index 39c8a0513..f13444338 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -89,7 +89,7 @@ pub trait ResolverUpdateSharder: Send { fn shard_update( &mut self, update: ResolverUpdate, - config: Option, + config: Option<&LbConfig>, ) -> Result>, Box>; } @@ -195,7 +195,7 @@ where fn resolver_update( &mut self, resolver_update: ResolverUpdate, - config: Option, + config: Option<&LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { // First determine if the incoming update is valid. @@ -306,9 +306,11 @@ where continue; }; let mut channel_controller = WrappedController::new(channel_controller); - let _ = child - .policy - .resolver_update(resolver_update, config, &mut channel_controller); + let _ = child.policy.resolver_update( + resolver_update, + config.as_ref(), + &mut channel_controller, + ); self.resolve_child_controller(channel_controller, child_idx); } Ok(()) @@ -444,7 +446,7 @@ mod test { fn shard_update( &mut self, resolver_update: ResolverUpdate, - config: Option, + config: Option<&LbConfig>, ) -> Result>, Box> { let mut sharded_endpoints = Vec::new(); @@ -459,7 +461,7 @@ mod test { service_config: resolver_update.service_config.clone(), resolution_note: resolver_update.resolution_note.clone(), }, - config.clone(), + config.cloned(), )), }; sharded_endpoints.push(child_update); diff --git a/grpc/src/client/load_balancing/graceful_switch.rs b/grpc/src/client/load_balancing/graceful_switch.rs index 93d6147d8..7eec5d90a 100644 --- a/grpc/src/client/load_balancing/graceful_switch.rs +++ b/grpc/src/client/load_balancing/graceful_switch.rs @@ -38,7 +38,7 @@ impl ResolverUpdateSharder<()> for UpdateSharder { fn shard_update( &mut self, resolver_update: ResolverUpdate, - config: Option, // The config is always produced based on the state stored in the sharder. + config: Option<&LbConfig>, // The config is always produced based on the state stored in the sharder. ) -> Result>, Box> { if config.is_none() { @@ -110,7 +110,7 @@ impl LbPolicy for GracefulSwitchPolicy { fn resolver_update( &mut self, update: ResolverUpdate, - config: Option, + config: Option<&LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { let res = self @@ -504,7 +504,7 @@ mod test { ..Default::default() }; graceful_switch - .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -556,7 +556,7 @@ mod test { }; graceful_switch - .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); // Subchannel creation and ready @@ -586,7 +586,7 @@ mod test { }) .unwrap(); graceful_switch - .resolver_update(update.clone(), Some(new_parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); // Simulate subchannel creation and ready for pending @@ -631,7 +631,7 @@ mod test { ..Default::default() }; graceful_switch - .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; let mut subchannels = subchannels.into_iter(); @@ -651,7 +651,7 @@ mod test { }) .unwrap(); graceful_switch - .resolver_update(update.clone(), Some(parsed_config2), &mut *tcc) + .resolver_update(update.clone(), Some(&parsed_config2), &mut *tcc) .unwrap(); } @@ -692,7 +692,7 @@ mod test { // Switch to first one (current) graceful_switch - .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -709,7 +709,7 @@ mod test { }) .unwrap(); graceful_switch - .resolver_update(second_update.clone(), Some(new_parsed_config), &mut *tcc) + .resolver_update(second_update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); let second_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -758,7 +758,7 @@ mod test { // Switch to first one (current) graceful_switch - .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -788,7 +788,7 @@ mod test { }) .unwrap(); graceful_switch - .resolver_update(new_update.clone(), Some(new_parsed_config), &mut *tcc) + .resolver_update(new_update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -848,7 +848,7 @@ mod test { // Switch to first one (current) graceful_switch - .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -878,7 +878,7 @@ mod test { .unwrap(); graceful_switch - .resolver_update(new_update.clone(), Some(new_parsed_config), &mut *tcc) + .resolver_update(new_update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -939,7 +939,7 @@ mod test { ..Default::default() }; graceful_switch - .resolver_update(update.clone(), Some(parsed_config), &mut *tcc) + .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; @@ -969,7 +969,7 @@ mod test { }) .unwrap(); graceful_switch - .resolver_update(second_update.clone(), Some(new_parsed_config), &mut *tcc) + .resolver_update(second_update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; let mut pending_subchannels = pending_subchannels.into_iter(); diff --git a/grpc/src/client/load_balancing/mod.rs b/grpc/src/client/load_balancing/mod.rs index a6a8426a0..797224553 100644 --- a/grpc/src/client/load_balancing/mod.rs +++ b/grpc/src/client/load_balancing/mod.rs @@ -159,7 +159,7 @@ pub trait LbPolicy: Send + Debug { fn resolver_update( &mut self, update: ResolverUpdate, - config: Option, + config: Option<&LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box>; diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index e93feffd5..3a8b13689 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -57,7 +57,7 @@ impl LbPolicy for PickFirstPolicy { fn resolver_update( &mut self, update: ResolverUpdate, - config: Option, + config: Option<&LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { let mut addresses = update diff --git a/grpc/src/client/load_balancing/test_utils.rs b/grpc/src/client/load_balancing/test_utils.rs index 161971008..30d075f3b 100644 --- a/grpc/src/client/load_balancing/test_utils.rs +++ b/grpc/src/client/load_balancing/test_utils.rs @@ -165,7 +165,7 @@ type ResolverUpdateFn = Arc< dyn Fn( &mut StubPolicyData, ResolverUpdate, - Option, + Option<&LbConfig>, &mut dyn ChannelController, ) -> Result<(), Box> + Send @@ -217,7 +217,7 @@ impl LbPolicy for StubPolicy { fn resolver_update( &mut self, update: ResolverUpdate, - config: Option, + config: Option<&LbConfig>, channel_controller: &mut dyn ChannelController, ) -> Result<(), Box> { if let Some(f) = &mut self.funcs.resolver_update { From 4230a4f4c3688e394c0f7561cbfb370179d2c702 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 10 Nov 2025 14:51:39 -0800 Subject: [PATCH 05/12] review comments --- .../client/load_balancing/child_manager.rs | 8 +- .../client/load_balancing/graceful_switch.rs | 264 +++++++++--------- grpc/src/client/load_balancing/mod.rs | 44 +-- grpc/src/client/load_balancing/pick_first.rs | 4 +- grpc/src/client/load_balancing/registry.rs | 4 +- grpc/src/client/load_balancing/test_utils.rs | 10 +- grpc/src/client/mod.rs | 7 +- grpc/src/client/name_resolution/backoff.rs | 6 +- grpc/src/client/name_resolution/dns/mod.rs | 6 +- grpc/src/client/name_resolution/dns/test.rs | 16 +- grpc/src/client/name_resolution/mod.rs | 22 +- grpc/src/client/name_resolution/registry.rs | 4 +- grpc/src/client/transport/registry.rs | 2 +- grpc/src/client/transport/tonic/mod.rs | 2 +- grpc/src/client/transport/tonic/test.rs | 4 +- 15 files changed, 206 insertions(+), 197 deletions(-) diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index f13444338..683ce5768 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -46,7 +46,7 @@ use super::{Subchannel, SubchannelState}; // An LbPolicy implementation that manages multiple children. #[derive(Debug)] -pub struct ChildManager> { +pub(crate) struct ChildManager> { subchannel_child_map: HashMap, children: Vec>, update_sharder: S, @@ -57,7 +57,7 @@ pub struct ChildManager> { #[non_exhaustive] #[derive(Debug)] -pub struct Child { +pub(crate) struct Child { pub identifier: T, pub policy: Box, pub builder: Arc, @@ -67,7 +67,7 @@ pub struct Child { } /// A collection of data sent to a child of the ChildManager. -pub struct ChildUpdate { +pub(crate) struct ChildUpdate { /// The identifier the ChildManager should use for this child. pub child_identifier: T, /// The builder the ChildManager should use to create this child if it does @@ -82,7 +82,7 @@ pub struct ChildUpdate { pub child_update: Option<(ResolverUpdate, Option)>, } -pub trait ResolverUpdateSharder: Send { +pub(crate) trait ResolverUpdateSharder: Send { /// Performs the operation of sharding an aggregate ResolverUpdate/LbConfig /// into one or more ChildUpdates. Called automatically by the ChildManager /// when its resolver_update method is called. diff --git a/grpc/src/client/load_balancing/graceful_switch.rs b/grpc/src/client/load_balancing/graceful_switch.rs index 7eec5d90a..813a0e4c4 100644 --- a/grpc/src/client/load_balancing/graceful_switch.rs +++ b/grpc/src/client/load_balancing/graceful_switch.rs @@ -1,5 +1,3 @@ -#![warn(unused_imports)] - use crate::client::load_balancing::child_manager::{ self, ChildManager, ChildUpdate, ResolverUpdateSharder, }; @@ -15,18 +13,9 @@ use std::collections::HashMap; use std::error::Error; use std::sync::Arc; -struct GracefulSwitchLbConfig { - child_builder: Arc, - child_config: Option, -} - -impl GracefulSwitchLbConfig { - fn new(child_builder: Arc, child_config: Option) -> Self { - GracefulSwitchLbConfig { - child_builder, - child_config, - } - } +enum GracefulSwitchLbConfig { + Update(Arc, Option), + Swap(Arc), } #[derive(Debug)] @@ -41,40 +30,50 @@ impl ResolverUpdateSharder<()> for UpdateSharder { config: Option<&LbConfig>, // The config is always produced based on the state stored in the sharder. ) -> Result>, Box> { - if config.is_none() { - // A config should always be passed. When one is not passed, we - // delete any children besides active. This is how the graceful - // switch balaner performs a swap of active policies. - return Ok(vec![ChildUpdate { - child_policy_builder: self.active_child_builder.clone().unwrap(), - child_identifier: (), - child_update: None, - }] - .into_iter()); + let config = config.expect("graceful switch should always get an LbConfig"); + + let gsb_config: Arc = config.convert_to().expect("invalid config"); + + let child_config; + let child_builder; + match &*gsb_config { + GracefulSwitchLbConfig::Swap(child_builder) => { + // When swapping we update the active_child_builder to the one + // we are swapping to and send an empty update that only + // includes that child, which removes the other child. + self.active_child_builder = Some(child_builder.clone()); + return Ok(vec![ChildUpdate { + child_policy_builder: child_builder.clone(), + child_identifier: (), + child_update: None, + }] + .into_iter()); + } + GracefulSwitchLbConfig::Update(cb, cc) => { + child_builder = cb; + child_config = cc; + } } - let gsb_config: Arc = - config.unwrap().convert_to().expect("invalid config"); - if self.active_child_builder.is_none() { // When there are no children yet, the current update immediately // becomes the active child. - self.active_child_builder = Some(gsb_config.child_builder.clone()); + self.active_child_builder = Some(child_builder.clone()); } let active_child_builder = self.active_child_builder.as_ref().unwrap(); - // Always include the incoming update let mut resp = Vec::with_capacity(2); + // Always include the incoming update. resp.push(ChildUpdate { - child_policy_builder: gsb_config.child_builder.clone(), + child_policy_builder: child_builder.clone(), child_identifier: (), - child_update: Some((resolver_update, gsb_config.child_config.clone())), + child_update: Some((resolver_update, child_config.clone())), }); - // Always include the active child so that the child manager will not - // delete it. - if gsb_config.child_builder.name() != active_child_builder.name() { + // Include the active child if it does not match the updated child so + // that the child manager will not delete it. + if child_builder.name() != active_child_builder.name() { resp.push(ChildUpdate { child_policy_builder: active_child_builder.clone(), child_identifier: (), @@ -102,7 +101,7 @@ impl UpdateSharder { /// active policy is not READY, graceful switch will promote the pending policy /// to to active and tear down the previously active policy. #[derive(Debug)] -pub struct GracefulSwitchPolicy { +pub(crate) struct GracefulSwitchPolicy { child_manager: ChildManager<(), UpdateSharder>, // Child ID is the name of the child policy. } @@ -178,15 +177,15 @@ impl GracefulSwitchPolicy { .into()); } let (policy_name, policy_config) = c.into_iter().next().unwrap(); - let Some(child) = GLOBAL_LB_REGISTRY.get_policy(policy_name.as_str()) else { + let Some(child_builder) = GLOBAL_LB_REGISTRY.get_policy(policy_name.as_str()) else { continue; }; let parsed_config = ParsedJsonLbConfig { value: policy_config, }; - let config = child.parse_config(&parsed_config)?; - let graceful_switch_lb_config = GracefulSwitchLbConfig::new(child, config); - return Ok(LbConfig::new(graceful_switch_lb_config)); + let child_config = child_builder.parse_config(&parsed_config)?; + let gsb_config = GracefulSwitchLbConfig::Update(child_builder, child_config); + return Ok(LbConfig::new(gsb_config)); } Err("no supported policies found in config".into()) } @@ -223,13 +222,15 @@ impl GracefulSwitchPolicy { } } if could_switch && pending_builder.is_some() { - let sharder = self.child_manager.update_sharder(); - sharder.active_child_builder = pending_builder; - self.child_manager.resolver_update( - ResolverUpdate::default(), - None, - channel_controller, - )?; + self.child_manager + .resolver_update( + ResolverUpdate::default(), + Some(&LbConfig::new(GracefulSwitchLbConfig::Swap( + pending_builder.unwrap(), + ))), + channel_controller, + ) + .expect("resolver_update with an empty update should not fail"); channel_controller.update_picker(pending_state.unwrap().clone()); } else if active_state_if_updated.is_some() { channel_controller.update_picker(active_state_if_updated.unwrap()); @@ -254,10 +255,14 @@ mod test { use crate::client::ConnectivityState; use crate::rt::default_runtime; use crate::service::Request; + use std::time::Duration; use std::{panic, sync::Arc}; - use tokio::sync::mpsc; + use tokio::select; + use tokio::sync::mpsc::{self, UnboundedReceiver}; use tonic::metadata::MetadataMap; + const DEFAULT_TEST_SHORT_TIMEOUT: Duration = Duration::from_millis(10); + struct TestSubchannelList { subchannels: Vec>, } @@ -299,7 +304,6 @@ mod test { }, mpsc::unbounded_channel().0, )), - // on_complete: None, metadata: MetadataMap::new(), on_complete: None, }) @@ -328,11 +332,10 @@ mod test { subchannel_list: scl, }; data.test_data = Some(Box::new(child_state)); - Ok(()) } else { data.test_data = None; - Ok(()) } + Ok(()) }, )), // Closure for subchannel_update. Verify that the subchannel that @@ -342,20 +345,18 @@ mod test { subchannel_update: Some(Arc::new( move |data: &mut StubPolicyData, updated_subchannel, state, channel_controller| { // Retrieve the specific TestState from the generic test_data field. - // This downcasts the `Any` trait object - if let Some(test_data) = data.test_data.as_mut() { - if let Some(test_state) = test_data.downcast_mut::() { - let scl = &mut test_state.subchannel_list; - assert!( - scl.contains(&updated_subchannel), - "subchannel_update received an update for a subchannel it does not own." - ); - channel_controller.update_picker(LbState { - connectivity_state: state.connectivity_state, - picker: Arc::new(TestPicker { name }), - }); - } - } + // This downcasts the `Any` trait object. + let test_data = data.test_data.as_mut().unwrap(); + let test_state = test_data.downcast_mut::().unwrap(); + let scl = &mut test_state.subchannel_list; + assert!( + scl.contains(&updated_subchannel), + "subchannel_update received an update for a subchannel it does not own." + ); + channel_controller.update_picker(LbState { + connectivity_state: state.connectivity_state, + picker: Arc::new(TestPicker { name }), + }); }, )), } @@ -402,19 +403,17 @@ mod test { } } - // Verifies that the expected number of subchannels is created. Returns the - // subchannels created. + // Verifies that the next event on rx_events channel is NewSubchannel. + // Returns the subchannel created. async fn verify_subchannel_creation_from_policy( rx_events: &mut mpsc::UnboundedReceiver, - ) -> Vec> { - let mut subchannels = Vec::new(); + ) -> Arc { match rx_events.recv().await.unwrap() { TestEvent::NewSubchannel(sc) => { - subchannels.push(sc); + return sc; } other => panic!("unexpected event {:?}", other), }; - subchannels } // Verifies that the channel moves to READY state with a picker that returns the @@ -426,31 +425,23 @@ mod test { name: &str, ) { println!("verify ready picker"); - match rx_events.recv().await.unwrap() { - TestEvent::UpdatePicker(update) => { - let req = test_utils::new_request(); - println!("{:?}", update.connectivity_state); - - let pick = update.picker.pick(&req); - match &pick { - PickResult::Pick(pick) => { - let received_address = &pick.subchannel.address().address.to_string(); - // It's good practice to create the expected value once. - let expected_address = name.to_string(); - - // Check for inequality and panic with a detailed message if they don't match. - if received_address != &expected_address { - panic!( - "Picker address mismatch. Expected: '{}', but got: '{}'", - expected_address, received_address - ); - } - } - other => panic!("unexpected pick result: {:?}", pick), - } - } - other => panic!("unexpected event {:?}", other), - } + let event = rx_events.recv().await.unwrap(); + let TestEvent::UpdatePicker(update) = event else { + panic!("unexpected event {:?}", event); + }; + let req = test_utils::new_request(); + println!("{:?}", update.connectivity_state); + + let pick = update.picker.pick(&req); + let PickResult::Pick(pick) = pick else { + panic!("unexpected pick result: {:?}", pick); + }; + let received_address = &pick.subchannel.address().address.to_string(); + // It's good practice to create the expected value once. + let expected_address = name.to_string(); + + // Check for inequality and panic with a detailed message if they don't match. + assert_eq!(received_address, &expected_address); } fn move_subchannel_to_state( @@ -507,11 +498,10 @@ mod test { .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); - let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; - let mut subchannels = subchannels.into_iter(); + let subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - subchannels.next().unwrap(), + subchannel, tcc.as_mut(), ConnectivityState::Ready, ); @@ -560,11 +550,10 @@ mod test { .unwrap(); // Subchannel creation and ready - let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; - let mut subchannels = subchannels.into_iter(); + let subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - subchannels.next().unwrap(), + subchannel, tcc.as_mut(), ConnectivityState::Ready, ); @@ -590,24 +579,32 @@ mod test { .unwrap(); // Simulate subchannel creation and ready for pending - let subchannels_two = verify_subchannel_creation_from_policy(&mut rx_events).await; - let mut subchannels_two = subchannels_two.into_iter(); + let subchannel_two = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - subchannels_two.next().unwrap(), + subchannel_two, tcc.as_mut(), ConnectivityState::Ready, ); - // Assert picker is TestPickerTwo by checking subchannel address verify_correct_picker_from_policy( &mut rx_events, "stub-gracefulswitch_switching_to_resolver_update-two", ) .await; + assert_channel_empty(&mut rx_events).await; } - // Tests that the gracefulswitch policy should do nothing when a receives a + async fn assert_channel_empty(rx_events: &mut UnboundedReceiver) { + select! { + event = rx_events.recv() => { + panic!("Received unexpected event from policy: {event:?}"); + } + _ = tokio::time::sleep(DEFAULT_TEST_SHORT_TIMEOUT) => {} + }; + } + + // Tests that the gracefulswitch policy should do nothing when it receives a // new config of the same policy that it received before. #[tokio::test] async fn gracefulswitch_two_policies_same_type() { @@ -633,14 +630,19 @@ mod test { graceful_switch .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); - let subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; - let mut subchannels = subchannels.into_iter(); + let subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - subchannels.next().unwrap(), + subchannel, tcc.as_mut(), ConnectivityState::Ready, ); + verify_correct_picker_from_policy( + &mut rx_events, + "stub-gracefulswitch_two_policies_same_type-one", + ) + .await; + let service_config2 = serde_json::json!( [ { "stub-gracefulswitch_two_policies_same_type-one": serde_json::json!({}) } @@ -653,6 +655,9 @@ mod test { graceful_switch .resolver_update(update.clone(), Some(&parsed_config2), &mut *tcc) .unwrap(); + let subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; + assert_eq!(&*subchannel.address().address, "127.0.0.1:1234"); + assert_channel_empty(&mut rx_events).await; } // Tests that the gracefulswitch policy should replace the current child @@ -696,6 +701,8 @@ mod test { .unwrap(); let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + assert_channel_empty(&mut rx_events).await; + let new_service_config = serde_json::json!([ { "stub-gracefulswitch_current_not_ready_pending_update-two": serde_json::json!({ "shuffleAddressList": false }) }, ] @@ -712,11 +719,12 @@ mod test { .resolver_update(second_update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); - let second_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; - let mut second_subchannels = second_subchannels.into_iter(); + let second_subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; + assert_channel_empty(&mut rx_events).await; + move_subchannel_to_state( &mut *graceful_switch, - second_subchannels.next().unwrap(), + second_subchannel, tcc.as_mut(), ConnectivityState::Ready, ); @@ -725,6 +733,7 @@ mod test { "stub-gracefulswitch_current_not_ready_pending_update-two", ) .await; + assert_channel_empty(&mut rx_events).await; } // Tests that the gracefulswitch policy should replace the current child @@ -761,10 +770,10 @@ mod test { .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); - let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let current_subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - current_subchannels[0].clone(), + current_subchannel.clone(), tcc.as_mut(), ConnectivityState::Ready, ); @@ -791,11 +800,11 @@ mod test { .resolver_update(new_update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); - let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let pending_subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - pending_subchannels[0].clone(), + pending_subchannel, tcc.as_mut(), ConnectivityState::Connecting, ); @@ -806,7 +815,7 @@ mod test { .await; move_subchannel_to_state( &mut *graceful_switch, - current_subchannels[0].clone(), + current_subchannel, tcc.as_mut(), ConnectivityState::Connecting, ); @@ -851,10 +860,10 @@ mod test { .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); - let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let current_subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - current_subchannels[0].clone(), + current_subchannel, tcc.as_mut(), ConnectivityState::Ready, ); @@ -881,11 +890,11 @@ mod test { .resolver_update(new_update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); - let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let pending_subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - pending_subchannels[0].clone(), + pending_subchannel.clone(), tcc.as_mut(), ConnectivityState::TransientFailure, ); @@ -896,7 +905,7 @@ mod test { .await; move_subchannel_to_state( &mut *graceful_switch, - pending_subchannels[0].clone(), + pending_subchannel, tcc.as_mut(), ConnectivityState::Connecting, ); @@ -942,10 +951,10 @@ mod test { .resolver_update(update.clone(), Some(&parsed_config), &mut *tcc) .unwrap(); - let current_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; + let current_subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, - current_subchannels[0].clone(), + current_subchannel.clone(), tcc.as_mut(), ConnectivityState::Ready, ); @@ -971,12 +980,11 @@ mod test { graceful_switch .resolver_update(second_update.clone(), Some(&new_parsed_config), &mut *tcc) .unwrap(); - let pending_subchannels = verify_subchannel_creation_from_policy(&mut rx_events).await; - let mut pending_subchannels = pending_subchannels.into_iter(); + let pending_subchannel = verify_subchannel_creation_from_policy(&mut rx_events).await; println!("moving subchannel to idle"); move_subchannel_to_state( &mut *graceful_switch, - pending_subchannels.next().unwrap(), + pending_subchannel, tcc.as_mut(), ConnectivityState::Idle, ); @@ -985,6 +993,6 @@ mod test { "stub-gracefulswitch_subchannels_removed_after_current_child_swapped-two", ) .await; - assert!(Arc::strong_count(¤t_subchannels[0]) == 1); + assert!(Arc::strong_count(¤t_subchannel) == 1); } } diff --git a/grpc/src/client/load_balancing/mod.rs b/grpc/src/client/load_balancing/mod.rs index 797224553..ec6e0105b 100644 --- a/grpc/src/client/load_balancing/mod.rs +++ b/grpc/src/client/load_balancing/mod.rs @@ -52,12 +52,12 @@ use crate::client::{ ConnectivityState, }; -pub mod child_manager; -pub mod graceful_switch; -pub mod pick_first; +pub(crate) mod child_manager; +pub(crate) mod graceful_switch; +pub(crate) mod pick_first; #[cfg(test)] -pub mod test_utils; +pub(crate) mod test_utils; pub(crate) mod registry; use super::{service_config::LbConfig, subchannel::SubchannelStateWatcher}; @@ -65,7 +65,7 @@ pub(crate) use registry::{LbPolicyRegistry, GLOBAL_LB_REGISTRY}; /// A collection of data configured on the channel that is constructing this /// LbPolicy. -pub struct LbPolicyOptions { +pub(crate) struct LbPolicyOptions { /// A hook into the channel's work scheduler that allows the LbPolicy to /// request the ability to perform operations on the ChannelController. pub work_scheduler: Arc, @@ -75,7 +75,7 @@ pub struct LbPolicyOptions { /// Used to asynchronously request a call into the LbPolicy's work method if /// the LbPolicy needs to provide an update without waiting for an update /// from the channel first. -pub trait WorkScheduler: Send + Sync + Debug { +pub(crate) trait WorkScheduler: Send + Sync + Debug { // Schedules a call into the LbPolicy's work method. If there is already a // pending work call that has not yet started, this may not schedule another // call. @@ -86,7 +86,7 @@ pub trait WorkScheduler: Send + Sync + Debug { /// JSON. Hides internal storage details and includes a method to deserialize /// the JSON into a concrete policy struct. #[derive(Debug)] -pub struct ParsedJsonLbConfig { +pub(crate) struct ParsedJsonLbConfig { value: serde_json::Value, } @@ -153,7 +153,7 @@ pub(crate) trait LbPolicyBuilder: Send + Sync + Debug { /// LB policies are responsible for creating connections (modeled as /// Subchannels) and producing Picker instances for picking connections for /// RPCs. -pub trait LbPolicy: Send + Debug { +pub(crate) trait LbPolicy: Send + Debug { /// Called by the channel when the name resolver produces a new set of /// resolved addresses or a new service config. fn resolver_update( @@ -182,7 +182,7 @@ pub trait LbPolicy: Send + Debug { } /// Controls channel behaviors. -pub trait ChannelController: Send + Sync { +pub(crate) trait ChannelController: Send + Sync { /// Creates a new subchannel in IDLE state. fn new_subchannel(&mut self, address: &Address) -> Arc; @@ -197,7 +197,7 @@ pub trait ChannelController: Send + Sync { /// Represents the current state of a Subchannel. #[derive(Debug, Clone)] -pub struct SubchannelState { +pub(crate) struct SubchannelState { /// The connectivity state of the subchannel. See SubChannel for a /// description of the various states and their valid transitions. pub connectivity_state: ConnectivityState, @@ -246,7 +246,7 @@ impl Display for SubchannelState { /// /// If the ConnectivityState is TransientFailure, the Picker should return an /// Err with an error that describes why connections are failing. -pub trait Picker: Send + Sync + Debug { +pub(crate) trait Picker: Send + Sync + Debug { /// Picks a connection to use for the request. /// /// This function should not block. If the Picker needs to do blocking or @@ -257,7 +257,7 @@ pub trait Picker: Send + Sync + Debug { } #[derive(Debug)] -pub enum PickResult { +pub(crate) enum PickResult { /// Indicates the Subchannel in the Pick should be used for the request. Pick(Pick), /// Indicates the LbPolicy is attempting to connect to a server to use for @@ -319,7 +319,7 @@ impl Display for PickResult { } /// Data provided by the LB policy. #[derive(Clone, Debug)] -pub struct LbState { +pub(crate) struct LbState { pub connectivity_state: super::ConnectivityState, pub picker: Arc, } @@ -336,10 +336,10 @@ impl LbState { } /// Type alias for the completion callback function. -pub type CompletionCallback = Box; +pub(crate) type CompletionCallback = Box; /// A collection of data used by the channel for routing a request. -pub struct Pick { +pub(crate) struct Pick { /// The Subchannel for the request. pub subchannel: Arc, // Metadata to be added to existing outgoing metadata. @@ -358,7 +358,7 @@ impl Debug for Pick { } } -pub trait DynHash { +pub(crate) trait DynHash { #[allow(clippy::redundant_allocation)] fn dyn_hash(&self, state: &mut Box<&mut dyn Hasher>); } @@ -369,7 +369,7 @@ impl DynHash for T { } } -pub trait DynPartialEq { +pub(crate) trait DynPartialEq { fn dyn_eq(&self, other: &&dyn Any) -> bool; } @@ -386,7 +386,7 @@ mod private { pub trait Sealed {} } -pub trait SealedSubchannel: private::Sealed {} +pub(crate) trait SealedSubchannel: private::Sealed {} /// A Subchannel represents a method of communicating with a server which may be /// connected or disconnected many times across its lifetime. @@ -405,7 +405,7 @@ pub trait SealedSubchannel: private::Sealed {} /// /// When a Subchannel is dropped, it is disconnected automatically, and no /// subsequent state updates will be provided for it to the LB policy. -pub trait Subchannel: SealedSubchannel + DynHash + DynPartialEq + Any + Send + Sync { +pub(crate) trait Subchannel: SealedSubchannel + DynHash + DynPartialEq + Any + Send + Sync { /// Returns the address of the Subchannel. /// TODO: Consider whether this should really be public. fn address(&self) -> Address; @@ -568,7 +568,7 @@ impl Display for ExternalSubchannel { } } -pub trait ForwardingSubchannel: DynHash + DynPartialEq + Any + Send + Sync { +pub(crate) trait ForwardingSubchannel: DynHash + DynPartialEq + Any + Send + Sync { fn delegate(&self) -> Arc; fn address(&self) -> Address { @@ -593,7 +593,7 @@ impl private::Sealed for T {} /// QueuingPicker always returns Queue. LB policies that are not actively /// Connecting should not use this picker. #[derive(Debug)] -pub struct QueuingPicker {} +pub(crate) struct QueuingPicker {} impl Picker for QueuingPicker { fn pick(&self, _request: &Request) -> PickResult { @@ -602,7 +602,7 @@ impl Picker for QueuingPicker { } #[derive(Debug)] -pub struct Failing { +pub(crate) struct Failing { pub error: String, } diff --git a/grpc/src/client/load_balancing/pick_first.rs b/grpc/src/client/load_balancing/pick_first.rs index 3a8b13689..4b1806582 100644 --- a/grpc/src/client/load_balancing/pick_first.rs +++ b/grpc/src/client/load_balancing/pick_first.rs @@ -21,7 +21,7 @@ use super::{ SubchannelState, WorkScheduler, }; -pub static POLICY_NAME: &str = "pick_first"; +pub(crate) static POLICY_NAME: &str = "pick_first"; #[derive(Debug)] struct Builder {} @@ -41,7 +41,7 @@ impl LbPolicyBuilder for Builder { } } -pub fn reg() { +pub(crate) fn reg() { super::GLOBAL_LB_REGISTRY.add_builder(Builder {}) } diff --git a/grpc/src/client/load_balancing/registry.rs b/grpc/src/client/load_balancing/registry.rs index d3dbeffb3..fb3c36ac3 100644 --- a/grpc/src/client/load_balancing/registry.rs +++ b/grpc/src/client/load_balancing/registry.rs @@ -7,7 +7,7 @@ use super::LbPolicyBuilder; /// A registry to store and retrieve LB policies. LB policies are indexed by /// their names. -pub struct LbPolicyRegistry { +pub(crate) struct LbPolicyRegistry { m: Arc>>>, } @@ -37,4 +37,4 @@ impl Default for LbPolicyRegistry { /// The registry used if a local registry is not provided to a channel or if it /// does not exist in the local registry. -pub static GLOBAL_LB_REGISTRY: LazyLock = LazyLock::new(LbPolicyRegistry::new); +pub(crate) static GLOBAL_LB_REGISTRY: LazyLock = LazyLock::new(LbPolicyRegistry::new); diff --git a/grpc/src/client/load_balancing/test_utils.rs b/grpc/src/client/load_balancing/test_utils.rs index 30d075f3b..3c2c8fa88 100644 --- a/grpc/src/client/load_balancing/test_utils.rs +++ b/grpc/src/client/load_balancing/test_utils.rs @@ -182,7 +182,7 @@ type SubchannelUpdateFn = Arc< /// This struct holds `LbPolicy` trait stub functions that tests are expected to /// implement. #[derive(Clone)] -pub struct StubPolicyFuncs { +pub(crate) struct StubPolicyFuncs { pub resolver_update: Option, pub subchannel_update: Option, } @@ -195,7 +195,7 @@ impl Debug for StubPolicyFuncs { /// Data holds test data that will be passed all to functions in PolicyFuncs #[derive(Debug)] -pub struct StubPolicyData { +pub(crate) struct StubPolicyData { pub test_data: Option>, } @@ -208,7 +208,7 @@ impl StubPolicyData { /// The stub `LbPolicy` that calls the provided functions. #[derive(Debug)] -pub struct StubPolicy { +pub(crate) struct StubPolicy { funcs: StubPolicyFuncs, data: StubPolicyData, } @@ -248,7 +248,7 @@ impl LbPolicy for StubPolicy { /// StubPolicyBuilder builds a StubLbPolicy. #[derive(Debug)] -pub struct StubPolicyBuilder { +pub(crate) struct StubPolicyBuilder { name: &'static str, funcs: StubPolicyFuncs, } @@ -286,6 +286,6 @@ impl LbPolicyBuilder for StubPolicyBuilder { } } -pub fn reg_stub_policy(name: &'static str, funcs: StubPolicyFuncs) { +pub(crate) fn reg_stub_policy(name: &'static str, funcs: StubPolicyFuncs) { super::GLOBAL_LB_REGISTRY.add_builder(StubPolicyBuilder { name, funcs }) } diff --git a/grpc/src/client/mod.rs b/grpc/src/client/mod.rs index e896412ae..c4f886201 100644 --- a/grpc/src/client/mod.rs +++ b/grpc/src/client/mod.rs @@ -25,14 +25,15 @@ use std::fmt::Display; pub mod channel; -pub(crate) mod load_balancing; -pub(crate) mod name_resolution; pub mod service_config; mod subchannel; -pub(crate) mod transport; pub use channel::Channel; pub use channel::ChannelOptions; +pub(crate) mod load_balancing; +pub(crate) mod name_resolution; +pub(crate) mod transport; + /// A representation of the current state of a gRPC channel, also used for the /// state of subchannels (individual connections within the channel). /// diff --git a/grpc/src/client/name_resolution/backoff.rs b/grpc/src/client/name_resolution/backoff.rs index 8725706e2..154fad47f 100644 --- a/grpc/src/client/name_resolution/backoff.rs +++ b/grpc/src/client/name_resolution/backoff.rs @@ -26,7 +26,7 @@ use rand::Rng; use std::time::Duration; #[derive(Clone)] -pub struct BackoffConfig { +pub(crate) struct BackoffConfig { /// The amount of time to backoff after the first failure. pub base_delay: Duration, @@ -41,7 +41,7 @@ pub struct BackoffConfig { pub max_delay: Duration, } -pub struct ExponentialBackoff { +pub(crate) struct ExponentialBackoff { config: BackoffConfig, /// The delay for the next retry, without the random jitter. Store as f64 @@ -54,7 +54,7 @@ pub struct ExponentialBackoff { /// /// This should be useful for callers who want to configure backoff with /// non-default values only for a subset of the options. -pub const DEFAULT_EXPONENTIAL_CONFIG: BackoffConfig = BackoffConfig { +pub(crate) const DEFAULT_EXPONENTIAL_CONFIG: BackoffConfig = BackoffConfig { base_delay: Duration::from_secs(1), multiplier: 1.6, jitter: 0.2, diff --git a/grpc/src/client/name_resolution/dns/mod.rs b/grpc/src/client/name_resolution/dns/mod.rs index 6475d62c3..dddb62492 100644 --- a/grpc/src/client/name_resolution/dns/mod.rs +++ b/grpc/src/client/name_resolution/dns/mod.rs @@ -83,7 +83,7 @@ fn get_resolving_timeout() -> Duration { /// premature timeouts during resolution, while setting it too high may lead to /// unnecessary delays in service discovery. Choose a value appropriate for your /// specific needs and network environment. -pub fn set_resolving_timeout(duration: Duration) { +pub(crate) fn set_resolving_timeout(duration: Duration) { RESOLVING_TIMEOUT_MS.store(duration.as_millis() as u64, Ordering::Relaxed); } @@ -96,11 +96,11 @@ fn get_min_resolution_interval() -> Duration { /// /// It must be called only at application startup, before any gRPC calls are /// made. -pub fn set_min_resolution_interval(duration: Duration) { +pub(crate) fn set_min_resolution_interval(duration: Duration) { MIN_RESOLUTION_INTERVAL_MS.store(duration.as_millis() as u64, Ordering::Relaxed); } -pub fn reg() { +pub(crate) fn reg() { global_registry().add_builder(Box::new(Builder {})); } diff --git a/grpc/src/client/name_resolution/dns/test.rs b/grpc/src/client/name_resolution/dns/test.rs index ee896a03c..f787a8df0 100644 --- a/grpc/src/client/name_resolution/dns/test.rs +++ b/grpc/src/client/name_resolution/dns/test.rs @@ -48,7 +48,7 @@ use super::{DnsOptions, ParseResult}; const DEFAULT_TEST_SHORT_TIMEOUT: Duration = Duration::from_millis(10); #[test] -pub fn target_parsing() { +pub(crate) fn target_parsing() { struct TestCase { input: &'static str, want_result: Result, @@ -191,7 +191,7 @@ impl ChannelController for FakeChannelController { } #[tokio::test] -pub async fn dns_basic() { +pub(crate) async fn dns_basic() { reg(); let builder = global_registry().get("dns").unwrap(); let target = &"dns:///localhost:1234".parse().unwrap(); @@ -220,7 +220,7 @@ pub async fn dns_basic() { } #[tokio::test] -pub async fn invalid_target() { +pub(crate) async fn invalid_target() { reg(); let builder = global_registry().get("dns").unwrap(); let target = &"dns:///:1234".parse().unwrap(); @@ -302,7 +302,7 @@ impl rt::Runtime for FakeRuntime { } #[tokio::test] -pub async fn dns_lookup_error() { +pub(crate) async fn dns_lookup_error() { reg(); let builder = global_registry().get("dns").unwrap(); let target = &"dns:///grpc.io:1234".parse().unwrap(); @@ -338,7 +338,7 @@ pub async fn dns_lookup_error() { } #[tokio::test] -pub async fn dns_lookup_timeout() { +pub(crate) async fn dns_lookup_timeout() { let (work_tx, mut work_rx) = mpsc::unbounded_channel(); let work_scheduler = Arc::new(FakeWorkScheduler { work_tx: work_tx.clone(), @@ -380,7 +380,7 @@ pub async fn dns_lookup_timeout() { } #[tokio::test] -pub async fn rate_limit() { +pub(crate) async fn rate_limit() { let (work_tx, mut work_rx) = mpsc::unbounded_channel(); let work_scheduler = Arc::new(FakeWorkScheduler { work_tx: work_tx.clone(), @@ -430,7 +430,7 @@ pub async fn rate_limit() { } #[tokio::test] -pub async fn re_resolution_after_success() { +pub(crate) async fn re_resolution_after_success() { let (work_tx, mut work_rx) = mpsc::unbounded_channel(); let work_scheduler = Arc::new(FakeWorkScheduler { work_tx: work_tx.clone(), @@ -474,7 +474,7 @@ pub async fn re_resolution_after_success() { } #[tokio::test] -pub async fn backoff_on_error() { +pub(crate) async fn backoff_on_error() { let (work_tx, mut work_rx) = mpsc::unbounded_channel(); let work_scheduler = Arc::new(FakeWorkScheduler { work_tx: work_tx.clone(), diff --git a/grpc/src/client/name_resolution/mod.rs b/grpc/src/client/name_resolution/mod.rs index bc2c9ef42..137d955cd 100644 --- a/grpc/src/client/name_resolution/mod.rs +++ b/grpc/src/client/name_resolution/mod.rs @@ -41,7 +41,7 @@ use std::{ mod backoff; mod dns; mod registry; -pub use registry::global_registry; +pub(crate) use registry::global_registry; use url::Url; /// Target represents a target for gRPC, as specified in: @@ -55,7 +55,7 @@ use url::Url; /// (i.e. no corresponding resolver available to resolve the endpoint), we will /// apply the default scheme, and will attempt to reparse it. #[derive(Debug, Clone)] -pub struct Target { +pub(crate) struct Target { url: Url, } @@ -133,7 +133,7 @@ impl Display for Target { /// A name resolver factory that produces Resolver instances used by the channel /// to resolve network addresses for the target URI. -pub trait ResolverBuilder: Send + Sync { +pub(crate) trait ResolverBuilder: Send + Sync { /// Builds a name resolver instance. /// /// Note that build must not fail. Instead, an erroring Resolver may be @@ -164,7 +164,7 @@ pub trait ResolverBuilder: Send + Sync { /// A collection of data configured on the channel that is constructing this /// name resolver. #[non_exhaustive] -pub struct ResolverOptions { +pub(crate) struct ResolverOptions { /// The authority that will be used for the channel by default. This refers /// to the `:authority` value sent in HTTP/2 requests — the dataplane /// authority — and not the authority portion of the target URI, which is @@ -184,7 +184,7 @@ pub struct ResolverOptions { } /// Used to asynchronously request a call into the Resolver's work method. -pub trait WorkScheduler: Send + Sync { +pub(crate) trait WorkScheduler: Send + Sync { // Schedules a call into the Resolver's work method. If there is already a // pending work call that has not yet started, this may not schedule another // call. @@ -196,7 +196,7 @@ pub trait WorkScheduler: Send + Sync { // This trait may not need the Sync sub-trait if the channel implementation can // ensure that the resolver is accessed serially. The sub-trait can be removed // in that case. -pub trait Resolver: Send + Sync { +pub(crate) trait Resolver: Send + Sync { /// Asks the resolver to obtain an updated resolver result, if applicable. /// /// This is useful for polling resolvers to decide when to re-resolve. @@ -215,7 +215,7 @@ pub trait Resolver: Send + Sync { /// The `ChannelController` trait provides the resolver with functionality /// to interact with the channel. -pub trait ChannelController: Send + Sync { +pub(crate) trait ChannelController: Send + Sync { /// Notifies the channel about the current state of the name resolver. If /// an error value is returned, the name resolver should attempt to /// re-resolve, if possible. The resolver is responsible for applying an @@ -232,7 +232,7 @@ pub trait ChannelController: Send + Sync { #[non_exhaustive] /// ResolverUpdate contains the current Resolver state relevant to the /// channel. -pub struct ResolverUpdate { +pub(crate) struct ResolverUpdate { /// Attributes contains arbitrary data about the resolver intended for /// consumption by the load balancing policy. pub attributes: Attributes, @@ -271,7 +271,7 @@ impl Default for ResolverUpdate { /// which the server can be reached, e.g. via IPv4 and IPv6 addresses. #[derive(Debug, Default, Clone, PartialEq, Eq)] #[non_exhaustive] -pub struct Endpoint { +pub(crate) struct Endpoint { /// Addresses contains a list of addresses used to access this endpoint. pub addresses: Vec
, @@ -289,7 +289,7 @@ impl Hash for Endpoint { /// An Address is an identifier that indicates how to connect to a server. #[non_exhaustive] #[derive(Debug, Clone, Default, Ord, PartialOrd)] -pub struct Address { +pub(crate) struct Address { /// The network type is used to identify what kind of transport to create /// when connecting to this address. Typically TCP_IP_ADDRESS_TYPE. pub network_type: &'static str, @@ -327,7 +327,7 @@ impl Display for Address { /// Indicates the address is an IPv4 or IPv6 address that should be connected to /// via TCP/IP. -pub static TCP_IP_NETWORK_TYPE: &str = "tcp"; +pub(crate) static TCP_IP_NETWORK_TYPE: &str = "tcp"; // A resolver that returns the same result every time its work method is called. // It can be used to return an error to the channel when a resolver fails to diff --git a/grpc/src/client/name_resolution/registry.rs b/grpc/src/client/name_resolution/registry.rs index eb216538c..ca2c2035a 100644 --- a/grpc/src/client/name_resolution/registry.rs +++ b/grpc/src/client/name_resolution/registry.rs @@ -34,7 +34,7 @@ static GLOBAL_RESOLVER_REGISTRY: OnceLock = OnceLock::new(); /// A registry to store and retrieve name resolvers. Resolvers are indexed by /// the URI scheme they are intended to handle. #[derive(Default)] -pub struct ResolverRegistry { +pub(crate) struct ResolverRegistry { inner: Arc>>>, } @@ -90,6 +90,6 @@ impl ResolverRegistry { } /// Global registry for resolver builders. -pub fn global_registry() -> &'static ResolverRegistry { +pub(crate) fn global_registry() -> &'static ResolverRegistry { GLOBAL_RESOLVER_REGISTRY.get_or_init(ResolverRegistry::new) } diff --git a/grpc/src/client/transport/registry.rs b/grpc/src/client/transport/registry.rs index 0b4f614ef..9fe246e92 100644 --- a/grpc/src/client/transport/registry.rs +++ b/grpc/src/client/transport/registry.rs @@ -48,5 +48,5 @@ impl TransportRegistry { /// The registry used if a local registry is not provided to a channel or if it /// does not exist in the local registry. -pub static GLOBAL_TRANSPORT_REGISTRY: LazyLock = +pub(crate) static GLOBAL_TRANSPORT_REGISTRY: LazyLock = LazyLock::new(TransportRegistry::new); diff --git a/grpc/src/client/transport/tonic/mod.rs b/grpc/src/client/transport/tonic/mod.rs index 11fcd24e1..57f695bd6 100644 --- a/grpc/src/client/transport/tonic/mod.rs +++ b/grpc/src/client/transport/tonic/mod.rs @@ -264,7 +264,7 @@ impl GrpcService for TonicService { /// A future that resolves to an HTTP response. /// /// This is returned by the `Service::call` on [`Channel`]. -pub struct ResponseFuture { +pub(crate) struct ResponseFuture { inner: BufferResponseFuture, BoxError>>>, } diff --git a/grpc/src/client/transport/tonic/test.rs b/grpc/src/client/transport/tonic/test.rs index 678280e34..2ccdc75da 100644 --- a/grpc/src/client/transport/tonic/test.rs +++ b/grpc/src/client/transport/tonic/test.rs @@ -21,7 +21,7 @@ const DEFAULT_TEST_SHORT_DURATION: Duration = Duration::from_millis(10); // Tests the tonic transport by creating a bi-di stream with a tonic server. #[tokio::test] -pub async fn tonic_transport_rpc() { +pub(crate) async fn tonic_transport_rpc() { super::reg(); let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); // get the assigned address @@ -110,7 +110,7 @@ pub async fn tonic_transport_rpc() { } #[derive(Debug)] -pub struct EchoService {} +pub(crate) struct EchoService {} #[async_trait] impl Echo for EchoService { From 4d4dbb483a1a47de9bfb79013cf2561de0525529 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 10 Nov 2025 14:52:55 -0800 Subject: [PATCH 06/12] clippy --- .../client/load_balancing/graceful_switch.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/grpc/src/client/load_balancing/graceful_switch.rs b/grpc/src/client/load_balancing/graceful_switch.rs index 813a0e4c4..fe991a8f2 100644 --- a/grpc/src/client/load_balancing/graceful_switch.rs +++ b/grpc/src/client/load_balancing/graceful_switch.rs @@ -115,7 +115,8 @@ impl LbPolicy for GracefulSwitchPolicy { let res = self .child_manager .resolver_update(update, config, channel_controller)?; - self.maybe_swap(channel_controller) + self.maybe_swap(channel_controller); + Ok(()) } fn subchannel_update( @@ -126,17 +127,17 @@ impl LbPolicy for GracefulSwitchPolicy { ) { self.child_manager .subchannel_update(subchannel, state, channel_controller); - let _ = self.maybe_swap(channel_controller); + self.maybe_swap(channel_controller); } fn work(&mut self, channel_controller: &mut dyn ChannelController) { self.child_manager.work(channel_controller); - let _ = self.maybe_swap(channel_controller); + self.maybe_swap(channel_controller); } fn exit_idle(&mut self, channel_controller: &mut dyn ChannelController) { self.child_manager.exit_idle(channel_controller); - let _ = self.maybe_swap(channel_controller); + self.maybe_swap(channel_controller); } } @@ -190,12 +191,9 @@ impl GracefulSwitchPolicy { Err("no supported policies found in config".into()) } - fn maybe_swap( - &mut self, - channel_controller: &mut dyn ChannelController, - ) -> Result<(), Box> { + fn maybe_swap(&mut self, channel_controller: &mut dyn ChannelController) { if !self.child_manager.child_updated() { - return Ok(()); + return; } let active_name = self @@ -235,7 +233,6 @@ impl GracefulSwitchPolicy { } else if active_state_if_updated.is_some() { channel_controller.update_picker(active_state_if_updated.unwrap()); } - return Ok(()); } } From 96da89301628d5b73c54ce6aa4353bde44d638fb Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 10 Nov 2025 16:08:50 -0800 Subject: [PATCH 07/12] use Child temporarily instead of anonymous tuple that keeps growing --- .../client/load_balancing/child_manager.rs | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index ec5543668..8bea3df82 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -226,10 +226,19 @@ where } // Build a map of the old children from their IDs for efficient lookups. + // This leverages a Child to hold all the entries where the + // identifier becomes the index within the old self.children vector. let old_children = old_children.into_iter().enumerate().map(|(old_idx, e)| { ( (e.builder.name(), e.identifier), - (e.policy, e.state, old_idx, e.work_scheduler, e.updated), + Child { + identifier: old_idx, + policy: e.policy, + builder: e.builder, + state: e.state, + updated: e.updated, + work_scheduler: e.work_scheduler, + }, ) }); let mut old_children: HashMap<(&'static str, T), _> = old_children.collect(); @@ -245,26 +254,25 @@ where // subchannel map. for (new_idx, (identifier, builder)) in ids_builders.into_iter().enumerate() { let k = (builder.name(), identifier); - if let Some((policy, state, old_idx, work_scheduler, updated)) = old_children.remove(&k) - { + if let Some(old_child) = old_children.remove(&k) { for subchannel in old_child_subchannels_map - .remove(&old_idx) + .remove(&old_child.identifier) .into_iter() .flatten() { self.subchannel_child_map.insert(subchannel, new_idx); } - if old_pending_work.contains(&old_idx) { + if old_pending_work.contains(&old_child.identifier) { pending_work.insert(new_idx); } - *work_scheduler.idx.lock().unwrap() = Some(new_idx); + *old_child.work_scheduler.idx.lock().unwrap() = Some(new_idx); self.children.push(Child { builder, identifier: k.1, - state, - policy, - work_scheduler, - updated, + state: old_child.state, + policy: old_child.policy, + work_scheduler: old_child.work_scheduler, + updated: old_child.updated, }); } else { let work_scheduler = Arc::new(ChildWorkScheduler { @@ -287,8 +295,8 @@ where } // Invalidate all deleted children's work_schedulers. - for (_, (_, _, _, work_scheduler, _)) in old_children { - *work_scheduler.idx.lock().unwrap() = None; + for (_, old_child) in old_children { + *old_child.work_scheduler.idx.lock().unwrap() = None; } // Release the pending_work mutex before calling into the children to From fc16fe5b4cdb6c74e28631db57b7dd7e9bc48be3 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 10 Nov 2025 16:30:22 -0800 Subject: [PATCH 08/12] use a vec when reversing instead of a map --- .../client/load_balancing/child_manager.rs | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index 8bea3df82..daac36425 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -215,14 +215,12 @@ where // Replace the subchannel map with an empty map. let old_subchannel_child_map = mem::take(&mut self.subchannel_child_map); - // Reverse the old subchannel map. - let mut old_child_subchannels_map: HashMap> = HashMap::new(); - - for (subchannel, child_idx) in old_subchannel_child_map { - old_child_subchannels_map - .entry(child_idx) - .or_default() - .push(subchannel); + // Reverse the old subchannel map into a vector indexed by the old child ID. + let mut old_child_subchannels: Vec> = Vec::new(); + old_child_subchannels.resize_with(old_children.len(), Vec::new); + + for (subchannel, old_idx) in old_subchannel_child_map { + old_child_subchannels[old_idx].push(subchannel); } // Build a map of the old children from their IDs for efficient lookups. @@ -255,14 +253,11 @@ where for (new_idx, (identifier, builder)) in ids_builders.into_iter().enumerate() { let k = (builder.name(), identifier); if let Some(old_child) = old_children.remove(&k) { - for subchannel in old_child_subchannels_map - .remove(&old_child.identifier) - .into_iter() - .flatten() - { + let old_idx = old_child.identifier; + for subchannel in mem::take(&mut old_child_subchannels[old_idx]) { self.subchannel_child_map.insert(subchannel, new_idx); } - if old_pending_work.contains(&old_child.identifier) { + if old_pending_work.contains(&old_idx) { pending_work.insert(new_idx); } *old_child.work_scheduler.idx.lock().unwrap() = Some(new_idx); From 4a376247d0478db2862dc701f2e3483b741c5355 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Tue, 11 Nov 2025 08:10:07 -0800 Subject: [PATCH 09/12] formatting --- grpc/src/client/load_balancing/mod.rs | 4 +++- grpc/src/client/load_balancing/registry.rs | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/grpc/src/client/load_balancing/mod.rs b/grpc/src/client/load_balancing/mod.rs index bbd0661ca..a15e030a1 100644 --- a/grpc/src/client/load_balancing/mod.rs +++ b/grpc/src/client/load_balancing/mod.rs @@ -399,7 +399,9 @@ pub(crate) trait SealedSubchannel: private::Sealed {} /// /// When a Subchannel is dropped, it is disconnected automatically, and no /// subsequent state updates will be provided for it to the LB policy. -pub(crate) trait Subchannel: SealedSubchannel + DynHash + DynPartialEq + Any + Send + Sync { +pub(crate) trait Subchannel: + SealedSubchannel + DynHash + DynPartialEq + Any + Send + Sync +{ /// Returns the address of the Subchannel. /// TODO: Consider whether this should really be public. fn address(&self) -> Address; diff --git a/grpc/src/client/load_balancing/registry.rs b/grpc/src/client/load_balancing/registry.rs index fb3c36ac3..45c757c89 100644 --- a/grpc/src/client/load_balancing/registry.rs +++ b/grpc/src/client/load_balancing/registry.rs @@ -37,4 +37,5 @@ impl Default for LbPolicyRegistry { /// The registry used if a local registry is not provided to a channel or if it /// does not exist in the local registry. -pub(crate) static GLOBAL_LB_REGISTRY: LazyLock = LazyLock::new(LbPolicyRegistry::new); +pub(crate) static GLOBAL_LB_REGISTRY: LazyLock = + LazyLock::new(LbPolicyRegistry::new); From 4fd93e621f642dbd94155307e87aa13f795e8afd Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 12 Nov 2025 13:40:39 -0800 Subject: [PATCH 10/12] refactor --- .../client/load_balancing/child_manager.rs | 6 +- .../client/load_balancing/graceful_switch.rs | 69 +++++++++++-------- grpc/src/client/mod.rs | 2 +- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index daac36425..2b183da81 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -59,10 +59,10 @@ pub(crate) struct ChildManager> { #[derive(Debug)] pub(crate) struct Child { pub identifier: T, - pub policy: Box, pub builder: Arc, pub state: LbState, - pub updated: bool, // Set when the child updates its picker; cleared in child_states is called. + pub updated: bool, // Set when the child updates its picker; cleared when accessed. + policy: Box, work_scheduler: Arc, } @@ -111,7 +111,7 @@ where } /// Returns data for all current children. - pub fn children(&self) -> impl Iterator> { + pub fn children(&mut self) -> impl Iterator> { self.children.iter() } diff --git a/grpc/src/client/load_balancing/graceful_switch.rs b/grpc/src/client/load_balancing/graceful_switch.rs index fe991a8f2..dfcd747cc 100644 --- a/grpc/src/client/load_balancing/graceful_switch.rs +++ b/grpc/src/client/load_balancing/graceful_switch.rs @@ -1,9 +1,9 @@ use crate::client::load_balancing::child_manager::{ - self, ChildManager, ChildUpdate, ResolverUpdateSharder, + self, Child, ChildManager, ChildUpdate, ResolverUpdateSharder, }; use crate::client::load_balancing::{ - ChannelController, LbConfig, LbPolicy, LbPolicyBuilder, ParsedJsonLbConfig, Subchannel, - SubchannelState, GLOBAL_LB_REGISTRY, + ChannelController, LbConfig, LbPolicy, LbPolicyBuilder, LbState, ParsedJsonLbConfig, + Subchannel, SubchannelState, GLOBAL_LB_REGISTRY, }; use crate::client::name_resolution::ResolverUpdate; use crate::client::ConnectivityState; @@ -191,9 +191,15 @@ impl GracefulSwitchPolicy { Err("no supported policies found in config".into()) } - fn maybe_swap(&mut self, channel_controller: &mut dyn ChannelController) { + fn update_picker(&mut self, channel_controller: &mut dyn ChannelController) { + if let Some(update) = self.maybe_swap(channel_controller) { + channel_controller.update_picker(update); + } + } + + fn maybe_swap(&mut self, channel_controller: &mut dyn ChannelController) -> Option { if !self.child_manager.child_updated() { - return; + return None; } let active_name = self @@ -203,36 +209,41 @@ impl GracefulSwitchPolicy { .as_ref() .unwrap() .name(); - let mut could_switch = false; - let mut active_state_if_updated = None; - let mut pending_builder = None; - let mut pending_state = None; + + let mut active_child = None; + let mut pending_child = None; for child in self.child_manager.children() { if child.builder.name() == active_name { - could_switch |= child.state.connectivity_state != ConnectivityState::Ready; - if child.updated { - active_state_if_updated = Some(child.state.clone()); - } + active_child = Some(child); } else { - pending_builder = Some(child.builder.clone()); - pending_state = Some(child.state.clone()); - could_switch |= child.state.connectivity_state != ConnectivityState::Connecting; + pending_child = Some(child); } } - if could_switch && pending_builder.is_some() { - self.child_manager - .resolver_update( - ResolverUpdate::default(), - Some(&LbConfig::new(GracefulSwitchLbConfig::Swap( - pending_builder.unwrap(), - ))), - channel_controller, - ) - .expect("resolver_update with an empty update should not fail"); - channel_controller.update_picker(pending_state.unwrap().clone()); - } else if active_state_if_updated.is_some() { - channel_controller.update_picker(active_state_if_updated.unwrap()); + let active_child = active_child.expect("There should always be an active child policy"); + let Some(pending_child) = pending_child else { + return updated_state(active_child); + }; + + if active_child.state.connectivity_state == ConnectivityState::Ready + && pending_child.state.connectivity_state == ConnectivityState::Connecting + { + return updated_state(active_child); } + + let config = &LbConfig::new(GracefulSwitchLbConfig::Swap(pending_child.builder.clone())); + let state = pending_child.state.clone(); + self.child_manager + .resolver_update(ResolverUpdate::default(), Some(config), channel_controller) + .expect("resolver_update with an empty update should not fail"); + return Some(state); + } +} + +fn updated_state(child: &Child<()>) -> Option { + if child.updated { + Some(child.state.clone()) + } else { + None } } diff --git a/grpc/src/client/mod.rs b/grpc/src/client/mod.rs index c4f886201..b141e7f2d 100644 --- a/grpc/src/client/mod.rs +++ b/grpc/src/client/mod.rs @@ -45,7 +45,7 @@ pub(crate) mod transport; /// /// Channels may re-enter the Idle state if they are unused for longer than /// their configured idleness timeout. -#[derive(Copy, Clone, PartialEq, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum ConnectivityState { Idle, Connecting, From 4ca06632fbe35489583860b56f5cbc28a9bab78b Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 12 Nov 2025 14:58:55 -0800 Subject: [PATCH 11/12] remove Child.updated since it wasn't being set correctly. GSB remembers last update state instead --- .../client/load_balancing/child_manager.rs | 5 -- .../client/load_balancing/graceful_switch.rs | 46 ++++++++++--------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index 2b183da81..aa7bb108e 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -61,7 +61,6 @@ pub(crate) struct Child { pub identifier: T, pub builder: Arc, pub state: LbState, - pub updated: bool, // Set when the child updates its picker; cleared when accessed. policy: Box, work_scheduler: Arc, } @@ -169,7 +168,6 @@ where // Update the tracked state if the child produced an update. if let Some(state) = channel_controller.picker_update { self.children[child_idx].state = state; - self.children[child_idx].updated = true; self.updated = true; }; } @@ -234,7 +232,6 @@ where policy: e.policy, builder: e.builder, state: e.state, - updated: e.updated, work_scheduler: e.work_scheduler, }, ) @@ -267,7 +264,6 @@ where state: old_child.state, policy: old_child.policy, work_scheduler: old_child.work_scheduler, - updated: old_child.updated, }); } else { let work_scheduler = Arc::new(ChildWorkScheduler { @@ -284,7 +280,6 @@ where state: LbState::initial(), policy, work_scheduler, - updated: false, }); }; } diff --git a/grpc/src/client/load_balancing/graceful_switch.rs b/grpc/src/client/load_balancing/graceful_switch.rs index dfcd747cc..0f4430df9 100644 --- a/grpc/src/client/load_balancing/graceful_switch.rs +++ b/grpc/src/client/load_balancing/graceful_switch.rs @@ -1,5 +1,5 @@ use crate::client::load_balancing::child_manager::{ - self, Child, ChildManager, ChildUpdate, ResolverUpdateSharder, + self, ChildManager, ChildUpdate, ResolverUpdateSharder, }; use crate::client::load_balancing::{ ChannelController, LbConfig, LbPolicy, LbPolicyBuilder, LbState, ParsedJsonLbConfig, @@ -103,6 +103,7 @@ impl UpdateSharder { #[derive(Debug)] pub(crate) struct GracefulSwitchPolicy { child_manager: ChildManager<(), UpdateSharder>, // Child ID is the name of the child policy. + last_update: LbState, // Saves the last output LbState to determine if an update is needed. } impl LbPolicy for GracefulSwitchPolicy { @@ -115,7 +116,7 @@ impl LbPolicy for GracefulSwitchPolicy { let res = self .child_manager .resolver_update(update, config, channel_controller)?; - self.maybe_swap(channel_controller); + self.update_picker(channel_controller); Ok(()) } @@ -127,17 +128,17 @@ impl LbPolicy for GracefulSwitchPolicy { ) { self.child_manager .subchannel_update(subchannel, state, channel_controller); - self.maybe_swap(channel_controller); + self.update_picker(channel_controller); } fn work(&mut self, channel_controller: &mut dyn ChannelController) { self.child_manager.work(channel_controller); - self.maybe_swap(channel_controller); + self.update_picker(channel_controller); } fn exit_idle(&mut self, channel_controller: &mut dyn ChannelController) { self.child_manager.exit_idle(channel_controller); - self.maybe_swap(channel_controller); + self.update_picker(channel_controller); } } @@ -152,6 +153,7 @@ impl GracefulSwitchPolicy { pub fn new(runtime: Arc) -> Self { GracefulSwitchPolicy { child_manager: ChildManager::new(UpdateSharder::new(), runtime), + last_update: LbState::initial(), } } @@ -192,11 +194,22 @@ impl GracefulSwitchPolicy { } fn update_picker(&mut self, channel_controller: &mut dyn ChannelController) { - if let Some(update) = self.maybe_swap(channel_controller) { - channel_controller.update_picker(update); + let Some(update) = self.maybe_swap(channel_controller) else { + return; + }; + if self.last_update.connectivity_state == update.connectivity_state + && std::ptr::addr_eq( + Arc::as_ptr(&self.last_update.picker), + Arc::as_ptr(&update.picker), + ) + { + return; } + channel_controller.update_picker(update.clone()); + self.last_update = update; } + // Determines the appropriate state to output fn maybe_swap(&mut self, channel_controller: &mut dyn ChannelController) -> Option { if !self.child_manager.child_updated() { return None; @@ -221,13 +234,13 @@ impl GracefulSwitchPolicy { } let active_child = active_child.expect("There should always be an active child policy"); let Some(pending_child) = pending_child else { - return updated_state(active_child); + return Some(active_child.state.clone()); }; if active_child.state.connectivity_state == ConnectivityState::Ready && pending_child.state.connectivity_state == ConnectivityState::Connecting { - return updated_state(active_child); + return Some(active_child.state.clone()); } let config = &LbConfig::new(GracefulSwitchLbConfig::Swap(pending_child.builder.clone())); @@ -239,14 +252,6 @@ impl GracefulSwitchPolicy { } } -fn updated_state(child: &Child<()>) -> Option { - if child.updated { - Some(child.state.clone()) - } else { - None - } -} - #[cfg(test)] mod test { use crate::client::load_balancing::graceful_switch::GracefulSwitchPolicy; @@ -816,11 +821,8 @@ mod test { tcc.as_mut(), ConnectivityState::Connecting, ); - verify_correct_picker_from_policy( - &mut rx_events, - "stub-gracefulswitch_current_leaving_ready-one", - ) - .await; + // This should not produce an update. + assert_channel_empty(&mut rx_events).await; move_subchannel_to_state( &mut *graceful_switch, current_subchannel, From 075f2c075fb2d6b72ca92705fc28a522ad49a026 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Wed, 12 Nov 2025 15:07:00 -0800 Subject: [PATCH 12/12] unnecessary mut --- .../client/load_balancing/child_manager.rs | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/grpc/src/client/load_balancing/child_manager.rs b/grpc/src/client/load_balancing/child_manager.rs index aa7bb108e..d3e8ca5b2 100644 --- a/grpc/src/client/load_balancing/child_manager.rs +++ b/grpc/src/client/load_balancing/child_manager.rs @@ -47,7 +47,7 @@ use super::{Subchannel, SubchannelState}; // An LbPolicy implementation that manages multiple children. #[derive(Debug)] pub(crate) struct ChildManager> { - subchannel_child_map: HashMap, + subchannel_to_child_idx: HashMap, children: Vec>, update_sharder: S, pending_work: Arc>>, @@ -101,7 +101,7 @@ where pub fn new(update_sharder: S, runtime: Arc) -> Self { Self { update_sharder, - subchannel_child_map: Default::default(), + subchannel_to_child_idx: Default::default(), children: Default::default(), pending_work: Default::default(), runtime, @@ -110,7 +110,7 @@ where } /// Returns data for all current children. - pub fn children(&mut self) -> impl Iterator> { + pub fn children(&self) -> impl Iterator> { self.children.iter() } @@ -163,7 +163,7 @@ where ) { // Add all created subchannels into the subchannel_child_map. for csc in channel_controller.created_subchannels { - self.subchannel_child_map.insert(csc.into(), child_idx); + self.subchannel_to_child_idx.insert(csc.into(), child_idx); } // Update the tracked state if the child produced an update. if let Some(state) = channel_controller.picker_update { @@ -211,7 +211,7 @@ where let old_children = mem::take(&mut self.children); // Replace the subchannel map with an empty map. - let old_subchannel_child_map = mem::take(&mut self.subchannel_child_map); + let old_subchannel_child_map = mem::take(&mut self.subchannel_to_child_idx); // Reverse the old subchannel map into a vector indexed by the old child ID. let mut old_child_subchannels: Vec> = Vec::new(); @@ -224,19 +224,22 @@ where // Build a map of the old children from their IDs for efficient lookups. // This leverages a Child to hold all the entries where the // identifier becomes the index within the old self.children vector. - let old_children = old_children.into_iter().enumerate().map(|(old_idx, e)| { - ( - (e.builder.name(), e.identifier), - Child { - identifier: old_idx, - policy: e.policy, - builder: e.builder, - state: e.state, - work_scheduler: e.work_scheduler, - }, - ) - }); - let mut old_children: HashMap<(&'static str, T), _> = old_children.collect(); + let mut old_children: HashMap<(&'static str, T), _> = old_children + .into_iter() + .enumerate() + .map(|(old_idx, e)| { + ( + (e.builder.name(), e.identifier), + Child { + identifier: old_idx, + policy: e.policy, + builder: e.builder, + state: e.state, + work_scheduler: e.work_scheduler, + }, + ) + }) + .collect(); // Split the child updates into the IDs and builders, and the // ResolverUpdates/LbConfigs. @@ -252,7 +255,7 @@ where if let Some(old_child) = old_children.remove(&k) { let old_idx = old_child.identifier; for subchannel in mem::take(&mut old_child_subchannels[old_idx]) { - self.subchannel_child_map.insert(subchannel, new_idx); + self.subchannel_to_child_idx.insert(subchannel, new_idx); } if old_pending_work.contains(&old_idx) { pending_work.insert(new_idx); @@ -324,7 +327,7 @@ where ) { // Determine which child created this subchannel. let child_idx = *self - .subchannel_child_map + .subchannel_to_child_idx .get(&WeakSubchannel::new(&subchannel)) .unwrap(); let policy = &mut self.children[child_idx].policy;