Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize AccountIndexIterator to reuse last loaded bin-map item range #4729

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 81 additions & 12 deletions accounts-db/src/accounts_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,21 +474,20 @@ pub struct AccountsIndexRootsStats {
pub clean_dead_slot_us: u64,
}

type RangeItemVec<T> = Vec<(Pubkey, AccountMapEntry<T>)>;

pub struct AccountsIndexIterator<'a, T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> {
account_maps: &'a LockMapTypeSlice<T, U>,
bin_calculator: &'a PubkeyBinCalculator24,
start_bound: Bound<Pubkey>,
end_bound: Bound<Pubkey>,
is_finished: bool,
collect_all_unsorted: bool,
last_bin_range: Option<(usize, RangeItemVec<T>)>,
}

impl<'a, T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndexIterator<'a, T, U> {
fn range<R>(
map: &AccountMaps<T, U>,
range: R,
collect_all_unsorted: bool,
) -> Vec<(Pubkey, AccountMapEntry<T>)>
fn range<R>(map: &AccountMaps<T, U>, range: R, collect_all_unsorted: bool) -> RangeItemVec<T>
where
R: RangeBounds<Pubkey> + std::fmt::Debug,
{
Expand Down Expand Up @@ -563,6 +562,7 @@ impl<'a, T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndexIter
is_finished: false,
bin_calculator: &index.bin_calculator,
collect_all_unsorted,
last_bin_range: None,
}
}

Expand Down Expand Up @@ -593,16 +593,35 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> Iterator
}
let (start_bin, bin_range) = self.bin_start_and_range();
let mut chunk = Vec::with_capacity(ITER_BATCH_SIZE);
'outer: for i in self.account_maps.iter().skip(start_bin).take(bin_range) {
for (pubkey, account_map_entry) in Self::range(
&i,
(self.start_bound, self.end_bound),
self.collect_all_unsorted,
) {
'outer: for (i, map) in self
.account_maps
.iter()
.skip(start_bin)
.take(bin_range)
.enumerate()
{
let bin = start_bin + i;
let mut range = match self.last_bin_range.take() {
Some((last_bin, r)) if last_bin == bin => {
// we've already loaded this bin from last iteration, so just continue where we left off
r
}
_ => {
// else load the new bin
Self::range(
&map,
(self.start_bound, self.end_bound),
self.collect_all_unsorted,
)
}
};
for (count, (pubkey, account_map_entry)) in range.iter().enumerate() {
if chunk.len() >= ITER_BATCH_SIZE && !self.collect_all_unsorted {
range.drain(0..count);
self.last_bin_range = Some((bin, range));
break 'outer;
}
let item = (pubkey, account_map_entry);
let item = (*pubkey, account_map_entry.clone());
chunk.push(item);
}
}
Expand Down Expand Up @@ -3871,6 +3890,56 @@ pub mod tests {
}
}

#[test]
fn test_account_index_iter() {
let index = AccountsIndex::<bool, bool>::default_for_tests();
// Setup an account index for test.
// Two bins. First bin has 2000 accounts, second bin has 0 accounts.
let num_pubkeys = 2 * ITER_BATCH_SIZE;
let pubkeys = (0..num_pubkeys)
.map(|_| Pubkey::new_unique())
.collect::<Vec<_>>();

for key in pubkeys {
let slot = 0;
let value = true;
let mut gc = Vec::new();
index.upsert(
slot,
slot,
&key,
&AccountSharedData::default(),
&AccountSecondaryIndexes::default(),
value,
&mut gc,
UPSERT_POPULATE_RECLAIMS,
);
}

// Create an iterator for the whole pubkey range.
let mut iter = index.iter(None::<&Range<Pubkey>>, COLLECT_ALL_UNSORTED_FALSE);
// First iter.next() should return the first batch of pubkeys (1000
// pubkeys) out of the 2000 pubkeys in the first bin. And the remaining
// 1000 pubkeys from the first bin should be cached in
// self.last_bin_range, so that the second iter.next() don't need to
// load/filter/sort the first bin again.
let x = iter.next().unwrap();
assert_eq!(x.len(), ITER_BATCH_SIZE);
assert!(x.is_sorted_by(|a, b| a.0 < b.0)); // The result should be sorted by pubkey.
assert!(iter.last_bin_range.is_some()); // last_bin_range should be cached.
assert_eq!(iter.last_bin_range.as_ref().unwrap().0, 0); // This is the first bin.
assert_eq!(
iter.last_bin_range.as_ref().unwrap().1.len(),
ITER_BATCH_SIZE
); // Contains the remaining 1000 items.

// Second iter.next() should return the second batch of pubkeys - the remaining 1000 pubkeys.
let x = iter.next().unwrap();
assert!(x.is_sorted_by(|a, b| a.0 < b.0)); // The result should be sorted by pubkey.
assert_eq!(x.len(), ITER_BATCH_SIZE); // contains the remaining 1000 pubkeys.
assert!(iter.last_bin_range.is_none()); // last_bin_range should be cleared.
}

#[test]
fn test_bin_start_and_range() {
let index = AccountsIndex::<bool, bool>::default_for_tests();
Expand Down
Loading