diff --git a/trie-db/src/lib.rs b/trie-db/src/lib.rs index 37bfe64e..2cc2f8f6 100644 --- a/trie-db/src/lib.rs +++ b/trie-db/src/lib.rs @@ -22,14 +22,16 @@ extern crate alloc; mod rstd { pub use std::{borrow, boxed, cmp, convert, fmt, hash, iter, marker, mem, ops, rc, result, vec}; pub use std::collections::VecDeque; + pub use std::collections::BTreeMap; pub use std::error::Error; } #[cfg(not(feature = "std"))] mod rstd { - pub use core::{borrow, convert, cmp, iter, fmt, hash, marker, mem, ops, result}; - pub use alloc::{boxed, rc, vec}; + pub use core::{convert, cmp, iter, fmt, hash, marker, mem, ops, result}; + pub use alloc::{borrow, boxed, rc, vec}; pub use alloc::collections::VecDeque; + pub use alloc::collections::btree_map::BTreeMap; pub trait Error {} impl Error for T {} } @@ -71,7 +73,11 @@ pub use crate::node_codec::{NodeCodec, Partial}; pub use crate::iter_build::{trie_visit, ProcessEncodedNode, TrieBuilder, TrieRoot, TrieRootUnhashed}; pub use crate::iterator::TrieDBNodeIterator; -pub use crate::trie_codec::{decode_compact, decode_compact_from_iter, encode_compact}; +pub use crate::trie_codec::{encode_compact, + decode_compact, decode_compact_from_iter, decode_compact_with_known_values, + decode_compact_for_encoded_skipped_values, encode_compact_skip_all_values, + LazyFetcher, compact_conditions, encode_compact_skip_conditional, + encode_compact_skip_conditional_with_key}; #[cfg(feature = "std")] pub use crate::iter_build::TrieRootPrint; diff --git a/trie-db/src/nibble/leftnibbleslice.rs b/trie-db/src/nibble/leftnibbleslice.rs index c31301be..f7d5d31d 100644 --- a/trie-db/src/nibble/leftnibbleslice.rs +++ b/trie-db/src/nibble/leftnibbleslice.rs @@ -92,6 +92,15 @@ impl<'a> LeftNibbleSlice<'a> { // If common nibble prefix is the same, finally compare lengths. self.len().cmp(&other.len()) } + + /// If nibble are aligned (true for any value), return slice to the key. + pub fn as_slice(&self) -> Option<&'a [u8]> { + if self.len % NIBBLE_PER_BYTE == 0 { + Some(&self.bytes[..self.len / NIBBLE_PER_BYTE]) + } else { + None + } + } } impl<'a> PartialEq for LeftNibbleSlice<'a> { @@ -226,4 +235,4 @@ mod tests { Ordering::Equal ); } -} \ No newline at end of file +} diff --git a/trie-db/src/node.rs b/trie-db/src/node.rs index ef50e8d6..a0c90045 100644 --- a/trie-db/src/node.rs +++ b/trie-db/src/node.rs @@ -95,6 +95,14 @@ impl NibbleSlicePlan { } } + /// A empty nibbleslice. + pub fn empty() -> Self { + NibbleSlicePlan { + bytes: 0..0, + offset: 0, + } + } + /// Returns the nibble length of the slice. pub fn len(&self) -> usize { (self.bytes.end - self.bytes.start) * nibble_ops::NIBBLE_PER_BYTE - self.offset diff --git a/trie-db/src/trie_codec.rs b/trie-db/src/trie_codec.rs index 68d956bb..70795916 100644 --- a/trie-db/src/trie_codec.rs +++ b/trie-db/src/trie_codec.rs @@ -30,26 +30,37 @@ use crate::{ CError, ChildReference, DBValue, NibbleVec, NodeCodec, Result, TrieHash, TrieError, TrieDB, TrieDBNodeIterator, TrieLayout, nibble_ops::NIBBLE_LENGTH, node::{Node, NodeHandle, NodeHandlePlan, NodePlan, OwnedNode}, + nibble::LeftNibbleSlice, }; use crate::rstd::{ boxed::Box, convert::TryInto, marker::PhantomData, rc::Rc, result, vec, vec::Vec, + borrow::Cow, cmp::Ordering, mem, }; struct EncoderStackEntry { /// The prefix is the nibble path to the node in the trie. prefix: NibbleVec, + /// Node stacked. node: Rc>, /// The next entry in the stack is a child of the preceding entry at this index. For branch /// nodes, the index is in [0, NIBBLE_LENGTH] and for extension nodes, the index is in [0, 1]. child_index: usize, /// Flags indicating whether each child is omitted in the encoded node. omit_children: Vec, + /// Enum indicating whether we should omit value in the encoded node. + omit_value: OmitValue, /// The encoding of the subtrie nodes rooted at this entry, which is built up in /// `encode_compact`. output_index: usize, _marker: PhantomData, } +enum OmitValue { + OmitValue, + EscapeValue, + None, +} + impl EncoderStackEntry { /// Given the prefix of the next child node, identify its index and advance `child_index` to /// that. For a given entry, this must be called sequentially only with strictly increasing @@ -98,7 +109,26 @@ impl EncoderStackEntry { fn encode_node(&self) -> Result, C::HashOut, C::Error> { let node_data = self.node.data(); Ok(match self.node.node_plan() { - NodePlan::Empty | NodePlan::Leaf { .. } => node_data.to_vec(), + NodePlan::Empty => node_data.to_vec(), + NodePlan::Leaf { partial, value } => { + let partial = partial.build(node_data); + + match self.omit_value { + OmitValue::OmitValue => { + C::leaf_node(partial.right(), &[][..]) + }, + OmitValue::EscapeValue => { + if let Some(escaped) = encode_empty_escape(&node_data[value.clone()]) { + C::leaf_node(partial.right(), &escaped[..]) + } else { + node_data.to_vec() + } + }, + OmitValue::None => { + node_data.to_vec() + }, + } + }, NodePlan::Extension { partial, child: _ } => { if !self.omit_children[0] { node_data.to_vec() @@ -109,18 +139,54 @@ impl EncoderStackEntry { } } NodePlan::Branch { value, children } => { + let value = value.clone().map(|range| { + let node_data = &node_data[range]; + match self.omit_value { + OmitValue::OmitValue => { + Cow::Borrowed(&[][..]) + }, + OmitValue::EscapeValue => { + if let Some(escaped) = encode_empty_escape(node_data) { + escaped + } else { + node_data.into() + } + }, + OmitValue::None => { + node_data.into() + }, + } + }); C::branch_node( Self::branch_children(node_data, &children, &self.omit_children)?.iter(), - value.clone().map(|range| &node_data[range]) + value.as_ref().map(|v| &v[..]), ) } NodePlan::NibbledBranch { partial, value, children } => { + let value = value.clone().map(|range| { + let node_data = &node_data[range]; + match self.omit_value { + OmitValue::OmitValue => { + Cow::Borrowed(&[][..]) + }, + OmitValue::EscapeValue => { + if let Some(escaped) = encode_empty_escape(node_data) { + escaped + } else { + node_data.into() + } + }, + OmitValue::None => { + node_data.into() + }, + } + }); let partial = partial.build(node_data); C::branch_node_nibbled( partial.right_iter(), partial.len(), Self::branch_children(node_data, &children, &self.omit_children)?.iter(), - value.clone().map(|range| &node_data[range]) + value.as_ref().map(|v| &v[..]), ) } }) @@ -167,7 +233,63 @@ impl EncoderStackEntry { /// references. pub fn encode_compact(db: &TrieDB) -> Result>, TrieHash, CError> where - L: TrieLayout + L: TrieLayout, +{ + encode_compact_skip_values_inner::(db, ()) +} + +/// Variant of 'encode_compact' where all values are removed and replace by empty value. +pub fn encode_compact_skip_all_values<'a, L>(db: &TrieDB) -> Result>, TrieHash, CError> + where + L: TrieLayout, +{ + encode_compact_skip_values_inner::(db, All) +} + +/// Variant of 'encode_compact' where values are removed +/// for a given condition. +/// Condition uses values as parameters. +pub fn encode_compact_skip_conditional<'a, L, F>( + db: &TrieDB, + value_skip_condition: F, + escape_values: bool, +) -> Result>, TrieHash, CError> + where + L: TrieLayout, + F: FnMut(&[u8]) -> bool, +{ + let to_skip = NoKeyCondition(value_skip_condition); + if escape_values { + encode_compact_skip_values_inner::(db, Escape(to_skip)) + } else { + encode_compact_skip_values_inner::(db, to_skip) + } +} + +/// Variant of 'encode_compact' where values are removed +/// for a given condition. +/// Condition uses key and values as parameters. +pub fn encode_compact_skip_conditional_with_key<'a, L, F>( + db: &TrieDB, + value_skip_condition: F, + escape_values: bool, +) -> Result>, TrieHash, CError> + where + L: TrieLayout, + F: FnMut(&NibbleVec, &[u8]) -> bool, +{ + let to_skip = WithKeyCondition(value_skip_condition); + if escape_values { + encode_compact_skip_values_inner::(db, Escape(to_skip)) + } else { + encode_compact_skip_values_inner::(db, to_skip) + } +} + +fn encode_compact_skip_values_inner<'a, L, F>(db: &TrieDB, mut to_skip: F) -> Result>, TrieHash, CError> + where + L: TrieLayout, + F: ValuesRemoveCondition, { let mut output = Vec::new(); @@ -221,11 +343,13 @@ pub fn encode_compact(db: &TrieDB) -> Result>, TrieHash, CE NodePlan::Extension { .. } => 1, NodePlan::Branch { .. } | NodePlan::NibbledBranch { .. } => NIBBLE_LENGTH, }; + let omit_value = to_skip.skip_new_node_value(&prefix, &node); stack.push(EncoderStackEntry { prefix, node, child_index: 0, omit_children: vec![false; children_len], + omit_value, output_index: output.len(), _marker: PhantomData::default(), }); @@ -250,6 +374,359 @@ pub fn encode_compact(db: &TrieDB) -> Result>, TrieHash, CE Ok(output) } +trait ValuesRemoveCondition { + const ESCAPE: OmitValue; + const REMOVE_NONE: bool; + const REMOVE_ALL: bool; + const NEED_KEY: bool; + + fn check(&mut self, key: &NibbleVec, value: &[u8]) -> bool; + + // return (omit_value, escape_value) + fn skip_new_node_value(&mut self, prefix: &NibbleVec, node: &Rc>) -> OmitValue { + + if Self::REMOVE_ALL { + return OmitValue::OmitValue; + } + if Self::REMOVE_NONE { + return Self::ESCAPE; + } + let (partial, value) = match node.node_plan() { + NodePlan::NibbledBranch{ partial, value: Some(value), ..} + | NodePlan::Leaf {partial, value} => { + (partial.clone(), value) + }, + NodePlan::Branch{ value: Some(value), ..} => { + (crate::node::NibbleSlicePlan::empty(), value) + }, + _ => return OmitValue::None, + }; + + let node_data = node.data(); + let value = &node_data[value.clone()]; + if Self::NEED_KEY { + let mut node_key = prefix.clone(); + let partial = partial.build(node_data); + node_key.append_partial(partial.right()); + return if self.check(&node_key, value) { + OmitValue::OmitValue + } else { + Self::ESCAPE + }; + } else { + return if self.check(&prefix, value) { + OmitValue::OmitValue + } else { + Self::ESCAPE + }; + } + } +} + +impl ValuesRemoveCondition for () { + const REMOVE_NONE: bool = true; + const REMOVE_ALL: bool = false; + const NEED_KEY: bool = false; + const ESCAPE: OmitValue = OmitValue::None; + + fn check(&mut self, _key: &NibbleVec, _value: &[u8]) -> bool { + false + } +} + +struct All; + +impl ValuesRemoveCondition for All { + const REMOVE_NONE: bool = false; + const REMOVE_ALL: bool = true; + const NEED_KEY: bool = false; + const ESCAPE: OmitValue = OmitValue::None; + + fn check(&mut self, _key: &NibbleVec, _value: &[u8]) -> bool { + true + } +} + +struct WithKeyCondition(F); + +impl ValuesRemoveCondition for WithKeyCondition + where F: FnMut(&NibbleVec, &[u8]) -> bool, +{ + const REMOVE_NONE: bool = false; + const REMOVE_ALL: bool = false; + const NEED_KEY: bool = true; + const ESCAPE: OmitValue = OmitValue::None; + + fn check(&mut self, key: &NibbleVec, value: &[u8]) -> bool { + self.0(key, value) + } +} + +struct NoKeyCondition(F); + +impl ValuesRemoveCondition for NoKeyCondition + where F: FnMut(&[u8]) -> bool, +{ + const REMOVE_NONE: bool = false; + const REMOVE_ALL: bool = false; + const NEED_KEY: bool = false; + const ESCAPE: OmitValue = OmitValue::None; + + fn check(&mut self, _key: &NibbleVec, value: &[u8]) -> bool { + self.0(value) + } +} + +struct Escape(F); + +impl ValuesRemoveCondition for Escape + where F: ValuesRemoveCondition, +{ + const REMOVE_NONE: bool = F::REMOVE_NONE; + const REMOVE_ALL: bool = F::REMOVE_ALL; + const NEED_KEY: bool = F::NEED_KEY; + const ESCAPE: OmitValue = OmitValue::EscapeValue; + + fn check(&mut self, key: &NibbleVec, value: &[u8]) -> bool { + self.0.check(key, value) + } +} + +impl<'a, F> ValuesRemoveCondition for &'a mut F + where F: ValuesRemoveCondition, +{ + const REMOVE_NONE: bool = F::REMOVE_NONE; + const REMOVE_ALL: bool = F::REMOVE_ALL; + const NEED_KEY: bool = F::NEED_KEY; + const ESCAPE: OmitValue = F::ESCAPE; + + fn check(&mut self, key: &NibbleVec, value: &[u8]) -> bool { + (*self).check(key, value) + } +} + +enum ValuesInsert<'a, I, F> { + None, + KnownKeys(InsertAt<'a, I, F>), + EscapedKnownKeys(InsertAt<'a, I, F>), + EscapedValues(F), + NonEscapedValues(F), +} + +struct InsertAt<'a, I, F> { + key_values: I, + fetcher: F, + next_key_value: Option<&'a [u8]>, +} + +impl< + 'a, + F: LazyFetcher<'a>, + I: Iterator +> InsertAt<'a, I, F> { + fn new(mut key_values: I, fetcher: F) -> Self { + let next_key_value = key_values.next(); + InsertAt { + key_values, + fetcher, + next_key_value, + } + } +} + +/// Since empty value is not a very common case, its encoding +/// will start by a byte sequence to avoid escaping too often +/// on valid value. +/// +/// The sequence is escape character followed by 'Esc'. +/// The repeating character for case where the sequence is part +/// of the content, is the first bit defined here. +const EMPTY_ESCAPE_SEQUENCE: &'static [u8] = b"Esc"; + +#[test] +fn escape_bytes_check() { + assert_eq!(EMPTY_ESCAPE_SEQUENCE, [27, 69, 115, 99]); +} + +/// Escape encode value. +/// This allows using the encoded empty value to define +/// a skipped value. +/// +/// So we redefine the empty value as a sequence of byte. +/// Se we redefine this sequence with n character appended by appending another character. +/// Such that: +/// [] -> [27, 69, 115, 99] +/// [27, 69, 115, 99] -> [27, 69, 115, 99, 27] +/// [27, 69, 115, 99, 27] -> [27, 69, 115, 99, 27, 27] +/// +/// When escaped return the escaped value. +fn encode_empty_escape(value: &[u8]) -> Option> { + if value.len() == 0 { + return Some(EMPTY_ESCAPE_SEQUENCE.into()); + } + + if value.starts_with(EMPTY_ESCAPE_SEQUENCE) { + let mut i = EMPTY_ESCAPE_SEQUENCE.len(); + while Some(&EMPTY_ESCAPE_SEQUENCE[0]) == value.get(i) { + i += 1; + } + if i == value.len() { + let mut value = value.to_vec(); + value.push(EMPTY_ESCAPE_SEQUENCE[0]); + // escaped escape sequence + return Some(value.into()); + } + } + None +} + +/// Get empty escaped value (either empty or value starting with +/// empty prefix minus end escape character). +/// +/// If escaped return the decoded value. +fn decode_empty_escaped(value: &[u8]) -> Option<&[u8]> { + if value.starts_with(EMPTY_ESCAPE_SEQUENCE) { + let mut i = EMPTY_ESCAPE_SEQUENCE.len(); + if value.len() == i { + // escaped empty + return Some(&[]) + } + while Some(&EMPTY_ESCAPE_SEQUENCE[0]) == value.get(i) { + i += 1; + } + if i == value.len() { + // escaped escape sequence + return Some(&value[..value.len() - 1]); + } + } + None +} + +#[test] +fn escape_empty_value() { + let test_set = [ + (&[][..], Some(&[27u8, 69, 115, 99][..])), + (&[27u8, 69, 115], None), + (&[27, 69, 115, 100], None), + (&[27, 69, 115, 99], Some(&[27, 69, 115, 99, 27])), + (&[27, 69, 115, 99, 100], None), + (&[27, 69, 115, 99, 27], Some(&[27, 69, 115, 99, 27, 27])), + (&[27, 69, 115, 99, 27, 100], None), + ]; + + for (input, output) in test_set.iter() { + let encoded = encode_empty_escape(input); + assert_eq!(&encoded.as_ref().map(Cow::as_ref), output); + if let Some(encoded) = output { + let decoded = decode_empty_escaped(encoded); + assert_eq!(decoded, Some(*input)); + } + } +} + +impl< + 'a, + F: LazyFetcher<'a>, + V: Iterator +> ValuesInsert<'a, V, F> { + fn escaped_value( + &self, + ) -> bool { + match self { + ValuesInsert::NonEscapedValues(..) + | ValuesInsert::KnownKeys(..) + | ValuesInsert::None => false, + ValuesInsert::EscapedKnownKeys(..) + | ValuesInsert::EscapedValues(..) => true + } + } + + fn skip_new_node_value( + &mut self, + prefix: &mut NibbleVec, + entry: &mut DecoderStackEntry<'a, C>, + ) -> bool { + + let original_length = prefix.len(); + let (partial, empty_value, escaped_value) = match entry.node { + Node::Leaf(partial, value) + | Node::NibbledBranch(partial, _, Some(value)) => { + (partial, value.is_empty(), if self.escaped_value() { + decode_empty_escaped(value) + } else { + None + }) + }, + Node::Branch(_, Some(value)) => { + (crate::nibble::NibbleSlice::new(&[]), value.is_empty(), if self.escaped_value() { + decode_empty_escaped(value) + } else { + None + }) + }, + _ => return true, + }; + + match self { + ValuesInsert::None => (), + ValuesInsert::EscapedKnownKeys(skipped_keys) + | ValuesInsert::KnownKeys(skipped_keys) => { + if let Some(next) = &skipped_keys.next_key_value { + prefix.append_partial(partial.right()); + // comparison is redundant with previous checks, could be optimized. + let node_key = LeftNibbleSlice::new(prefix.inner()).truncate(prefix.len()); + let next = LeftNibbleSlice::new(next); + let (move_next, result) = match next.cmp(&node_key) { + Ordering::Less => (true, false), + Ordering::Greater => (false, false), + Ordering::Equal => { + (true, true) + }, + }; + prefix.drop_lasts(prefix.len() - original_length); + if result && empty_value { + if let Some(key) = mem::take(&mut skipped_keys.next_key_value) { + if let Some(value) = skipped_keys.fetcher.fetch(key) { + entry.inserted_value = Some(value); + } else { + return false; + } + } + } + if result && !empty_value { + // expected skip value was not skip, can be harmless, but consider invalid + return false; + } + if move_next { + skipped_keys.next_key_value = skipped_keys.key_values.next(); + if !result { + return self.skip_new_node_value(prefix, entry); + } + } + } + }, + ValuesInsert::NonEscapedValues(fetcher) + | ValuesInsert::EscapedValues(fetcher) => { + if empty_value { + prefix.append_partial(partial.right()); + let key = LeftNibbleSlice::new(prefix.inner()).truncate(prefix.len()); + if let Some(value) = fetcher.fetch(key.as_slice().expect("Values have keys")) { + entry.inserted_value = Some(value); + prefix.drop_lasts(prefix.len() - original_length); + } else { + prefix.drop_lasts(prefix.len() - original_length); + return false; + } + } + }, + } + if let Some(new_value) = escaped_value { + entry.inserted_value = Some(new_value.into()); + } + true + } +} + struct DecoderStackEntry<'a, C: NodeCodec> { node: Node<'a>, /// The next entry in the stack is a child of the preceding entry at this index. For branch @@ -257,6 +734,8 @@ struct DecoderStackEntry<'a, C: NodeCodec> { child_index: usize, /// The reconstructed child references. children: Vec>>, + /// Value to insert. + inserted_value: Option>, _marker: PhantomData, } @@ -347,12 +826,17 @@ impl<'a, C: NodeCodec> DecoderStackEntry<'a, C> { /// /// Preconditions: /// - if node is an extension node, then `children[0]` is Some. - fn encode_node(self) -> Vec { - match self.node { + fn encode_node(mut self) -> Option> { + Some(match self.node { Node::Empty => C::empty_node().to_vec(), - Node::Leaf(partial, value) => - C::leaf_node(partial.right(), value), + Node::Leaf(partial, value) => { + if let Some(inserted_value) = self.inserted_value.take() { + C::leaf_node(partial.right(), inserted_value.as_ref()) + } else { + C::leaf_node(partial.right(), value) + } + }, Node::Extension(partial, _) => C::extension_node( partial.right_iter(), @@ -360,16 +844,31 @@ impl<'a, C: NodeCodec> DecoderStackEntry<'a, C> { self.children[0] .expect("required by method precondition; qed"), ), - Node::Branch(_, value) => - C::branch_node(self.children.into_iter(), value), - Node::NibbledBranch(partial, _, value) => - C::branch_node_nibbled( - partial.right_iter(), - partial.len(), - self.children.iter(), - value, - ), - } + Node::Branch(_, value) => { + if let Some(inserted_value) = self.inserted_value.take() { + C::branch_node(self.children.into_iter(), Some(inserted_value.as_ref())) + } else { + C::branch_node(self.children.into_iter(), value) + } + }, + Node::NibbledBranch(partial, _, value) => { + if let Some(inserted_value) = self.inserted_value.take() { + C::branch_node_nibbled( + partial.right_iter(), + partial.len(), + self.children.iter(), + Some(inserted_value.as_ref()), + ) + } else { + C::branch_node_nibbled( + partial.right_iter(), + partial.len(), + self.children.iter(), + value, + ) + } + }, + }) } } @@ -401,6 +900,73 @@ pub fn decode_compact_from_iter<'a, L, DB, T, I>(db: &mut DB, encoded: I) L: TrieLayout, DB: HashDB, I: IntoIterator, +{ + let skipped = ValuesInsert::, ()>::None; + decode_compact_inner::(db, encoded.into_iter(), skipped) +} + +/// Variant of 'decode_compact' that inject some known key values. +/// Values are only added if the existing one is a zero length value, +/// if the value exist and is not a zero length value, an error +/// is returned. +/// +/// Known key in input must be ordered. +pub fn decode_compact_with_known_values<'a, L, DB, T, I, F, K>( + db: &mut DB, + encoded: I, + fetcher: F, + known_keys: K, + escaped_value: bool, +) -> Result<(TrieHash, usize), TrieHash, CError> + where + L: TrieLayout, + DB: HashDB, + I: IntoIterator, + F: LazyFetcher<'a>, + K: IntoIterator, +{ + let known = if escaped_value { + ValuesInsert::EscapedKnownKeys(InsertAt::new(known_keys.into_iter(), fetcher)) + } else { + ValuesInsert::KnownKeys(InsertAt::new(known_keys.into_iter(), fetcher)) + }; + decode_compact_inner::(db, encoded.into_iter(), known) +} + +/// Variant of 'decode_compact' that try to fetch value when they are +/// skipped. +/// Skipped values are encoded into a 0 length value. +pub fn decode_compact_for_encoded_skipped_values<'a, L, DB, T, I, F>( + db: &mut DB, + encoded: I, + fetcher: F, + escaped_value: bool, +) -> Result<(TrieHash, usize), TrieHash, CError> + where + L: TrieLayout, + DB: HashDB, + I: IntoIterator, + F: LazyFetcher<'a>, +{ + let skipped = if escaped_value { + ValuesInsert::EscapedValues(fetcher) + } else { + ValuesInsert::NonEscapedValues(fetcher) + }; + decode_compact_inner::>(db, encoded.into_iter(), skipped) +} + +fn decode_compact_inner<'a, L, DB, T, I, F, V>( + db: &mut DB, + encoded: I, + mut skipped: ValuesInsert<'a, V, F>, +) -> Result<(TrieHash, usize), TrieHash, CError> + where + L: TrieLayout, + DB: HashDB, + I: Iterator, + F: LazyFetcher<'a>, + V: Iterator, { // The stack of nodes through a path in the trie. Each entry is a child node of the preceding // entry. @@ -422,10 +988,14 @@ pub fn decode_compact_from_iter<'a, L, DB, T, I>(db: &mut DB, encoded: I) node, child_index: 0, children: vec![None; children_len], + inserted_value: None, _marker: PhantomData::default(), }; loop { + if !skipped.skip_new_node_value(&mut prefix, &mut last_entry) { + return Err(Box::new(TrieError::IncompleteDatabase(>::default()))); + } if !last_entry.advance_child_index()? { last_entry.push_to_prefix(&mut prefix); stack.push(last_entry); @@ -434,7 +1004,8 @@ pub fn decode_compact_from_iter<'a, L, DB, T, I>(db: &mut DB, encoded: I) // Since `advance_child_index` returned true, the preconditions for `encode_node` are // satisfied. - let node_data = last_entry.encode_node(); + let node_data = last_entry.encode_node() + .ok_or(Box::new(TrieError::IncompleteDatabase(>::default())))?; let node_hash = db.insert(prefix.as_prefix(), node_data.as_ref()); if let Some(entry) = stack.pop() { @@ -451,3 +1022,87 @@ pub fn decode_compact_from_iter<'a, L, DB, T, I>(db: &mut DB, encoded: I) Err(Box::new(TrieError::IncompleteDatabase(>::default()))) } + +/// Simple lazy access to values to insert in proof. +pub trait LazyFetcher<'a> { + /// Get actual value as bytes. + /// If value cannot be fetch return `None`, resulting + /// in an error in the decode method. + fn fetch(&self, key: &[u8]) -> Option>; +} + +impl<'a> LazyFetcher<'a> for () { + fn fetch(&self, _key: &[u8]) -> Option> { + None + } +} + +impl<'a> LazyFetcher<'a> for (&'a [u8], &'a [u8]) { + fn fetch(&self, key: &[u8]) -> Option> { + if key == self.0 { + Some(Cow::Borrowed(self.1)) + } else { + None + } + } +} + +impl<'a> LazyFetcher<'a> for &'a crate::rstd::BTreeMap<&'a [u8], &'a [u8]> { + fn fetch(&self, key: &[u8]) -> Option> { + self.get(key).map(|value| Cow::Borrowed(*value)) + } +} + +/// Implementation of condition to use for removing values. +pub mod compact_conditions { + use super::*; + + /// Treshold size condition for removing values from proof. + pub fn skip_treshold(treshold: usize) -> impl FnMut(&[u8]) -> bool { + move |value: &[u8]| { + value.len() > treshold + } + } + + /// Treshold size condition for removing values from proof. + pub fn skip_treshold_collect_keys<'a>( + treshold: usize, + keys: &'a mut Vec>, + ) -> impl FnMut(&NibbleVec, &[u8]) -> bool + 'a { + move |key: &NibbleVec, value: &[u8]| { + if value.len() > treshold { + keys.push(key.as_prefix().0.to_vec()); + true + } else { + false + } + } + } + + /// Skip keys from an iterator. + pub fn skip_given_ordered_keys<'a>( + iter: impl IntoIterator + 'a, + ) -> impl FnMut(&NibbleVec, &[u8]) -> bool + 'a { + let mut iter = iter.into_iter(); + let mut next_key = iter.next(); + move |node_key: &NibbleVec, _value: &[u8]| { + while let Some(next) = next_key { + // comparison is redundant with previous checks, could be optimized. + let node_key = LeftNibbleSlice::new(node_key.inner()).truncate(node_key.len()); + let next = LeftNibbleSlice::new(next); + match next.cmp(&node_key) { + Ordering::Less => { + next_key = iter.next(); + }, + Ordering::Equal => { + next_key = iter.next(); + return true; + }, + Ordering::Greater => break, + }; + } + + false + } + } +} diff --git a/trie-db/src/triedbmut.rs b/trie-db/src/triedbmut.rs index 6338ba0f..7ba3611b 100644 --- a/trie-db/src/triedbmut.rs +++ b/trie-db/src/triedbmut.rs @@ -1179,7 +1179,7 @@ where None, One(u8), Many, - }; + } let mut used_index = UsedIndex::None; for i in 0..16 { match (children[i].is_none(), &used_index) { @@ -1225,7 +1225,7 @@ where None, One(u8), Many, - }; + } let mut used_index = UsedIndex::None; for i in 0..16 { match (children[i].is_none(), &used_index) { diff --git a/trie-db/test/src/trie_codec.rs b/trie-db/test/src/trie_codec.rs index e4aa2181..d6c7bbab 100644 --- a/trie-db/test/src/trie_codec.rs +++ b/trie-db/test/src/trie_codec.rs @@ -14,19 +14,30 @@ use trie_db::{ - DBValue, encode_compact, decode_compact, + DBValue, Trie, TrieMut, TrieDB, TrieError, TrieDBMut, TrieLayout, Recorder, + decode_compact, }; use hash_db::{HashDB, Hasher, EMPTY_PREFIX}; use reference_trie::{ - ExtensionLayout, NoExtensionLayout, + ExtensionLayout, NoExtensionLayout, AllowEmptyLayout, }; +use std::collections::{BTreeSet, BTreeMap}; type MemoryDB = memory_db::MemoryDB, DBValue>; +enum EncodeType<'a> { + SkipKeys(&'a BTreeSet<&'static [u8]>), + TresholdEscaped(usize), + TresholdCollect(usize, &'a mut Vec>), + All, + None, +} + fn test_encode_compact( entries: Vec<(&'static [u8], &'static [u8])>, keys: Vec<&'static [u8]>, + encode_type: EncodeType, ) -> (::Out, Vec>, Vec<(&'static [u8], Option)>) { // Populate DB with full trie from entries. @@ -63,21 +74,83 @@ fn test_encode_compact( // Compactly encode the partial trie DB. let compact_trie = { let trie = >::new(&partial_db, &root).unwrap(); - encode_compact::(&trie).unwrap() + match encode_type { + EncodeType::None => { + trie_db::encode_compact::(&trie).unwrap() + }, + EncodeType::All => { + trie_db::encode_compact_skip_all_values::(&trie).unwrap() + }, + EncodeType::SkipKeys(skip_keys) => { + trie_db::encode_compact_skip_conditional_with_key::( + &trie, + trie_db::compact_conditions::skip_given_ordered_keys( + skip_keys.iter().map(|k| *k), + ), + false, + ).unwrap() + }, + EncodeType::TresholdCollect(treshold, keys) => { + trie_db::encode_compact_skip_conditional_with_key::( + &trie, + trie_db::compact_conditions::skip_treshold_collect_keys(treshold, keys), + false, + ).unwrap() + }, + EncodeType::TresholdEscaped(treshold) => { + trie_db::encode_compact_skip_conditional::( + &trie, + trie_db::compact_conditions::skip_treshold(treshold), + true, + ).unwrap() + }, + } }; (root, compact_trie, items) } +enum DecodeType<'a> { + None, + SkippedValues(&'a BTreeMap<&'static [u8], &'static [u8]>), + Escaped(&'a BTreeMap<&'static [u8], &'static [u8]>), +} + fn test_decode_compact( encoded: &[Vec], items: Vec<(&'static [u8], Option)>, expected_root: ::Out, expected_used: usize, + decode_type: DecodeType, ) { // Reconstruct the partial DB from the compact encoding. let mut db = MemoryDB::default(); - let (root, used) = decode_compact::(&mut db, encoded).unwrap(); + let (root, used) = match decode_type { + DecodeType::SkippedValues(skipped_values) => { + trie_db::decode_compact_with_known_values::( + &mut db, + encoded.iter().map(Vec::as_slice), + skipped_values, + skipped_values.keys().map(|k| *k), + false, + ) + }, + DecodeType::None => { + trie_db::decode_compact_from_iter::( + &mut db, + encoded.iter().map(Vec::as_slice), + ) + }, + DecodeType::Escaped(fetcher) => { + trie_db::decode_compact_for_encoded_skipped_values::( + &mut db, + encoded.iter().map(Vec::as_slice), + fetcher, + true, + ) + }, + }.unwrap(); + assert_eq!(root, expected_root); assert_eq!(used, expected_used); @@ -88,24 +161,41 @@ fn test_decode_compact( } } +fn test_set() -> Vec<(&'static [u8], &'static [u8])> { + vec![ + // "alfa" is at a hash-referenced leaf node. + (b"alfa", &[0; 32]), + // "bravo" is at an inline leaf node. + (b"bravo", b"bravo"), + // "do" is at a hash-referenced branch node. + (b"do", b"verb"), + // "dog" is at an inline leaf node. + (b"dog", b"puppy"), + // "doge" is at a hash-referenced leaf node. + (b"doge", &[0; 32]), + // extension node "o" (plus nibble) to next branch. + (b"horse", b"stallion"), + (b"house", b"building"), + ] +} + +// ok proof elements to test with test_set +fn test_proof_default() -> Vec<&'static [u8]> { + vec![ + b"do", + b"dog", + b"doge", + b"bravo", + b"d", // None, witness is a branch partial + b"do\x10", // None, witness is empty branch child + b"halp", // None, witness is branch partial + ] +} + #[test] fn trie_compact_encoding_works_with_ext() { let (root, mut encoded, items) = test_encode_compact::( - vec![ - // "alfa" is at a hash-referenced leaf node. - (b"alfa", &[0; 32]), - // "bravo" is at an inline leaf node. - (b"bravo", b"bravo"), - // "do" is at a hash-referenced branch node. - (b"do", b"verb"), - // "dog" is at an inline leaf node. - (b"dog", b"puppy"), - // "doge" is at a hash-referenced leaf node. - (b"doge", &[0; 32]), - // extension node "o" (plus nibble) to next branch. - (b"horse", b"stallion"), - (b"house", b"building"), - ], + test_set(), vec![ b"do", b"dog", @@ -115,55 +205,107 @@ fn trie_compact_encoding_works_with_ext() { b"do\x10", // None, empty branch child b"halp", // None, witness is extension node with non-omitted child ], + EncodeType::None, ); encoded.push(Vec::new()); // Add an extra item to ensure it is not read. - test_decode_compact::(&encoded, items, root, encoded.len() - 1); + test_decode_compact::(&encoded, items, root, encoded.len() - 1, DecodeType::None); } #[test] fn trie_compact_encoding_works_without_ext() { let (root, mut encoded, items) = test_encode_compact::( - vec![ - // "alfa" is at a hash-referenced leaf node. - (b"alfa", &[0; 32]), - // "bravo" is at an inline leaf node. - (b"bravo", b"bravo"), - // "do" is at a hash-referenced branch node. - (b"do", b"verb"), - // "dog" is at an inline leaf node. - (b"dog", b"puppy"), - // "doge" is at a hash-referenced leaf node. - (b"doge", &[0; 32]), - // extension node "o" (plus nibble) to next branch. - (b"horse", b"stallion"), - (b"house", b"building"), - ], - vec![ - b"do", - b"dog", - b"doge", - b"bravo", - b"d", // None, witness is a branch partial - b"do\x10", // None, witness is empty branch child - b"halp", // None, witness is branch partial - ], + test_set(), + test_proof_default(), + EncodeType::None, ); encoded.push(Vec::new()); // Add an extra item to ensure it is not read. - test_decode_compact::(&encoded, items, root, encoded.len() - 1); + test_decode_compact::(&encoded, items, root, encoded.len() - 1, DecodeType::None); +} + +#[test] +fn trie_compact_encoding_skip_values() { + let mut to_skip = BTreeSet::new(); + to_skip.extend(&[&b"doge"[..], &b"aaaaaa"[..], &b"do"[..], &b"b"[..]]); + // doge and do will be skip (32 + 4 bytes) + let skip_len = 36; + let (root_no_skip, encoded_no_skip, items_no_skip) = test_encode_compact::( + test_set(), + test_proof_default(), + EncodeType::None, + ); + let (root, encoded, items) = test_encode_compact::( + test_set(), + test_proof_default(), + EncodeType::SkipKeys(&to_skip), + ); + assert_eq!(root_no_skip, root); + assert_eq!(items_no_skip, items); + assert_eq!( + encoded_no_skip.iter().map(|e| e.len()).sum::(), + encoded.iter().map(|e| e.len()).sum::() + skip_len, + ); + let mut encoded = encoded; + encoded.push(Vec::new()); // Add an extra item to ensure it is not read. + let mut skipped_values = BTreeMap::new(); + skipped_values.extend(vec![ + (&b"doge"[..], &[0; 32][..]), + (&b"do"[..], &b"verb"[..]), + (&b"aaaa"[..], &b"dummy"[..]), + (&b"b"[..], &b"dummy"[..]), + ]); + test_decode_compact::( + &encoded, + items, + root, + encoded.len() - 1, + DecodeType::SkippedValues(&skipped_values), + ); +} + +#[test] +fn trie_compact_encoding_skip_all_values() { + let mut values = BTreeMap::new(); + values.extend(test_set()); + let (root_no_skip, _encoded_no_skip, items_no_skip) = test_encode_compact::( + test_set(), + test_proof_default(), + EncodeType::None, + ); + let (root, encoded, items) = test_encode_compact::( + test_set(), + test_proof_default(), + EncodeType::All, + ); + assert_eq!(root_no_skip, root); + assert_eq!(items_no_skip, items); + let mut encoded = encoded; + encoded.push(Vec::new()); // Add an extra item to ensure it is not read. + test_decode_compact::( + &encoded, + items.clone(), + root, + encoded.len() - 1, + DecodeType::Escaped(&values), + ); + test_decode_compact::( + &encoded, + items, + root, + encoded.len() - 1, + DecodeType::SkippedValues(&values), + ); } #[test] fn trie_decoding_fails_with_incomplete_database() { let (_, encoded, _) = test_encode_compact::( - vec![ - (b"alfa", &[0; 32]), - (b"bravo", b"bravo"), - ], + test_set(), vec![ b"alfa", ], + EncodeType::None, ); assert!(encoded.len() > 1); @@ -178,3 +320,56 @@ fn trie_decoding_fails_with_incomplete_database() { _ => panic!("decode was unexpectedly successful"), } } + + +#[test] +fn trie_encode_skip_condition() { + let additional_values = vec![ + (&b"dumy1"[..], &b""[..]), + (&b"dumy1_"[..], &[1; 32]), // force parent to be not inline + (b"dumy2", b"Esc"), + (&b"dumy2_"[..], &[2; 32]), // force parent to be not inline + (b"dumy3", b"Esc"), + (&b"dumy3_"[..], &[3; 32]), // force parent to be not inline + (b"dumy4", b"Esc"), + (&b"dumy4_"[..], &[4; 32]), // force parent to be not inline + (b"dumy5", b"Escd"), + (&b"dumy5_"[..], &[5; 32]), // force parent to be not inline + ]; + let mut test_set = test_set(); + test_set.extend(additional_values.iter().cloned()); + let mut test_proof_default = test_proof_default(); + test_proof_default.extend(additional_values.iter().filter_map(|kv| + if kv.0.len() == 5 { + Some(kv.0) + } else { + None + } + )); + let (_, encoded, _) = test_encode_compact::( + test_set.clone(), + test_proof_default.clone(), + EncodeType::None, + ); + + let none_size = encoded.iter().map(|e| e.len()).sum::(); + let mut keys = Vec::new(); + let (_, encoded, _) = test_encode_compact::( + test_set.clone(), + test_proof_default.clone(), + EncodeType::TresholdCollect(26, &mut keys), + ); + let six_size = encoded.iter().map(|e| e.len()).sum::(); + assert_eq!(keys, vec![b"doge".to_vec()]); + // only one 32 byte value skipped + assert_eq!(none_size, six_size + 32); + let (_, encoded, _) = test_encode_compact::( + test_set.clone(), + test_proof_default.clone(), + EncodeType::TresholdEscaped(26), + ); + let six_escaped_size = encoded.iter().map(|e| e.len()).sum::(); + // from additional value: +4 for escapped empty and +3 for each starting + // escape seq + assert_eq!(none_size, six_escaped_size + 32 - 4 - 3); +}