diff --git a/data/src/components/vector.rs b/data/src/components/vector.rs index e3f71858b2..3f02a1aa50 100644 --- a/data/src/components/vector.rs +++ b/data/src/components/vector.rs @@ -22,7 +22,9 @@ use perfect_derive::perfect_derive; use range_collections::RangeSet2; use crate::clone::CloneState; +use crate::foldable::EncodeLeaf; use crate::foldable::Fold; +use crate::foldable::FoldLeaf; use crate::foldable::Foldable; use crate::foldable::NodeFold; use crate::foldable::NodeUnfold; @@ -33,7 +35,6 @@ use crate::foldable::seq_tree; use crate::foldable::seq_tree::DepthAdjustedSeqAsTree; use crate::foldable::seq_tree::IndexableSeqAsTree; use crate::hash::Hash; -use crate::hash::HashFold; use crate::hash::PartialHash; use crate::hash::PartialHashFold; use crate::merkle_proof::Deserialiser; @@ -147,13 +148,12 @@ impl Default for Vector { } } -impl> Foldable for Vector { - fn fold(&self, builder: HashFold) -> Hash { +impl> Foldable for Vector { + fn fold(&self, builder: F) -> F::Folded { let mut node = builder.into_node_fold(); let length = self.vector.len(); - let length_node = - Hash::hash_encodable(length as u64).expect("Hashing length should not fail"); + let length_node = EncodeLeaf::new(length as u64, "Serialising length should not fail"); node.add(&length_node); let get_item = |idx: usize| &self.vector[idx]; @@ -164,13 +164,12 @@ impl> Foldable for Vector { } } -impl> Foldable for Vector> { - fn fold(&self, builder: HashFold) -> Hash { +impl> Foldable for Vector> { + fn fold(&self, builder: F) -> F::Folded { let mut node = builder.into_node_fold(); let length = self.vector.unrecorded_len(); - let length_node = - Hash::hash_encodable(length as u64).expect("Hashing length should not fail"); + let length_node = EncodeLeaf::new(length as u64, "Serialising length should not fail"); node.add(&length_node); let get_item = |idx: usize| self.vector.unrecorded_index(idx); diff --git a/data/src/components/vector/tests.rs b/data/src/components/vector/tests.rs index 6107bd34c6..8af94dd482 100644 --- a/data/src/components/vector/tests.rs +++ b/data/src/components/vector/tests.rs @@ -14,7 +14,6 @@ use proptest::prop_oneof; use proptest::proptest; use proptest::test_runner::TestCaseResult; -use super::NODE_ARITY; use super::Vector; use crate::components::atom::Atom; use crate::components::atom::tests::AtomMutOp; @@ -24,13 +23,9 @@ use crate::components::bytes::BytesMode; use crate::components::bytes::tests::BytesMutOp; use crate::components::bytes::tests::BytesOp; use crate::components::vector::VectorMode; -use crate::foldable::Fold; use crate::foldable::Foldable; -use crate::foldable::NodeFold; use crate::foldable::Unfoldable; -use crate::foldable::seq_tree::IndexableSeqAsTree; use crate::foldable::tests::TestFolder; -use crate::foldable::tests::TestTree; use crate::hash::Hash; use crate::hash::HashFold; use crate::hash::PartialHash; @@ -46,21 +41,6 @@ use crate::mode::Verify; use crate::mode_test; use crate::serialisation::serialise; -impl> Foldable for Vector { - fn fold(&self, builder: TestFolder) -> TestTree { - let length = self.len(); - let length_node = TestTree::Leaf(serialise(length as u64).unwrap()); - - let get_item = |idx: usize| &self[idx]; - let seq_as_tree = IndexableSeqAsTree::new(length, NODE_ARITY, &get_item); - - let mut node = builder.into_node_fold(); - node.add(&length_node); - node.add(&seq_as_tree); - node.done() - } -} - // Test that the Vector doesn't drop any values on construction. mode_test!(len_and_is_empty_match_initial_values, F, { proptest!(|(initial_values in vec(any::(), 0..64))| { diff --git a/data/src/store.rs b/data/src/store.rs index 79ee2e868e..f817dce622 100644 --- a/data/src/store.rs +++ b/data/src/store.rs @@ -4,6 +4,8 @@ //! Content addressable 'blob' store trait and in-memory implementation. +pub mod fold; + use std::collections::HashMap; use std::sync::RwLock; @@ -21,7 +23,7 @@ pub trait BlobStore { fn blob_get(&self, key: Hash) -> Result, Self::Error>; /// Store a blob under its hash; should be a no-op if it is already present. - fn blob_set>(&self, blob: HashedData) -> Result<(), Self::Error>; + fn blob_set>(&self, blob: &HashedData) -> Result<(), Self::Error>; /// Remove an item from the store; should be a no-op if it is already absent. fn blob_delete(&self, key: Hash) -> Result<(), Self::Error>; @@ -40,10 +42,7 @@ pub enum InMemoryError { LockPoisoned, } -#[expect( - dead_code, - reason = "Will be used in future PR, see TZX-105 and TZX-106" -)] +#[cfg(test)] impl InMemoryBlobStore { fn new() -> Self { Self(RwLock::new(HashMap::new())) @@ -61,7 +60,7 @@ impl BlobStore for InMemoryBlobStore { } } - fn blob_set>(&self, blob: HashedData) -> Result<(), Self::Error> { + fn blob_set>(&self, blob: &HashedData) -> Result<(), Self::Error> { let mut store = self.0.write().map_err(|_| InMemoryError::LockPoisoned)?; store.insert(blob.hash(), Bytes::copy_from_slice(blob.data())); Ok(()) diff --git a/data/src/store/fold.rs b/data/src/store/fold.rs new file mode 100644 index 0000000000..a43bfb3fbb --- /dev/null +++ b/data/src/store/fold.rs @@ -0,0 +1,225 @@ +// SPDX-FileCopyrightText: 2026 TriliTech +// +// SPDX-License-Identifier: MIT + +//! Implementation of `BlobStoreFold` that allows foldable components to be stored as Merkle trees. + +use std::sync::Arc; + +use super::BlobStore; +use crate::foldable::Fold; +use crate::foldable::FoldLeaf; +use crate::foldable::Foldable; +use crate::foldable::NodeFold; +use crate::hash::Hash; +use crate::hash::HashedData; + +/// A builder type for saving the Merkle tree structure of any `Foldable` type into any `BlobStore`. +pub struct BlobStoreFold { + store: Arc, +} + +/// The node builder type corresponding to `BlobStoreFold`. Tracks the concatenated hashes as a +/// byte string, before finally hashing them when the node is `done`. +/// +/// This also has to track the error status of the fold because the `NodeFold` trait defines the +/// method `add` as infallible. In our case, `add` may cause an error because any access of the +/// `BlobStore` may do so. We store that error in the `error` field and subsequently short-circuit +/// on any further calls to `add`, returning the error when `done` is called. +pub struct BlobStoreNodeFold { + store: Arc, + bytes: Vec, + error: Option, +} + +impl Fold for BlobStoreFold { + type Folded = Result; + + type NodeFold = BlobStoreNodeFold; + + fn into_node_fold(self) -> BlobStoreNodeFold { + BlobStoreNodeFold { + store: self.store, + bytes: vec![], + error: None, + } + } +} + +impl FoldLeaf for BlobStoreFold { + fn fold_leaf_raw(self, bytes: &[u8]) -> Result { + let hashed = HashedData::from_data(bytes); + self.store.blob_set(&hashed)?; + Ok(hashed.hash()) + } +} + +impl NodeFold for BlobStoreNodeFold { + type Parent = BlobStoreFold; + + fn add>>(&mut self, child: &T) { + if self.error.is_some() { + return; + } + match child.fold(BlobStoreFold { + store: Arc::clone(&self.store), + }) { + Ok(hash) => { + self.bytes.extend_from_slice(hash.as_ref()); + } + Err(e) => { + self.error = Some(e); + } + }; + } + + fn done(self) -> Result { + if let Some(e) = self.error { + Err(e) + } else { + let hashed = HashedData::from_data(self.bytes); + self.store.blob_set(&hashed)?; + Ok(hashed.hash()) + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::BlobStoreFold; + use crate::components::atom::Atom; + use crate::components::bytes::Bytes; + use crate::components::data_space::DataSpace; + use crate::components::vector::Vector; + use crate::foldable::Foldable; + use crate::hash::Hash; + use crate::hash::HashedData; + use crate::mode::Normal; + use crate::store::BlobStore; + use crate::store::InMemoryBlobStore; + + #[derive(Debug, thiserror::Error)] + enum TestError { + #[error("Test error")] + TestError, + } + + struct ErroringBlobStore { + inner: InMemoryBlobStore, + error_hash: Hash, + } + + impl ErroringBlobStore { + fn from_hash(hash: Hash) -> Self { + ErroringBlobStore { + inner: InMemoryBlobStore::new(), + error_hash: hash, + } + } + } + + impl BlobStore for ErroringBlobStore { + type Error = TestError; + + fn blob_get(&self, key: Hash) -> Result, Self::Error> { + Ok(self.inner.blob_get(key).unwrap()) + } + + fn blob_set>(&self, blob: &HashedData) -> Result<(), Self::Error> { + if blob.hash() == self.error_hash { + Err(TestError::TestError) + } else { + self.inner.blob_set(blob).unwrap(); + Ok(()) + } + } + + fn blob_delete(&self, key: Hash) -> Result<(), Self::Error> { + self.inner.blob_delete(key).unwrap(); + Ok(()) + } + } + + type A = Atom; + + fn a(t: T) -> A { + Atom::new(t) + } + + type Data = ( + [(A, A); 4], + Bytes, + (Vector, Normal>, DataSpace, (A, A)), + ); + + #[test] + fn fold_in_memory() { + let data: Data = ( + [(a(1), a(2)), (a(3), a(4)), (a(7), a(8)), (a(9), a(2379))], + Bytes::new(10000), + ( + Vector::new(vec![a(false); 37]), + DataSpace::new(20000), + (a(79), a(false)), + ), + ); + + let store = Arc::new(InMemoryBlobStore::new()); + let folded = data + .fold(BlobStoreFold { + store: Arc::clone(&store), + }) + .unwrap(); + + // Hash agreement between `BlobStoreFold` and `HashFold` + let root_hash = Hash::from_foldable(&data); + assert_eq!(folded, root_hash); + + // a few more hashes we can check are in the store + let hash1 = Hash::hash_encodable(false).unwrap(); + let page_encoding = { + let mut arr = [0u8; 4104]; + // the first four bytes encode the length, which is 4096, i.e. 256 * 16 + arr[1] = 16; + arr + }; + let hash2 = Hash::hash_encodable(page_encoding).unwrap(); + let hash3 = Hash::from_foldable(&DataSpace::::new(20000)); + + // 'false' is encoded as [0] + assert_eq!(store.blob_get(hash1).unwrap().as_ref(), [0]); + + // a full page of zeroes + assert_eq!(store.blob_get(hash2).unwrap().as_ref(), page_encoding); + + // the node for a `DataSpace` has two children so hash concatenation is 64 bytes + assert_eq!(store.blob_get(hash3).unwrap().as_ref().len(), 64); + + // the root node has three children so hash concatenation is 96 bytes + assert_eq!(store.blob_get(root_hash).unwrap().as_ref().len(), 96); + } + + #[test] + fn fold_error_from_blob_store() { + let data: (A, A, A) = (a(9), a(2379), a(10)); + let hash1 = Hash::hash_encodable(9u8).unwrap(); + let hash2 = Hash::hash_encodable(2379u32).unwrap(); + let hash3 = Hash::hash_encodable(10u16).unwrap(); + + let error_store = Arc::new(ErroringBlobStore::from_hash(hash2)); + let folded = data.fold(BlobStoreFold { + store: error_store.clone(), + }); + + // fold passes through errors from the blob-store + assert!(folded.is_err()); + + // the first child hash has been stored + assert_eq!(error_store.blob_get(hash1).unwrap().as_ref(), [9]); + + // the final child hash has not been stored (the error caused the fold to short-circuit) + assert!(error_store.inner.blob_get(hash3).is_err()) + } +} diff --git a/pvm/src/storage.rs b/pvm/src/storage.rs index 0c151307d2..1ab7497e8c 100644 --- a/pvm/src/storage.rs +++ b/pvm/src/storage.rs @@ -126,7 +126,7 @@ impl BlobStore for Store { self.load(&key) } - fn blob_set>(&self, blob: HashedData) -> Result<(), Self::Error> { + fn blob_set>(&self, blob: &HashedData) -> Result<(), Self::Error> { let file_name = self.path_of_hash(&blob.hash()); self.write_data_if_new(file_name, blob.data())?; Ok(()) @@ -247,8 +247,8 @@ mod tests { let hash1 = Hash::hash_bytes(data1); let hash2 = Hash::hash_bytes(data2); - store.blob_set(HashedData::from_data(data1)).unwrap(); - store.blob_set(HashedData::from_data(data2)).unwrap(); + store.blob_set(&HashedData::from_data(data1)).unwrap(); + store.blob_set(&HashedData::from_data(data2)).unwrap(); assert_eq!(store.blob_get(hash1).unwrap().as_ref(), &[3, 4, 5, 6, 8]); assert_eq!(store.blob_get(hash2).unwrap().as_ref(), &[72, 105]); @@ -261,7 +261,7 @@ mod tests { }; // Both no-ops - store.blob_set(HashedData::from_data(data2)).unwrap(); + store.blob_set(&HashedData::from_data(data2)).unwrap(); store.blob_delete(hash1).unwrap(); assert_eq!(store.blob_get(hash2).unwrap().as_ref(), &[72, 105]);