- Add Hybrid Logical Clock (HLC) for causality tracking across nodes - Implement Merkle tree for efficient diff/sync with BLAKE3 hashing - Add CRDT-aware stores for assertions and votes with vector clocks - Create stemedb-sync crate with anti-entropy and gossip protocols - Add stemedb-rpc crate with gRPC sync service (proto definitions) - Implement SupersessionChain for tracking assertion lifecycles - Add Aphoria application for code analysis/reporting - Add battery11 replication test scaffolding - Fix .gitignore to exclude nested target directories Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
256 lines
8.4 KiB
Rust
256 lines
8.4 KiB
Rust
//! Serialization for Merkle trees using rkyv zero-copy format.
|
|
//!
|
|
//! # Design
|
|
//!
|
|
//! Merkle trees need to be persisted to disk for crash recovery and
|
|
//! transferred over the network for sync. This module provides:
|
|
//!
|
|
//! - **Zero-copy serialization**: Uses rkyv for efficient encoding
|
|
//! - **Validation**: Checks archived data before deserialization
|
|
//! - **Consistency**: Uses same helpers as other StemeDB crates
|
|
//!
|
|
//! # Use Cases
|
|
//!
|
|
//! 1. **Crash recovery**: Persist tree to disk, restore after restart
|
|
//! 2. **Network sync**: Serialize tree state for transfer to peers
|
|
//! 3. **Checkpointing**: Save tree snapshots for fast bootstrap
|
|
//!
|
|
//! # Example
|
|
//!
|
|
//! ```
|
|
//! use stemedb_merkle::{MerkleTree, serialize::serialize_tree, serialize::deserialize_tree};
|
|
//!
|
|
//! let mut tree = MerkleTree::new();
|
|
//! tree.insert([1u8; 32]).expect("insert");
|
|
//! tree.insert([2u8; 32]).expect("insert");
|
|
//!
|
|
//! // Serialize to bytes
|
|
//! let bytes = serialize_tree(&tree).expect("serialize");
|
|
//!
|
|
//! // Deserialize back
|
|
//! let recovered = deserialize_tree(&bytes).expect("deserialize");
|
|
//! assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
|
|
//! assert_eq!(tree.len(), recovered.len());
|
|
//! ```
|
|
//!
|
|
//! # Performance
|
|
//!
|
|
//! - Serialization: O(N) where N is number of leaves
|
|
//! - Deserialization: O(N) with validation
|
|
//! - Memory: Tree size + 4KB scratch buffer
|
|
|
|
use crate::tree::{Hash, MerkleTree};
|
|
use rkyv::ser::serializers::AllocSerializer;
|
|
use rkyv::ser::Serializer;
|
|
use rkyv::Deserialize as RkyvDeserialize;
|
|
use thiserror::Error;
|
|
use tracing::{debug, instrument};
|
|
|
|
/// Default scratch buffer size for serialization.
|
|
///
|
|
/// 4KB is sufficient for most trees. Larger trees will trigger
|
|
/// reallocation but the operation will still succeed.
|
|
#[allow(dead_code)]
|
|
const DEFAULT_SCRATCH_SIZE: usize = 4096;
|
|
|
|
/// Errors that can occur during serialization/deserialization.
|
|
#[derive(Debug, Error)]
|
|
pub enum SerializeError {
|
|
/// Failed to serialize the tree.
|
|
#[error("Serialization error: {0}")]
|
|
Serialization(String),
|
|
|
|
/// Failed to validate or deserialize the archived data.
|
|
#[error("Deserialization error: {0}")]
|
|
Deserialization(String),
|
|
}
|
|
|
|
/// Serialize a Merkle tree to bytes using rkyv zero-copy serialization.
|
|
///
|
|
/// This serializes only the leaf hashes. The tree structure and cached
|
|
/// root are rebuilt during deserialization.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```
|
|
/// use stemedb_merkle::{MerkleTree, serialize::serialize_tree};
|
|
///
|
|
/// let mut tree = MerkleTree::new();
|
|
/// tree.insert([1u8; 32]).expect("insert");
|
|
/// tree.insert([2u8; 32]).expect("insert");
|
|
///
|
|
/// let bytes = serialize_tree(&tree).expect("serialize");
|
|
/// assert!(!bytes.is_empty());
|
|
/// ```
|
|
#[instrument(skip(tree), fields(leaf_count = tree.len()))]
|
|
pub fn serialize_tree(tree: &MerkleTree) -> Result<Vec<u8>, SerializeError> {
|
|
debug!("Serializing Merkle tree");
|
|
|
|
// Only serialize the leaves - we'll rebuild the tree on deserialization
|
|
let leaves: Vec<Hash> = tree.leaves().to_vec();
|
|
|
|
let mut serializer = AllocSerializer::<DEFAULT_SCRATCH_SIZE>::default();
|
|
serializer
|
|
.serialize_value(&leaves)
|
|
.map_err(|e| SerializeError::Serialization(e.to_string()))?;
|
|
|
|
let bytes = serializer.into_serializer().into_inner().to_vec();
|
|
debug!(bytes_len = bytes.len(), "Merkle tree serialized");
|
|
Ok(bytes)
|
|
}
|
|
|
|
/// Deserialize bytes back to a Merkle tree using rkyv zero-copy deserialization.
|
|
///
|
|
/// This validates the archived data, deserializes the leaves, and rebuilds
|
|
/// the tree structure (including cached root).
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```
|
|
/// use stemedb_merkle::{MerkleTree, serialize::serialize_tree, serialize::deserialize_tree};
|
|
///
|
|
/// let mut tree = MerkleTree::new();
|
|
/// tree.insert([1u8; 32]).expect("insert");
|
|
/// tree.insert([2u8; 32]).expect("insert");
|
|
///
|
|
/// let bytes = serialize_tree(&tree).expect("serialize");
|
|
/// let recovered = deserialize_tree(&bytes).expect("deserialize");
|
|
///
|
|
/// assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
|
|
/// assert_eq!(tree.len(), recovered.len());
|
|
/// ```
|
|
#[instrument(skip(data), fields(bytes_len = data.len()))]
|
|
pub fn deserialize_tree(data: &[u8]) -> Result<MerkleTree, SerializeError> {
|
|
debug!("Deserializing Merkle tree");
|
|
|
|
// Deserialize the leaves vector
|
|
let archived = rkyv::check_archived_root::<Vec<Hash>>(data)
|
|
.map_err(|e| SerializeError::Deserialization(e.to_string()))?;
|
|
|
|
let leaves: Vec<Hash> = RkyvDeserialize::deserialize(archived, &mut rkyv::Infallible)
|
|
.map_err(|e| SerializeError::Deserialization(e.to_string()))?;
|
|
|
|
// Rebuild the tree from leaves
|
|
let mut tree = MerkleTree::new();
|
|
for hash in leaves {
|
|
tree.insert(hash).map_err(|e| SerializeError::Deserialization(e.to_string()))?;
|
|
}
|
|
|
|
debug!(leaf_count = tree.len(), "Merkle tree deserialized");
|
|
Ok(tree)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_serialize_deserialize_empty_tree() {
|
|
let tree = MerkleTree::new();
|
|
|
|
let bytes = serialize_tree(&tree).expect("serialize");
|
|
let recovered = deserialize_tree(&bytes).expect("deserialize");
|
|
|
|
assert_eq!(recovered.len(), 0);
|
|
assert!(recovered.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_serialize_deserialize_single_leaf() {
|
|
let mut tree = MerkleTree::new();
|
|
tree.insert([1u8; 32]).expect("insert");
|
|
|
|
let bytes = serialize_tree(&tree).expect("serialize");
|
|
let recovered = deserialize_tree(&bytes).expect("deserialize");
|
|
|
|
assert_eq!(recovered.len(), 1);
|
|
assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_serialize_deserialize_multiple_leaves() {
|
|
let mut tree = MerkleTree::new();
|
|
tree.insert([1u8; 32]).expect("insert");
|
|
tree.insert([2u8; 32]).expect("insert");
|
|
tree.insert([3u8; 32]).expect("insert");
|
|
tree.insert([4u8; 32]).expect("insert");
|
|
|
|
let bytes = serialize_tree(&tree).expect("serialize");
|
|
let recovered = deserialize_tree(&bytes).expect("deserialize");
|
|
|
|
assert_eq!(recovered.len(), 4);
|
|
assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
|
|
|
|
// Verify leaves are preserved
|
|
assert_eq!(tree.leaves(), recovered.leaves());
|
|
}
|
|
|
|
#[test]
|
|
fn test_serialize_deserialize_large_tree() {
|
|
let mut tree = MerkleTree::new();
|
|
for i in 0..100 {
|
|
let mut hash = [0u8; 32];
|
|
hash[0] = i;
|
|
tree.insert(hash).expect("insert");
|
|
}
|
|
|
|
let bytes = serialize_tree(&tree).expect("serialize");
|
|
let recovered = deserialize_tree(&bytes).expect("deserialize");
|
|
|
|
assert_eq!(recovered.len(), 100);
|
|
assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_deserialize_invalid_data() {
|
|
let garbage = vec![0u8, 1, 2, 3, 4, 5];
|
|
let result = deserialize_tree(&garbage);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_deserialize_empty_data() {
|
|
let empty = vec![];
|
|
let result = deserialize_tree(&empty);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_roundtrip_preserves_structure() {
|
|
let mut tree = MerkleTree::new();
|
|
let hashes: Vec<[u8; 32]> = (0..10).map(|i| [i as u8; 32]).collect();
|
|
|
|
for hash in &hashes {
|
|
tree.insert(*hash).expect("insert");
|
|
}
|
|
|
|
let bytes = serialize_tree(&tree).expect("serialize");
|
|
let recovered = deserialize_tree(&bytes).expect("deserialize");
|
|
|
|
// Verify all properties preserved
|
|
assert_eq!(tree.len(), recovered.len());
|
|
assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
|
|
assert_eq!(tree.leaves(), recovered.leaves());
|
|
assert_eq!(tree.is_empty(), recovered.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_multiple_serialization_roundtrips() {
|
|
let mut tree = MerkleTree::new();
|
|
tree.insert([1u8; 32]).expect("insert");
|
|
|
|
// First roundtrip
|
|
let bytes1 = serialize_tree(&tree).expect("serialize");
|
|
let tree1 = deserialize_tree(&bytes1).expect("deserialize");
|
|
|
|
// Second roundtrip
|
|
let bytes2 = serialize_tree(&tree1).expect("serialize");
|
|
let tree2 = deserialize_tree(&bytes2).expect("deserialize");
|
|
|
|
// Should be stable
|
|
assert_eq!(tree.root().expect("root"), tree1.root().expect("root"));
|
|
assert_eq!(tree.root().expect("root"), tree2.root().expect("root"));
|
|
assert_eq!(bytes1, bytes2);
|
|
}
|
|
}
|