stemedb/crates/stemedb-merkle/src/serialize.rs
jordan 2b0923f20e feat: Distributed replication foundation (Phase 6A) - HLC, Merkle trees, CRDT stores, sync protocol
- Add Hybrid Logical Clock (HLC) for causality tracking across nodes
- Implement Merkle tree for efficient diff/sync with BLAKE3 hashing
- Add CRDT-aware stores for assertions and votes with vector clocks
- Create stemedb-sync crate with anti-entropy and gossip protocols
- Add stemedb-rpc crate with gRPC sync service (proto definitions)
- Implement SupersessionChain for tracking assertion lifecycles
- Add Aphoria application for code analysis/reporting
- Add battery11 replication test scaffolding
- Fix .gitignore to exclude nested target directories

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 19:31:54 -07:00

256 lines
8.4 KiB
Rust

//! Serialization for Merkle trees using rkyv zero-copy format.
//!
//! # Design
//!
//! Merkle trees need to be persisted to disk for crash recovery and
//! transferred over the network for sync. This module provides:
//!
//! - **Zero-copy serialization**: Uses rkyv for efficient encoding
//! - **Validation**: Checks archived data before deserialization
//! - **Consistency**: Uses same helpers as other StemeDB crates
//!
//! # Use Cases
//!
//! 1. **Crash recovery**: Persist tree to disk, restore after restart
//! 2. **Network sync**: Serialize tree state for transfer to peers
//! 3. **Checkpointing**: Save tree snapshots for fast bootstrap
//!
//! # Example
//!
//! ```
//! use stemedb_merkle::{MerkleTree, serialize::serialize_tree, serialize::deserialize_tree};
//!
//! let mut tree = MerkleTree::new();
//! tree.insert([1u8; 32]).expect("insert");
//! tree.insert([2u8; 32]).expect("insert");
//!
//! // Serialize to bytes
//! let bytes = serialize_tree(&tree).expect("serialize");
//!
//! // Deserialize back
//! let recovered = deserialize_tree(&bytes).expect("deserialize");
//! assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
//! assert_eq!(tree.len(), recovered.len());
//! ```
//!
//! # Performance
//!
//! - Serialization: O(N) where N is number of leaves
//! - Deserialization: O(N) with validation
//! - Memory: Tree size + 4KB scratch buffer
use crate::tree::{Hash, MerkleTree};
use rkyv::ser::serializers::AllocSerializer;
use rkyv::ser::Serializer;
use rkyv::Deserialize as RkyvDeserialize;
use thiserror::Error;
use tracing::{debug, instrument};
/// Default scratch buffer size for serialization.
///
/// 4KB is sufficient for most trees. Larger trees will trigger
/// reallocation but the operation will still succeed.
#[allow(dead_code)]
const DEFAULT_SCRATCH_SIZE: usize = 4096;
/// Errors that can occur during serialization/deserialization.
#[derive(Debug, Error)]
pub enum SerializeError {
/// Failed to serialize the tree.
#[error("Serialization error: {0}")]
Serialization(String),
/// Failed to validate or deserialize the archived data.
#[error("Deserialization error: {0}")]
Deserialization(String),
}
/// Serialize a Merkle tree to bytes using rkyv zero-copy serialization.
///
/// This serializes only the leaf hashes. The tree structure and cached
/// root are rebuilt during deserialization.
///
/// # Example
///
/// ```
/// use stemedb_merkle::{MerkleTree, serialize::serialize_tree};
///
/// let mut tree = MerkleTree::new();
/// tree.insert([1u8; 32]).expect("insert");
/// tree.insert([2u8; 32]).expect("insert");
///
/// let bytes = serialize_tree(&tree).expect("serialize");
/// assert!(!bytes.is_empty());
/// ```
#[instrument(skip(tree), fields(leaf_count = tree.len()))]
pub fn serialize_tree(tree: &MerkleTree) -> Result<Vec<u8>, SerializeError> {
debug!("Serializing Merkle tree");
// Only serialize the leaves - we'll rebuild the tree on deserialization
let leaves: Vec<Hash> = tree.leaves().to_vec();
let mut serializer = AllocSerializer::<DEFAULT_SCRATCH_SIZE>::default();
serializer
.serialize_value(&leaves)
.map_err(|e| SerializeError::Serialization(e.to_string()))?;
let bytes = serializer.into_serializer().into_inner().to_vec();
debug!(bytes_len = bytes.len(), "Merkle tree serialized");
Ok(bytes)
}
/// Deserialize bytes back to a Merkle tree using rkyv zero-copy deserialization.
///
/// This validates the archived data, deserializes the leaves, and rebuilds
/// the tree structure (including cached root).
///
/// # Example
///
/// ```
/// use stemedb_merkle::{MerkleTree, serialize::serialize_tree, serialize::deserialize_tree};
///
/// let mut tree = MerkleTree::new();
/// tree.insert([1u8; 32]).expect("insert");
/// tree.insert([2u8; 32]).expect("insert");
///
/// let bytes = serialize_tree(&tree).expect("serialize");
/// let recovered = deserialize_tree(&bytes).expect("deserialize");
///
/// assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
/// assert_eq!(tree.len(), recovered.len());
/// ```
#[instrument(skip(data), fields(bytes_len = data.len()))]
pub fn deserialize_tree(data: &[u8]) -> Result<MerkleTree, SerializeError> {
debug!("Deserializing Merkle tree");
// Deserialize the leaves vector
let archived = rkyv::check_archived_root::<Vec<Hash>>(data)
.map_err(|e| SerializeError::Deserialization(e.to_string()))?;
let leaves: Vec<Hash> = RkyvDeserialize::deserialize(archived, &mut rkyv::Infallible)
.map_err(|e| SerializeError::Deserialization(e.to_string()))?;
// Rebuild the tree from leaves
let mut tree = MerkleTree::new();
for hash in leaves {
tree.insert(hash).map_err(|e| SerializeError::Deserialization(e.to_string()))?;
}
debug!(leaf_count = tree.len(), "Merkle tree deserialized");
Ok(tree)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_serialize_deserialize_empty_tree() {
let tree = MerkleTree::new();
let bytes = serialize_tree(&tree).expect("serialize");
let recovered = deserialize_tree(&bytes).expect("deserialize");
assert_eq!(recovered.len(), 0);
assert!(recovered.is_empty());
}
#[test]
fn test_serialize_deserialize_single_leaf() {
let mut tree = MerkleTree::new();
tree.insert([1u8; 32]).expect("insert");
let bytes = serialize_tree(&tree).expect("serialize");
let recovered = deserialize_tree(&bytes).expect("deserialize");
assert_eq!(recovered.len(), 1);
assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
}
#[test]
fn test_serialize_deserialize_multiple_leaves() {
let mut tree = MerkleTree::new();
tree.insert([1u8; 32]).expect("insert");
tree.insert([2u8; 32]).expect("insert");
tree.insert([3u8; 32]).expect("insert");
tree.insert([4u8; 32]).expect("insert");
let bytes = serialize_tree(&tree).expect("serialize");
let recovered = deserialize_tree(&bytes).expect("deserialize");
assert_eq!(recovered.len(), 4);
assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
// Verify leaves are preserved
assert_eq!(tree.leaves(), recovered.leaves());
}
#[test]
fn test_serialize_deserialize_large_tree() {
let mut tree = MerkleTree::new();
for i in 0..100 {
let mut hash = [0u8; 32];
hash[0] = i;
tree.insert(hash).expect("insert");
}
let bytes = serialize_tree(&tree).expect("serialize");
let recovered = deserialize_tree(&bytes).expect("deserialize");
assert_eq!(recovered.len(), 100);
assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
}
#[test]
fn test_deserialize_invalid_data() {
let garbage = vec![0u8, 1, 2, 3, 4, 5];
let result = deserialize_tree(&garbage);
assert!(result.is_err());
}
#[test]
fn test_deserialize_empty_data() {
let empty = vec![];
let result = deserialize_tree(&empty);
assert!(result.is_err());
}
#[test]
fn test_roundtrip_preserves_structure() {
let mut tree = MerkleTree::new();
let hashes: Vec<[u8; 32]> = (0..10).map(|i| [i as u8; 32]).collect();
for hash in &hashes {
tree.insert(*hash).expect("insert");
}
let bytes = serialize_tree(&tree).expect("serialize");
let recovered = deserialize_tree(&bytes).expect("deserialize");
// Verify all properties preserved
assert_eq!(tree.len(), recovered.len());
assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
assert_eq!(tree.leaves(), recovered.leaves());
assert_eq!(tree.is_empty(), recovered.is_empty());
}
#[test]
fn test_multiple_serialization_roundtrips() {
let mut tree = MerkleTree::new();
tree.insert([1u8; 32]).expect("insert");
// First roundtrip
let bytes1 = serialize_tree(&tree).expect("serialize");
let tree1 = deserialize_tree(&bytes1).expect("deserialize");
// Second roundtrip
let bytes2 = serialize_tree(&tree1).expect("serialize");
let tree2 = deserialize_tree(&bytes2).expect("deserialize");
// Should be stable
assert_eq!(tree.root().expect("root"), tree1.root().expect("root"));
assert_eq!(tree.root().expect("root"), tree2.root().expect("root"));
assert_eq!(bytes1, bytes2);
}
}