- Add Hybrid Logical Clock (HLC) for causality tracking across nodes - Implement Merkle tree for efficient diff/sync with BLAKE3 hashing - Add CRDT-aware stores for assertions and votes with vector clocks - Create stemedb-sync crate with anti-entropy and gossip protocols - Add stemedb-rpc crate with gRPC sync service (proto definitions) - Implement SupersessionChain for tracking assertion lifecycles - Add Aphoria application for code analysis/reporting - Add battery11 replication test scaffolding - Fix .gitignore to exclude nested target directories Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
215 lines
7.0 KiB
Rust
215 lines
7.0 KiB
Rust
//! Merkle tree manager with persistence.
|
|
//!
|
|
//! Manages the Merkle tree for assertion hashes with periodic checkpointing
|
|
//! to the KV store for crash recovery.
|
|
//!
|
|
//! # Persistence
|
|
//!
|
|
//! The tree is serialized and stored at key `\x00MERKLE_CHECKPOINT`.
|
|
//! On startup, the manager attempts to load from this checkpoint.
|
|
//! If not found or corrupt, it rebuilds from the assertion store.
|
|
//!
|
|
//! # Thread Safety
|
|
//!
|
|
//! All operations are protected by an RwLock, allowing concurrent reads
|
|
//! but exclusive writes.
|
|
|
|
use crate::error::{Result, SyncError};
|
|
use std::sync::Arc;
|
|
use stemedb_merkle::serialize::{deserialize_tree, serialize_tree};
|
|
use stemedb_merkle::{Hash, MerkleTree};
|
|
use stemedb_storage::KVStore;
|
|
use tokio::sync::RwLock;
|
|
use tracing::{debug, info, instrument, warn};
|
|
|
|
/// Key for storing the Merkle tree checkpoint.
|
|
const MERKLE_CHECKPOINT_KEY: &[u8] = b"\x00MERKLE_CHECKPOINT";
|
|
|
|
/// Manages a Merkle tree with persistence.
|
|
pub struct MerkleTreeManager<S> {
|
|
tree: RwLock<MerkleTree>,
|
|
store: Arc<S>,
|
|
}
|
|
|
|
impl<S: KVStore> MerkleTreeManager<S> {
|
|
/// Load the Merkle tree from checkpoint, or create a new empty tree.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `store` - KV store for persistence
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// A manager with the tree loaded from checkpoint if available.
|
|
#[instrument(skip(store))]
|
|
pub async fn load_or_create(store: Arc<S>) -> Result<Self> {
|
|
let tree = match store.get(MERKLE_CHECKPOINT_KEY).await? {
|
|
Some(data) => match deserialize_tree(&data) {
|
|
Ok(tree) => {
|
|
info!(leaf_count = tree.len(), "Loaded Merkle tree from checkpoint");
|
|
tree
|
|
}
|
|
Err(e) => {
|
|
warn!(error = %e, "Failed to deserialize Merkle checkpoint, starting fresh");
|
|
MerkleTree::new()
|
|
}
|
|
},
|
|
None => {
|
|
debug!("No Merkle checkpoint found, starting with empty tree");
|
|
MerkleTree::new()
|
|
}
|
|
};
|
|
|
|
Ok(Self { tree: RwLock::new(tree), store })
|
|
}
|
|
|
|
/// Insert a hash into the Merkle tree.
|
|
///
|
|
/// This operation does NOT automatically checkpoint. Call `checkpoint()`
|
|
/// periodically to persist the tree.
|
|
#[instrument(skip(self, hash), fields(hash = %hex::encode(&hash[..8])))]
|
|
pub async fn insert(&self, hash: Hash) -> Result<()> {
|
|
let mut tree = self.tree.write().await;
|
|
tree.insert(hash)?;
|
|
debug!(leaf_count = tree.len(), "Inserted hash into Merkle tree");
|
|
Ok(())
|
|
}
|
|
|
|
/// Get the current Merkle root.
|
|
///
|
|
/// Returns `None` if the tree is empty.
|
|
pub async fn root(&self) -> Result<Option<Hash>> {
|
|
let tree = self.tree.read().await;
|
|
match tree.root() {
|
|
Ok(root) => Ok(Some(root)),
|
|
Err(stemedb_merkle::TreeError::EmptyTree) => Ok(None),
|
|
Err(e) => Err(SyncError::Merkle(e.to_string())),
|
|
}
|
|
}
|
|
|
|
/// Get the number of leaves in the tree.
|
|
pub async fn len(&self) -> usize {
|
|
self.tree.read().await.len()
|
|
}
|
|
|
|
/// Check if the tree is empty.
|
|
pub async fn is_empty(&self) -> bool {
|
|
self.tree.read().await.is_empty()
|
|
}
|
|
|
|
/// Get all leaf hashes.
|
|
///
|
|
/// Used for diff operations during anti-entropy sync.
|
|
pub async fn leaves(&self) -> Vec<Hash> {
|
|
self.tree.read().await.leaves().to_vec()
|
|
}
|
|
|
|
/// Checkpoint the tree to persistent storage.
|
|
///
|
|
/// Should be called periodically (e.g., every 5 minutes) to ensure
|
|
/// fast recovery after crash.
|
|
#[instrument(skip(self))]
|
|
pub async fn checkpoint(&self) -> Result<()> {
|
|
let tree = self.tree.read().await;
|
|
let data = serialize_tree(&tree)?;
|
|
|
|
self.store.put(MERKLE_CHECKPOINT_KEY, &data).await?;
|
|
info!(leaf_count = tree.len(), bytes = data.len(), "Checkpointed Merkle tree");
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Rebuild the tree from a list of hashes.
|
|
///
|
|
/// Used during recovery if the checkpoint is corrupt or missing.
|
|
#[instrument(skip(self, hashes), fields(hash_count = hashes.len()))]
|
|
pub async fn rebuild_from_hashes(&self, hashes: Vec<Hash>) -> Result<()> {
|
|
let mut tree = self.tree.write().await;
|
|
*tree = MerkleTree::new();
|
|
|
|
for hash in hashes {
|
|
tree.insert(hash)?;
|
|
}
|
|
|
|
info!(leaf_count = tree.len(), "Rebuilt Merkle tree from hashes");
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use stemedb_storage::HybridStore;
|
|
use tempfile::tempdir;
|
|
|
|
async fn create_test_store() -> Arc<HybridStore> {
|
|
let dir = tempdir().expect("create temp dir");
|
|
Arc::new(HybridStore::open(dir.path()).expect("open store"))
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_empty_tree() {
|
|
let store = create_test_store().await;
|
|
let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
|
|
|
|
assert!(manager.is_empty().await);
|
|
assert_eq!(manager.len().await, 0);
|
|
assert!(manager.root().await.expect("root").is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_insert_and_root() {
|
|
let store = create_test_store().await;
|
|
let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
|
|
|
|
manager.insert([1u8; 32]).await.expect("insert");
|
|
manager.insert([2u8; 32]).await.expect("insert");
|
|
|
|
assert_eq!(manager.len().await, 2);
|
|
assert!(!manager.is_empty().await);
|
|
assert!(manager.root().await.expect("root").is_some());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_checkpoint_and_restore() {
|
|
let dir = tempdir().expect("create temp dir");
|
|
let path = dir.path().to_path_buf();
|
|
|
|
// Create and populate
|
|
{
|
|
let store = Arc::new(HybridStore::open(&path).expect("open store"));
|
|
let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
|
|
|
|
manager.insert([1u8; 32]).await.expect("insert");
|
|
manager.insert([2u8; 32]).await.expect("insert");
|
|
manager.insert([3u8; 32]).await.expect("insert");
|
|
|
|
manager.checkpoint().await.expect("checkpoint");
|
|
}
|
|
|
|
// Reopen and verify
|
|
{
|
|
let store = Arc::new(HybridStore::open(&path).expect("open store"));
|
|
let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
|
|
|
|
assert_eq!(manager.len().await, 3);
|
|
let leaves = manager.leaves().await;
|
|
assert_eq!(leaves.len(), 3);
|
|
assert_eq!(leaves[0], [1u8; 32]);
|
|
assert_eq!(leaves[1], [2u8; 32]);
|
|
assert_eq!(leaves[2], [3u8; 32]);
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_rebuild_from_hashes() {
|
|
let store = create_test_store().await;
|
|
let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
|
|
|
|
let hashes = vec![[1u8; 32], [2u8; 32], [3u8; 32]];
|
|
manager.rebuild_from_hashes(hashes).await.expect("rebuild");
|
|
|
|
assert_eq!(manager.len().await, 3);
|
|
}
|
|
}
|