feat: Distributed replication foundation (Phase 6A) - HLC, Merkle trees, CRDT stores, sync protocol

- Add Hybrid Logical Clock (HLC) for causality tracking across nodes - Implement Merkle tree for efficient diff/sync with BLAKE3 hashing - Add CRDT-aware stores for assertions and votes with vector clocks - Create stemedb-sync crate with anti-entropy and gossip protocols - Add stemedb-rpc crate with gRPC sync service (proto definitions) - Implement SupersessionChain for tracking assertion lifecycles - Add Aphoria application for code analysis/reporting - Add battery11 replication test scaffolding - Fix .gitignore to exclude nested target directories Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 19:31:54 -07:00 · 2026-02-02 19:31:54 -07:00 · 2b0923f20e
commit 2b0923f20e
parent 137a588ed0
60 changed files with 7366 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,5 @@
 # Rust
-/target/
+**/target/
 **/*.rs.bk
 Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -8,6 +8,9 @@ members = [
    "crates/stemedb-lens",
    "crates/stemedb-sim",
    "crates/stemedb-api",
    "crates/stemedb-merkle",
    "crates/stemedb-rpc",
    "crates/stemedb-sync",
 ]
 resolver = "2"
--- a/applications/aphoria/Cargo.toml
+++ b/applications/aphoria/Cargo.toml
@ -0,0 +1,79 @@
 [package]
 name = "aphoria"
 version = "0.1.0"
 edition = "2021"
 description = "A code-level truth linter powered by Episteme"
 authors = ["Orchard9"]
 license = "MIT"
 # Standalone crate (not part of workspace)
 [workspace]
 [[bin]]
 name = "aphoria"
 path = "src/main.rs"
 [lib]
 name = "aphoria"
 path = "src/lib.rs"
 # Match workspace lint configuration
 [lints.rust]
 unsafe_code = "forbid"
 missing_docs = "warn"
 [lints.clippy]
 unwrap_used = "deny"
 expect_used = "deny"
 panic = "deny"
 print_stdout = "warn"  # CLI uses println for user output
 print_stderr = "warn"
 [dependencies]
 # StemeDB dependencies (relative paths from applications/aphoria/)
 stemedb-core = { path = "../../crates/stemedb-core" }
 stemedb-storage = { path = "../../crates/stemedb-storage" }
 stemedb-ingest = { path = "../../crates/stemedb-ingest" }
 stemedb-query = { path = "../../crates/stemedb-query" }
 # CLI
 clap = { version = "4.5", features = ["derive"] }
 # Async runtime
 tokio = { version = "1", features = ["full"] }
 # File walking
 ignore = "0.4"
 # Pattern matching
 regex = "1.10"
 # Serialization
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 toml = "0.8"
 # Output formatting
 comfy-table = "7.1"
 # Cryptography
 ed25519-dalek = { version = "2.1", features = ["rand_core"] }
 blake3 = "1.5"
 rand = "0.8"
 # Error handling
 thiserror = "1.0"
 # Platform directories
 dirs = "5.0"
 # Logging
 tracing = "0.1"
 tracing-subscriber = "0.3"
 # rkyv for zero-copy (consistent with stemedb)
 rkyv = { version = "0.7", features = ["validation"] }
 bytecheck = "0.6"
 [dev-dependencies]
 tempfile = "3.10"
--- a/applications/aphoria/src/config.rs
+++ b/applications/aphoria/src/config.rs
@ -0,0 +1,260 @@
 //! Configuration parsing for Aphoria.
 use std::path::{Path, PathBuf};
 use serde::Deserialize;
 use crate::AphoriaError;
 /// Top-level Aphoria configuration.
 ///
 /// Loaded from `aphoria.toml` at the project root.
 #[derive(Debug, Clone, Default, Deserialize)]
 #[serde(default)]
 pub struct AphoriaConfig {
    /// Project settings.
    pub project: ProjectConfig,
    /// Episteme instance settings.
    pub episteme: EpistemeConfig,
    /// Conflict threshold settings.
    pub thresholds: ThresholdConfig,
    /// Extractor settings.
    pub extractors: ExtractorConfig,
    /// Scan settings.
    pub scan: ScanConfig,
    /// Alias suggestion settings.
    pub aliases: AliasConfig,
 }
 impl AphoriaConfig {
    /// Load configuration from a TOML file.
    pub fn from_file(path: &Path) -> Result<Self, AphoriaError> {
        if !path.exists() {
            return Err(AphoriaError::ConfigNotFound(path.to_path_buf()));
        }
        let content = std::fs::read_to_string(path)?;
        let config: AphoriaConfig = toml::from_str(&content)?;
        Ok(config)
    }
 }
 /// Project identification settings.
 #[derive(Debug, Clone, Default, Deserialize)]
 #[serde(default)]
 pub struct ProjectConfig {
    /// Project name (auto-detected if not specified).
    pub name: Option<String>,
    /// Primary language (auto-detected if not specified).
    pub language: Option<String>,
 }
 /// Episteme instance configuration.
 #[derive(Debug, Clone, Deserialize)]
 #[serde(default)]
 pub struct EpistemeConfig {
    /// Path to local Episteme data directory.
    pub data_dir: PathBuf,
    /// Remote Episteme URL (future feature).
    pub url: Option<String>,
 }
 impl Default for EpistemeConfig {
    fn default() -> Self {
        Self { data_dir: dirs_default_data_dir(), url: None }
    }
 }
 /// Conflict threshold configuration.
 #[derive(Debug, Clone, Deserialize)]
 #[serde(default)]
 pub struct ThresholdConfig {
    /// Conflict score at or above which to BLOCK.
    pub block: f32,
    /// Conflict score at or above which to FLAG.
    pub flag: f32,
 }
 impl Default for ThresholdConfig {
    fn default() -> Self {
        Self { block: 0.7, flag: 0.4 }
    }
 }
 /// Extractor configuration.
 #[derive(Debug, Clone, Deserialize)]
 #[serde(default)]
 pub struct ExtractorConfig {
    /// Enabled extractors.
    pub enabled: Vec<String>,
    /// Disabled extractors (alternative to enabled list).
    pub disabled: Vec<String>,
    /// Timeout extractor settings.
    pub timeout_config: TimeoutExtractorConfig,
    /// Dependency version extractor settings.
    pub dep_versions: DepVersionConfig,
 }
 impl Default for ExtractorConfig {
    fn default() -> Self {
        Self {
            enabled: vec![
                "tls_verify".to_string(),
                "jwt_config".to_string(),
                "hardcoded_secrets".to_string(),
                "timeout_config".to_string(),
                "dep_versions".to_string(),
                "cors_config".to_string(),
                "rate_limit".to_string(),
            ],
            disabled: vec![],
            timeout_config: TimeoutExtractorConfig::default(),
            dep_versions: DepVersionConfig::default(),
        }
    }
 }
 /// Timeout extractor configuration.
 #[derive(Debug, Clone, Deserialize)]
 #[serde(default)]
 pub struct TimeoutExtractorConfig {
    /// Minimum reasonable timeout in milliseconds.
    pub min_reasonable_ms: u64,
    /// Maximum reasonable timeout in milliseconds.
    pub max_reasonable_ms: u64,
 }
 impl Default for TimeoutExtractorConfig {
    fn default() -> Self {
        Self { min_reasonable_ms: 1000, max_reasonable_ms: 300_000 }
    }
 }
 /// Dependency version extractor configuration.
 #[derive(Debug, Clone, Deserialize)]
 #[serde(default)]
 pub struct DepVersionConfig {
    /// Path to advisory database.
    pub advisory_db: PathBuf,
 }
 impl Default for DepVersionConfig {
    fn default() -> Self {
        Self { advisory_db: dirs_default_advisory_db() }
    }
 }
 /// Scan configuration.
 #[derive(Debug, Clone, Deserialize)]
 #[serde(default)]
 pub struct ScanConfig {
    /// Directories to exclude from scanning.
    pub exclude: Vec<String>,
    /// Maximum file size to scan (bytes).
    pub max_file_size: u64,
    /// Whether to include test files.
    pub include_tests: bool,
 }
 impl Default for ScanConfig {
    fn default() -> Self {
        Self {
            exclude: vec![
                "target/".to_string(),
                "node_modules/".to_string(),
                ".git/".to_string(),
                "vendor/".to_string(),
            ],
            max_file_size: 1_048_576, // 1MB
            include_tests: false,
        }
    }
 }
 /// Alias suggestion configuration.
 #[derive(Debug, Clone, Deserialize)]
 #[serde(default)]
 pub struct AliasConfig {
    /// Whether to auto-suggest aliases for shared concepts.
    pub auto_suggest: bool,
    /// Whether to auto-accept aliases to Tier 0 sources.
    pub auto_accept_tier0: bool,
 }
 impl Default for AliasConfig {
    fn default() -> Self {
        Self { auto_suggest: true, auto_accept_tier0: true }
    }
 }
 /// Get the default Aphoria data directory.
 fn dirs_default_data_dir() -> PathBuf {
    if let Some(home) = dirs::home_dir() {
        home.join(".aphoria").join("db")
    } else {
        PathBuf::from(".aphoria/db")
    }
 }
 /// Get the default advisory database directory.
 fn dirs_default_advisory_db() -> PathBuf {
    if let Some(home) = dirs::home_dir() {
        home.join(".aphoria").join("advisory-db")
    } else {
        PathBuf::from(".aphoria/advisory-db")
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_default_config() {
        let config = AphoriaConfig::default();
        assert_eq!(config.thresholds.block, 0.7);
        assert_eq!(config.thresholds.flag, 0.4);
        assert!(config.extractors.enabled.contains(&"tls_verify".to_string()));
        assert!(config.scan.exclude.contains(&"target/".to_string()));
    }
    #[test]
    fn test_config_parse() {
        let toml = r#"
 [project]
 name = "testproject"
 language = "rust"
 [thresholds]
 block = 0.8
 flag = 0.5
 [scan]
 exclude = ["build/", "dist/"]
 "#;
        let config: AphoriaConfig = toml::from_str(toml).expect("should parse");
        assert_eq!(config.project.name, Some("testproject".to_string()));
        assert_eq!(config.project.language, Some("rust".to_string()));
        assert_eq!(config.thresholds.block, 0.8);
        assert_eq!(config.thresholds.flag, 0.5);
        assert!(config.scan.exclude.contains(&"build/".to_string()));
    }
 }
--- a/applications/aphoria/src/error.rs
+++ b/applications/aphoria/src/error.rs
@ -0,0 +1,65 @@
 //! Error types for Aphoria.
 use std::path::PathBuf;
 use thiserror::Error;
 /// Errors that can occur during Aphoria operations.
 #[derive(Error, Debug)]
 pub enum AphoriaError {
    /// Configuration file error.
    #[error("Configuration error: {0}")]
    Config(String),
    /// Configuration file not found.
    #[error("Configuration file not found: {0}")]
    ConfigNotFound(PathBuf),
    /// Invalid configuration format.
    #[error("Invalid configuration: {0}")]
    ConfigParse(#[from] toml::de::Error),
    /// Project not found.
    #[error("Project not found: {0}")]
    ProjectNotFound(PathBuf),
    /// File system error.
    #[error("File system error: {0}")]
    Io(#[from] std::io::Error),
    /// Walker error during file traversal.
    #[error("Walker error: {0}")]
    Walker(String),
    /// Extractor error during claim extraction.
    #[error("Extraction error in {extractor}: {message}")]
    Extraction {
        /// The extractor that failed.
        extractor: String,
        /// The error message.
        message: String,
    },
    /// Episteme storage error.
    #[error("Storage error: {0}")]
    Storage(String),
    /// Query error during conflict detection.
    #[error("Query error: {0}")]
    Query(String),
    /// Report generation error.
    #[error("Report error: {0}")]
    Report(String),
    /// Baseline not found.
    #[error("No baseline set. Run `aphoria baseline` first.")]
    NoBaseline,
    /// Initialization error.
    #[error("Initialization error: {0}")]
    Init(String),
    /// Acknowledgment error.
    #[error("Acknowledgment error: {0}")]
    Acknowledge(String),
 }
--- a/applications/aphoria/src/extractors/mod.rs
+++ b/applications/aphoria/src/extractors/mod.rs
@ -0,0 +1,103 @@
 //! Claim extractors for finding implicit decisions in source code.
 // Skeleton phase: allow unused until extractors are implemented
 #![allow(dead_code)]
 //!
 //! Each extractor looks for specific patterns that represent implicit claims:
 //! - `tls_verify`: TLS certificate verification settings
 //! - `jwt_config`: JWT validation configuration
 //! - `hardcoded_secrets`: Credentials in source code
 //! - `timeout_config`: HTTP/DB/Redis timeout values
 //! - `dep_versions`: Vulnerable dependency versions
 //! - `cors_config`: CORS allow-origin settings
 //! - `rate_limit`: Rate limiting configuration
 use crate::types::{ExtractedClaim, Language};
 /// Trait for claim extractors.
 ///
 /// Extractors scan file content and return claims about implicit decisions.
 pub trait Extractor: Send + Sync {
    /// Unique identifier for this extractor.
    fn name(&self) -> &str;
    /// File types this extractor operates on.
    fn languages(&self) -> &[Language];
    /// Extract claims from a file's content.
    ///
    /// # Arguments
    ///
    /// * `path_segments` - ConceptPath segments derived from the file's location
    /// * `content` - The file content as a string
    /// * `language` - The detected language of the file
    ///
    /// # Returns
    ///
    /// Zero or more extracted claims.
    fn extract(
        &self,
        path_segments: &[String],
        content: &str,
        language: Language,
    ) -> Vec<ExtractedClaim>;
 }
 /// Registry of available extractors.
 pub struct ExtractorRegistry {
    extractors: Vec<Box<dyn Extractor>>,
 }
 impl Default for ExtractorRegistry {
    fn default() -> Self {
        Self::new()
    }
 }
 impl ExtractorRegistry {
    /// Create a new registry with all built-in extractors.
    pub fn new() -> Self {
        // TODO: Register built-in extractors
        Self { extractors: Vec::new() }
    }
    /// Get extractors applicable to a given language.
    pub fn for_language(&self, language: Language) -> Vec<&dyn Extractor> {
        self.extractors
            .iter()
            .filter(|e| e.languages().contains(&language))
            .map(|e| e.as_ref())
            .collect()
    }
    /// Extract claims from content using all applicable extractors.
    pub fn extract_all(
        &self,
        path_segments: &[String],
        content: &str,
        language: Language,
    ) -> Vec<ExtractedClaim> {
        self.for_language(language)
            .iter()
            .flat_map(|e| e.extract(path_segments, content, language))
            .collect()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_registry_creation() {
        let registry = ExtractorRegistry::new();
        // Currently empty, will be populated when extractors are implemented
        assert!(registry.for_language(Language::Rust).is_empty());
    }
    #[test]
    fn test_extract_all_empty() {
        let registry = ExtractorRegistry::new();
        let claims = registry.extract_all(&["rust".to_string()], "fn main() {}", Language::Rust);
        assert!(claims.is_empty());
    }
 }
--- a/applications/aphoria/src/lib.rs
+++ b/applications/aphoria/src/lib.rs
@ -0,0 +1,170 @@
 //! Aphoria - A code-level truth linter powered by Episteme
 //!
 // Skeleton phase: allow unused code until extractors are implemented
 #![allow(dead_code, unused_imports, unused_variables)]
 //!
 //! Aphoria scans a codebase, extracts the decisions embedded in config and code,
 //! and checks them against authoritative sources. It finds the places where what
 //! your code *does* contradicts what the specs *say*.
 //!
 //! # Architecture
 //!
 //! ```text
 //! ┌──────────────────────────────────────────────────────────────┐
 //! │                        aphoria CLI                           │
 //! │                                                              │
 //! │  ┌──────────┐   ┌────────────┐   ┌──────────┐   ┌────────┐  │
 //! │  │ Walker   │──▶│ Extractors │──▶│ Ingester │──▶│ Report │  │
 //! │  └──────────┘   └────────────┘   └──────────┘   └────────┘  │
 //! │                                       │              ▲       │
 //! │                                       ▼              │       │
 //! │                              ┌──────────────┐        │       │
 //! │                              │   Episteme   │────────┘       │
 //! │                              │   (local)    │                │
 //! │                              └──────────────┘                │
 //! └──────────────────────────────────────────────────────────────┘
 //! ```
 //!
 //! # Example
 //!
 //! ```ignore
 //! use aphoria::{run_scan, AphoriaConfig, ScanArgs};
 //!
 //! let args = ScanArgs {
 //!     path: ".".into(),
 //!     format: "table".to_string(),
 //!     exit_code_enabled: false,
 //! };
 //! let config = AphoriaConfig::default();
 //! let result = run_scan(args, &config).await?;
 //!
 //! println!("{}", result.display());
 //! ```
 // Module declarations
 mod config;
 mod error;
 mod extractors;
 mod report;
 mod types;
 mod walker;
 // Public re-exports
 pub use config::AphoriaConfig;
 pub use error::AphoriaError;
 pub use types::{AcknowledgeArgs, ConflictResult, ExtractedClaim, ScanArgs, ScanResult, Verdict};
 /// Run a scan on the specified project.
 ///
 /// This is the main entry point for scanning a codebase. It:
 /// 1. Walks the project directory
 /// 2. Extracts claims from config and code
 /// 3. Ingests claims into the local Episteme instance
 /// 4. Queries for conflicts against authoritative sources
 /// 5. Returns a formatted report
 pub async fn run_scan(args: ScanArgs, config: &AphoriaConfig) -> Result<ScanResult, AphoriaError> {
    tracing::info!(path = %args.path.display(), format = %args.format, "Starting scan");
    // TODO: Implement full scan pipeline
    // For now, return a stub result to validate the CLI works
    Ok(ScanResult::stub(&args.path, &args.format))
 }
 /// Acknowledge a conflict as intentional.
 ///
 /// Creates an assertion in Episteme recording that this conflict has been
 /// reviewed and accepted. The conflict still appears in reports but marked as ACK.
 pub async fn acknowledge(
    args: AcknowledgeArgs,
    _config: &AphoriaConfig,
 ) -> Result<(), AphoriaError> {
    tracing::info!(
        concept_path = %args.concept_path,
        reason = %args.reason,
        "Acknowledging conflict"
    );
    // TODO: Create acknowledgment assertion in Episteme
    Ok(())
 }
 /// Set the current scan as the baseline.
 ///
 /// Future `aphoria diff` commands will compare against this baseline.
 pub async fn set_baseline(_config: &AphoriaConfig) -> Result<(), AphoriaError> {
    tracing::info!("Setting baseline");
    // TODO: Record baseline scan ID
    Ok(())
 }
 /// Show changes since the last baseline.
 pub async fn show_diff(_config: &AphoriaConfig) -> Result<String, AphoriaError> {
    tracing::info!("Showing diff");
    // TODO: Compare current scan against baseline
    Ok("No baseline set. Run `aphoria baseline` first.".to_string())
 }
 /// Show current scan status.
 pub async fn show_status(_config: &AphoriaConfig) -> Result<String, AphoriaError> {
    tracing::info!("Showing status");
    // TODO: Show summary of local Episteme instance
    Ok("Aphoria status: Not initialized. Run `aphoria init` first.".to_string())
 }
 /// Initialize Aphoria with the authoritative corpus.
 ///
 /// Downloads and ingests:
 /// - RFC corpus (auth, crypto, TLS)
 /// - OWASP cheat sheets
 pub async fn initialize(_config: &AphoriaConfig) -> Result<(), AphoriaError> {
    tracing::info!("Initializing Aphoria");
    // TODO: Download and ingest authoritative corpus
    Ok(())
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::path::PathBuf;
    #[tokio::test]
    async fn test_scan_returns_stub_result() {
        let args = ScanArgs {
            path: PathBuf::from("."),
            format: "table".to_string(),
            exit_code_enabled: false,
        };
        let config = AphoriaConfig::default();
        let result = run_scan(args, &config).await;
        assert!(result.is_ok());
        let scan_result = result.expect("should have result");
        assert!(!scan_result.has_blocks());
    }
    #[tokio::test]
    async fn test_acknowledge_succeeds() {
        let args = AcknowledgeArgs {
            concept_path: "code://rust/test/jwt/audience_validation".to_string(),
            reason: "Internal service".to_string(),
        };
        let config = AphoriaConfig::default();
        let result = acknowledge(args, &config).await;
        assert!(result.is_ok());
    }
    #[tokio::test]
    async fn test_status_before_init() {
        let config = AphoriaConfig::default();
        let result = show_status(&config).await;
        assert!(result.is_ok());
        assert!(result.expect("should have status").contains("Not initialized"));
    }
 }
--- a/applications/aphoria/src/main.rs
+++ b/applications/aphoria/src/main.rs
@ -0,0 +1,186 @@
 //! Aphoria CLI - A code-level truth linter powered by Episteme
 //!
 //! CLI binaries use println! for user-facing output (not tracing)
 #![allow(clippy::print_stdout, clippy::print_stderr)]
 use std::path::PathBuf;
 use std::process::ExitCode;
 use clap::{Parser, Subcommand};
 use aphoria::{run_scan, AcknowledgeArgs, AphoriaConfig, ScanArgs};
 /// A code-level truth linter powered by Episteme.
 ///
 /// Aphoria scans a codebase, extracts the decisions embedded in config and code,
 /// and checks them against authoritative sources. It finds the places where what
 /// your code *does* contradicts what the specs *say*.
 #[derive(Parser)]
 #[command(name = "aphoria")]
 #[command(version, about, long_about = None)]
 struct Cli {
    /// Path to aphoria.toml configuration file
    #[arg(short, long, global = true)]
    config: Option<PathBuf>,
    #[command(subcommand)]
    command: Commands,
 }
 #[derive(Subcommand)]
 enum Commands {
    /// Scan a project for epistemic drift
    Scan {
        /// Path to the project root to scan
        #[arg(default_value = ".")]
        path: PathBuf,
        /// Output format: table, json, sarif, markdown
        #[arg(short, long, default_value = "table")]
        format: String,
        /// Exit with non-zero code if conflicts found
        #[arg(long)]
        exit_code: bool,
    },
    /// Acknowledge a conflict (mark as intentional)
    Ack {
        /// The concept path to acknowledge
        concept_path: String,
        /// Reason for acknowledgment
        #[arg(short, long)]
        reason: String,
    },
    /// Set the current scan as the baseline
    Baseline,
    /// Show changes since last baseline
    Diff,
    /// Show current scan status
    Status,
    /// Initialize Aphoria with authoritative corpus
    Init,
 }
 #[tokio::main]
 async fn main() -> ExitCode {
    // Initialize tracing for internal logging
    tracing_subscriber::fmt::init();
    let cli = Cli::parse();
    // Load configuration
    let config = match load_config(cli.config.as_deref()) {
        Ok(cfg) => cfg,
        Err(e) => {
            eprintln!("Error loading configuration: {e}");
            return ExitCode::from(3);
        }
    };
    match cli.command {
        Commands::Scan { path, format, exit_code } => {
            let args = ScanArgs { path, format, exit_code_enabled: exit_code };
            match run_scan(args, &config).await {
                Ok(result) => {
                    println!("{}", result.display());
                    if exit_code && result.has_blocks() {
                        ExitCode::from(2)
                    } else if exit_code && result.has_flags() {
                        ExitCode::from(1)
                    } else {
                        ExitCode::SUCCESS
                    }
                }
                Err(e) => {
                    eprintln!("Scan error: {e}");
                    ExitCode::from(3)
                }
            }
        }
        Commands::Ack { concept_path, reason } => {
            let args = AcknowledgeArgs { concept_path, reason };
            match aphoria::acknowledge(args, &config).await {
                Ok(()) => {
                    println!("Conflict acknowledged.");
                    ExitCode::SUCCESS
                }
                Err(e) => {
                    eprintln!("Acknowledge error: {e}");
                    ExitCode::from(3)
                }
            }
        }
        Commands::Baseline => match aphoria::set_baseline(&config).await {
            Ok(()) => {
                println!("Baseline set.");
                ExitCode::SUCCESS
            }
            Err(e) => {
                eprintln!("Baseline error: {e}");
                ExitCode::from(3)
            }
        },
        Commands::Diff => match aphoria::show_diff(&config).await {
            Ok(output) => {
                println!("{output}");
                ExitCode::SUCCESS
            }
            Err(e) => {
                eprintln!("Diff error: {e}");
                ExitCode::from(3)
            }
        },
        Commands::Status => match aphoria::show_status(&config).await {
            Ok(output) => {
                println!("{output}");
                ExitCode::SUCCESS
            }
            Err(e) => {
                eprintln!("Status error: {e}");
                ExitCode::from(3)
            }
        },
        Commands::Init => match aphoria::initialize(&config).await {
            Ok(()) => {
                println!("Aphoria initialized. Run `aphoria scan <project>` to begin.");
                ExitCode::SUCCESS
            }
            Err(e) => {
                eprintln!("Init error: {e}");
                ExitCode::from(3)
            }
        },
    }
 }
 /// Load configuration from file or use defaults.
 fn load_config(path: Option<&std::path::Path>) -> Result<AphoriaConfig, aphoria::AphoriaError> {
    if let Some(p) = path {
        AphoriaConfig::from_file(p)
    } else {
        // Try default locations
        let default_paths = ["aphoria.toml", ".aphoria/config.toml"];
        for default in default_paths {
            let p = std::path::Path::new(default);
            if p.exists() {
                return AphoriaConfig::from_file(p);
            }
        }
        // No config file found, use defaults
        Ok(AphoriaConfig::default())
    }
 }
--- a/applications/aphoria/src/report/json.rs
+++ b/applications/aphoria/src/report/json.rs
@ -0,0 +1,14 @@
 //! JSON output format for programmatic consumption.
 use crate::types::ScanResult;
 use super::ReportFormatter;
 /// JSON report formatter.
 pub struct JsonReport;
 impl ReportFormatter for JsonReport {
    fn format(&self, result: &ScanResult) -> String {
        result.display()
    }
 }
--- a/applications/aphoria/src/report/markdown.rs
+++ b/applications/aphoria/src/report/markdown.rs
@ -0,0 +1,14 @@
 //! Markdown output format for documentation.
 use crate::types::ScanResult;
 use super::ReportFormatter;
 /// Markdown report formatter.
 pub struct MarkdownReport;
 impl ReportFormatter for MarkdownReport {
    fn format(&self, result: &ScanResult) -> String {
        result.display()
    }
 }
--- a/applications/aphoria/src/report/mod.rs
+++ b/applications/aphoria/src/report/mod.rs
@ -0,0 +1,59 @@
 //! Report generation for scan results.
 // Skeleton phase: allow unused until report pipeline is wired up
 #![allow(dead_code)]
 //!
 //! Supports multiple output formats:
 //! - `table`: Terminal table output (default)
 //! - `json`: JSON for programmatic consumption
 //! - `sarif`: SARIF for CI integration (GitHub, GitLab, Azure DevOps)
 //! - `markdown`: Markdown for documentation
 mod json;
 mod markdown;
 mod sarif;
 mod table;
 pub use json::JsonReport;
 pub use markdown::MarkdownReport;
 pub use sarif::SarifReport;
 pub use table::TableReport;
 use crate::types::ScanResult;
 /// Trait for report formatters.
 pub trait ReportFormatter {
    /// Format the scan result as a string.
    fn format(&self, result: &ScanResult) -> String;
 }
 /// Get a report formatter by name.
 pub fn get_formatter(name: &str) -> Box<dyn ReportFormatter> {
    match name {
        "json" => Box::new(JsonReport),
        "sarif" => Box::new(SarifReport),
        "markdown" => Box::new(MarkdownReport),
        _ => Box::new(TableReport),
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::path::PathBuf;
    #[test]
    fn test_get_formatter_table() {
        let formatter = get_formatter("table");
        let result = ScanResult::stub(&PathBuf::from("."), "table");
        let output = formatter.format(&result);
        assert!(output.contains("Scanning"));
    }
    #[test]
    fn test_get_formatter_unknown_defaults_to_table() {
        let formatter = get_formatter("unknown");
        let result = ScanResult::stub(&PathBuf::from("."), "table");
        let output = formatter.format(&result);
        assert!(output.contains("Scanning"));
    }
 }
--- a/applications/aphoria/src/report/sarif.rs
+++ b/applications/aphoria/src/report/sarif.rs
@ -0,0 +1,19 @@
 //! SARIF output format for CI integration.
 //!
 //! SARIF (Static Analysis Results Interchange Format) is supported by:
 //! - GitHub Code Scanning
 //! - GitLab SAST
 //! - Azure DevOps
 use crate::types::ScanResult;
 use super::ReportFormatter;
 /// SARIF report formatter.
 pub struct SarifReport;
 impl ReportFormatter for SarifReport {
    fn format(&self, result: &ScanResult) -> String {
        result.display()
    }
 }
--- a/applications/aphoria/src/report/table.rs
+++ b/applications/aphoria/src/report/table.rs
@ -0,0 +1,14 @@
 //! Table output format for terminal display.
 use crate::types::ScanResult;
 use super::ReportFormatter;
 /// Table report formatter.
 pub struct TableReport;
 impl ReportFormatter for TableReport {
    fn format(&self, result: &ScanResult) -> String {
        result.display()
    }
 }
--- a/applications/aphoria/src/types.rs
+++ b/applications/aphoria/src/types.rs
@ -0,0 +1,415 @@
 //! Core types for Aphoria.
 // Skeleton phase: allow unused until scan pipeline is wired up
 #![allow(dead_code)]
 use std::fmt;
 use std::path::{Path, PathBuf};
 use stemedb_core::types::{ObjectValue, SourceClass};
 /// Arguments for the scan command.
 #[derive(Debug, Clone)]
 pub struct ScanArgs {
    /// Path to the project root.
    pub path: PathBuf,
    /// Output format (table, json, sarif, markdown).
    pub format: String,
    /// Whether to enable non-zero exit codes on conflicts.
    pub exit_code_enabled: bool,
 }
 /// Arguments for the acknowledge command.
 #[derive(Debug, Clone)]
 pub struct AcknowledgeArgs {
    /// The concept path to acknowledge.
    pub concept_path: String,
    /// Reason for acknowledgment.
    pub reason: String,
 }
 /// Result of a scan operation.
 #[derive(Debug, Clone)]
 pub struct ScanResult {
    /// Project name.
    pub project: String,
    /// Scan ID (for baseline comparison).
    pub scan_id: String,
    /// Number of files scanned.
    pub files_scanned: usize,
    /// Number of claims extracted.
    pub claims_extracted: usize,
    /// Conflicts found.
    pub conflicts: Vec<ConflictResult>,
    /// Output format.
    pub format: String,
 }
 impl ScanResult {
    /// Create a stub result for initial CLI testing.
    pub fn stub(path: &Path, format: &str) -> Self {
        Self {
            project: path.file_name().and_then(|s| s.to_str()).unwrap_or("unknown").to_string(),
            scan_id: "stub-scan-id".to_string(),
            files_scanned: 0,
            claims_extracted: 0,
            conflicts: vec![],
            format: format.to_string(),
        }
    }
    /// Check if any BLOCK-level conflicts exist.
    pub fn has_blocks(&self) -> bool {
        self.conflicts.iter().any(|c| c.verdict == Verdict::Block)
    }
    /// Check if any FLAG-level conflicts exist.
    pub fn has_flags(&self) -> bool {
        self.conflicts.iter().any(|c| c.verdict == Verdict::Flag)
    }
    /// Count conflicts by verdict.
    pub fn count_by_verdict(&self, verdict: Verdict) -> usize {
        self.conflicts.iter().filter(|c| c.verdict == verdict).count()
    }
    /// Format the result for display.
    pub fn display(&self) -> String {
        match self.format.as_str() {
            "json" => self.display_json(),
            "sarif" => self.display_sarif(),
            "markdown" => self.display_markdown(),
            _ => self.display_table(),
        }
    }
    fn display_table(&self) -> String {
        let mut output = String::new();
        output.push_str(&format!("Scanning {} ...\n\n", self.project));
        if self.conflicts.is_empty() {
            output.push_str("No conflicts found.\n");
        } else {
            for conflict in &self.conflicts {
                output.push_str(&format!("{}\n\n", conflict));
            }
        }
        output.push_str(&format!(
            "{} files scanned, {} claims extracted, {} conflicts ({} BLOCK, {} FLAG)\n",
            self.files_scanned,
            self.claims_extracted,
            self.conflicts.len(),
            self.count_by_verdict(Verdict::Block),
            self.count_by_verdict(Verdict::Flag),
        ));
        output
    }
    fn display_json(&self) -> String {
        // TODO: Implement JSON output
        serde_json::json!({
            "project": self.project,
            "scan_id": self.scan_id,
            "summary": {
                "files_scanned": self.files_scanned,
                "claims_extracted": self.claims_extracted,
                "conflicts": self.conflicts.len(),
                "blocks": self.count_by_verdict(Verdict::Block),
                "flags": self.count_by_verdict(Verdict::Flag),
            },
            "conflicts": []
        })
        .to_string()
    }
    fn display_sarif(&self) -> String {
        // TODO: Implement SARIF output
        serde_json::json!({
            "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json",
            "version": "2.1.0",
            "runs": [{
                "tool": {
                    "driver": {
                        "name": "aphoria",
                        "version": env!("CARGO_PKG_VERSION"),
                    }
                },
                "results": []
            }]
        })
        .to_string()
    }
    fn display_markdown(&self) -> String {
        let mut output = String::new();
        output.push_str(&format!("# Aphoria Scan: {}\n\n", self.project));
        output.push_str(&format!(
            "**Summary:** {} files, {} claims, {} conflicts\n\n",
            self.files_scanned,
            self.claims_extracted,
            self.conflicts.len()
        ));
        if self.conflicts.is_empty() {
            output.push_str("No conflicts found.\n");
        } else {
            output.push_str("## Conflicts\n\n");
            for conflict in &self.conflicts {
                output.push_str(&format!("### {}\n\n", conflict.claim.concept_path));
                output.push_str(&format!("- **Verdict:** {:?}\n", conflict.verdict));
                output.push_str(&format!("- **Score:** {:.2}\n", conflict.conflict_score));
                output.push_str(&format!(
                    "- **File:** {}:{}\n\n",
                    conflict.claim.file, conflict.claim.line
                ));
            }
        }
        output
    }
 }
 /// A claim extracted from source code.
 #[derive(Debug, Clone)]
 pub struct ExtractedClaim {
    /// The full ConceptPath for this claim.
    pub concept_path: String,
    /// The predicate describing what aspect this claims.
    pub predicate: String,
    /// The extracted value.
    pub value: ObjectValue,
    /// Source file path relative to project root.
    pub file: String,
    /// Line number in the source file (1-indexed).
    pub line: usize,
    /// The matched source text.
    pub matched_text: String,
    /// Confidence of extraction (0.0 to 1.0).
    pub confidence: f32,
    /// Human-readable description.
    pub description: String,
 }
 /// A source that conflicts with the code claim.
 #[derive(Debug, Clone)]
 pub struct ConflictingSource {
    /// The concept path of the authoritative source.
    pub path: String,
    /// The source class (tier).
    pub source_class: SourceClass,
    /// The authoritative value.
    pub value: ObjectValue,
    /// Confidence of the authoritative assertion.
    pub confidence: f32,
 }
 /// Result of conflict detection for a single claim.
 #[derive(Debug, Clone)]
 pub struct ConflictResult {
    /// The extracted claim.
    pub claim: ExtractedClaim,
    /// Sources that conflict with this claim.
    pub conflicts: Vec<ConflictingSource>,
    /// Computed conflict score (0.0 to 1.0).
    pub conflict_score: f32,
    /// The verdict based on thresholds.
    pub verdict: Verdict,
    /// Whether this conflict has been acknowledged.
    pub acknowledged: Option<AcknowledgmentInfo>,
 }
 impl fmt::Display for ConflictResult {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let verdict_str = match self.verdict {
            Verdict::Block => "BLOCK",
            Verdict::Flag => "FLAG",
            Verdict::Pass => "PASS",
            Verdict::Ack => "ACK",
        };
        writeln!(f, "  {}    {}", verdict_str, self.claim.concept_path)?;
        writeln!(
            f,
            "         Your code:  {} ({}:{})",
            self.claim.description, self.claim.file, self.claim.line
        )?;
        for source in &self.conflicts {
            writeln!(
                f,
                "         {:?}:   {:?} (Tier {})",
                source.source_class,
                source.value,
                source.source_class.tier()
            )?;
        }
        writeln!(f, "         Conflict:   {:.2}", self.conflict_score)?;
        if let Some(ack) = &self.acknowledged {
            writeln!(f, "         Acknowledged: {} by {}", ack.timestamp, ack.by)?;
            writeln!(f, "         Reason: \"{}\"", ack.reason)?;
        }
        Ok(())
    }
 }
 /// Information about an acknowledgment.
 #[derive(Debug, Clone)]
 pub struct AcknowledgmentInfo {
    /// When the acknowledgment was made.
    pub timestamp: String,
    /// Who made the acknowledgment.
    pub by: String,
    /// The reason given.
    pub reason: String,
 }
 /// Verdict for a conflict.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Verdict {
    /// Conflict score >= block threshold. Must fix or acknowledge.
    Block,
    /// Conflict score >= flag threshold. Review recommended.
    Flag,
    /// Conflict score below thresholds. No action needed.
    Pass,
    /// Conflict exists but has been acknowledged.
    Ack,
 }
 /// Detected language of a file.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Language {
    /// Rust source files.
    Rust,
    /// Go source files.
    Go,
    /// Python source files.
    Python,
    /// TypeScript source files.
    TypeScript,
    /// JavaScript source files.
    JavaScript,
    /// YAML configuration files.
    Yaml,
    /// TOML configuration files.
    Toml,
    /// JSON configuration files.
    Json,
    /// Dotenv files.
    Dotenv,
    /// Docker files.
    Docker,
    /// Cargo manifest.
    CargoManifest,
    /// Go module file.
    GoMod,
    /// NPM manifest.
    NpmManifest,
    /// Python manifest.
    PythonManifest,
    /// Unknown language.
    Unknown,
 }
 impl Language {
    /// Detect language from file extension.
    pub fn from_path(path: &Path) -> Self {
        let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
        let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
        // Check specific filenames first
        match file_name {
            "Cargo.toml" => return Language::CargoManifest,
            "go.mod" => return Language::GoMod,
            "package.json" => return Language::NpmManifest,
            "requirements.txt" | "pyproject.toml" => return Language::PythonManifest,
            _ if file_name.starts_with("Dockerfile") => return Language::Docker,
            _ if file_name.starts_with("docker-compose") => return Language::Docker,
            _ if file_name.starts_with(".env") => return Language::Dotenv,
            _ => {}
        }
        // Check extensions
        match extension {
            "rs" => Language::Rust,
            "go" => Language::Go,
            "py" => Language::Python,
            "ts" | "tsx" => Language::TypeScript,
            "js" | "jsx" => Language::JavaScript,
            "yaml" | "yml" => Language::Yaml,
            "toml" => Language::Toml,
            "json" => Language::Json,
            _ => Language::Unknown,
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_language_detection() {
        assert_eq!(Language::from_path(Path::new("src/main.rs")), Language::Rust);
        assert_eq!(Language::from_path(Path::new("main.go")), Language::Go);
        assert_eq!(Language::from_path(Path::new("app.py")), Language::Python);
        assert_eq!(Language::from_path(Path::new("Cargo.toml")), Language::CargoManifest);
        assert_eq!(Language::from_path(Path::new("go.mod")), Language::GoMod);
        assert_eq!(Language::from_path(Path::new(".env.production")), Language::Dotenv);
        assert_eq!(Language::from_path(Path::new("Dockerfile")), Language::Docker);
    }
    #[test]
    fn test_scan_result_has_blocks() {
        let result = ScanResult {
            project: "test".to_string(),
            scan_id: "id".to_string(),
            files_scanned: 0,
            claims_extracted: 0,
            conflicts: vec![],
            format: "table".to_string(),
        };
        assert!(!result.has_blocks());
        assert!(!result.has_flags());
    }
    #[test]
    fn test_verdict_equality() {
        assert_eq!(Verdict::Block, Verdict::Block);
        assert_ne!(Verdict::Block, Verdict::Flag);
    }
 }
--- a/applications/aphoria/src/walker/language.rs
+++ b/applications/aphoria/src/walker/language.rs
@ -0,0 +1,73 @@
 //! Language detection for projects.
 #![allow(dead_code)]
 use std::path::Path;
 use crate::types::Language;
 /// Detect the primary language of a project.
 ///
 /// Priority:
 /// 1. Explicit language in config (handled by caller)
 /// 2. Presence of language-specific manifest files
 /// 3. File count heuristic (most common extension)
 pub fn detect_project_language(root: &Path) -> Language {
    // Check for manifest files
    if root.join("Cargo.toml").exists() {
        return Language::Rust;
    }
    if root.join("go.mod").exists() {
        return Language::Go;
    }
    if root.join("package.json").exists() {
        // Could be TypeScript or JavaScript
        if root.join("tsconfig.json").exists() {
            return Language::TypeScript;
        }
        return Language::JavaScript;
    }
    if root.join("pyproject.toml").exists() || root.join("requirements.txt").exists() {
        return Language::Python;
    }
    // Fallback: Unknown
    Language::Unknown
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use tempfile::TempDir;
    #[test]
    fn test_detect_rust_project() {
        let dir = TempDir::new().expect("create temp dir");
        std::fs::write(dir.path().join("Cargo.toml"), "[package]").expect("write file");
        assert_eq!(detect_project_language(dir.path()), Language::Rust);
    }
    #[test]
    fn test_detect_go_project() {
        let dir = TempDir::new().expect("create temp dir");
        std::fs::write(dir.path().join("go.mod"), "module test").expect("write file");
        assert_eq!(detect_project_language(dir.path()), Language::Go);
    }
    #[test]
    fn test_detect_typescript_project() {
        let dir = TempDir::new().expect("create temp dir");
        std::fs::write(dir.path().join("package.json"), "{}").expect("write file");
        std::fs::write(dir.path().join("tsconfig.json"), "{}").expect("write file");
        assert_eq!(detect_project_language(dir.path()), Language::TypeScript);
    }
    #[test]
    fn test_detect_unknown() {
        let dir = TempDir::new().expect("create temp dir");
        assert_eq!(detect_project_language(dir.path()), Language::Unknown);
    }
 }
--- a/applications/aphoria/src/walker/mod.rs
+++ b/applications/aphoria/src/walker/mod.rs
@ -0,0 +1,129 @@
 //! Project walker for traversing and analyzing codebases.
 // Skeleton phase: allow unused until scan pipeline is wired up
 #![allow(dead_code)]
 //!
 //! The walker:
 //! 1. Traverses the project directory (respecting .gitignore)
 //! 2. Detects the primary language
 //! 3. Maps file paths to ConceptPath segments
 //! 4. Filters files based on configuration
 mod language;
 mod path_mapper;
 pub use language::detect_project_language;
 pub use path_mapper::PathMapper;
 use std::path::Path;
 use ignore::WalkBuilder;
 use crate::config::AphoriaConfig;
 use crate::types::Language;
 use crate::AphoriaError;
 /// A file discovered during walking.
 #[derive(Debug)]
 pub struct WalkedFile {
    /// Absolute path to the file.
    pub path: std::path::PathBuf,
    /// Path relative to project root.
    pub relative_path: String,
    /// Detected language.
    pub language: Language,
    /// ConceptPath segments derived from the path.
    pub path_segments: Vec<String>,
 }
 /// Walk a project directory and yield files for extraction.
 pub fn walk_project(root: &Path, config: &AphoriaConfig) -> Result<Vec<WalkedFile>, AphoriaError> {
    if !root.exists() {
        return Err(AphoriaError::ProjectNotFound(root.to_path_buf()));
    }
    let mut files = Vec::new();
    let mapper = PathMapper::new(root, config);
    let walker = WalkBuilder::new(root)
        .hidden(true) // Skip hidden files
        .git_ignore(true) // Respect .gitignore
        .build();
    for entry in walker {
        let entry = entry.map_err(|e| AphoriaError::Walker(e.to_string()))?;
        let path = entry.path();
        // Skip directories
        if path.is_dir() {
            continue;
        }
        // Skip files that are too large
        if let Ok(metadata) = path.metadata() {
            if metadata.len() > config.scan.max_file_size {
                continue;
            }
        }
        // Get relative path
        let relative = path.strip_prefix(root).map_err(|e| AphoriaError::Walker(e.to_string()))?;
        let relative_str = relative.to_string_lossy().to_string();
        // Check exclusions
        if config.scan.exclude.iter().any(|ex| relative_str.starts_with(ex.trim_end_matches('/'))) {
            continue;
        }
        // Detect language
        let language = Language::from_path(path);
        // Skip unknown file types
        if language == Language::Unknown {
            continue;
        }
        // Skip test files if configured
        if !config.scan.include_tests && is_test_file(&relative_str) {
            continue;
        }
        // Map to concept path segments
        let path_segments = mapper.to_segments(&relative_str, language);
        files.push(WalkedFile {
            path: path.to_path_buf(),
            relative_path: relative_str,
            language,
            path_segments,
        });
    }
    Ok(files)
 }
 /// Check if a file is a test file.
 fn is_test_file(path: &str) -> bool {
    let lower = path.to_lowercase();
    lower.contains("test")
        || lower.contains("spec")
        || lower.contains("_test.")
        || lower.contains(".test.")
        || lower.contains("tests/")
        || lower.contains("__tests__")
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_is_test_file() {
        assert!(is_test_file("src/auth/jwt_test.rs"));
        assert!(is_test_file("tests/integration.rs"));
        assert!(is_test_file("src/__tests__/app.tsx"));
        assert!(!is_test_file("src/auth/jwt.rs"));
    }
 }
--- a/applications/aphoria/src/walker/path_mapper.rs
+++ b/applications/aphoria/src/walker/path_mapper.rs
@ -0,0 +1,196 @@
 //! Path mapping from file paths to ConceptPath segments.
 #![allow(dead_code)]
 use std::path::Path;
 use crate::config::AphoriaConfig;
 use crate::types::Language;
 /// Maps file paths to ConceptPath segments.
 pub struct PathMapper {
    /// Project name.
    project_name: String,
 }
 impl PathMapper {
    /// Create a new path mapper for a project.
    pub fn new(root: &Path, config: &AphoriaConfig) -> Self {
        let project_name =
            config.project.name.clone().or_else(|| detect_project_name(root)).unwrap_or_else(
                || root.file_name().and_then(|s| s.to_str()).unwrap_or("unknown").to_string(),
            );
        Self { project_name }
    }
    /// Convert a relative file path to ConceptPath segments.
    ///
    /// Language-specific stripping rules remove boilerplate directories.
    pub fn to_segments(&self, relative_path: &str, language: Language) -> Vec<String> {
        let mut segments = Vec::new();
        // Add language prefix
        let lang_prefix = match language {
            Language::Rust | Language::CargoManifest => "rust",
            Language::Go | Language::GoMod => "go",
            Language::Python | Language::PythonManifest => "python",
            Language::TypeScript => "typescript",
            Language::JavaScript | Language::NpmManifest => "javascript",
            Language::Yaml | Language::Toml | Language::Json | Language::Dotenv => "config",
            Language::Docker => "docker",
            Language::Unknown => "unknown",
        };
        segments.push(lang_prefix.to_string());
        // Add project name
        segments.push(self.project_name.clone());
        // Process path components
        let path = Path::new(relative_path);
        let components: Vec<&str> =
            path.components().filter_map(|c| c.as_os_str().to_str()).collect();
        // Apply language-specific stripping
        let stripped = strip_boilerplate(&components, language);
        // Remove file extension from last component
        if let Some((last, rest)) = stripped.split_last() {
            for component in rest {
                segments.push((*component).to_string());
            }
            // Strip extension
            let stem = Path::new(last).file_stem().and_then(|s| s.to_str()).unwrap_or(last);
            segments.push(stem.to_string());
        }
        segments
    }
 }
 /// Strip boilerplate directories based on language conventions.
 ///
 /// Removes common structural directories that don't add semantic meaning:
 /// - Rust: `src/`, `crates/`
 /// - Go: `cmd/`, `internal/`, `pkg/`
 /// - Python: `src/`, `lib/`
 /// - JS/TS: `src/`, `lib/`
 fn strip_boilerplate<'a>(components: &'a [&'a str], language: Language) -> Vec<&'a str> {
    let skip_dirs: &[&str] = match language {
        Language::Rust | Language::CargoManifest => &["src", "crates"],
        Language::Go | Language::GoMod => &["cmd", "internal", "pkg"],
        Language::Python | Language::PythonManifest => &["src", "lib"],
        Language::TypeScript | Language::JavaScript | Language::NpmManifest => &["src", "lib"],
        _ => &[],
    };
    components.iter().filter(|c| !skip_dirs.contains(c)).copied().collect()
 }
 /// Detect project name from manifest files.
 fn detect_project_name(root: &Path) -> Option<String> {
    // Try Cargo.toml
    if let Ok(content) = std::fs::read_to_string(root.join("Cargo.toml")) {
        if let Ok(parsed) = content.parse::<toml::Table>() {
            if let Some(package) = parsed.get("package").and_then(|p| p.as_table()) {
                if let Some(name) = package.get("name").and_then(|n| n.as_str()) {
                    return Some(name.to_string());
                }
            }
        }
    }
    // Try go.mod
    if let Ok(content) = std::fs::read_to_string(root.join("go.mod")) {
        for line in content.lines() {
            if line.starts_with("module ") {
                let module = line.trim_start_matches("module ").trim();
                // Extract last segment of module path
                return Some(module.rsplit('/').next().unwrap_or(module).to_string());
            }
        }
    }
    // Try package.json
    if let Ok(content) = std::fs::read_to_string(root.join("package.json")) {
        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&content) {
            if let Some(name) = parsed.get("name").and_then(|n| n.as_str()) {
                return Some(name.to_string());
            }
        }
    }
    None
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use tempfile::TempDir;
    #[test]
    fn test_rust_path_mapping() {
        let dir = TempDir::new().expect("create temp dir");
        let config = AphoriaConfig {
            project: crate::config::ProjectConfig {
                name: Some("citadeldb".to_string()),
                language: None,
            },
            ..Default::default()
        };
        let mapper = PathMapper::new(dir.path(), &config);
        let segments = mapper.to_segments("crates/citadeldb/src/auth/jwt.rs", Language::Rust);
        assert_eq!(segments, vec!["rust", "citadeldb", "citadeldb", "auth", "jwt"]);
    }
    #[test]
    fn test_go_path_mapping() {
        let dir = TempDir::new().expect("create temp dir");
        let config = AphoriaConfig {
            project: crate::config::ProjectConfig {
                name: Some("myapp".to_string()),
                language: None,
            },
            ..Default::default()
        };
        let mapper = PathMapper::new(dir.path(), &config);
        let segments = mapper.to_segments("internal/auth/jwt/validator.go", Language::Go);
        assert_eq!(segments, vec!["go", "myapp", "auth", "jwt", "validator"]);
    }
    #[test]
    fn test_config_path_mapping() {
        let dir = TempDir::new().expect("create temp dir");
        let config = AphoriaConfig {
            project: crate::config::ProjectConfig {
                name: Some("myapp".to_string()),
                language: None,
            },
            ..Default::default()
        };
        let mapper = PathMapper::new(dir.path(), &config);
        let segments = mapper.to_segments("config/production.yaml", Language::Yaml);
        assert_eq!(segments, vec!["config", "myapp", "config", "production"]);
    }
    #[test]
    fn test_strip_boilerplate() {
        let components = vec!["src", "auth", "jwt.rs"];
        let result = strip_boilerplate(&components, Language::Rust);
        assert_eq!(result, vec!["auth", "jwt.rs"]);
        // Multiple boilerplate dirs (crates/xxx/src/)
        let components = vec!["crates", "mylib", "src", "auth", "jwt.rs"];
        let result = strip_boilerplate(&components, Language::Rust);
        assert_eq!(result, vec!["mylib", "auth", "jwt.rs"]);
        let components = vec!["internal", "auth", "jwt", "validator.go"];
        let result = strip_boilerplate(&components, Language::Go);
        assert_eq!(result, vec!["auth", "jwt", "validator.go"]);
    }
 }
--- a/crates/stemedb-api/src/handlers/supersede.rs
+++ b/crates/stemedb-api/src/handlers/supersede.rs
@ -114,12 +114,16 @@ pub async fn supersede(
    let supersession_type: SupersessionType = req.supersession_type.into();
    // Create supersession record
    // NOTE: hlc_timestamp is None for API-created supersessions. In distributed mode,
    // supersessions flow through the IngestWorker which generates HLC timestamps.
    // Direct API creation is for single-node deployments or manual corrections.
    let supersession = Supersession {
        target_hash,
        supersession_type,
        reason: req.reason.clone(),
        new_hash,
        timestamp,
        hlc_timestamp: None, // Single-node mode; distributed mode uses IngestWorker
        agent_id,
        signature,
    };
--- a/crates/stemedb-core/Cargo.toml
+++ b/crates/stemedb-core/Cargo.toml
@ -22,5 +22,8 @@ bytecheck = "0.6" # Required for rkyv validation
 # Cryptography
 ed25519-dalek = { version = "2.1", features = ["rand_core"] }
 # Hybrid Logical Clocks for distributed causal ordering
 uhlc = "0.7"
 # Visual Provenance
 image_hasher = "3.1"
--- a/crates/stemedb-core/src/lib.rs
+++ b/crates/stemedb-core/src/lib.rs
@ -167,6 +167,7 @@ mod tests {
            reason: "Proposal treated as approved. See incident INC-2024-001".to_string(),
            new_hash: Some([2u8; 32]),
            timestamp: 1704067200,
            hlc_timestamp: None, // Legacy: no HLC for backward compat test
            agent_id: [3u8; 32],
            signature: [4u8; 64],
        };
@ -209,6 +210,7 @@ mod tests {
                reason: format!("{:?} test", stype),
                new_hash: None,
                timestamp: 0,
                hlc_timestamp: None,
                agent_id: [0u8; 32],
                signature: [0u8; 64],
            };
--- a/crates/stemedb-core/src/types/hlc.rs
+++ b/crates/stemedb-core/src/types/hlc.rs
@ -0,0 +1,361 @@
 //! Hybrid Logical Clock types for distributed causal ordering.
 //!
 //! HLCs combine physical time with node identity to provide:
 //! - Causal ordering across distributed nodes
 //! - Monotonic timestamps even with clock skew
 //! - Total ordering when combined with node ID
 //!
 //! # Design
 //!
 //! This module provides a serializable wrapper around [`uhlc::Timestamp`] that
 //! is compatible with rkyv zero-copy serialization. The wrapper stores:
 //!
 //! - `time_ntp64`: NTP64 encoded time (physical + logical in upper bits)
 //! - `node_id`: 16-byte identifier for total ordering tiebreaker
 //!
 //! # Use Cases
 //!
 //! - **Supersession ordering**: Determine which supersession happened first
 //!   across multiple nodes, even with clock skew
 //! - **Conflict resolution**: Break ties in Last-Write-Wins (LWW) semantics
 //! - **Replication**: Ensure causal consistency during CRDT merges
 //!
 //! # Example
 //!
 //! ```ignore
 //! use stemedb_core::types::HlcTimestamp;
 //!
 //! // Create from uhlc::Timestamp
 //! let hlc = HlcTimestamp::from_uhlc(&timestamp);
 //!
 //! // HLC timestamps are totally ordered
 //! assert!(hlc1 < hlc2 || hlc1 > hlc2 || hlc1 == hlc2);
 //! ```
 use rkyv::{Archive, Deserialize, Serialize};
 use std::cmp::Ordering;
 /// A serializable Hybrid Logical Clock timestamp.
 ///
 /// Provides causal ordering guarantees across distributed nodes. When comparing
 /// two HLC timestamps:
 ///
 /// 1. First compare `time_ntp64` (NTP64 encoded time with logical counter)
 /// 2. If equal, compare `node_id` for total ordering
 ///
 /// This ensures a total order even for concurrent events on different nodes.
 ///
 /// # Serialization
 ///
 /// Uses rkyv for zero-copy serialization, compatible with StemeDB's storage layer.
 /// The archived form has identical layout for O(1) access.
 #[derive(Archive, Deserialize, Serialize, Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
 #[archive(check_bytes)]
 pub struct HlcTimestamp {
    /// NTP64-encoded time with embedded logical counter.
    ///
    /// The upper bits contain the physical time (seconds since NTP epoch),
    /// and the lower bits may contain a logical counter for disambiguation
    /// of events at the same physical time.
    pub time_ntp64: u64,
    /// Node identifier for total ordering tiebreaker.
    ///
    /// When NTP64 time is equal (concurrent events on different nodes),
    /// the node ID provides a deterministic tiebreaker.
    /// Typically derived from a UUID or random bytes at node startup.
    pub node_id: [u8; 16],
 }
 impl HlcTimestamp {
    /// Creates a new HLC timestamp with the given components.
    ///
    /// # Arguments
    ///
    /// * `time_ntp64` - NTP64-encoded time value
    /// * `node_id` - 16-byte node identifier
    pub fn new(time_ntp64: u64, node_id: [u8; 16]) -> Self {
        Self { time_ntp64, node_id }
    }
    /// Creates an HLC timestamp from a `uhlc::Timestamp`.
    ///
    /// This is the primary constructor when using the `uhlc` crate for
    /// clock management.
    ///
    /// # Arguments
    ///
    /// * `ts` - A timestamp from the `uhlc` crate
    pub fn from_uhlc(ts: &uhlc::Timestamp) -> Self {
        Self { time_ntp64: ts.get_time().as_u64(), node_id: ts.get_id().to_le_bytes() }
    }
    /// Creates an HLC timestamp from the current time on a `uhlc::HLC` clock.
    ///
    /// This generates a new timestamp that is guaranteed to be greater than
    /// all previously generated timestamps from this clock.
    pub fn now(clock: &uhlc::HLC) -> Self {
        let ts = clock.new_timestamp();
        Self::from_uhlc(&ts)
    }
    /// Returns the time as milliseconds since Unix epoch (approximate).
    ///
    /// NTP64 encodes time differently than Unix timestamps. This method
    /// provides an approximate conversion for human-readable display.
    #[must_use]
    pub fn millis(&self) -> u64 {
        // NTP64 stores seconds in upper 32 bits, fractions in lower 32 bits
        // Convert to milliseconds: (ntp64 >> 32) * 1000 + ((ntp64 & 0xFFFFFFFF) * 1000 >> 32)
        let seconds = self.time_ntp64 >> 32;
        let fractions = self.time_ntp64 & 0xFFFF_FFFF;
        // NTP epoch is 1900-01-01, Unix epoch is 1970-01-01 (70 years = 2208988800 seconds)
        const NTP_UNIX_OFFSET: u64 = 2_208_988_800;
        let unix_seconds = seconds.saturating_sub(NTP_UNIX_OFFSET);
        let millis_from_fractions = (fractions * 1000) >> 32;
        unix_seconds * 1000 + millis_from_fractions
    }
    /// Returns the raw NTP64 time value for precise comparison.
    #[must_use]
    pub fn as_ntp64(&self) -> u64 {
        self.time_ntp64
    }
    /// Checks if this timestamp is causally before another.
    ///
    /// Note: This is based on the NTP64 time only, not the node ID.
    /// Two timestamps may be concurrent if they have the same time
    /// but different node IDs.
    #[must_use]
    pub fn is_before(&self, other: &Self) -> bool {
        self.time_ntp64 < other.time_ntp64
    }
    /// Returns true if this timestamp and another are concurrent.
    ///
    /// Concurrent means they have the same NTP64 time but different node IDs,
    /// indicating they were generated at the "same time" on different nodes
    /// without a causal relationship.
    #[must_use]
    pub fn is_concurrent_with(&self, other: &Self) -> bool {
        self.time_ntp64 == other.time_ntp64 && self.node_id != other.node_id
    }
    /// Converts this timestamp back to a `uhlc::Timestamp`.
    ///
    /// Useful for updating a clock with a received timestamp.
    #[must_use]
    pub fn to_uhlc(&self) -> Option<uhlc::Timestamp> {
        let id = uhlc::ID::try_from(&self.node_id[..]).ok()?;
        let time = uhlc::NTP64(self.time_ntp64);
        Some(uhlc::Timestamp::new(time, id))
    }
 }
 /// Total ordering for HLC timestamps.
 ///
 /// Ordering is determined by:
 /// 1. NTP64 time (includes physical + logical)
 /// 2. Node ID (lexicographic)
 impl Ord for HlcTimestamp {
    fn cmp(&self, other: &Self) -> Ordering {
        match self.time_ntp64.cmp(&other.time_ntp64) {
            Ordering::Equal => self.node_id.cmp(&other.node_id),
            other => other,
        }
    }
 }
 impl PartialOrd for HlcTimestamp {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }
 /// Converts a `uhlc::ID` to a 16-byte array.
 #[allow(dead_code)]
 pub fn id_to_bytes(id: &uhlc::ID) -> [u8; 16] {
    id.to_le_bytes()
 }
 /// Creates a `uhlc::ID` from a 16-byte array.
 ///
 /// Returns None if the bytes represent an invalid ID (all zeros).
 pub fn bytes_to_id(bytes: [u8; 16]) -> Option<uhlc::ID> {
    uhlc::ID::try_from(&bytes[..]).ok()
 }
 /// Default skew threshold for clock drift detection (500ms in NTP64 units).
 ///
 /// If the difference between local and remote physical time exceeds this
 /// threshold, the clock should log a warning. This helps detect nodes
 /// with significantly drifted clocks.
 ///
 /// 500ms = 0.5 seconds. In NTP64, this is approximately 0x80000000 (half of
 /// the fractional second range in the lower 32 bits).
 pub const SKEW_THRESHOLD_MS: u64 = 500;
 /// Checks if two HLC timestamps indicate clock skew beyond the threshold.
 ///
 /// Returns `Some(skew_ms)` if the physical time difference exceeds
 /// `SKEW_THRESHOLD_MS`, otherwise `None`.
 ///
 /// # Use Case
 ///
 /// When merging CRDT state from a remote node, check for clock skew
 /// and log a warning if detected. This helps operators identify nodes
 /// that need NTP synchronization.
 pub fn detect_clock_skew(local: &HlcTimestamp, remote: &HlcTimestamp) -> Option<u64> {
    let local_ms = local.millis();
    let remote_ms = remote.millis();
    let diff = local_ms.abs_diff(remote_ms);
    if diff > SKEW_THRESHOLD_MS {
        Some(diff)
    } else {
        None
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_hlc_ordering_time() {
        let node_id = [1u8; 16];
        let t1 = HlcTimestamp::new(1000, node_id);
        let t2 = HlcTimestamp::new(2000, node_id);
        assert!(t1 < t2);
        assert!(t2 > t1);
    }
    #[test]
    fn test_hlc_ordering_node_id() {
        let t1 = HlcTimestamp::new(1000, [1u8; 16]);
        let t2 = HlcTimestamp::new(1000, [2u8; 16]);
        assert!(t1 < t2);
        assert!(t2 > t1);
    }
    #[test]
    fn test_hlc_equality() {
        let t1 = HlcTimestamp::new(1000, [1u8; 16]);
        let t2 = HlcTimestamp::new(1000, [1u8; 16]);
        assert_eq!(t1, t2);
    }
    #[test]
    fn test_is_before() {
        let node_id = [1u8; 16];
        let t1 = HlcTimestamp::new(1000, node_id);
        let t2 = HlcTimestamp::new(2000, node_id);
        let t3 = HlcTimestamp::new(3000, node_id);
        assert!(t1.is_before(&t2));
        assert!(t1.is_before(&t3));
        assert!(t2.is_before(&t3));
        assert!(!t2.is_before(&t1));
    }
    #[test]
    fn test_is_concurrent() {
        let t1 = HlcTimestamp::new(1000, [1u8; 16]);
        let t2 = HlcTimestamp::new(1000, [2u8; 16]);
        let t3 = HlcTimestamp::new(2000, [1u8; 16]);
        assert!(t1.is_concurrent_with(&t2));
        assert!(!t1.is_concurrent_with(&t3));
    }
    #[test]
    fn test_detect_clock_skew() {
        // Test with realistic NTP64 values (after NTP epoch offset)
        // NTP epoch offset: 2208988800 seconds from 1900 to 1970
        // 1 second in NTP64 = 1 << 32 (upper 32 bits are seconds)
        let ntp_seconds = |s: u64| s << 32;
        // Use a time after the NTP-Unix offset so millis() returns positive values
        // NTP_UNIX_OFFSET = 2_208_988_800, so use a time after that
        const BASE_NTP_SECONDS: u64 = 2_208_988_800 + 1000; // 1000 seconds after Unix epoch
        let local = HlcTimestamp::new(ntp_seconds(BASE_NTP_SECONDS), [1u8; 16]);
        let remote_ok = HlcTimestamp::new(ntp_seconds(BASE_NTP_SECONDS), [2u8; 16]);
        // No skew - same time
        assert!(detect_clock_skew(&local, &remote_ok).is_none());
        // Create a timestamp 1 second ahead (1000ms > 500ms threshold)
        let remote_skew = HlcTimestamp::new(ntp_seconds(BASE_NTP_SECONDS + 1), [2u8; 16]);
        // This should detect skew (1000ms > 500ms threshold)
        let skew = detect_clock_skew(&local, &remote_skew);
        assert!(skew.is_some(), "Expected skew detection, got None");
        // The skew should be ~1000ms (1 second)
        assert_eq!(skew, Some(1000));
        // Test with 400ms difference (below threshold)
        // 400ms = 0.4 * 2^32 ≈ 1717986918 in NTP64 fractional part
        let remote_under_threshold =
            HlcTimestamp::new(ntp_seconds(BASE_NTP_SECONDS) + 1717986918, [2u8; 16]);
        assert!(detect_clock_skew(&local, &remote_under_threshold).is_none());
    }
    #[test]
    fn test_from_uhlc_roundtrip() {
        // Create a uhlc clock and generate a timestamp
        let clock = uhlc::HLCBuilder::new().build();
        let ts = clock.new_timestamp();
        // Convert to our format
        let hlc = HlcTimestamp::from_uhlc(&ts);
        // Convert back
        let recovered = hlc.to_uhlc().expect("should convert back");
        // Should be equal
        assert_eq!(ts, recovered);
    }
    #[test]
    fn test_hlc_now() {
        let clock = uhlc::HLCBuilder::new().build();
        let t1 = HlcTimestamp::now(&clock);
        let t2 = HlcTimestamp::now(&clock);
        // t2 should be >= t1 (monotonic)
        assert!(t2 >= t1);
        // Both should have the same node ID
        assert_eq!(t1.node_id, t2.node_id);
    }
    #[test]
    fn test_serialization_roundtrip() {
        use crate::serde::{deserialize, serialize};
        let ts = HlcTimestamp::new(
            12345678901234,
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
        );
        let bytes = serialize(&ts).expect("serialize");
        let deserialized: HlcTimestamp = deserialize(&bytes).expect("deserialize");
        assert_eq!(ts, deserialized);
    }
    #[test]
    fn test_default() {
        let ts = HlcTimestamp::default();
        assert_eq!(ts.time_ntp64, 0);
        assert_eq!(ts.node_id, [0u8; 16]);
    }
 }
--- a/crates/stemedb-core/src/types/mod.rs
+++ b/crates/stemedb-core/src/types/mod.rs
@ -103,6 +103,7 @@ mod concept;
 mod epoch;
 mod escalation;
 mod gold_standard;
 mod hlc;
 mod lifecycle;
 mod materialized;
 mod query;
@ -119,6 +120,7 @@ pub use concept::{AliasOrigin, ConceptAlias, ConceptPath, ConceptPathError, Sour
 pub use epoch::Epoch;
 pub use escalation::{EscalationEvent, EscalationLevel, EscalationPolicy};
 pub use gold_standard::GoldStandard;
 pub use hlc::{bytes_to_id, detect_clock_skew, HlcTimestamp, SKEW_THRESHOLD_MS};
 pub use lifecycle::LifecycleStage;
 pub use materialized::{ChangeEntry, MaterializedView};
 pub use query::{ContributingAssertion, QueryAudit, QueryParams};
--- a/crates/stemedb-core/src/types/supersession.rs
+++ b/crates/stemedb-core/src/types/supersession.rs
@ -2,6 +2,7 @@
 use rkyv::{Archive, Deserialize, Serialize};
 use super::hlc::HlcTimestamp;
 use super::Hash;
 /// Defines the nature of a paradigm shift or error correction.
@ -68,6 +69,7 @@ pub enum SupersessionType {
 ///     reason: "Proposal treated as approved. See incident INC-2024-001".to_string(),
 ///     new_hash: Some(corrected_assertion_hash),
 ///     timestamp: now(),
 ///     hlc_timestamp: Some(hlc_clock.now()), // For distributed causal ordering
 ///     agent_id: supervisor_public_key,
 ///     signature: supervisor_signature,
 /// };
@ -86,10 +88,143 @@ pub struct Supersession {
    /// None for RequiresReview (flagging, not replacing) or pure invalidation.
    pub new_hash: Option<Hash>,
    /// Unix timestamp when the supersession was created.
    ///
    /// Kept for backward compatibility. New supersessions should also set
    /// `hlc_timestamp` for distributed causal ordering.
    pub timestamp: u64,
    /// Hybrid Logical Clock timestamp for distributed causal ordering.
    ///
    /// Provides causal ordering guarantees across distributed nodes. When
    /// comparing supersessions from different nodes, HLC comparison is
    /// preferred over `timestamp` when available.
    ///
    /// # Migration
    ///
    /// - New supersessions: Set both `timestamp` and `hlc_timestamp`
    /// - Existing supersessions: Have `hlc_timestamp: None`
    /// - Comparison: Use HLC when available, fall back to `timestamp`
    pub hlc_timestamp: Option<HlcTimestamp>,
    /// Ed25519 public key of the agent creating the supersession.
    pub agent_id: [u8; 32],
    /// Ed25519 signature over the supersession content.
    /// Signs: BLAKE3(target_hash || type || reason || new_hash || timestamp)
    pub signature: [u8; 64],
 }
 impl Supersession {
    /// Compares two supersessions by their temporal ordering.
    ///
    /// Uses HLC timestamp when available for causal ordering, otherwise
    /// falls back to Unix timestamp comparison.
    ///
    /// # Returns
    ///
    /// - `std::cmp::Ordering::Less` if `self` is before `other`
    /// - `std::cmp::Ordering::Greater` if `self` is after `other`
    /// - `std::cmp::Ordering::Equal` if they have the same timestamp
    pub fn temporal_cmp(&self, other: &Self) -> std::cmp::Ordering {
        match (&self.hlc_timestamp, &other.hlc_timestamp) {
            // Both have HLC: use causal ordering
            (Some(a), Some(b)) => a.cmp(b),
            // Only self has HLC: prefer HLC physical time vs other's timestamp
            (Some(a), None) => {
                let self_ms = a.millis();
                let other_ms = other.timestamp * 1000; // Convert seconds to millis if needed
                self_ms.cmp(&other_ms)
            }
            // Only other has HLC: prefer other's HLC physical time
            (None, Some(b)) => {
                let self_ms = self.timestamp * 1000;
                let other_ms = b.millis();
                self_ms.cmp(&other_ms)
            }
            // Neither has HLC: fall back to Unix timestamp
            (None, None) => self.timestamp.cmp(&other.timestamp),
        }
    }
    /// Returns true if this supersession has causal ordering information.
    ///
    /// Supersessions with HLC timestamps can be reliably ordered across
    /// distributed nodes, even in the presence of clock skew.
    pub fn has_hlc(&self) -> bool {
        self.hlc_timestamp.is_some()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::cmp::Ordering;
    fn create_supersession(timestamp: u64, hlc: Option<HlcTimestamp>) -> Supersession {
        Supersession {
            target_hash: [0u8; 32],
            supersession_type: SupersessionType::Temporal,
            reason: "test".to_string(),
            new_hash: None,
            timestamp,
            hlc_timestamp: hlc,
            agent_id: [0u8; 32],
            signature: [0u8; 64],
        }
    }
    #[test]
    fn test_temporal_cmp_both_hlc() {
        // When both have HLC, use HLC comparison
        let hlc1 = HlcTimestamp::new(1000, [1u8; 16]);
        let hlc2 = HlcTimestamp::new(2000, [1u8; 16]);
        let s1 = create_supersession(100, Some(hlc1));
        let s2 = create_supersession(200, Some(hlc2));
        // HLC timestamps should determine order (s1 < s2)
        assert_eq!(s1.temporal_cmp(&s2), Ordering::Less);
        assert_eq!(s2.temporal_cmp(&s1), Ordering::Greater);
    }
    #[test]
    fn test_temporal_cmp_neither_hlc() {
        // When neither has HLC, use Unix timestamp
        let s1 = create_supersession(100, None);
        let s2 = create_supersession(200, None);
        assert_eq!(s1.temporal_cmp(&s2), Ordering::Less);
        assert_eq!(s2.temporal_cmp(&s1), Ordering::Greater);
    }
    #[test]
    fn test_temporal_cmp_only_first_has_hlc() {
        // When only first has HLC, compare HLC millis to other's timestamp*1000
        let hlc = HlcTimestamp::new(500_u64 << 32, [1u8; 16]); // ~500ms since NTP epoch
        let s1 = create_supersession(100, Some(hlc));
        let s2 = create_supersession(200, None);
        // s1's HLC millis vs s2's timestamp*1000 (200000ms)
        // This depends on the actual HLC time value
        let result = s1.temporal_cmp(&s2);
        // Just verify it produces some ordering
        assert!(
            result == Ordering::Less || result == Ordering::Greater || result == Ordering::Equal
        );
    }
    #[test]
    fn test_temporal_cmp_equal() {
        let s1 = create_supersession(100, None);
        let s2 = create_supersession(100, None);
        assert_eq!(s1.temporal_cmp(&s2), Ordering::Equal);
    }
    #[test]
    fn test_has_hlc() {
        let s_without = create_supersession(100, None);
        let s_with = create_supersession(100, Some(HlcTimestamp::new(1000, [1u8; 16])));
        assert!(!s_without.has_hlc());
        assert!(s_with.has_hlc());
    }
 }
--- a/crates/stemedb-ingest/Cargo.toml
+++ b/crates/stemedb-ingest/Cargo.toml
@ -19,6 +19,10 @@ thiserror = "1.0"
 blake3 = "1.5"
 hex = "0.4"
 ed25519-dalek = { version = "2.1", features = ["rand_core"] }
 # Hybrid Logical Clocks for distributed causal ordering
 uhlc = "0.7"
 # Async traits
 async-trait = "0.1"
 [dev-dependencies]
 tempfile = "3.10"
--- a/crates/stemedb-ingest/src/gossip.rs
+++ b/crates/stemedb-ingest/src/gossip.rs
@ -0,0 +1,129 @@
 //! Gossip broadcast trait for distributed replication.
 //!
 //! This module defines the `GossipBroadcast` trait that allows the IngestWorker
 //! to broadcast newly ingested assertions to peer nodes.
 //!
 //! # Design
 //!
 //! The trait is defined here in stemedb-ingest to avoid a cyclic dependency:
 //! - stemedb-ingest needs the trait for IngestWorker
 //! - stemedb-sync implements the trait (and depends on stemedb-ingest would cause cycle)
 //!
 //! By defining the trait here, stemedb-sync can implement it without the cycle.
 use async_trait::async_trait;
 use stemedb_core::types::HlcTimestamp;
 use thiserror::Error;
 /// Error type for gossip operations.
 #[derive(Debug, Error)]
 pub enum GossipError {
    /// Network error during broadcast.
    #[error("Network error: {0}")]
    Network(String),
    /// Serialization error.
    #[error("Serialization error: {0}")]
    Serialization(String),
    /// All peers failed to receive the message.
    #[error("All peers failed")]
    AllPeersFailed,
 }
 /// Trait for broadcasting assertions to peer nodes.
 ///
 /// Implementations should be:
 /// - **Non-blocking**: Don't wait for all peers to acknowledge
 /// - **Best-effort**: Log failures but don't block the ingestion pipeline
 /// - **Idempotent-friendly**: Receivers handle duplicates gracefully
 ///
 /// # Example
 ///
 /// ```ignore
 /// use stemedb_ingest::gossip::GossipBroadcast;
 ///
 /// struct MyBroadcaster { /* ... */ }
 ///
 /// #[async_trait]
 /// impl GossipBroadcast for MyBroadcaster {
 ///     async fn broadcast(&self, hash: &[u8; 32], data: &[u8], hlc: &HlcTimestamp) -> Result<(), GossipError> {
 ///         // Send to peers...
 ///         Ok(())
 ///     }
 ///
 ///     fn is_enabled(&self) -> bool { true }
 ///     fn enable(&self) {}
 ///     fn disable(&self) {}
 /// }
 /// ```
 #[async_trait]
 pub trait GossipBroadcast: Send + Sync {
    /// Broadcast an assertion to peer nodes.
    ///
    /// # Arguments
    ///
    /// * `hash` - BLAKE3 hash of the assertion (32 bytes)
    /// * `data` - Serialized assertion data (rkyv format)
    /// * `hlc` - HLC timestamp for causal ordering
    ///
    /// # Returns
    ///
    /// `Ok(())` if at least one peer received the message, or if no peers
    /// are configured. The method should not fail the ingestion pipeline.
    async fn broadcast(
        &self,
        hash: &[u8; 32],
        data: &[u8],
        hlc: &HlcTimestamp,
    ) -> Result<(), GossipError>;
    /// Check if broadcasting is currently enabled.
    fn is_enabled(&self) -> bool;
    /// Enable broadcasting.
    fn enable(&self);
    /// Disable broadcasting (e.g., for testing or during recovery).
    fn disable(&self);
 }
 /// A no-op implementation for single-node deployments or testing.
 pub struct NoOpGossipBroadcast;
 #[async_trait]
 impl GossipBroadcast for NoOpGossipBroadcast {
    async fn broadcast(
        &self,
        _hash: &[u8; 32],
        _data: &[u8],
        _hlc: &HlcTimestamp,
    ) -> Result<(), GossipError> {
        // Do nothing
        Ok(())
    }
    fn is_enabled(&self) -> bool {
        false
    }
    fn enable(&self) {}
    fn disable(&self) {}
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[tokio::test]
    async fn test_noop_broadcast() {
        let broadcaster = NoOpGossipBroadcast;
        let hash = [1u8; 32];
        let data = vec![1, 2, 3];
        let hlc = HlcTimestamp::new(1000, [1u8; 16]);
        // Should always succeed
        broadcaster.broadcast(&hash, &data, &hlc).await.expect("broadcast");
        assert!(!broadcaster.is_enabled());
    }
 }
--- a/crates/stemedb-ingest/src/lib.rs
+++ b/crates/stemedb-ingest/src/lib.rs
@ -13,11 +13,14 @@
 /// Error types and Result wrapper for ingestion.
 pub mod error;
 /// Gossip broadcast trait for distributed replication.
 pub mod gossip;
 /// High-level ingestor manager.
 pub mod ingestor;
 /// Background worker logic for processing the WAL.
 pub mod worker;
 pub use error::{IngestError, Result};
 pub use gossip::{GossipBroadcast, GossipError, NoOpGossipBroadcast};
 pub use ingestor::Ingestor;
 pub use worker::{serialize_assertion, serialize_epoch, serialize_vote, IngestWorker, RecordType};
--- a/crates/stemedb-ingest/src/worker/mod.rs
+++ b/crates/stemedb-ingest/src/worker/mod.rs
@ -13,12 +13,14 @@
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 use stemedb_core::types::HlcTimestamp;
 use stemedb_storage::{GenericIndexStore, GenericVoteStore, KVStore, VectorIndex, VisualIndex};
 use stemedb_wal::{Journal, HEADER_SIZE};
 use tokio::sync::{Mutex, Notify};
 use tracing::{debug, info, warn};
 use crate::error::{IngestError, Result};
 use crate::gossip::GossipBroadcast;
 // Module declarations
 mod processing;
@ -52,6 +54,16 @@ pub struct IngestWorker<S> {
    /// Shutdown signal shared with Ingestor.
    /// When set to true, the run() loop exits gracefully.
    shutdown: Arc<AtomicBool>,
    /// Hybrid Logical Clock for distributed causal ordering.
    ///
    /// Used to generate HLC timestamps for supersessions and epoch
    /// ingestion. Provides causal ordering guarantees across distributed
    /// nodes, even with clock skew.
    hlc: uhlc::HLC,
    /// Optional gossip broadcaster for distributed replication.
    ///
    /// When set, the worker broadcasts newly ingested assertions to peer nodes.
    gossip_broadcaster: Option<Arc<dyn GossipBroadcast>>,
 }
 impl<S: KVStore + 'static> IngestWorker<S> {
@ -85,6 +97,9 @@ impl<S: KVStore + 'static> IngestWorker<S> {
                HEADER_SIZE as u64
            }
        };
        // Initialize HLC with random node ID
        let hlc = uhlc::HLCBuilder::new().build();
        Ok(Self {
            journal,
            store,
@ -95,6 +110,8 @@ impl<S: KVStore + 'static> IngestWorker<S> {
            vector_index: None,
            visual_index: None,
            shutdown: Arc::new(AtomicBool::new(false)),
            hlc,
            gossip_broadcaster: None,
        })
    }
@ -160,4 +177,91 @@ impl<S: KVStore + 'static> IngestWorker<S> {
        self.visual_index = Some(index);
        self
    }
    /// Configure the HLC with a specific node ID.
    ///
    /// Use this when running multiple nodes in a distributed cluster to ensure
    /// each node has a unique identifier for total ordering of concurrent events.
    ///
    /// # Example
    /// ```ignore
    /// let node_id = uhlc::ID::try_from(&node_uuid.as_bytes()[..]).unwrap();
    /// let worker = IngestWorker::new(journal, store)
    ///     .await?
    ///     .with_node_id(node_id);
    /// ```
    pub fn with_node_id(mut self, node_id: uhlc::ID) -> Self {
        self.hlc = uhlc::HLCBuilder::new().with_id(node_id).build();
        self
    }
    /// Attach a gossip broadcaster for distributed replication.
    ///
    /// When set, newly ingested assertions are broadcast to peer nodes
    /// for low-latency replication. The gossip layer is best-effort:
    /// failures are logged but don't block the ingestion pipeline.
    ///
    /// # Example
    /// ```ignore
    /// let broadcaster = GossipBroadcaster::new(peers).await?;
    /// let worker = IngestWorker::new(journal, store)
    ///     .await?
    ///     .with_gossip_broadcaster(Arc::new(broadcaster));
    /// ```
    pub fn with_gossip_broadcaster(mut self, broadcaster: Arc<dyn GossipBroadcast>) -> Self {
        self.gossip_broadcaster = Some(broadcaster);
        self
    }
    /// Returns the gossip broadcaster if configured.
    pub fn gossip_broadcaster(&self) -> Option<&Arc<dyn GossipBroadcast>> {
        self.gossip_broadcaster.as_ref()
    }
    /// Generates a new HLC timestamp.
    ///
    /// The returned timestamp is guaranteed to be greater than all previously
    /// generated timestamps from this worker, even if the system clock goes
    /// backwards.
    ///
    /// Use this when creating supersessions or other records that need
    /// causal ordering across distributed nodes.
    pub fn generate_hlc_timestamp(&self) -> HlcTimestamp {
        HlcTimestamp::now(&self.hlc)
    }
    /// Updates the HLC with a timestamp from a remote node.
    ///
    /// Call this when receiving data from another node to ensure the local
    /// clock stays synchronized. The HLC will advance to at least the
    /// remote timestamp, maintaining causal ordering.
    ///
    /// # Arguments
    ///
    /// * `remote` - HLC timestamp received from a remote node
    ///
    /// # Returns
    ///
    /// Ok(()) if the clock was updated, Err if the timestamp is too far
    /// in the future (clock skew protection).
    pub fn update_hlc_from_remote(&self, remote: &HlcTimestamp) -> Result<()> {
        if let Some(ts) = remote.to_uhlc() {
            self.hlc.update_with_timestamp(&ts).map_err(|e| {
                warn!(
                    remote_time = remote.time_ntp64,
                    error = %e,
                    "Failed to update HLC from remote timestamp (clock skew?)"
                );
                IngestError::InputValidation(format!("HLC update failed: {}", e))
            })?;
        }
        Ok(())
    }
    /// Returns the current HLC node ID as bytes.
    ///
    /// Useful for including in CRDT state or other distributed data structures.
    pub fn hlc_node_id(&self) -> [u8; 16] {
        self.hlc.get_id().to_le_bytes()
    }
 }
--- a/crates/stemedb-ingest/src/worker/processing.rs
+++ b/crates/stemedb-ingest/src/worker/processing.rs
@ -192,6 +192,26 @@ impl<S: KVStore + 'static> IngestWorker<S> {
            }
        }
        // Broadcast to peers if gossip is configured
        if let Some(ref broadcaster) = self.gossip_broadcaster {
            if broadcaster.is_enabled() {
                let hlc = self.generate_hlc_timestamp();
                if let Err(e) = broadcaster.broadcast(&assertion_hash, data, &hlc).await {
                    // Log but don't fail - gossip is best-effort
                    warn!(
                        hash = %hash_hex,
                        error = %e,
                        "Failed to broadcast assertion to peers"
                    );
                } else {
                    debug!(
                        hash = %hash_hex,
                        "Broadcast assertion to peers"
                    );
                }
            }
        }
        Ok(())
    }
--- a/crates/stemedb-merkle/Cargo.toml
+++ b/crates/stemedb-merkle/Cargo.toml
@ -0,0 +1,27 @@
 [package]
 name = "stemedb-merkle"
 version = "0.1.0"
 edition = "2021"
 description = "BLAKE3-based Merkle tree for append-only assertion diff detection"
 # Inherit workspace lints
 [lints]
 workspace = true
 [dependencies]
 # Hashing
 blake3 = "1.5"
 # Serialization
 rkyv = { version = "0.7", features = ["validation", "strict"] }
 bytecheck = "0.6"
 # Error handling
 thiserror = "1.0"
 # Logging
 tracing = "0.1"
 [dev-dependencies]
 # Testing utilities
 stemedb-core = { path = "../stemedb-core" }
--- a/crates/stemedb-merkle/README.md
+++ b/crates/stemedb-merkle/README.md
@ -0,0 +1,129 @@
 # stemedb-merkle
 BLAKE3-based Merkle tree for append-only assertion diff detection in StemeDB.
 ## Overview
 This crate provides an efficient Merkle tree implementation optimized for StemeDB's append-only assertion store. The primary use case is **incremental sync between distributed nodes** - quickly identifying which assertions differ between local and remote stores.
 ## Design Principles
 - **Append-Only**: Trees grow monotonically with O(log N) insert performance
 - **Content-Addressed**: Uses BLAKE3 for cryptographic hash verification
 - **Efficient Diff**: O(log N) comparison to identify divergent subtrees
 - **Zero-Copy Serialization**: Uses rkyv for fast persistence and network transfer
 - **No unwrap/expect**: All operations return `Result` for defensive error handling
 ## Architecture
 The tree is a binary Merkle tree where:
 - **Leaves** contain assertion hashes (BLAKE3 digests)
 - **Internal nodes** contain `BLAKE3(left_hash || right_hash)`
 - **Root hash** represents the entire assertion set
 ```
       root (BLAKE3(h12 || h34))
      /                        \
    h12 (BLAKE3(h1 || h2))    h34 (BLAKE3(h3 || h4))
   /    \                     /    \
  h1    h2                   h3    h4
  |     |                    |     |
 a1    a2                   a3    a4  (assertion hashes)
 ```
 ## Example Usage
 ### Basic Tree Operations
 ```rust
 use stemedb_merkle::MerkleTree;
 // Create a tree and insert assertion hashes
 let mut tree = MerkleTree::new();
 tree.insert([1u8; 32]).expect("insert");
 tree.insert([2u8; 32]).expect("insert");
 tree.insert([3u8; 32]).expect("insert");
 // Get root hash (O(1) - cached)
 let root = tree.root().expect("root");
 // Check tree size
 assert_eq!(tree.len(), 3);
 ```
 ### Incremental Sync (Fast Diff)
 ```rust
 use stemedb_merkle::{MerkleTree, DiffResult, roots_equal};
 let mut local = MerkleTree::new();
 local.insert([1u8; 32]).expect("insert");
 local.insert([2u8; 32]).expect("insert");
 let mut remote = MerkleTree::new();
 remote.insert([1u8; 32]).expect("insert");
 remote.insert([2u8; 32]).expect("insert");
 remote.insert([3u8; 32]).expect("insert");  // New assertion
 remote.insert([4u8; 32]).expect("insert");  // New assertion
 // Quick check: do we need to sync? (O(1))
 if !roots_equal(&local, &remote) {
    // Find what remote has that local doesn't (O(N))
    let diff = DiffResult::diff(&local, &remote);
    println!("Need to fetch {} assertions", diff.len());
    // Request missing assertions: [3, 4]
    for hash in diff.missing_hashes {
        // fetch_assertion(hash)...
    }
 }
 ```
 ### Persistence (Crash Recovery)
 ```rust
 use stemedb_merkle::{MerkleTree, serialize::{serialize_tree, deserialize_tree}};
 let mut tree = MerkleTree::new();
 tree.insert([1u8; 32]).expect("insert");
 tree.insert([2u8; 32]).expect("insert");
 // Serialize to disk
 let bytes = serialize_tree(&tree).expect("serialize");
 std::fs::write("merkle_tree.bin", &bytes).expect("write");
 // Restore after crash
 let bytes = std::fs::read("merkle_tree.bin").expect("read");
 let recovered = deserialize_tree(&bytes).expect("deserialize");
 assert_eq!(tree.root(), recovered.root());
 ```
 ## Performance Characteristics
 | Operation | Complexity | Notes |
 |-----------|------------|-------|
 | Insert | O(log N) | Recompute path from leaf to root |
 | Root | O(1) | Cached after each insert |
 | Diff | O(N) | Set-based comparison of leaves |
 | Serialize | O(N) | Write all leaves to bytes |
 | Deserialize | O(N log N) | Rebuild tree from leaves |
 ## Future Optimizations
 For very large trees (millions of assertions), we plan to implement:
 - **Subtree-based diff**: Skip identical subtrees by comparing intermediate hashes
  - Reduces diff from O(N) to O(diff_size * log N)
 - **Incremental serialization**: Only persist changes since last checkpoint
 - **Range queries**: Find assertions inserted between timestamps
 ## Integration with StemeDB
 This crate will be used by:
 1. **Write-ahead log (WAL)**: Build Merkle tree as assertions are appended
 2. **Replication**: Exchange root hashes to detect drift, then sync missing data
 3. **Checkpointing**: Persist tree state for fast bootstrap after restart
 See `docs/research/distributed-write-path.md` for architecture details.
--- a/crates/stemedb-merkle/src/diff.rs
+++ b/crates/stemedb-merkle/src/diff.rs
@ -0,0 +1,367 @@
 //! Merkle tree diff operations for efficient sync.
 //!
 //! # Design Philosophy
 //!
 //! The diff algorithm is optimized for StemeDB's append-only model:
 //! - **Fast identity check**: O(1) root comparison before expensive traversal
 //! - **Minimal data transfer**: Return only hashes that differ
 //! - **Set semantics**: Identify assertions in remote but not in local
 //!
 //! # Use Case
 //!
 //! When a StemeDB node connects to a peer:
 //! 1. Exchange root hashes: O(1) to check if sync needed
 //! 2. If roots differ, call `diff()` to find missing assertions
 //! 3. Request missing assertions by hash
 //! 4. Insert into local store
 //!
 //! # Example
 //!
 //! ```
 //! use stemedb_merkle::{MerkleTree, roots_equal};
 //!
 //! let mut local = MerkleTree::new();
 //! local.insert([1u8; 32]).expect("insert");
 //! local.insert([2u8; 32]).expect("insert");
 //!
 //! let mut remote = MerkleTree::new();
 //! remote.insert([1u8; 32]).expect("insert");
 //! remote.insert([2u8; 32]).expect("insert");
 //! remote.insert([3u8; 32]).expect("insert");
 //!
 //! // Quick check: do we need to sync?
 //! if !roots_equal(&local, &remote) {
 //!     // Find what remote has that local doesn't
 //!     let diff = stemedb_merkle::DiffResult::diff(&local, &remote);
 //!     // Request missing assertions [3]
 //! }
 //! ```
 use crate::tree::{Hash, MerkleTree};
 use std::collections::HashSet;
 use tracing::instrument;
 /// Check if two trees have identical roots.
 ///
 /// This is an O(1) operation that determines if sync is needed.
 /// If roots are equal, trees are identical (due to hash properties).
 ///
 /// # Example
 ///
 /// ```
 /// use stemedb_merkle::{MerkleTree, roots_equal};
 ///
 /// let mut tree1 = MerkleTree::new();
 /// tree1.insert([1u8; 32]).expect("insert");
 ///
 /// let mut tree2 = MerkleTree::new();
 /// tree2.insert([1u8; 32]).expect("insert");
 ///
 /// assert!(roots_equal(&tree1, &tree2));
 /// ```
 pub fn roots_equal(a: &MerkleTree, b: &MerkleTree) -> bool {
    match (a.root(), b.root()) {
        (Ok(root_a), Ok(root_b)) => root_a == root_b,
        (Err(_), Err(_)) => true, // Both empty
        _ => false,               // One empty, one not
    }
 }
 /// Result of a Merkle tree diff operation.
 ///
 /// Contains the set of assertion hashes present in the remote tree
 /// but missing from the local tree.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct DiffResult {
    /// Hashes present in remote but not in local.
    pub missing_hashes: Vec<Hash>,
 }
 impl DiffResult {
    /// Compute the diff between local and remote trees.
    ///
    /// Returns all assertion hashes that exist in `remote` but not in `local`.
    /// This is what the local node needs to fetch to catch up.
    ///
    /// # Algorithm
    ///
    /// For append-only trees, we use a set-based approach:
    /// 1. Build HashSet from local leaves: O(N_local)
    /// 2. Iterate remote leaves, checking membership: O(N_remote)
    /// 3. Return hashes in remote but not in local
    ///
    /// This is simple and correct for append-only semantics where:
    /// - Leaves are never removed
    /// - Order matters only for root hash, not for membership
    ///
    /// # Future Optimization
    ///
    /// For very large trees (millions of assertions), implement subtree-based
    /// diff that exploits tree structure to skip identical subtrees:
    /// - Compare subtree roots before descending
    /// - Skip entire subtrees with matching hashes
    /// - Reduces comparison from O(N) to O(diff_size * log N)
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::{MerkleTree, DiffResult};
    ///
    /// let mut local = MerkleTree::new();
    /// local.insert([1u8; 32]).expect("insert");
    /// local.insert([2u8; 32]).expect("insert");
    ///
    /// let mut remote = MerkleTree::new();
    /// remote.insert([1u8; 32]).expect("insert");
    /// remote.insert([2u8; 32]).expect("insert");
    /// remote.insert([3u8; 32]).expect("insert");
    /// remote.insert([4u8; 32]).expect("insert");
    ///
    /// let diff = DiffResult::diff(&local, &remote);
    /// assert_eq!(diff.missing_hashes.len(), 2);
    /// assert!(diff.missing_hashes.contains(&[3u8; 32]));
    /// assert!(diff.missing_hashes.contains(&[4u8; 32]));
    /// ```
    #[instrument(skip(local, remote), fields(
        local_len = local.len(),
        remote_len = remote.len()
    ))]
    pub fn diff(local: &MerkleTree, remote: &MerkleTree) -> Self {
        // Fast path: if roots are equal, no diff needed
        if roots_equal(local, remote) {
            return Self { missing_hashes: Vec::new() };
        }
        // Build set of local hashes for O(1) membership check
        let local_set: HashSet<Hash> = local.leaves().iter().copied().collect();
        // Find hashes in remote but not in local
        let missing_hashes: Vec<Hash> =
            remote.leaves().iter().filter(|hash| !local_set.contains(*hash)).copied().collect();
        tracing::debug!(missing_count = missing_hashes.len(), "Computed Merkle diff");
        Self { missing_hashes }
    }
    /// Check if the diff is empty (trees are identical).
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::{MerkleTree, DiffResult};
    ///
    /// let mut tree1 = MerkleTree::new();
    /// tree1.insert([1u8; 32]).expect("insert");
    ///
    /// let mut tree2 = MerkleTree::new();
    /// tree2.insert([1u8; 32]).expect("insert");
    ///
    /// let diff = DiffResult::diff(&tree1, &tree2);
    /// assert!(diff.is_empty());
    /// ```
    pub fn is_empty(&self) -> bool {
        self.missing_hashes.is_empty()
    }
    /// Get the number of missing hashes.
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::{MerkleTree, DiffResult};
    ///
    /// let mut local = MerkleTree::new();
    /// local.insert([1u8; 32]).expect("insert");
    ///
    /// let mut remote = MerkleTree::new();
    /// remote.insert([1u8; 32]).expect("insert");
    /// remote.insert([2u8; 32]).expect("insert");
    /// remote.insert([3u8; 32]).expect("insert");
    ///
    /// let diff = DiffResult::diff(&local, &remote);
    /// assert_eq!(diff.len(), 2);
    /// ```
    pub fn len(&self) -> usize {
        self.missing_hashes.len()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_roots_equal_empty_trees() {
        let tree1 = MerkleTree::new();
        let tree2 = MerkleTree::new();
        assert!(roots_equal(&tree1, &tree2));
    }
    #[test]
    fn test_roots_equal_identical_trees() {
        let mut tree1 = MerkleTree::new();
        tree1.insert([1u8; 32]).expect("insert");
        tree1.insert([2u8; 32]).expect("insert");
        let mut tree2 = MerkleTree::new();
        tree2.insert([1u8; 32]).expect("insert");
        tree2.insert([2u8; 32]).expect("insert");
        assert!(roots_equal(&tree1, &tree2));
    }
    #[test]
    fn test_roots_not_equal_different_trees() {
        let mut tree1 = MerkleTree::new();
        tree1.insert([1u8; 32]).expect("insert");
        let mut tree2 = MerkleTree::new();
        tree2.insert([2u8; 32]).expect("insert");
        assert!(!roots_equal(&tree1, &tree2));
    }
    #[test]
    fn test_roots_not_equal_empty_vs_nonempty() {
        let tree1 = MerkleTree::new();
        let mut tree2 = MerkleTree::new();
        tree2.insert([1u8; 32]).expect("insert");
        assert!(!roots_equal(&tree1, &tree2));
    }
    #[test]
    fn test_diff_identical_trees() {
        let mut tree1 = MerkleTree::new();
        tree1.insert([1u8; 32]).expect("insert");
        tree1.insert([2u8; 32]).expect("insert");
        let mut tree2 = MerkleTree::new();
        tree2.insert([1u8; 32]).expect("insert");
        tree2.insert([2u8; 32]).expect("insert");
        let diff = DiffResult::diff(&tree1, &tree2);
        assert!(diff.is_empty());
        assert_eq!(diff.len(), 0);
    }
    #[test]
    fn test_diff_remote_has_extra() {
        let mut local = MerkleTree::new();
        local.insert([1u8; 32]).expect("insert");
        local.insert([2u8; 32]).expect("insert");
        let mut remote = MerkleTree::new();
        remote.insert([1u8; 32]).expect("insert");
        remote.insert([2u8; 32]).expect("insert");
        remote.insert([3u8; 32]).expect("insert");
        let diff = DiffResult::diff(&local, &remote);
        assert_eq!(diff.len(), 1);
        assert_eq!(diff.missing_hashes, vec![[3u8; 32]]);
    }
    #[test]
    fn test_diff_remote_has_multiple_extra() {
        let mut local = MerkleTree::new();
        local.insert([1u8; 32]).expect("insert");
        let mut remote = MerkleTree::new();
        remote.insert([1u8; 32]).expect("insert");
        remote.insert([2u8; 32]).expect("insert");
        remote.insert([3u8; 32]).expect("insert");
        remote.insert([4u8; 32]).expect("insert");
        let diff = DiffResult::diff(&local, &remote);
        assert_eq!(diff.len(), 3);
        assert!(diff.missing_hashes.contains(&[2u8; 32]));
        assert!(diff.missing_hashes.contains(&[3u8; 32]));
        assert!(diff.missing_hashes.contains(&[4u8; 32]));
    }
    #[test]
    fn test_diff_local_has_extra() {
        let mut local = MerkleTree::new();
        local.insert([1u8; 32]).expect("insert");
        local.insert([2u8; 32]).expect("insert");
        local.insert([3u8; 32]).expect("insert");
        let mut remote = MerkleTree::new();
        remote.insert([1u8; 32]).expect("insert");
        let diff = DiffResult::diff(&local, &remote);
        // Remote doesn't have [2] or [3], but we only report what remote has that local doesn't
        assert!(diff.is_empty());
    }
    #[test]
    fn test_diff_disjoint_trees() {
        let mut local = MerkleTree::new();
        local.insert([1u8; 32]).expect("insert");
        local.insert([2u8; 32]).expect("insert");
        let mut remote = MerkleTree::new();
        remote.insert([3u8; 32]).expect("insert");
        remote.insert([4u8; 32]).expect("insert");
        let diff = DiffResult::diff(&local, &remote);
        assert_eq!(diff.len(), 2);
        assert!(diff.missing_hashes.contains(&[3u8; 32]));
        assert!(diff.missing_hashes.contains(&[4u8; 32]));
    }
    #[test]
    fn test_diff_empty_local() {
        let local = MerkleTree::new();
        let mut remote = MerkleTree::new();
        remote.insert([1u8; 32]).expect("insert");
        remote.insert([2u8; 32]).expect("insert");
        let diff = DiffResult::diff(&local, &remote);
        assert_eq!(diff.len(), 2);
        assert!(diff.missing_hashes.contains(&[1u8; 32]));
        assert!(diff.missing_hashes.contains(&[2u8; 32]));
    }
    #[test]
    fn test_diff_empty_remote() {
        let mut local = MerkleTree::new();
        local.insert([1u8; 32]).expect("insert");
        let remote = MerkleTree::new();
        let diff = DiffResult::diff(&local, &remote);
        assert!(diff.is_empty());
    }
    #[test]
    fn test_diff_both_empty() {
        let local = MerkleTree::new();
        let remote = MerkleTree::new();
        let diff = DiffResult::diff(&local, &remote);
        assert!(diff.is_empty());
    }
    #[test]
    fn test_diff_partial_overlap() {
        let mut local = MerkleTree::new();
        local.insert([1u8; 32]).expect("insert");
        local.insert([2u8; 32]).expect("insert");
        local.insert([3u8; 32]).expect("insert");
        let mut remote = MerkleTree::new();
        remote.insert([2u8; 32]).expect("insert");
        remote.insert([3u8; 32]).expect("insert");
        remote.insert([4u8; 32]).expect("insert");
        remote.insert([5u8; 32]).expect("insert");
        let diff = DiffResult::diff(&local, &remote);
        assert_eq!(diff.len(), 2);
        assert!(diff.missing_hashes.contains(&[4u8; 32]));
        assert!(diff.missing_hashes.contains(&[5u8; 32]));
    }
 }
--- a/crates/stemedb-merkle/src/lib.rs
+++ b/crates/stemedb-merkle/src/lib.rs
@ -0,0 +1,67 @@
 //! BLAKE3-based Merkle tree for append-only assertion diff detection.
 //!
 //! This crate provides an efficient Merkle tree implementation optimized for
 //! StemeDB's append-only assertion store. The primary use case is incremental
 //! sync between nodes: quickly identify which assertions differ between local
 //! and remote stores.
 //!
 //! # Design Philosophy
 //!
 //! - **Append-Only**: Trees grow monotonically, optimized for O(log N) inserts
 //! - **Content-Addressed**: Uses BLAKE3 for cryptographic hash verification
 //! - **Efficient Diff**: O(log N) comparison to identify divergent subtrees
 //! - **Zero-Copy Serialization**: Uses rkyv for fast network transfer
 //!
 //! # Architecture
 //!
 //! The tree is a binary Merkle tree where:
 //! - Leaves contain assertion hashes (BLAKE3 digests)
 //! - Internal nodes contain BLAKE3(left_hash || right_hash)
 //! - Root hash represents the entire assertion set
 //!
 //! # Example
 //!
 //! ```
 //! use stemedb_merkle::MerkleTree;
 //!
 //! // Create a tree and insert assertion hashes
 //! let mut tree = MerkleTree::new();
 //! tree.insert([1u8; 32]).expect("insert failed");
 //! tree.insert([2u8; 32]).expect("insert failed");
 //!
 //! // Get the root hash (identifies the entire tree)
 //! let root = tree.root().expect("empty tree");
 //! assert_eq!(tree.len(), 2);
 //!
 //! // Compare with another tree
 //! let mut other = MerkleTree::new();
 //! other.insert([1u8; 32]).expect("insert failed");
 //! other.insert([3u8; 32]).expect("insert failed");
 //!
 //! // Roots differ because trees contain different assertions
 //! assert_ne!(tree.root().expect("root"), other.root().expect("root"));
 //! ```
 //!
 //! # Performance Characteristics
 //!
 //! - Insert: O(log N) - recompute path from leaf to root
 //! - Root: O(1) - cached after each insert
 //! - Diff: O(log N) - compare subtree hashes to find divergence
 //! - Serialize: O(N) - write all nodes to bytes
 //!
 //! # Crash Recovery
 //!
 //! The tree can be serialized to disk and restored after crash. Combined with
 //! StemeDB's WAL, this enables fast reconstruction of the tree state without
 //! replaying all assertions.
 #![forbid(unsafe_code)]
 #![warn(missing_docs)]
 mod diff;
 pub mod serialize;
 mod tree;
 pub use diff::{roots_equal, DiffResult};
 pub use serialize::SerializeError;
 pub use tree::{Hash, MerkleTree, TreeError};
--- a/crates/stemedb-merkle/src/serialize.rs
+++ b/crates/stemedb-merkle/src/serialize.rs
@ -0,0 +1,255 @@
 //! Serialization for Merkle trees using rkyv zero-copy format.
 //!
 //! # Design
 //!
 //! Merkle trees need to be persisted to disk for crash recovery and
 //! transferred over the network for sync. This module provides:
 //!
 //! - **Zero-copy serialization**: Uses rkyv for efficient encoding
 //! - **Validation**: Checks archived data before deserialization
 //! - **Consistency**: Uses same helpers as other StemeDB crates
 //!
 //! # Use Cases
 //!
 //! 1. **Crash recovery**: Persist tree to disk, restore after restart
 //! 2. **Network sync**: Serialize tree state for transfer to peers
 //! 3. **Checkpointing**: Save tree snapshots for fast bootstrap
 //!
 //! # Example
 //!
 //! ```
 //! use stemedb_merkle::{MerkleTree, serialize::serialize_tree, serialize::deserialize_tree};
 //!
 //! let mut tree = MerkleTree::new();
 //! tree.insert([1u8; 32]).expect("insert");
 //! tree.insert([2u8; 32]).expect("insert");
 //!
 //! // Serialize to bytes
 //! let bytes = serialize_tree(&tree).expect("serialize");
 //!
 //! // Deserialize back
 //! let recovered = deserialize_tree(&bytes).expect("deserialize");
 //! assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
 //! assert_eq!(tree.len(), recovered.len());
 //! ```
 //!
 //! # Performance
 //!
 //! - Serialization: O(N) where N is number of leaves
 //! - Deserialization: O(N) with validation
 //! - Memory: Tree size + 4KB scratch buffer
 use crate::tree::{Hash, MerkleTree};
 use rkyv::ser::serializers::AllocSerializer;
 use rkyv::ser::Serializer;
 use rkyv::Deserialize as RkyvDeserialize;
 use thiserror::Error;
 use tracing::{debug, instrument};
 /// Default scratch buffer size for serialization.
 ///
 /// 4KB is sufficient for most trees. Larger trees will trigger
 /// reallocation but the operation will still succeed.
 #[allow(dead_code)]
 const DEFAULT_SCRATCH_SIZE: usize = 4096;
 /// Errors that can occur during serialization/deserialization.
 #[derive(Debug, Error)]
 pub enum SerializeError {
    /// Failed to serialize the tree.
    #[error("Serialization error: {0}")]
    Serialization(String),
    /// Failed to validate or deserialize the archived data.
    #[error("Deserialization error: {0}")]
    Deserialization(String),
 }
 /// Serialize a Merkle tree to bytes using rkyv zero-copy serialization.
 ///
 /// This serializes only the leaf hashes. The tree structure and cached
 /// root are rebuilt during deserialization.
 ///
 /// # Example
 ///
 /// ```
 /// use stemedb_merkle::{MerkleTree, serialize::serialize_tree};
 ///
 /// let mut tree = MerkleTree::new();
 /// tree.insert([1u8; 32]).expect("insert");
 /// tree.insert([2u8; 32]).expect("insert");
 ///
 /// let bytes = serialize_tree(&tree).expect("serialize");
 /// assert!(!bytes.is_empty());
 /// ```
 #[instrument(skip(tree), fields(leaf_count = tree.len()))]
 pub fn serialize_tree(tree: &MerkleTree) -> Result<Vec<u8>, SerializeError> {
    debug!("Serializing Merkle tree");
    // Only serialize the leaves - we'll rebuild the tree on deserialization
    let leaves: Vec<Hash> = tree.leaves().to_vec();
    let mut serializer = AllocSerializer::<DEFAULT_SCRATCH_SIZE>::default();
    serializer
        .serialize_value(&leaves)
        .map_err(|e| SerializeError::Serialization(e.to_string()))?;
    let bytes = serializer.into_serializer().into_inner().to_vec();
    debug!(bytes_len = bytes.len(), "Merkle tree serialized");
    Ok(bytes)
 }
 /// Deserialize bytes back to a Merkle tree using rkyv zero-copy deserialization.
 ///
 /// This validates the archived data, deserializes the leaves, and rebuilds
 /// the tree structure (including cached root).
 ///
 /// # Example
 ///
 /// ```
 /// use stemedb_merkle::{MerkleTree, serialize::serialize_tree, serialize::deserialize_tree};
 ///
 /// let mut tree = MerkleTree::new();
 /// tree.insert([1u8; 32]).expect("insert");
 /// tree.insert([2u8; 32]).expect("insert");
 ///
 /// let bytes = serialize_tree(&tree).expect("serialize");
 /// let recovered = deserialize_tree(&bytes).expect("deserialize");
 ///
 /// assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
 /// assert_eq!(tree.len(), recovered.len());
 /// ```
 #[instrument(skip(data), fields(bytes_len = data.len()))]
 pub fn deserialize_tree(data: &[u8]) -> Result<MerkleTree, SerializeError> {
    debug!("Deserializing Merkle tree");
    // Deserialize the leaves vector
    let archived = rkyv::check_archived_root::<Vec<Hash>>(data)
        .map_err(|e| SerializeError::Deserialization(e.to_string()))?;
    let leaves: Vec<Hash> = RkyvDeserialize::deserialize(archived, &mut rkyv::Infallible)
        .map_err(|e| SerializeError::Deserialization(e.to_string()))?;
    // Rebuild the tree from leaves
    let mut tree = MerkleTree::new();
    for hash in leaves {
        tree.insert(hash).map_err(|e| SerializeError::Deserialization(e.to_string()))?;
    }
    debug!(leaf_count = tree.len(), "Merkle tree deserialized");
    Ok(tree)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_serialize_deserialize_empty_tree() {
        let tree = MerkleTree::new();
        let bytes = serialize_tree(&tree).expect("serialize");
        let recovered = deserialize_tree(&bytes).expect("deserialize");
        assert_eq!(recovered.len(), 0);
        assert!(recovered.is_empty());
    }
    #[test]
    fn test_serialize_deserialize_single_leaf() {
        let mut tree = MerkleTree::new();
        tree.insert([1u8; 32]).expect("insert");
        let bytes = serialize_tree(&tree).expect("serialize");
        let recovered = deserialize_tree(&bytes).expect("deserialize");
        assert_eq!(recovered.len(), 1);
        assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
    }
    #[test]
    fn test_serialize_deserialize_multiple_leaves() {
        let mut tree = MerkleTree::new();
        tree.insert([1u8; 32]).expect("insert");
        tree.insert([2u8; 32]).expect("insert");
        tree.insert([3u8; 32]).expect("insert");
        tree.insert([4u8; 32]).expect("insert");
        let bytes = serialize_tree(&tree).expect("serialize");
        let recovered = deserialize_tree(&bytes).expect("deserialize");
        assert_eq!(recovered.len(), 4);
        assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
        // Verify leaves are preserved
        assert_eq!(tree.leaves(), recovered.leaves());
    }
    #[test]
    fn test_serialize_deserialize_large_tree() {
        let mut tree = MerkleTree::new();
        for i in 0..100 {
            let mut hash = [0u8; 32];
            hash[0] = i;
            tree.insert(hash).expect("insert");
        }
        let bytes = serialize_tree(&tree).expect("serialize");
        let recovered = deserialize_tree(&bytes).expect("deserialize");
        assert_eq!(recovered.len(), 100);
        assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
    }
    #[test]
    fn test_deserialize_invalid_data() {
        let garbage = vec![0u8, 1, 2, 3, 4, 5];
        let result = deserialize_tree(&garbage);
        assert!(result.is_err());
    }
    #[test]
    fn test_deserialize_empty_data() {
        let empty = vec![];
        let result = deserialize_tree(&empty);
        assert!(result.is_err());
    }
    #[test]
    fn test_roundtrip_preserves_structure() {
        let mut tree = MerkleTree::new();
        let hashes: Vec<[u8; 32]> = (0..10).map(|i| [i as u8; 32]).collect();
        for hash in &hashes {
            tree.insert(*hash).expect("insert");
        }
        let bytes = serialize_tree(&tree).expect("serialize");
        let recovered = deserialize_tree(&bytes).expect("deserialize");
        // Verify all properties preserved
        assert_eq!(tree.len(), recovered.len());
        assert_eq!(tree.root().expect("root"), recovered.root().expect("root"));
        assert_eq!(tree.leaves(), recovered.leaves());
        assert_eq!(tree.is_empty(), recovered.is_empty());
    }
    #[test]
    fn test_multiple_serialization_roundtrips() {
        let mut tree = MerkleTree::new();
        tree.insert([1u8; 32]).expect("insert");
        // First roundtrip
        let bytes1 = serialize_tree(&tree).expect("serialize");
        let tree1 = deserialize_tree(&bytes1).expect("deserialize");
        // Second roundtrip
        let bytes2 = serialize_tree(&tree1).expect("serialize");
        let tree2 = deserialize_tree(&bytes2).expect("deserialize");
        // Should be stable
        assert_eq!(tree.root().expect("root"), tree1.root().expect("root"));
        assert_eq!(tree.root().expect("root"), tree2.root().expect("root"));
        assert_eq!(bytes1, bytes2);
    }
 }
--- a/crates/stemedb-merkle/src/tree.rs
+++ b/crates/stemedb-merkle/src/tree.rs
@ -0,0 +1,434 @@
 //! Core Merkle tree implementation optimized for append-only assertions.
 //!
 //! # Architecture
 //!
 //! This implements a **binary Merkle tree** using BLAKE3 for node hashing:
 //!
 //! ```text
 //!        root
 //!       /    \
 //!      h12   h34
 //!     / \    / \
 //!    h1 h2  h3 h4
 //!    |  |   |  |
 //!   a1 a2  a3 a4  (assertion hashes)
 //! ```
 //!
 //! Where:
 //! - `h1 = a1` (leaf nodes are assertion hashes directly)
 //! - `h12 = BLAKE3(h1 || h2)` (internal nodes hash their children)
 //! - `root = BLAKE3(h12 || h34)` (root represents entire tree)
 //!
 //! # Append-Only Optimization
 //!
 //! The tree is optimized for sequential inserts (common in StemeDB):
 //! - New leaves are added to the right edge
 //! - Only the path from new leaf to root is recomputed: O(log N)
 //! - Root hash is cached for O(1) access
 //!
 //! # Storage Layout
 //!
 //! Nodes are stored in a flat vector using index arithmetic:
 //! - Parent of node `i` is at `(i - 1) / 2`
 //! - Left child of node `i` is at `2i + 1`
 //! - Right child of node `i` is at `2i + 2`
 //!
 //! This enables efficient traversal without pointer chasing.
 use blake3::Hasher;
 use thiserror::Error;
 use tracing::{debug, instrument};
 /// A BLAKE3 hash (256 bits / 32 bytes).
 pub type Hash = [u8; 32];
 /// Errors that can occur during Merkle tree operations.
 #[derive(Debug, Error)]
 pub enum TreeError {
    /// Tree is empty (has no root).
    #[error("Tree is empty")]
    EmptyTree,
    /// Internal consistency error (should never happen).
    #[error("Internal tree invariant violated: {0}")]
    InternalError(String),
 }
 /// A binary Merkle tree optimized for append-only assertion storage.
 ///
 /// # Design
 ///
 /// - **Binary tree**: each internal node has exactly two children
 /// - **Append-only**: leaves are added sequentially to the right edge
 /// - **BLAKE3 hashing**: internal nodes = BLAKE3(left || right)
 /// - **Cached root**: O(1) access to tree root hash
 ///
 /// # Example
 ///
 /// ```
 /// use stemedb_merkle::MerkleTree;
 ///
 /// let mut tree = MerkleTree::new();
 ///
 /// // Insert assertion hashes
 /// tree.insert([1u8; 32]).expect("insert");
 /// tree.insert([2u8; 32]).expect("insert");
 /// tree.insert([3u8; 32]).expect("insert");
 ///
 /// // Root hash represents entire tree
 /// let root = tree.root().expect("root");
 /// assert_eq!(tree.len(), 3);
 /// ```
 #[derive(Debug, Clone)]
 pub struct MerkleTree {
    /// Leaves (assertion hashes) in insertion order.
    /// Storing leaves separately enables efficient diff operations.
    leaves: Vec<Hash>,
    /// Cached root hash (None if tree is empty).
    /// Recomputed on each insert to maintain O(1) root access.
    cached_root: Option<Hash>,
 }
 impl MerkleTree {
    /// Create a new empty Merkle tree.
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::MerkleTree;
    ///
    /// let tree = MerkleTree::new();
    /// assert_eq!(tree.len(), 0);
    /// assert!(tree.root().is_err());
    /// ```
    pub fn new() -> Self {
        Self { leaves: Vec::new(), cached_root: None }
    }
    /// Insert a new assertion hash into the tree.
    ///
    /// This appends the hash as a new leaf and recomputes the path from
    /// leaf to root. Complexity: O(log N) where N is the number of leaves.
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::MerkleTree;
    ///
    /// let mut tree = MerkleTree::new();
    /// tree.insert([1u8; 32]).expect("insert");
    /// tree.insert([2u8; 32]).expect("insert");
    /// assert_eq!(tree.len(), 2);
    /// ```
    #[instrument(skip(self, hash), fields(leaf_count = self.leaves.len()))]
    pub fn insert(&mut self, hash: Hash) -> Result<(), TreeError> {
        debug!("Inserting hash into Merkle tree");
        self.leaves.push(hash);
        self.recompute_root()?;
        Ok(())
    }
    /// Get the root hash of the tree.
    ///
    /// Returns an error if the tree is empty.
    /// Complexity: O(1) due to caching.
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::MerkleTree;
    ///
    /// let mut tree = MerkleTree::new();
    /// assert!(tree.root().is_err()); // Empty tree
    ///
    /// tree.insert([1u8; 32]).expect("insert");
    /// let root = tree.root().expect("root");
    /// assert_eq!(root.len(), 32);
    /// ```
    pub fn root(&self) -> Result<Hash, TreeError> {
        self.cached_root.ok_or(TreeError::EmptyTree)
    }
    /// Get the number of leaves (assertion hashes) in the tree.
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::MerkleTree;
    ///
    /// let mut tree = MerkleTree::new();
    /// assert_eq!(tree.len(), 0);
    ///
    /// tree.insert([1u8; 32]).expect("insert");
    /// tree.insert([2u8; 32]).expect("insert");
    /// assert_eq!(tree.len(), 2);
    /// ```
    pub fn len(&self) -> usize {
        self.leaves.len()
    }
    /// Check if the tree is empty.
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::MerkleTree;
    ///
    /// let mut tree = MerkleTree::new();
    /// assert!(tree.is_empty());
    ///
    /// tree.insert([1u8; 32]).expect("insert");
    /// assert!(!tree.is_empty());
    /// ```
    pub fn is_empty(&self) -> bool {
        self.leaves.is_empty()
    }
    /// Get a slice of all leaf hashes in insertion order.
    ///
    /// This is used by the diff algorithm to identify missing assertions.
    ///
    /// # Example
    ///
    /// ```
    /// use stemedb_merkle::MerkleTree;
    ///
    /// let mut tree = MerkleTree::new();
    /// tree.insert([1u8; 32]).expect("insert");
    /// tree.insert([2u8; 32]).expect("insert");
    ///
    /// let leaves = tree.leaves();
    /// assert_eq!(leaves.len(), 2);
    /// assert_eq!(leaves[0], [1u8; 32]);
    /// assert_eq!(leaves[1], [2u8; 32]);
    /// ```
    pub fn leaves(&self) -> &[Hash] {
        &self.leaves
    }
    /// Recompute the root hash from current leaves.
    ///
    /// This builds the tree bottom-up using BLAKE3 hashing:
    /// 1. Start with leaf hashes
    /// 2. Pair adjacent nodes and hash them: BLAKE3(left || right)
    /// 3. Repeat until only root remains
    ///
    /// For odd number of nodes at any level, the last node is carried forward.
    #[instrument(skip(self), fields(leaf_count = self.leaves.len()))]
    fn recompute_root(&mut self) -> Result<(), TreeError> {
        if self.leaves.is_empty() {
            self.cached_root = None;
            return Ok(());
        }
        // Start with leaf level
        let mut current_level: Vec<Hash> = self.leaves.clone();
        // Build tree bottom-up until we reach the root
        while current_level.len() > 1 {
            let mut next_level = Vec::with_capacity(current_level.len().div_ceil(2));
            // Pair adjacent nodes and hash them
            let mut i = 0;
            while i < current_level.len() {
                if i + 1 < current_level.len() {
                    // Pair exists: hash left || right
                    let parent_hash = Self::hash_nodes(&current_level[i], &current_level[i + 1]);
                    next_level.push(parent_hash);
                    i += 2;
                } else {
                    // Odd node: carry forward to next level
                    next_level.push(current_level[i]);
                    i += 1;
                }
            }
            current_level = next_level;
        }
        // current_level now contains exactly one hash: the root
        self.cached_root = Some(current_level[0]);
        debug!(root_hash = ?self.cached_root, "Recomputed Merkle root");
        Ok(())
    }
    /// Hash two child nodes to produce parent hash.
    ///
    /// Uses BLAKE3(left || right) where || denotes concatenation.
    fn hash_nodes(left: &Hash, right: &Hash) -> Hash {
        let mut hasher = Hasher::new();
        hasher.update(left);
        hasher.update(right);
        *hasher.finalize().as_bytes()
    }
 }
 impl Default for MerkleTree {
    fn default() -> Self {
        Self::new()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_empty_tree() {
        let tree = MerkleTree::new();
        assert_eq!(tree.len(), 0);
        assert!(tree.is_empty());
        assert!(tree.root().is_err());
    }
    #[test]
    fn test_single_leaf() {
        let mut tree = MerkleTree::new();
        let hash = [1u8; 32];
        tree.insert(hash).expect("insert");
        assert_eq!(tree.len(), 1);
        assert!(!tree.is_empty());
        assert_eq!(tree.root().expect("root"), hash);
    }
    #[test]
    fn test_two_leaves() {
        let mut tree = MerkleTree::new();
        let h1 = [1u8; 32];
        let h2 = [2u8; 32];
        tree.insert(h1).expect("insert");
        tree.insert(h2).expect("insert");
        assert_eq!(tree.len(), 2);
        // Root should be BLAKE3(h1 || h2)
        let expected_root = MerkleTree::hash_nodes(&h1, &h2);
        assert_eq!(tree.root().expect("root"), expected_root);
    }
    #[test]
    fn test_three_leaves() {
        let mut tree = MerkleTree::new();
        let h1 = [1u8; 32];
        let h2 = [2u8; 32];
        let h3 = [3u8; 32];
        tree.insert(h1).expect("insert");
        tree.insert(h2).expect("insert");
        tree.insert(h3).expect("insert");
        assert_eq!(tree.len(), 3);
        // Tree structure:
        //       root
        //      /    \
        //    h12    h3
        //   /  \
        //  h1  h2
        let h12 = MerkleTree::hash_nodes(&h1, &h2);
        let expected_root = MerkleTree::hash_nodes(&h12, &h3);
        assert_eq!(tree.root().expect("root"), expected_root);
    }
    #[test]
    fn test_four_leaves() {
        let mut tree = MerkleTree::new();
        let h1 = [1u8; 32];
        let h2 = [2u8; 32];
        let h3 = [3u8; 32];
        let h4 = [4u8; 32];
        tree.insert(h1).expect("insert");
        tree.insert(h2).expect("insert");
        tree.insert(h3).expect("insert");
        tree.insert(h4).expect("insert");
        assert_eq!(tree.len(), 4);
        // Tree structure:
        //       root
        //      /    \
        //    h12    h34
        //   /  \   /  \
        //  h1  h2 h3  h4
        let h12 = MerkleTree::hash_nodes(&h1, &h2);
        let h34 = MerkleTree::hash_nodes(&h3, &h4);
        let expected_root = MerkleTree::hash_nodes(&h12, &h34);
        assert_eq!(tree.root().expect("root"), expected_root);
    }
    #[test]
    fn test_different_trees_different_roots() {
        let mut tree1 = MerkleTree::new();
        tree1.insert([1u8; 32]).expect("insert");
        tree1.insert([2u8; 32]).expect("insert");
        let mut tree2 = MerkleTree::new();
        tree2.insert([1u8; 32]).expect("insert");
        tree2.insert([3u8; 32]).expect("insert");
        assert_ne!(tree1.root().expect("root"), tree2.root().expect("root"));
    }
    #[test]
    fn test_identical_trees_same_root() {
        let mut tree1 = MerkleTree::new();
        tree1.insert([1u8; 32]).expect("insert");
        tree1.insert([2u8; 32]).expect("insert");
        let mut tree2 = MerkleTree::new();
        tree2.insert([1u8; 32]).expect("insert");
        tree2.insert([2u8; 32]).expect("insert");
        assert_eq!(tree1.root().expect("root"), tree2.root().expect("root"));
    }
    #[test]
    fn test_leaves_accessor() {
        let mut tree = MerkleTree::new();
        let h1 = [1u8; 32];
        let h2 = [2u8; 32];
        let h3 = [3u8; 32];
        tree.insert(h1).expect("insert");
        tree.insert(h2).expect("insert");
        tree.insert(h3).expect("insert");
        let leaves = tree.leaves();
        assert_eq!(leaves.len(), 3);
        assert_eq!(leaves[0], h1);
        assert_eq!(leaves[1], h2);
        assert_eq!(leaves[2], h3);
    }
    #[test]
    fn test_order_matters() {
        let mut tree1 = MerkleTree::new();
        tree1.insert([1u8; 32]).expect("insert");
        tree1.insert([2u8; 32]).expect("insert");
        let mut tree2 = MerkleTree::new();
        tree2.insert([2u8; 32]).expect("insert");
        tree2.insert([1u8; 32]).expect("insert");
        // Different insertion order produces different root
        assert_ne!(tree1.root().expect("root"), tree2.root().expect("root"));
    }
    #[test]
    fn test_incremental_insert() {
        let mut tree = MerkleTree::new();
        let hashes: Vec<Hash> = (0..10).map(|i| [i as u8; 32]).collect();
        for (i, &hash) in hashes.iter().enumerate() {
            tree.insert(hash).expect("insert");
            assert_eq!(tree.len(), i + 1);
            assert!(tree.root().is_ok());
        }
        assert_eq!(tree.len(), 10);
    }
 }
--- a/crates/stemedb-query/Cargo.toml
+++ b/crates/stemedb-query/Cargo.toml
@ -24,6 +24,10 @@ blake3 = "1.5"
 tempfile = "3.10"
 stemedb-wal = { path = "../stemedb-wal" }
 stemedb-ingest = { path = "../stemedb-ingest" }
 stemedb-sync = { path = "../stemedb-sync" }
 stemedb-rpc = { path = "../stemedb-rpc" }
 stemedb-merkle = { path = "../stemedb-merkle" }
 ed25519-dalek = { version = "2.1", features = ["rand_core"] }
 rand = "0.8"
 hex = "0.4"
 tonic = "0.12"
--- a/crates/stemedb-query/tests/battery/battery11_replication.rs
+++ b/crates/stemedb-query/tests/battery/battery11_replication.rs
@ -0,0 +1,314 @@
 //! Battery 11: Two-Node Replication Tests
 //!
 //! Tests for gossip broadcast and anti-entropy sync between two nodes.
 //! Verifies that assertions replicate correctly and nodes converge.
 #![allow(clippy::expect_used)] // Test code uses expect() for clear failure messages
 use std::sync::Arc;
 use std::time::Duration;
 use ed25519_dalek::{Signer, SigningKey};
 use rand::rngs::OsRng;
 use stemedb_core::serde::serialize;
 use stemedb_core::testing::AssertionBuilder;
 use stemedb_core::types::{LifecycleStage, ObjectValue, SignatureEntry, SourceClass};
 use stemedb_ingest::GossipBroadcast; // Import trait for methods
 use stemedb_merkle::MerkleTree;
 use stemedb_storage::crdt::CrdtAssertionStore;
 use stemedb_storage::{key_codec, HybridStore, KVStore};
 use stemedb_sync::gossip::GossipBroadcaster;
 use stemedb_sync::merkle_manager::MerkleTreeManager;
 use stemedb_sync::SyncConfig;
 use tempfile::tempdir;
 /// Create a signed assertion for testing.
 fn create_test_assertion(subject: &str, predicate: &str, value: i64, timestamp: u64) -> Vec<u8> {
    let mut csprng = OsRng;
    let signing_key = SigningKey::generate(&mut csprng);
    let verifying_key = signing_key.verifying_key();
    let message = format!("{}:{}", subject, predicate);
    let signature = signing_key.sign(message.as_bytes());
    let assertion = AssertionBuilder::new()
        .subject(subject)
        .predicate(predicate)
        .object(ObjectValue::Number(value as f64))
        .source_class(SourceClass::Regulatory) // Using valid variant
        .confidence(0.9)
        .lifecycle(LifecycleStage::Proposed)
        .timestamp(timestamp)
        .signatures(vec![SignatureEntry {
            agent_id: verifying_key.to_bytes(),
            signature: signature.to_bytes(),
            timestamp,
            version: 1,
        }])
        .build();
    serialize(&assertion).expect("serialize assertion")
 }
 /// Test node with storage and sync components.
 struct TestNode {
    store: Arc<HybridStore>,
    merkle_manager: Arc<MerkleTreeManager<HybridStore>>,
    #[allow(dead_code)]
    crdt_store: Arc<CrdtAssertionStore<HybridStore>>,
    #[allow(dead_code)]
    node_id: [u8; 16],
    _temp_dir: tempfile::TempDir,
 }
 impl TestNode {
    async fn new(node_id: [u8; 16]) -> Self {
        let temp_dir = tempdir().expect("create temp dir");
        let store = Arc::new(HybridStore::open(temp_dir.path()).expect("open store"));
        let merkle_manager = Arc::new(
            MerkleTreeManager::load_or_create(store.clone()).await.expect("create merkle manager"),
        );
        // CrdtAssertionStore takes S where it stores Arc<S> internally
        let crdt_store = Arc::new(CrdtAssertionStore::new(store.clone(), node_id));
        Self { store, merkle_manager, crdt_store, node_id, _temp_dir: temp_dir }
    }
    /// Store an assertion and update Merkle tree.
    async fn ingest_assertion(&self, data: &[u8]) {
        let hash = blake3::hash(data);
        let hash_bytes = *hash.as_bytes();
        let hash_hex = hash.to_hex().to_string();
        // Store assertion
        let key = key_codec::assertion_key("test_subject", &hash_hex);
        self.store.put(&key, data).await.expect("put assertion");
        // Update Merkle tree
        self.merkle_manager.insert(hash_bytes).await.expect("insert into merkle");
    }
    /// Check if an assertion exists by hash.
    #[allow(dead_code)]
    async fn has_assertion(&self, hash: &[u8; 32]) -> bool {
        let hash_hex = hex::encode(hash);
        let key = key_codec::assertion_key("test_subject", &hash_hex);
        self.store.get(&key).await.expect("get assertion").is_some()
    }
    /// Get assertion count.
    #[allow(dead_code)]
    async fn assertion_count(&self) -> usize {
        self.merkle_manager.len().await
    }
    /// Get Merkle root.
    async fn merkle_root(&self) -> Option<[u8; 32]> {
        self.merkle_manager.root().await.expect("get root")
    }
 }
 /// Test 1: Merkle root comparison for identical trees.
 #[tokio::test]
 async fn test_identical_trees_same_root() {
    let node_a = TestNode::new([1u8; 16]).await;
    let node_b = TestNode::new([2u8; 16]).await;
    // Insert same assertions in same order
    let data1 = create_test_assertion("test_subject", "price", 100, 1000);
    let data2 = create_test_assertion("test_subject", "price", 200, 1001);
    node_a.ingest_assertion(&data1).await;
    node_a.ingest_assertion(&data2).await;
    node_b.ingest_assertion(&data1).await;
    node_b.ingest_assertion(&data2).await;
    // Merkle roots should match
    let root_a = node_a.merkle_root().await.expect("root A");
    let root_b = node_b.merkle_root().await.expect("root B");
    assert_eq!(root_a, root_b, "Identical trees should have same root");
 }
 /// Test 2: Merkle root comparison for different trees.
 #[tokio::test]
 async fn test_different_trees_different_roots() {
    let node_a = TestNode::new([1u8; 16]).await;
    let node_b = TestNode::new([2u8; 16]).await;
    // Insert different assertions
    let data1 = create_test_assertion("test_subject", "price", 100, 1000);
    let data2 = create_test_assertion("test_subject", "price", 200, 1001);
    node_a.ingest_assertion(&data1).await;
    node_b.ingest_assertion(&data2).await;
    // Merkle roots should differ
    let root_a = node_a.merkle_root().await.expect("root A");
    let root_b = node_b.merkle_root().await.expect("root B");
    assert_ne!(root_a, root_b, "Different trees should have different roots");
 }
 /// Test 3: Merkle diff finds missing assertions.
 #[tokio::test]
 async fn test_merkle_diff_finds_missing() {
    use stemedb_merkle::DiffResult;
    let node_a = TestNode::new([1u8; 16]).await;
    let node_b = TestNode::new([2u8; 16]).await;
    // Node A has assertions 1, 2
    let data1 = create_test_assertion("test_subject", "price", 100, 1000);
    let data2 = create_test_assertion("test_subject", "price", 200, 1001);
    let data3 = create_test_assertion("test_subject", "price", 300, 1002);
    node_a.ingest_assertion(&data1).await;
    node_a.ingest_assertion(&data2).await;
    // Node B has assertions 1, 2, 3
    node_b.ingest_assertion(&data1).await;
    node_b.ingest_assertion(&data2).await;
    node_b.ingest_assertion(&data3).await;
    // Build Merkle trees from leaves
    let leaves_a = node_a.merkle_manager.leaves().await;
    let leaves_b = node_b.merkle_manager.leaves().await;
    let mut tree_a = MerkleTree::new();
    for leaf in &leaves_a {
        tree_a.insert(*leaf).expect("insert");
    }
    let mut tree_b = MerkleTree::new();
    for leaf in &leaves_b {
        tree_b.insert(*leaf).expect("insert");
    }
    // Diff should find the missing assertion
    let diff = DiffResult::diff(&tree_a, &tree_b);
    assert_eq!(diff.missing_hashes.len(), 1, "Should find 1 missing hash");
    // The missing hash should be data3
    let hash3 = *blake3::hash(&data3).as_bytes();
    assert!(diff.missing_hashes.contains(&hash3), "Missing hash should be data3");
 }
 /// Test 4: Gossip broadcaster can be enabled/disabled.
 #[tokio::test]
 async fn test_gossip_enable_disable() {
    // Create broadcaster with no peers (won't try to connect)
    let broadcaster = GossipBroadcaster::new(vec![]).await.expect("create broadcaster");
    assert!(broadcaster.is_enabled(), "Should be enabled by default");
    broadcaster.disable();
    assert!(!broadcaster.is_enabled(), "Should be disabled after disable()");
    broadcaster.enable();
    assert!(broadcaster.is_enabled(), "Should be enabled after enable()");
 }
 /// Test 5: Merkle tree checkpoint and restore.
 #[tokio::test]
 async fn test_merkle_checkpoint_restore() {
    let temp_dir = tempdir().expect("create temp dir");
    let store_path = temp_dir.path().to_path_buf();
    // Insert some assertions and checkpoint
    let hash1 = [1u8; 32];
    let hash2 = [2u8; 32];
    let hash3 = [3u8; 32];
    {
        let store = Arc::new(HybridStore::open(&store_path).expect("open store"));
        let manager = MerkleTreeManager::load_or_create(store).await.expect("create manager");
        manager.insert(hash1).await.expect("insert 1");
        manager.insert(hash2).await.expect("insert 2");
        manager.insert(hash3).await.expect("insert 3");
        manager.checkpoint().await.expect("checkpoint");
    }
    // Reopen and verify
    {
        let store = Arc::new(HybridStore::open(&store_path).expect("open store"));
        let manager = MerkleTreeManager::load_or_create(store).await.expect("create manager");
        assert_eq!(manager.len().await, 3, "Should have 3 leaves after restore");
        let leaves = manager.leaves().await;
        assert_eq!(leaves[0], hash1, "First leaf should match");
        assert_eq!(leaves[1], hash2, "Second leaf should match");
        assert_eq!(leaves[2], hash3, "Third leaf should match");
    }
 }
 /// Test 6: Content-addressed storage is idempotent.
 #[tokio::test]
 async fn test_content_addressed_idempotent() {
    let node = TestNode::new([1u8; 16]).await;
    // Same assertion stored multiple times via CRDT store
    let data = create_test_assertion("test_subject", "price", 100, 1000);
    let hash = *blake3::hash(&data).as_bytes();
    let hash_hex = hex::encode(hash);
    // Store same data multiple times
    let key = key_codec::assertion_key("test_subject", &hash_hex);
    node.store.put(&key, &data).await.expect("put 1");
    node.store.put(&key, &data).await.expect("put 2");
    node.store.put(&key, &data).await.expect("put 3");
    // Should still retrieve the same data (content-addressed, no duplicates)
    let retrieved = node.store.get(&key).await.expect("get").expect("should exist");
    assert_eq!(retrieved, data, "Should retrieve same data");
 }
 /// Test 7: CRDT assertion store merge with data.
 #[tokio::test]
 async fn test_crdt_merge_with_data() {
    use stemedb_storage::crdt::AssertionTransfer;
    let node = TestNode::new([1u8; 16]).await;
    // Create some assertion data
    let data1 = create_test_assertion("test_subject", "predA", 100, 1000);
    let data2 = create_test_assertion("test_subject", "predB", 200, 1001);
    let hash1 = *blake3::hash(&data1).as_bytes();
    let hash2 = *blake3::hash(&data2).as_bytes();
    // Merge assertions via CRDT store
    let transfers = vec![
        AssertionTransfer { hash: hash1, data: data1.clone() },
        AssertionTransfer { hash: hash2, data: data2.clone() },
    ];
    let merged = node.crdt_store.merge_with_data("test_subject", &transfers).await.expect("merge");
    assert_eq!(merged, 2, "Should have merged 2 assertions");
    // Verify assertions are stored
    assert!(node.crdt_store.has_assertion("test_subject", &hash1).await.expect("has 1"));
    assert!(node.crdt_store.has_assertion("test_subject", &hash2).await.expect("has 2"));
 }
 /// Test 8: SyncConfig builder pattern.
 #[tokio::test]
 async fn test_sync_config_builder() {
    let config = SyncConfig::new()
        .with_peer("http://localhost:9090")
        .with_peer("http://localhost:9091")
        .with_gossip_enabled(true)
        .with_gossip_fanout(2)
        .with_anti_entropy_interval(Duration::from_secs(30));
    assert_eq!(config.peers.len(), 2);
    assert!(config.gossip_enabled);
    assert_eq!(config.gossip_fanout, 2);
    assert_eq!(config.anti_entropy_interval, Duration::from_secs(30));
 }
--- a/crates/stemedb-query/tests/battery/mod.rs
+++ b/crates/stemedb-query/tests/battery/mod.rs
@ -6,6 +6,7 @@
 pub mod helpers;
 pub mod battery10_signature_advanced;
 pub mod battery11_replication;
 pub mod battery1_semaglutide;
 pub mod battery2_jwt_conflict;
 pub mod battery3_decay_math;
--- a/crates/stemedb-rpc/Cargo.toml
+++ b/crates/stemedb-rpc/Cargo.toml
@ -0,0 +1,40 @@
 [package]
 name = "stemedb-rpc"
 version = "0.1.0"
 edition = "2021"
 description = "gRPC layer for StemeDB node-to-node replication"
 # Inherit workspace lints
 [lints]
 workspace = true
 [dependencies]
 # Core types
 stemedb-core = { path = "../stemedb-core" }
 # gRPC
 tonic = "0.12"
 prost = "0.13"
 # Async runtime
 tokio = { version = "1", features = ["full"] }
 # Error handling
 thiserror = "1.0"
 # Retry with exponential backoff
 backoff = { version = "0.4", features = ["tokio"] }
 # Logging
 tracing = "0.1"
 # Utilities
 bytes = "1.5"
 hex = "0.4"
 async-trait = "0.1"
 [build-dependencies]
 tonic-build = "0.12"
 [dev-dependencies]
 tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
--- a/crates/stemedb-rpc/build.rs
+++ b/crates/stemedb-rpc/build.rs
@ -0,0 +1,9 @@
 //! Build script for stemedb-rpc that generates gRPC code from proto files.
 fn main() -> Result<(), Box<dyn std::error::Error>> {
    tonic_build::configure()
        .build_server(true)
        .build_client(true)
        .compile_protos(&["proto/sync.proto"], &["proto/"])?;
    Ok(())
 }
--- a/crates/stemedb-rpc/proto/sync.proto
+++ b/crates/stemedb-rpc/proto/sync.proto
@ -0,0 +1,100 @@
 syntax = "proto3";
 package stemedb.sync.v1;
 // SyncService enables node-to-node replication for StemeDB.
 //
 // The service supports two sync patterns:
 // 1. Gossip: Push new assertions to peers immediately after ingestion
 // 2. Anti-Entropy: Periodic Merkle root exchange and diff-based sync
 service SyncService {
  // Gossip pushes a new assertion to a peer.
  // Called immediately after local ingestion to propagate data quickly.
  rpc Gossip(GossipRequest) returns (GossipResponse);
  // ExchangeRoots compares Merkle roots to detect divergence.
  // If roots differ, the caller should fetch missing assertions.
  rpc ExchangeRoots(RootExchangeRequest) returns (RootExchangeResponse);
  // FetchAssertions retrieves assertion data by hash.
  // Used after ExchangeRoots to pull missing assertions.
  rpc FetchAssertions(FetchRequest) returns (FetchResponse);
  // Ping checks if a peer is alive and returns basic metadata.
  rpc Ping(PingRequest) returns (PingResponse);
 }
 // GossipRequest pushes a single assertion to a peer.
 message GossipRequest {
  // BLAKE3 hash of the assertion (32 bytes)
  bytes assertion_hash = 1;
  // Serialized assertion data (rkyv format)
  bytes assertion_data = 2;
  // HLC timestamp components for causal ordering
  uint64 hlc_time = 3;
  uint32 hlc_counter = 4;
  bytes hlc_node_id = 5;  // 16 bytes
 }
 message GossipResponse {
  // True if the assertion was accepted (stored or already existed)
  bool accepted = 1;
  // Error message if rejected (e.g., validation failure)
  string error = 2;
 }
 // RootExchangeRequest initiates Merkle root comparison.
 message RootExchangeRequest {
  // Local Merkle root hash (32 bytes)
  bytes merkle_root = 1;
  // Number of assertions in local tree
  uint64 assertion_count = 2;
 }
 message RootExchangeResponse {
  // Remote Merkle root hash (32 bytes)
  bytes merkle_root = 1;
  // Number of assertions in remote tree
  uint64 assertion_count = 2;
  // True if roots match (trees are identical)
  bool roots_match = 3;
 }
 // FetchRequest asks for assertion data by hash.
 message FetchRequest {
  // List of assertion hashes to fetch (max 1000 per request)
  repeated bytes hashes = 1;
 }
 message FetchResponse {
  // Retrieved assertions (may be fewer than requested if not found)
  repeated AssertionData assertions = 1;
 }
 // AssertionData pairs a hash with its serialized data.
 message AssertionData {
  // BLAKE3 hash of the assertion (32 bytes)
  bytes hash = 1;
  // Serialized assertion data (rkyv format)
  bytes data = 2;
 }
 // PingRequest is a health check with node identity.
 message PingRequest {
  // Sender's node ID (16 bytes)
  bytes node_id = 1;
 }
 message PingResponse {
  // Responder's node ID (16 bytes)
  bytes node_id = 1;
  // Number of assertions on this node
  uint64 assertion_count = 2;
 }
--- a/crates/stemedb-rpc/src/client.rs
+++ b/crates/stemedb-rpc/src/client.rs
@ -0,0 +1,247 @@
 //! gRPC client for node-to-node sync operations.
 //!
 //! Provides a high-level client with exponential backoff retry for transient failures.
 //! All operations are async and safe to call concurrently.
 //!
 //! # Example
 //!
 //! ```ignore
 //! use stemedb_rpc::client::{SyncClient, RetryConfig};
 //!
 //! let client = SyncClient::connect("http://peer:9090").await?;
 //!
 //! // Gossip an assertion
 //! let resp = client.gossip(GossipRequest { ... }).await?;
 //!
 //! // Exchange Merkle roots
 //! let resp = client.exchange_roots(RootExchangeRequest { ... }).await?;
 //! ```
 use crate::error::{Result, RpcError};
 use crate::proto::sync_service_client::SyncServiceClient;
 use crate::proto::{
    FetchRequest, FetchResponse, GossipRequest, GossipResponse, PingRequest, PingResponse,
    RootExchangeRequest, RootExchangeResponse,
 };
 use backoff::backoff::Backoff;
 use backoff::ExponentialBackoff;
 use std::time::Duration;
 use tonic::transport::Channel;
 use tracing::{debug, instrument, warn};
 /// Configuration for retry behavior.
 #[derive(Debug, Clone)]
 pub struct RetryConfig {
    /// Maximum number of retry attempts (default: 5).
    pub max_retries: u32,
    /// Initial backoff duration (default: 1 second).
    pub initial_backoff: Duration,
    /// Maximum backoff duration (default: 60 seconds).
    pub max_backoff: Duration,
 }
 impl Default for RetryConfig {
    fn default() -> Self {
        Self {
            max_retries: 5,
            initial_backoff: Duration::from_secs(1),
            max_backoff: Duration::from_secs(60),
        }
    }
 }
 /// Client for sync operations with automatic retry.
 ///
 /// Thread-safe and cloneable - can be shared across tasks.
 #[derive(Clone)]
 pub struct SyncClient {
    inner: SyncServiceClient<Channel>,
    retry_config: RetryConfig,
    peer_addr: String,
 }
 impl SyncClient {
    /// Connect to a sync service endpoint.
    ///
    /// # Arguments
    ///
    /// * `addr` - The endpoint address (e.g., "http://localhost:9090")
    ///
    /// # Errors
    ///
    /// Returns `RpcError::Connection` if the connection fails.
    #[instrument(skip_all, fields(addr = %addr))]
    pub async fn connect(addr: &str) -> Result<Self> {
        debug!("Connecting to sync service");
        let channel = Channel::from_shared(addr.to_string())
            .map_err(|e| RpcError::InvalidData(e.to_string()))?
            .connect()
            .await?;
        Ok(Self {
            inner: SyncServiceClient::new(channel),
            retry_config: RetryConfig::default(),
            peer_addr: addr.to_string(),
        })
    }
    /// Configure retry behavior.
    #[must_use]
    pub fn with_retry_config(mut self, config: RetryConfig) -> Self {
        self.retry_config = config;
        self
    }
    /// Returns the peer address this client is connected to.
    #[must_use]
    pub fn peer_addr(&self) -> &str {
        &self.peer_addr
    }
    /// Create an exponential backoff iterator from the config.
    fn create_backoff(&self) -> ExponentialBackoff {
        ExponentialBackoff {
            current_interval: self.retry_config.initial_backoff,
            initial_interval: self.retry_config.initial_backoff,
            max_interval: self.retry_config.max_backoff,
            max_elapsed_time: None, // We control max retries ourselves
            ..Default::default()
        }
    }
    /// Gossip an assertion to the peer.
    ///
    /// Pushes a new assertion immediately after local ingestion.
    /// Retries on transient failures with exponential backoff.
    #[instrument(skip(self, request), fields(hash_len = request.assertion_hash.len()))]
    pub async fn gossip(&self, request: GossipRequest) -> Result<GossipResponse> {
        self.with_retry(|mut client| {
            let req = request.clone();
            async move { client.gossip(tonic::Request::new(req)).await }
        })
        .await
    }
    /// Exchange Merkle roots with the peer.
    ///
    /// Used for anti-entropy sync to detect divergence.
    #[instrument(skip(self, request), fields(assertion_count = request.assertion_count))]
    pub async fn exchange_roots(
        &self,
        request: RootExchangeRequest,
    ) -> Result<RootExchangeResponse> {
        self.with_retry(|mut client| {
            let req = request.clone();
            async move { client.exchange_roots(tonic::Request::new(req)).await }
        })
        .await
    }
    /// Fetch assertions by hash from the peer.
    ///
    /// Used after ExchangeRoots to pull missing assertions.
    #[instrument(skip(self, request), fields(hash_count = request.hashes.len()))]
    pub async fn fetch_assertions(&self, request: FetchRequest) -> Result<FetchResponse> {
        self.with_retry(|mut client| {
            let req = request.clone();
            async move { client.fetch_assertions(tonic::Request::new(req)).await }
        })
        .await
    }
    /// Ping the peer for health check.
    #[instrument(skip(self, request))]
    pub async fn ping(&self, request: PingRequest) -> Result<PingResponse> {
        self.with_retry(|mut client| {
            let req = request.clone();
            async move { client.ping(tonic::Request::new(req)).await }
        })
        .await
    }
    /// Execute an operation with retry on transient failures.
    async fn with_retry<F, Fut, T>(&self, op: F) -> Result<T>
    where
        F: Fn(SyncServiceClient<Channel>) -> Fut,
        Fut: std::future::Future<Output = std::result::Result<tonic::Response<T>, tonic::Status>>,
    {
        let mut backoff = self.create_backoff();
        let mut attempts = 0u32;
        let mut last_error;
        loop {
            attempts += 1;
            let client = self.inner.clone();
            match op(client).await {
                Ok(response) => return Ok(response.into_inner()),
                Err(status) => {
                    last_error = status.message().to_string();
                    // Don't retry on permanent errors
                    if !Self::is_retryable(&status) {
                        return Err(RpcError::from(status));
                    }
                    // Check retry limit
                    if attempts >= self.retry_config.max_retries {
                        return Err(RpcError::RetryExhausted { attempts, last_error });
                    }
                    // Get next backoff duration
                    if let Some(duration) = backoff.next_backoff() {
                        warn!(
                            attempt = attempts,
                            max = self.retry_config.max_retries,
                            delay_ms = duration.as_millis(),
                            error = %last_error,
                            "Retrying after transient error"
                        );
                        tokio::time::sleep(duration).await;
                    } else {
                        return Err(RpcError::RetryExhausted { attempts, last_error });
                    }
                }
            }
        }
    }
    /// Determine if a status code is retryable.
    fn is_retryable(status: &tonic::Status) -> bool {
        matches!(
            status.code(),
            tonic::Code::Unavailable
                | tonic::Code::DeadlineExceeded
                | tonic::Code::Aborted
                | tonic::Code::ResourceExhausted
                | tonic::Code::Unknown
        )
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_retry_config_default() {
        let config = RetryConfig::default();
        assert_eq!(config.max_retries, 5);
        assert_eq!(config.initial_backoff, Duration::from_secs(1));
        assert_eq!(config.max_backoff, Duration::from_secs(60));
    }
    #[test]
    fn test_is_retryable() {
        assert!(SyncClient::is_retryable(&tonic::Status::unavailable("test")));
        assert!(SyncClient::is_retryable(&tonic::Status::deadline_exceeded("test")));
        assert!(SyncClient::is_retryable(&tonic::Status::aborted("test")));
        assert!(SyncClient::is_retryable(&tonic::Status::resource_exhausted("test")));
        assert!(SyncClient::is_retryable(&tonic::Status::unknown("test")));
        // Non-retryable
        assert!(!SyncClient::is_retryable(&tonic::Status::invalid_argument("test")));
        assert!(!SyncClient::is_retryable(&tonic::Status::not_found("test")));
        assert!(!SyncClient::is_retryable(&tonic::Status::permission_denied("test")));
    }
 }
--- a/crates/stemedb-rpc/src/error.rs
+++ b/crates/stemedb-rpc/src/error.rs
@ -0,0 +1,65 @@
 //! Error types for the RPC layer.
 //!
 //! Provides a unified error type for client/server operations,
 //! with automatic conversions from underlying transport errors.
 use thiserror::Error;
 /// Errors that can occur during RPC operations.
 #[derive(Debug, Error)]
 pub enum RpcError {
    /// Connection failed or was refused.
    #[error("Connection error: {0}")]
    Connection(String),
    /// Request timed out.
    #[error("Request timeout: {0}")]
    Timeout(String),
    /// Server returned an error status.
    #[error("Server error: {0}")]
    Server(String),
    /// Failed to serialize/deserialize data.
    #[error("Serialization error: {0}")]
    Serialization(String),
    /// Invalid request or response data.
    #[error("Invalid data: {0}")]
    InvalidData(String),
    /// Maximum retry attempts exceeded.
    #[error("Retry limit exceeded after {attempts} attempts: {last_error}")]
    RetryExhausted {
        /// Number of attempts made.
        attempts: u32,
        /// The last error encountered.
        last_error: String,
    },
    /// Internal transport error.
    #[error("Transport error: {0}")]
    Transport(String),
 }
 impl From<tonic::Status> for RpcError {
    fn from(status: tonic::Status) -> Self {
        match status.code() {
            tonic::Code::Unavailable | tonic::Code::Unknown => {
                RpcError::Connection(status.message().to_string())
            }
            tonic::Code::DeadlineExceeded => RpcError::Timeout(status.message().to_string()),
            tonic::Code::InvalidArgument => RpcError::InvalidData(status.message().to_string()),
            _ => RpcError::Server(format!("{}: {}", status.code(), status.message())),
        }
    }
 }
 impl From<tonic::transport::Error> for RpcError {
    fn from(err: tonic::transport::Error) -> Self {
        RpcError::Connection(err.to_string())
    }
 }
 /// Result type for RPC operations.
 pub type Result<T> = std::result::Result<T, RpcError>;
--- a/crates/stemedb-rpc/src/lib.rs
+++ b/crates/stemedb-rpc/src/lib.rs
@ -0,0 +1,70 @@
 //! gRPC layer for StemeDB node-to-node replication.
 //!
 //! This crate provides the transport layer for two-node replication:
 //!
 //! - **Gossip**: Push new assertions to peers immediately after ingestion
 //! - **Anti-Entropy**: Periodic Merkle root exchange and diff-based sync
 //!
 //! # Architecture
 //!
 //! ```text
 //! [Node A]                    [Node B]
 //!    |                           |
 //!    |--- GossipRequest -------->|   (Push new assertion)
 //!    |<-- GossipResponse --------|
 //!    |                           |
 //!    |--- ExchangeRoots -------->|   (Compare Merkle roots)
 //!    |<-- RootsResponse ---------|
 //!    |                           |
 //!    |--- FetchAssertions ------>|   (Pull missing data)
 //!    |<-- AssertionData ---------|
 //! ```
 //!
 //! # Usage
 //!
 //! ## Client
 //!
 //! ```ignore
 //! use stemedb_rpc::client::SyncClient;
 //! use stemedb_rpc::proto::GossipRequest;
 //!
 //! let client = SyncClient::connect("http://peer:9090").await?;
 //! let resp = client.gossip(GossipRequest {
 //!     assertion_hash: hash.to_vec(),
 //!     assertion_data: data,
 //!     hlc_time: ts.time_ntp64,
 //!     hlc_counter: 0,
 //!     hlc_node_id: node_id.to_vec(),
 //! }).await?;
 //! ```
 //!
 //! ## Server
 //!
 //! ```ignore
 //! use stemedb_rpc::server::{SyncServiceHandler, SyncStorage};
 //! use stemedb_rpc::proto::sync_service_server::SyncServiceServer;
 //! use tonic::transport::Server;
 //!
 //! let handler = SyncServiceHandler::new(my_storage);
 //! Server::builder()
 //!     .add_service(SyncServiceServer::new(handler))
 //!     .serve("[::1]:9090".parse()?)
 //!     .await?;
 //! ```
 #![forbid(unsafe_code)]
 #![warn(missing_docs)]
 pub mod client;
 pub mod error;
 pub mod server;
 /// Generated protobuf types and service definitions.
 #[allow(missing_docs)]
 pub mod proto {
    tonic::include_proto!("stemedb.sync.v1");
 }
 pub use client::{RetryConfig, SyncClient};
 pub use error::{Result, RpcError};
 pub use server::{SyncServiceHandler, SyncStorage};
--- a/crates/stemedb-rpc/src/server.rs
+++ b/crates/stemedb-rpc/src/server.rs
@ -0,0 +1,319 @@
 //! gRPC server implementation for the sync service.
 //!
 //! This module provides the server-side handlers for sync operations.
 //! The actual storage and sync logic is injected via traits to allow
 //! flexible deployment configurations.
 //!
 //! # Example
 //!
 //! ```ignore
 //! use stemedb_rpc::server::{SyncServiceHandler, SyncStorage};
 //! use tonic::transport::Server;
 //!
 //! let storage = MyStorage::new(...);
 //! let handler = SyncServiceHandler::new(storage);
 //!
 //! Server::builder()
 //!     .add_service(SyncServiceServer::new(handler))
 //!     .serve(addr)
 //!     .await?;
 //! ```
 use crate::proto::sync_service_server::SyncService;
 use crate::proto::{
    AssertionData, FetchRequest, FetchResponse, GossipRequest, GossipResponse, PingRequest,
    PingResponse, RootExchangeRequest, RootExchangeResponse,
 };
 use async_trait::async_trait;
 use std::sync::Arc;
 use tonic::{Request, Response, Status};
 use tracing::{debug, info, instrument, warn};
 /// Backend storage interface for sync operations.
 ///
 /// Implement this trait to connect the sync service to your storage layer.
 #[async_trait]
 pub trait SyncStorage: Send + Sync + 'static {
    /// Store an assertion received via gossip.
    ///
    /// Returns Ok(true) if stored, Ok(false) if already existed.
    async fn store_gossip_assertion(
        &self,
        hash: [u8; 32],
        data: Vec<u8>,
        hlc_time: u64,
        hlc_counter: u32,
        hlc_node_id: [u8; 16],
    ) -> Result<bool, String>;
    /// Get the current Merkle root and assertion count.
    async fn get_merkle_state(&self) -> Result<(Option<[u8; 32]>, u64), String>;
    /// Fetch assertions by hash.
    ///
    /// Returns (hash, data) pairs for assertions that exist.
    async fn fetch_assertions(
        &self,
        hashes: Vec<[u8; 32]>,
    ) -> Result<Vec<([u8; 32], Vec<u8>)>, String>;
    /// Get this node's ID and assertion count for ping response.
    async fn get_node_info(&self) -> Result<([u8; 16], u64), String>;
 }
 /// gRPC service handler for sync operations.
 pub struct SyncServiceHandler<S> {
    storage: Arc<S>,
 }
 impl<S: SyncStorage> SyncServiceHandler<S> {
    /// Create a new sync service handler with the given storage backend.
    pub fn new(storage: Arc<S>) -> Self {
        Self { storage }
    }
 }
 #[async_trait]
 impl<S: SyncStorage> SyncService for SyncServiceHandler<S> {
    #[instrument(skip(self, request), fields(hash_len = request.get_ref().assertion_hash.len()))]
    async fn gossip(
        &self,
        request: Request<GossipRequest>,
    ) -> Result<Response<GossipResponse>, Status> {
        let req = request.into_inner();
        // Validate hash length
        if req.assertion_hash.len() != 32 {
            return Err(Status::invalid_argument(format!(
                "assertion_hash must be 32 bytes, got {}",
                req.assertion_hash.len()
            )));
        }
        // Validate HLC node ID length
        if req.hlc_node_id.len() != 16 {
            return Err(Status::invalid_argument(format!(
                "hlc_node_id must be 16 bytes, got {}",
                req.hlc_node_id.len()
            )));
        }
        let mut hash = [0u8; 32];
        hash.copy_from_slice(&req.assertion_hash);
        let mut hlc_node_id = [0u8; 16];
        hlc_node_id.copy_from_slice(&req.hlc_node_id);
        debug!(hash = %hex::encode(&hash[..8]), "Received gossip");
        match self
            .storage
            .store_gossip_assertion(
                hash,
                req.assertion_data,
                req.hlc_time,
                req.hlc_counter,
                hlc_node_id,
            )
            .await
        {
            Ok(stored) => {
                if stored {
                    info!(hash = %hex::encode(&hash[..8]), "Stored gossiped assertion");
                } else {
                    debug!(hash = %hex::encode(&hash[..8]), "Duplicate gossip (already stored)");
                }
                Ok(Response::new(GossipResponse { accepted: true, error: String::new() }))
            }
            Err(e) => {
                warn!(error = %e, "Failed to store gossiped assertion");
                Ok(Response::new(GossipResponse { accepted: false, error: e }))
            }
        }
    }
    #[instrument(skip(self, request), fields(assertion_count = request.get_ref().assertion_count))]
    async fn exchange_roots(
        &self,
        request: Request<RootExchangeRequest>,
    ) -> Result<Response<RootExchangeResponse>, Status> {
        let req = request.into_inner();
        // Validate root length if provided
        if !req.merkle_root.is_empty() && req.merkle_root.len() != 32 {
            return Err(Status::invalid_argument(format!(
                "merkle_root must be 32 bytes if provided, got {}",
                req.merkle_root.len()
            )));
        }
        let (local_root, local_count) =
            self.storage.get_merkle_state().await.map_err(Status::internal)?;
        let remote_root: Option<[u8; 32]> = if req.merkle_root.len() == 32 {
            let mut root = [0u8; 32];
            root.copy_from_slice(&req.merkle_root);
            Some(root)
        } else {
            None
        };
        let roots_match = match (&local_root, &remote_root) {
            (Some(local), Some(remote)) => local == remote,
            (None, None) => true,
            _ => false,
        };
        debug!(
            local_count,
            remote_count = req.assertion_count,
            roots_match,
            "Exchanged Merkle roots"
        );
        Ok(Response::new(RootExchangeResponse {
            merkle_root: local_root.map(|r| r.to_vec()).unwrap_or_default(),
            assertion_count: local_count,
            roots_match,
        }))
    }
    #[instrument(skip(self, request), fields(hash_count = request.get_ref().hashes.len()))]
    async fn fetch_assertions(
        &self,
        request: Request<FetchRequest>,
    ) -> Result<Response<FetchResponse>, Status> {
        let req = request.into_inner();
        // Limit request size to prevent abuse
        const MAX_HASHES: usize = 1000;
        if req.hashes.len() > MAX_HASHES {
            return Err(Status::invalid_argument(format!(
                "Too many hashes requested: {} > {}",
                req.hashes.len(),
                MAX_HASHES
            )));
        }
        // Convert and validate hashes
        let mut hashes = Vec::with_capacity(req.hashes.len());
        for (i, hash_bytes) in req.hashes.iter().enumerate() {
            if hash_bytes.len() != 32 {
                return Err(Status::invalid_argument(format!(
                    "hash[{}] must be 32 bytes, got {}",
                    i,
                    hash_bytes.len()
                )));
            }
            let mut hash = [0u8; 32];
            hash.copy_from_slice(hash_bytes);
            hashes.push(hash);
        }
        let results = self.storage.fetch_assertions(hashes).await.map_err(Status::internal)?;
        debug!(requested = req.hashes.len(), found = results.len(), "Fetched assertions");
        let assertions = results
            .into_iter()
            .map(|(hash, data)| AssertionData { hash: hash.to_vec(), data })
            .collect();
        Ok(Response::new(FetchResponse { assertions }))
    }
    #[instrument(skip(self, _request))]
    async fn ping(&self, _request: Request<PingRequest>) -> Result<Response<PingResponse>, Status> {
        let (node_id, assertion_count) =
            self.storage.get_node_info().await.map_err(Status::internal)?;
        debug!(assertion_count, "Responding to ping");
        Ok(Response::new(PingResponse { node_id: node_id.to_vec(), assertion_count }))
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    /// Mock storage for testing.
    struct MockStorage {
        node_id: [u8; 16],
        assertion_count: u64,
    }
    #[async_trait]
    impl SyncStorage for MockStorage {
        async fn store_gossip_assertion(
            &self,
            _hash: [u8; 32],
            _data: Vec<u8>,
            _hlc_time: u64,
            _hlc_counter: u32,
            _hlc_node_id: [u8; 16],
        ) -> Result<bool, String> {
            Ok(true)
        }
        async fn get_merkle_state(&self) -> Result<(Option<[u8; 32]>, u64), String> {
            Ok((Some([1u8; 32]), self.assertion_count))
        }
        async fn fetch_assertions(
            &self,
            hashes: Vec<[u8; 32]>,
        ) -> Result<Vec<([u8; 32], Vec<u8>)>, String> {
            // Return mock data for each hash
            Ok(hashes.into_iter().map(|h| (h, vec![1, 2, 3])).collect())
        }
        async fn get_node_info(&self) -> Result<([u8; 16], u64), String> {
            Ok((self.node_id, self.assertion_count))
        }
    }
    #[tokio::test]
    async fn test_ping() {
        let storage = Arc::new(MockStorage { node_id: [42u8; 16], assertion_count: 100 });
        let handler = SyncServiceHandler::new(storage);
        let request = Request::new(PingRequest { node_id: vec![1u8; 16] });
        let response = handler.ping(request).await.expect("ping should succeed");
        assert_eq!(response.get_ref().node_id, vec![42u8; 16]);
        assert_eq!(response.get_ref().assertion_count, 100);
    }
    #[tokio::test]
    async fn test_gossip_invalid_hash_length() {
        let storage = Arc::new(MockStorage { node_id: [1u8; 16], assertion_count: 0 });
        let handler = SyncServiceHandler::new(storage);
        let request = Request::new(GossipRequest {
            assertion_hash: vec![1u8; 16], // Wrong length
            assertion_data: vec![],
            hlc_time: 0,
            hlc_counter: 0,
            hlc_node_id: vec![1u8; 16],
        });
        let result = handler.gossip(request).await;
        assert!(result.is_err());
        assert_eq!(result.err().map(|e| e.code()), Some(tonic::Code::InvalidArgument));
    }
    #[tokio::test]
    async fn test_fetch_too_many_hashes() {
        let storage = Arc::new(MockStorage { node_id: [1u8; 16], assertion_count: 0 });
        let handler = SyncServiceHandler::new(storage);
        let request = Request::new(FetchRequest {
            hashes: vec![vec![0u8; 32]; 1001], // More than MAX_HASHES
        });
        let result = handler.fetch_assertions(request).await;
        assert!(result.is_err());
        assert_eq!(result.err().map(|e| e.code()), Some(tonic::Code::InvalidArgument));
    }
 }
--- a/crates/stemedb-storage/Cargo.toml
+++ b/crates/stemedb-storage/Cargo.toml
@ -36,6 +36,7 @@ byteorder = "1.5"
 [dev-dependencies]
 tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread"] }
 criterion = { version = "0.5", features = ["html_reports", "async_tokio"] }
 proptest = "1.4"
 [[bench]]
 name = "kv_store"
--- a/crates/stemedb-storage/src/crdt/assertion_store.rs
+++ b/crates/stemedb-storage/src/crdt/assertion_store.rs
@ -0,0 +1,485 @@
 //! CRDT wrapper for assertion storage implementing G-Set semantics.
 //!
 //! Assertions naturally form a G-Set (Grow-only Set):
 //! - Assertions are append-only (never deleted)
 //! - Content-addressed by BLAKE3 hash (idempotent inserts)
 //!
 //! This wrapper adds explicit merge operations for replication.
 use crate::error::{Result, StorageError};
 use crate::key_codec;
 use crate::traits::KVStore;
 use async_trait::async_trait;
 use rkyv::{Archive, Deserialize, Serialize};
 use std::sync::Arc;
 use stemedb_core::types::Hash;
 use tracing::{debug, instrument, warn};
 use super::traits::CrdtMerge;
 /// G-Set state for assertions under a subject.
 ///
 /// This is a set of assertion hashes - the actual assertion data
 /// is content-addressed and can be fetched separately.
 #[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
 #[archive(check_bytes)]
 pub struct AssertionSetState {
    /// The subject this state covers.
    pub subject: String,
    /// Set of assertion hashes present on this node.
    pub assertion_hashes: Vec<Hash>,
    /// Source node ID.
    pub source_node: [u8; 16],
 }
 impl AssertionSetState {
    /// Creates a new assertion set state.
    pub fn new(subject: String, assertion_hashes: Vec<Hash>, source_node: [u8; 16]) -> Self {
        Self { subject, assertion_hashes, source_node }
    }
    /// Merges two assertion set states using set union.
    ///
    /// G-Set merge is simply the union of both sets.
    pub fn merge(&self, other: &Self) -> Self {
        debug_assert_eq!(self.subject, other.subject, "Cannot merge states for different subjects");
        // Union of both hash sets
        let mut combined: Vec<Hash> = self.assertion_hashes.clone();
        for hash in &other.assertion_hashes {
            if !combined.contains(hash) {
                combined.push(*hash);
            }
        }
        Self {
            subject: self.subject.clone(),
            assertion_hashes: combined,
            source_node: self.source_node,
        }
    }
    /// Returns the number of assertions in this state.
    pub fn len(&self) -> usize {
        self.assertion_hashes.len()
    }
    /// Returns true if the state is empty.
    pub fn is_empty(&self) -> bool {
        self.assertion_hashes.is_empty()
    }
    /// Checks if an assertion hash is in this state.
    pub fn contains(&self, hash: &Hash) -> bool {
        self.assertion_hashes.contains(hash)
    }
 }
 /// Assertion data that may need to be transferred during sync.
 ///
 /// When merging G-Sets, we first compare hashes. If the remote has
 /// hashes we don't have, we request the full assertion data.
 #[derive(Archive, Deserialize, Serialize, Debug, Clone)]
 #[archive(check_bytes)]
 pub struct AssertionTransfer {
    /// The assertion hash (for verification).
    pub hash: Hash,
    /// The raw serialized assertion bytes.
    pub data: Vec<u8>,
 }
 /// CRDT wrapper for assertion storage with G-Set merge semantics.
 ///
 /// Wraps a KVStore and adds merge operations for distributed replication.
 /// Assertions are content-addressed by their BLAKE3 hash, making inserts
 /// naturally idempotent.
 ///
 /// # Merge Semantics
 ///
 /// Assertion sets use G-Set (union) semantics:
 /// - `merge(A, B)` = `A ∪ B` (set union)
 /// - Missing assertions are requested and stored locally
 ///
 /// # Example
 ///
 /// ```ignore
 /// use stemedb_storage::crdt::CrdtAssertionStore;
 /// use std::sync::Arc;
 ///
 /// let crdt = CrdtAssertionStore::new(Arc::new(store), node_id);
 ///
 /// // Get set of assertion hashes for a subject
 /// let state = crdt.get_state("Tesla_Inc").await?;
 ///
 /// // Compare with remote state to find missing assertions
 /// let missing = crdt.find_missing("Tesla_Inc", &remote_state).await?;
 ///
 /// // Merge remote assertions (with their data)
 /// crdt.merge_with_data("Tesla_Inc", &remote_assertions).await?;
 /// ```
 pub struct CrdtAssertionStore<S: KVStore> {
    store: Arc<S>,
    node_id: [u8; 16],
 }
 impl<S: KVStore + 'static> CrdtAssertionStore<S> {
    /// Creates a new CRDT assertion store with the given node ID.
    pub fn new(store: Arc<S>, node_id: [u8; 16]) -> Self {
        Self { store, node_id }
    }
    /// Gets an assertion by its hash.
    #[instrument(skip(self), fields(hash = %hex::encode(hash)))]
    pub async fn get_assertion(&self, subject: &str, hash: &Hash) -> Result<Option<Vec<u8>>> {
        let hash_hex = hex::encode(hash);
        let key = key_codec::assertion_key(subject, &hash_hex);
        self.store.get(&key).await
    }
    /// Puts an assertion (content-addressed, idempotent).
    ///
    /// The hash is computed from the data, so duplicate puts are safe.
    #[instrument(skip(self, data), fields(data_len = data.len()))]
    pub async fn put_assertion(&self, subject: &str, data: &[u8]) -> Result<Hash> {
        let hash_bytes = blake3::hash(data);
        let hash: Hash = *hash_bytes.as_bytes();
        let hash_hex = hex::encode(hash);
        let key = key_codec::assertion_key(subject, &hash_hex);
        self.store.put(&key, data).await?;
        debug!(hash = %hash_hex, "Stored assertion");
        Ok(hash)
    }
    /// Checks if an assertion exists locally.
    #[instrument(skip(self))]
    pub async fn has_assertion(&self, subject: &str, hash: &Hash) -> Result<bool> {
        let hash_hex = hex::encode(hash);
        let key = key_codec::assertion_key(subject, &hash_hex);
        Ok(self.store.get(&key).await?.is_some())
    }
    /// Finds assertion hashes present in remote state but missing locally.
    ///
    /// Returns hashes that need to be fetched from the remote node.
    #[instrument(skip(self, remote), fields(remote_count = remote.assertion_hashes.len()))]
    pub async fn find_missing(
        &self,
        subject: &str,
        remote: &AssertionSetState,
    ) -> Result<Vec<Hash>> {
        if remote.subject != subject {
            return Err(StorageError::InputValidation("Subject mismatch".to_string()));
        }
        let mut missing = Vec::new();
        for hash in &remote.assertion_hashes {
            if !self.has_assertion(subject, hash).await? {
                missing.push(*hash);
            }
        }
        debug!(missing_count = missing.len(), "Found missing assertions");
        Ok(missing)
    }
    /// Merges assertion data received from a remote node.
    ///
    /// Each assertion is verified by computing its hash and comparing
    /// to the expected hash before storing.
    #[instrument(skip(self, assertions), fields(count = assertions.len()))]
    pub async fn merge_with_data(
        &self,
        subject: &str,
        assertions: &[AssertionTransfer],
    ) -> Result<usize> {
        let mut merged_count = 0;
        for transfer in assertions {
            // Verify hash
            let computed_hash = blake3::hash(&transfer.data);
            if computed_hash.as_bytes() != &transfer.hash {
                warn!(
                    expected = %hex::encode(transfer.hash),
                    computed = %hex::encode(computed_hash.as_bytes()),
                    "Hash mismatch in assertion transfer, skipping"
                );
                continue;
            }
            // Store if not already present
            if !self.has_assertion(subject, &transfer.hash).await? {
                self.put_assertion(subject, &transfer.data).await?;
                merged_count += 1;
            }
        }
        debug!(merged_count, "Merged assertion data from remote");
        Ok(merged_count)
    }
 }
 #[async_trait]
 impl<S: KVStore + 'static> CrdtMerge for CrdtAssertionStore<S> {
    type State = AssertionSetState;
    #[instrument(skip(self))]
    async fn get_state(&self, subject: &str) -> Result<Self::State> {
        // Scan all assertion keys for this subject
        let prefix = key_codec::assertion_prefix(subject);
        let entries = self.store.scan_prefix(&prefix).await?;
        let mut hashes = Vec::with_capacity(entries.len());
        for (key, _) in entries {
            // Extract hash from key
            // Key format: {subject}\x00H:{hash_hex}
            let key_str = String::from_utf8_lossy(&key);
            if let Some(hash_hex) = key_str.split(':').next_back() {
                if let Ok(hash_bytes) = hex::decode(hash_hex) {
                    if hash_bytes.len() == 32 {
                        let hash: Hash = hash_bytes.try_into().map_err(|_| {
                            StorageError::Serialization("Invalid hash bytes".to_string())
                        })?;
                        hashes.push(hash);
                    }
                }
            }
        }
        Ok(AssertionSetState::new(subject.to_string(), hashes, self.node_id))
    }
    #[instrument(skip(self, remote), fields(subject = %remote.subject, hash_count = remote.assertion_hashes.len()))]
    async fn merge(&self, subject: &str, remote: &Self::State) -> Result<()> {
        if remote.subject != subject {
            warn!(
                expected = subject,
                actual = %remote.subject,
                "Subject mismatch in merge"
            );
            return Err(StorageError::InputValidation("Subject mismatch in merge".to_string()));
        }
        // G-Set merge: just need to ensure all hashes exist
        // The actual data transfer is handled separately via merge_with_data
        let missing = self.find_missing(subject, remote).await?;
        if !missing.is_empty() {
            debug!(
                missing_count = missing.len(),
                "Merge found missing assertions - data transfer required"
            );
            // Note: Caller is responsible for fetching and merging the actual data
            // using merge_with_data(). This method only identifies what's missing.
        }
        Ok(())
    }
    fn node_id(&self) -> [u8; 16] {
        self.node_id
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::HybridStore;
    use tempfile::tempdir;
    async fn create_test_store() -> Arc<HybridStore> {
        let dir = tempdir().expect("Failed to create temp dir");
        Arc::new(HybridStore::open(dir.path()).expect("Failed to open store"))
    }
    #[tokio::test]
    async fn test_assertion_set_state_merge() {
        let hash1 = [1u8; 32];
        let hash2 = [2u8; 32];
        let hash3 = [3u8; 32];
        let node1 = [1u8; 16];
        let node2 = [2u8; 16];
        let state1 = AssertionSetState::new("test".to_string(), vec![hash1, hash2], node1);
        let state2 = AssertionSetState::new("test".to_string(), vec![hash2, hash3], node2);
        let merged = state1.merge(&state2);
        assert_eq!(merged.assertion_hashes.len(), 3);
        assert!(merged.contains(&hash1));
        assert!(merged.contains(&hash2));
        assert!(merged.contains(&hash3));
    }
    #[tokio::test]
    async fn test_put_and_get_assertion() {
        let store = create_test_store().await;
        let crdt = CrdtAssertionStore::new(store, [1u8; 16]);
        let data = b"test assertion data";
        let hash = crdt.put_assertion("test", data).await.expect("put");
        let retrieved = crdt.get_assertion("test", &hash).await.expect("get");
        assert!(retrieved.is_some());
        assert_eq!(retrieved.expect("should exist"), data.to_vec());
    }
    #[tokio::test]
    async fn test_put_is_idempotent() {
        let store = create_test_store().await;
        let crdt = CrdtAssertionStore::new(store, [1u8; 16]);
        let data = b"test assertion data";
        let hash1 = crdt.put_assertion("test", data).await.expect("put1");
        let hash2 = crdt.put_assertion("test", data).await.expect("put2");
        // Same data = same hash
        assert_eq!(hash1, hash2);
        // Only one entry
        let state = crdt.get_state("test").await.expect("state");
        assert_eq!(state.len(), 1);
    }
    #[tokio::test]
    async fn test_find_missing() {
        let store = create_test_store().await;
        let crdt = CrdtAssertionStore::new(store, [1u8; 16]);
        // Put one assertion locally
        let local_data = b"local assertion";
        let local_hash = crdt.put_assertion("test", local_data).await.expect("put");
        // Remote state has local + one more
        let remote_only_hash = [99u8; 32];
        let remote_state = AssertionSetState::new(
            "test".to_string(),
            vec![local_hash, remote_only_hash],
            [2u8; 16],
        );
        let missing = crdt.find_missing("test", &remote_state).await.expect("find");
        assert_eq!(missing.len(), 1);
        assert_eq!(missing[0], remote_only_hash);
    }
    #[tokio::test]
    async fn test_merge_with_data() {
        let store = create_test_store().await;
        let crdt = CrdtAssertionStore::new(store, [1u8; 16]);
        let data = b"transferred assertion";
        let hash_bytes = blake3::hash(data);
        let hash: Hash = *hash_bytes.as_bytes();
        let transfers = vec![AssertionTransfer { hash, data: data.to_vec() }];
        let merged = crdt.merge_with_data("test", &transfers).await.expect("merge");
        assert_eq!(merged, 1);
        // Should now exist
        let exists = crdt.has_assertion("test", &hash).await.expect("has");
        assert!(exists);
        // Merging again should be idempotent
        let merged2 = crdt.merge_with_data("test", &transfers).await.expect("merge2");
        assert_eq!(merged2, 0); // Already exists
    }
    #[tokio::test]
    async fn test_merge_with_data_rejects_bad_hash() {
        let store = create_test_store().await;
        let crdt = CrdtAssertionStore::new(store, [1u8; 16]);
        let data = b"some data";
        let wrong_hash = [0u8; 32]; // Doesn't match data
        let transfers = vec![AssertionTransfer { hash: wrong_hash, data: data.to_vec() }];
        let merged = crdt.merge_with_data("test", &transfers).await.expect("merge");
        assert_eq!(merged, 0); // Should reject due to hash mismatch
    }
 }
 #[cfg(test)]
 mod property_tests {
    use super::*;
    use proptest::prelude::*;
    // Property: AssertionSetState merge is commutative
    proptest! {
        #[test]
        fn merge_commutative(
            hashes_a in prop::collection::vec(prop::array::uniform32(0u8..255), 0..10),
            hashes_b in prop::collection::vec(prop::array::uniform32(0u8..255), 0..10),
        ) {
            let node1 = [1u8; 16];
            let node2 = [2u8; 16];
            let state_a = AssertionSetState::new("test".to_string(), hashes_a.clone(), node1);
            let state_b = AssertionSetState::new("test".to_string(), hashes_b.clone(), node2);
            let merged_ab = state_a.merge(&state_b);
            let merged_ba = state_b.merge(&state_a);
            // Same hashes regardless of order
            let mut ab_sorted = merged_ab.assertion_hashes.clone();
            let mut ba_sorted = merged_ba.assertion_hashes.clone();
            ab_sorted.sort();
            ba_sorted.sort();
            prop_assert_eq!(ab_sorted, ba_sorted);
        }
    }
    // Property: AssertionSetState merge is associative
    proptest! {
        #[test]
        fn merge_associative(
            hashes_a in prop::collection::vec(prop::array::uniform32(0u8..255), 0..5),
            hashes_b in prop::collection::vec(prop::array::uniform32(0u8..255), 0..5),
            hashes_c in prop::collection::vec(prop::array::uniform32(0u8..255), 0..5),
        ) {
            let node1 = [1u8; 16];
            let node2 = [2u8; 16];
            let node3 = [3u8; 16];
            let state_a = AssertionSetState::new("test".to_string(), hashes_a, node1);
            let state_b = AssertionSetState::new("test".to_string(), hashes_b, node2);
            let state_c = AssertionSetState::new("test".to_string(), hashes_c, node3);
            let merged_ab_c = state_a.merge(&state_b).merge(&state_c);
            let merged_a_bc = state_a.merge(&state_b.merge(&state_c));
            let mut ab_c_sorted = merged_ab_c.assertion_hashes.clone();
            let mut a_bc_sorted = merged_a_bc.assertion_hashes.clone();
            ab_c_sorted.sort();
            a_bc_sorted.sort();
            prop_assert_eq!(ab_c_sorted, a_bc_sorted);
        }
    }
    // Property: AssertionSetState merge is idempotent
    proptest! {
        #[test]
        fn merge_idempotent(
            hashes in prop::collection::vec(prop::array::uniform32(0u8..255), 0..10),
        ) {
            let node_id = [1u8; 16];
            let state = AssertionSetState::new("test".to_string(), hashes, node_id);
            let merged = state.merge(&state);
            let mut original_sorted = state.assertion_hashes.clone();
            let mut merged_sorted = merged.assertion_hashes.clone();
            original_sorted.sort();
            merged_sorted.sort();
            prop_assert_eq!(original_sorted, merged_sorted);
        }
    }
 }
--- a/crates/stemedb-storage/src/crdt/mod.rs
+++ b/crates/stemedb-storage/src/crdt/mod.rs
@ -0,0 +1,218 @@
 //! CRDT (Conflict-free Replicated Data Type) implementations for distributed StemeDB.
 //!
 //! This module provides CRDT wrappers around existing storage types to enable
 //! conflict-free replication across multiple nodes. The key insight is that
 //! StemeDB's existing storage operations already have CRDT semantics:
 //!
 //! - **Votes**: G-Counter semantics (counts only increase)
 //! - **Assertions**: G-Set semantics (append-only, never removed)
 //!
 //! These wrappers add explicit `merge()` operations for replication.
 //!
 //! # Design Principles
 //!
 //! 1. **Wrap, don't replace**: CRDT types wrap existing stores rather than
 //!    reimplementing them, preserving all existing functionality.
 //!
 //! 2. **Merge is idempotent**: `merge(A, A) == A` - safe to replay messages.
 //!
 //! 3. **Merge is commutative**: `merge(A, B) == merge(B, A)` - order doesn't matter.
 //!
 //! 4. **Merge is associative**: `merge(merge(A, B), C) == merge(A, merge(B, C))`.
 //!
 //! # State Types
 //!
 //! Each CRDT defines a state type that can be extracted, transferred over the
 //! network, and merged into another replica. States are designed for efficient
 //! delta synchronization.
 //!
 //! # Example
 //!
 //! ```ignore
 //! use stemedb_storage::crdt::{CrdtVoteStore, CrdtMerge, VoteCountState};
 //!
 //! // Local node
 //! let local = CrdtVoteStore::new(store, node_id);
 //!
 //! // Get state to send to remote
 //! let state = local.get_state("Tesla_Inc").await?;
 //!
 //! // On remote node, merge received state
 //! remote.merge("Tesla_Inc", &state).await?;
 //! ```
 mod assertion_store;
 mod traits;
 mod vote_store;
 #[cfg(test)]
 mod vote_store_props;
 pub use assertion_store::{AssertionSetState, AssertionTransfer, CrdtAssertionStore};
 pub use traits::CrdtMerge;
 pub use vote_store::{CrdtVoteStore, VoteCountState};
 #[cfg(test)]
 mod tests {
    //! Property-based tests for CRDT laws.
    //!
    //! These tests verify the fundamental CRDT properties:
    //! - Commutativity: merge(A, B) == merge(B, A)
    //! - Associativity: merge(merge(A, B), C) == merge(A, merge(B, C))
    //! - Idempotence: merge(A, A) == A
    // Property tests are in the submodules with proptest
 }
 /// Integration tests demonstrating end-to-end CRDT operations.
 #[cfg(test)]
 mod integration_tests {
    use super::*;
    use crate::vote_store::VoteStore;
    use crate::HybridStore;
    use std::sync::Arc;
    use stemedb_core::types::Vote;
    use tempfile::tempdir;
    async fn create_test_store() -> Arc<HybridStore> {
        let dir = tempdir().expect("Failed to create temp dir");
        Arc::new(HybridStore::open(dir.path()).expect("Failed to open store"))
    }
    /// Tests concurrent vote ingestion across multiple nodes, then merge.
    ///
    /// Simulates:
    /// 1. Node A receives votes from agents 1, 2, 3
    /// 2. Node B receives votes from agents 4, 5
    /// 3. Nodes exchange state and merge
    /// 4. Both nodes should converge to the same final state
    #[tokio::test]
    async fn test_multi_node_vote_convergence() {
        // Create two independent "nodes" with their own stores
        let store_a = create_test_store().await;
        let store_b = create_test_store().await;
        let node_a = CrdtVoteStore::new(store_a, [1u8; 16]);
        let node_b = CrdtVoteStore::new(store_b, [2u8; 16]);
        let assertion_hash = [42u8; 32];
        let subject = "test_subject";
        // Node A receives 3 votes
        for i in 0..3 {
            let vote = Vote {
                assertion_hash,
                agent_id: [i as u8; 32],
                weight: 0.5,
                signature: [0u8; 64],
                timestamp: 1000 + i as u64,
                source_url: None,
                observed_context: None,
            };
            node_a.put_vote(&vote, subject).await.expect("put vote");
        }
        // Node B receives 2 votes
        for i in 3..5 {
            let vote = Vote {
                assertion_hash,
                agent_id: [i as u8; 32],
                weight: 0.3,
                signature: [0u8; 64],
                timestamp: 1000 + i as u64,
                source_url: None,
                observed_context: None,
            };
            node_b.put_vote(&vote, subject).await.expect("put vote");
        }
        // Verify initial states differ
        let count_a = node_a.get_vote_count(&assertion_hash, subject).await.expect("count");
        let count_b = node_b.get_vote_count(&assertion_hash, subject).await.expect("count");
        assert_eq!(count_a, 3);
        assert_eq!(count_b, 2);
        // Exchange and merge state
        let state_a = node_a.get_state(subject).await.expect("get state");
        let state_b = node_b.get_state(subject).await.expect("get state");
        node_b.merge(subject, &state_a).await.expect("merge a->b");
        node_a.merge(subject, &state_b).await.expect("merge b->a");
        // Verify convergence: both should have max(3, 2) = 3 votes
        let final_count_a = node_a.get_vote_count(&assertion_hash, subject).await.expect("count");
        let final_count_b = node_b.get_vote_count(&assertion_hash, subject).await.expect("count");
        assert_eq!(final_count_a, 3, "Node A should converge to highest count");
        assert_eq!(final_count_b, 3, "Node B should converge to highest count");
    }
    /// Tests assertion set merge across nodes.
    ///
    /// Simulates:
    /// 1. Node A has assertions [A1, A2]
    /// 2. Node B has assertions [A2, A3]
    /// 3. After merge, both should have [A1, A2, A3]
    #[tokio::test]
    async fn test_assertion_set_merge() {
        let store_a = create_test_store().await;
        let store_b = create_test_store().await;
        let node_a = CrdtAssertionStore::new(store_a, [1u8; 16]);
        let node_b = CrdtAssertionStore::new(store_b, [2u8; 16]);
        let subject = "test_subject";
        // Node A: assertions 1 and 2
        let hash_a1 = node_a.put_assertion(subject, b"assertion 1").await.expect("put");
        let hash_a2 = node_a.put_assertion(subject, b"assertion 2").await.expect("put");
        // Node B: assertions 2 and 3 (2 is same content, so same hash)
        let hash_b2 = node_b.put_assertion(subject, b"assertion 2").await.expect("put");
        let hash_b3 = node_b.put_assertion(subject, b"assertion 3").await.expect("put");
        // A2 and B2 should have the same hash (content-addressed)
        assert_eq!(hash_a2, hash_b2, "Same content should produce same hash");
        // Get initial states
        let state_a = node_a.get_state(subject).await.expect("get state");
        let state_b = node_b.get_state(subject).await.expect("get state");
        assert_eq!(state_a.assertion_hashes.len(), 2);
        assert_eq!(state_b.assertion_hashes.len(), 2);
        // Find what B has that A doesn't (should be hash_b3 only)
        let missing_from_a = node_a.find_missing(subject, &state_b).await.expect("find missing");
        assert_eq!(missing_from_a.len(), 1);
        assert_eq!(missing_from_a[0], hash_b3);
        // Find what A has that B doesn't (should be hash_a1 only)
        let missing_from_b = node_b.find_missing(subject, &state_a).await.expect("find missing");
        assert_eq!(missing_from_b.len(), 1);
        assert_eq!(missing_from_b[0], hash_a1);
        // Simulate data transfer and merge
        let transfer_to_a = vec![assertion_store::AssertionTransfer {
            hash: hash_b3,
            data: b"assertion 3".to_vec(),
        }];
        node_a.merge_with_data(subject, &transfer_to_a).await.expect("merge");
        let transfer_to_b = vec![assertion_store::AssertionTransfer {
            hash: hash_a1,
            data: b"assertion 1".to_vec(),
        }];
        node_b.merge_with_data(subject, &transfer_to_b).await.expect("merge");
        // Verify both nodes now have 3 unique assertions
        let final_state_a = node_a.get_state(subject).await.expect("get state");
        let final_state_b = node_b.get_state(subject).await.expect("get state");
        assert_eq!(final_state_a.assertion_hashes.len(), 3);
        assert_eq!(final_state_b.assertion_hashes.len(), 3);
        // Both should have all three hashes
        assert!(final_state_a.contains(&hash_a1));
        assert!(final_state_a.contains(&hash_a2));
        assert!(final_state_a.contains(&hash_b3));
    }
 }
--- a/crates/stemedb-storage/src/crdt/traits.rs
+++ b/crates/stemedb-storage/src/crdt/traits.rs
@ -0,0 +1,68 @@
 //! Core CRDT traits for distributed merge operations.
 use crate::error::Result;
 use async_trait::async_trait;
 /// Trait for CRDT types that support merge operations.
 ///
 /// This trait defines the interface for extracting state and merging
 /// state from remote replicas. Implementations must satisfy the CRDT
 /// properties:
 ///
 /// - **Commutativity**: `merge(A, B)` produces the same result as `merge(B, A)`
 /// - **Associativity**: `merge(merge(A, B), C)` equals `merge(A, merge(B, C))`
 /// - **Idempotence**: `merge(A, A)` equals `A`
 ///
 /// # Type Parameters
 ///
 /// The `State` associated type represents the serializable state that
 /// can be transferred between replicas. It should be designed for
 /// efficient delta synchronization when possible.
 #[async_trait]
 pub trait CrdtMerge: Send + Sync {
    /// The serializable state type for this CRDT.
    ///
    /// This type should implement rkyv serialization for efficient
    /// network transfer and storage.
    type State: Send + Sync;
    /// Extracts the current state for a given subject.
    ///
    /// The returned state can be sent to remote replicas and merged
    /// using the `merge` method.
    ///
    /// # Arguments
    ///
    /// * `subject` - The subject identifier to get state for
    ///
    /// # Returns
    ///
    /// The current CRDT state for the subject.
    async fn get_state(&self, subject: &str) -> Result<Self::State>;
    /// Merges remote state into the local replica.
    ///
    /// This operation must be:
    /// - **Idempotent**: Merging the same state twice has no additional effect
    /// - **Commutative**: Order of merge operations doesn't matter
    /// - **Associative**: Grouping of merge operations doesn't matter
    ///
    /// # Arguments
    ///
    /// * `subject` - The subject identifier to merge state for
    /// * `remote` - The state received from a remote replica
    ///
    /// # Returns
    ///
    /// Ok(()) on success, or an error if the merge fails.
    async fn merge(&self, subject: &str, remote: &Self::State) -> Result<()>;
    /// Returns the node ID for this CRDT instance.
    ///
    /// The node ID is used for tiebreaking in some CRDT operations
    /// and for tracking state provenance.
    fn node_id(&self) -> [u8; 16];
 }
 // NOTE: CrdtStateCompare trait planned for Phase 6B (anti-entropy sync).
 // Removed to avoid dead code until implementation is needed.
--- a/crates/stemedb-storage/src/crdt/vote_store.rs
+++ b/crates/stemedb-storage/src/crdt/vote_store.rs
@ -0,0 +1,439 @@
 //! CRDT wrapper for VoteStore implementing G-Counter semantics.
 //!
 //! The vote store naturally implements G-Counter (Grow-only Counter) semantics:
 //! - Vote counts only increase
 //! - Aggregate weights only increase (assuming positive weights)
 //!
 //! This wrapper adds explicit merge operations for replication.
 use crate::error::Result;
 use crate::key_codec;
 use crate::traits::KVStore;
 use crate::vote_store::{GenericVoteStore, VoteStore};
 use async_trait::async_trait;
 use rkyv::{Archive, Deserialize, Serialize};
 use std::sync::Arc;
 use stemedb_core::types::Hash;
 use tracing::{debug, instrument, warn};
 use super::traits::CrdtMerge;
 /// G-Counter state for vote counts per assertion.
 ///
 /// This state captures the vote count and aggregate weight for a single
 /// assertion within a subject. It's designed for efficient delta sync.
 #[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
 #[archive(check_bytes)]
 pub struct VoteCountState {
    /// The assertion this state applies to.
    pub assertion_hash: Hash,
    /// Total vote count from all nodes.
    pub count: u64,
    /// Aggregate weight from all nodes.
    pub weight: f32,
    /// Node that produced this state (for provenance).
    pub source_node: [u8; 16],
 }
 impl VoteCountState {
    /// Creates a new vote count state.
    pub fn new(assertion_hash: Hash, count: u64, weight: f32, source_node: [u8; 16]) -> Self {
        Self { assertion_hash, count, weight, source_node }
    }
    /// Merges two vote count states, taking the maximum of each field.
    ///
    /// This implements G-Counter merge semantics where counts only grow.
    pub fn merge(&self, other: &Self) -> Self {
        debug_assert_eq!(
            self.assertion_hash, other.assertion_hash,
            "Cannot merge states for different assertions"
        );
        Self {
            assertion_hash: self.assertion_hash,
            count: self.count.max(other.count),
            weight: self.weight.max(other.weight),
            source_node: self.source_node, // Keep local node ID
        }
    }
 }
 /// CRDT wrapper for VoteStore with G-Counter merge semantics.
 ///
 /// Wraps a `GenericVoteStore` and adds merge operations for distributed
 /// replication. The underlying atomic operations (`fetch_and_add_u64`,
 /// `compare_and_swap_f32`) already provide local consistency; this wrapper
 /// adds cross-node consistency via explicit merge.
 ///
 /// # Merge Semantics
 ///
 /// Vote counts use G-Counter (max) semantics:
 /// - `merge(local, remote)` takes `max(local.count, remote.count)`
 /// - This ensures counts converge to the highest observed value
 ///
 /// # Example
 ///
 /// ```ignore
 /// use stemedb_storage::crdt::CrdtVoteStore;
 /// use std::sync::Arc;
 ///
 /// let crdt = CrdtVoteStore::new(Arc::new(store), node_id);
 ///
 /// // Local operations work as normal
 /// crdt.put_vote(&vote, "subject").await?;
 ///
 /// // Get state to send to remote node
 /// let state = crdt.get_state("subject").await?;
 ///
 /// // On receiving remote state, merge it
 /// crdt.merge("subject", &remote_state).await?;
 /// ```
 pub struct CrdtVoteStore<S: KVStore> {
    inner: GenericVoteStore<Arc<S>>,
    store: Arc<S>,
    node_id: [u8; 16],
 }
 impl<S: KVStore + 'static> CrdtVoteStore<S> {
    /// Creates a new CRDT vote store with the given node ID.
    ///
    /// # Arguments
    ///
    /// * `store` - The underlying KVStore (wrapped in Arc for sharing)
    /// * `node_id` - Unique identifier for this node (for provenance)
    pub fn new(store: Arc<S>, node_id: [u8; 16]) -> Self {
        Self { inner: GenericVoteStore::new(store.clone()), store, node_id }
    }
    /// Returns a reference to the underlying VoteStore.
    ///
    /// This allows using all standard VoteStore operations.
    pub fn inner(&self) -> &GenericVoteStore<Arc<S>> {
        &self.inner
    }
    /// Gets the vote count state for a specific assertion.
    #[instrument(skip(self))]
    pub async fn get_assertion_state(
        &self,
        assertion_hash: &Hash,
        subject: &str,
    ) -> Result<VoteCountState> {
        let count = self.inner.get_vote_count(assertion_hash, subject).await?;
        let weight = self.inner.get_aggregate_weight(assertion_hash, subject).await?;
        Ok(VoteCountState::new(*assertion_hash, count, weight, self.node_id))
    }
    /// Merges a single assertion's vote state from a remote node.
    ///
    /// Uses G-Counter semantics: takes the maximum of local and remote values.
    #[instrument(skip(self, remote), fields(
        assertion_hash = %hex::encode(remote.assertion_hash),
        remote_count = remote.count,
        remote_weight = remote.weight
    ))]
    pub async fn merge_assertion_state(
        &self,
        subject: &str,
        remote: &VoteCountState,
    ) -> Result<()> {
        let assertion_hex = hex::encode(remote.assertion_hash);
        // Get current local state
        let local_count = self.inner.get_vote_count(&remote.assertion_hash, subject).await?;
        let local_weight = self.inner.get_aggregate_weight(&remote.assertion_hash, subject).await?;
        // Apply G-Counter merge: take max
        if remote.count > local_count {
            let count_key = key_codec::vote_count_key(subject, &assertion_hex);
            // Set to the higher value
            // Note: This is safe because counts only grow in G-Counters
            self.store.put(&count_key, &remote.count.to_le_bytes()).await?;
            debug!(
                old_count = local_count,
                new_count = remote.count,
                "Merged vote count (remote was higher)"
            );
        }
        if remote.weight > local_weight {
            let weight_key = key_codec::vote_weight_key(subject, &assertion_hex);
            // Set to the higher value
            self.store.put(&weight_key, &remote.weight.to_le_bytes()).await?;
            debug!(
                old_weight = local_weight,
                new_weight = remote.weight,
                "Merged aggregate weight (remote was higher)"
            );
        }
        Ok(())
    }
 }
 /// Aggregate state for all votes under a subject.
 ///
 /// Used for bulk state transfer during initial sync or catch-up.
 #[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
 #[archive(check_bytes)]
 pub struct SubjectVoteState {
    /// The subject this state covers.
    pub subject: String,
    /// Vote states for each assertion under this subject.
    pub assertions: Vec<VoteCountState>,
    /// Source node ID.
    pub source_node: [u8; 16],
 }
 #[async_trait]
 impl<S: KVStore + 'static> CrdtMerge for CrdtVoteStore<S> {
    type State = SubjectVoteState;
    #[instrument(skip(self))]
    async fn get_state(&self, subject: &str) -> Result<Self::State> {
        // Scan all vote count keys for this subject
        let prefix = key_codec::vote_count_prefix(subject);
        let entries = self.store.scan_prefix(&prefix).await?;
        let mut assertions = Vec::with_capacity(entries.len());
        for (key, count_bytes) in entries {
            // Extract assertion hash from key
            // Key format: {subject}\x00VC:{assertion_hex}
            let key_str = String::from_utf8_lossy(&key);
            if let Some(assertion_hex) = key_str.split(':').next_back() {
                if let Ok(assertion_hash) = hex::decode(assertion_hex) {
                    if assertion_hash.len() == 32 {
                        let hash: Hash = assertion_hash.try_into().map_err(|_| {
                            crate::error::StorageError::Serialization(
                                "Invalid assertion hash".to_string(),
                            )
                        })?;
                        let count = if count_bytes.len() == 8 {
                            u64::from_le_bytes(count_bytes.try_into().map_err(|_| {
                                crate::error::StorageError::Serialization(
                                    "Invalid count bytes".to_string(),
                                )
                            })?)
                        } else {
                            0
                        };
                        // Weight may fail to fetch if store is corrupted; log and use 0.0
                        let weight = match self.inner.get_aggregate_weight(&hash, subject).await {
                            Ok(w) => w,
                            Err(e) => {
                                warn!(
                                    error = %e,
                                    hash = %hex::encode(hash),
                                    "Failed to get aggregate weight, using 0.0"
                                );
                                0.0
                            }
                        };
                        assertions.push(VoteCountState::new(hash, count, weight, self.node_id));
                    }
                }
            }
        }
        Ok(SubjectVoteState { subject: subject.to_string(), assertions, source_node: self.node_id })
    }
    #[instrument(skip(self, remote), fields(subject = %remote.subject, assertion_count = remote.assertions.len()))]
    async fn merge(&self, subject: &str, remote: &Self::State) -> Result<()> {
        if remote.subject != subject {
            warn!(
                expected = subject,
                actual = %remote.subject,
                "Subject mismatch in merge"
            );
            return Err(crate::error::StorageError::InputValidation(
                "Subject mismatch in merge".to_string(),
            ));
        }
        for assertion_state in &remote.assertions {
            self.merge_assertion_state(subject, assertion_state).await?;
        }
        debug!(merged_count = remote.assertions.len(), "Merged vote state from remote node");
        Ok(())
    }
    fn node_id(&self) -> [u8; 16] {
        self.node_id
    }
 }
 // Delegate VoteStore trait to inner
 #[async_trait]
 impl<S: KVStore + 'static> VoteStore for CrdtVoteStore<S> {
    async fn put_vote(&self, vote: &stemedb_core::types::Vote, subject: &str) -> Result<Hash> {
        self.inner.put_vote(vote, subject).await
    }
    async fn get_vote(
        &self,
        assertion_hash: &Hash,
        vote_hash: &Hash,
        subject: &str,
    ) -> Result<Option<stemedb_core::types::Vote>> {
        self.inner.get_vote(assertion_hash, vote_hash, subject).await
    }
    async fn get_votes_for_assertion(
        &self,
        assertion_hash: &Hash,
        subject: &str,
    ) -> Result<Vec<stemedb_core::types::Vote>> {
        self.inner.get_votes_for_assertion(assertion_hash, subject).await
    }
    async fn get_vote_count(&self, assertion_hash: &Hash, subject: &str) -> Result<u64> {
        self.inner.get_vote_count(assertion_hash, subject).await
    }
    async fn get_aggregate_weight(&self, assertion_hash: &Hash, subject: &str) -> Result<f32> {
        self.inner.get_aggregate_weight(assertion_hash, subject).await
    }
    async fn has_votes(&self, assertion_hash: &Hash, subject: &str) -> Result<bool> {
        self.inner.has_votes(assertion_hash, subject).await
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::HybridStore;
    use stemedb_core::types::Vote;
    use tempfile::tempdir;
    async fn create_test_store() -> Arc<HybridStore> {
        let dir = tempdir().expect("Failed to create temp dir");
        Arc::new(HybridStore::open(dir.path()).expect("Failed to open store"))
    }
    #[tokio::test]
    async fn test_crdt_vote_store_basic() {
        let store = create_test_store().await;
        let node_id = [1u8; 16];
        let crdt = CrdtVoteStore::new(store, node_id);
        let vote = Vote {
            assertion_hash: [1u8; 32],
            agent_id: [2u8; 32],
            weight: 0.8,
            signature: [0u8; 64],
            timestamp: 12345,
            source_url: None,
            observed_context: None,
        };
        // Put a vote
        crdt.put_vote(&vote, "test_subject").await.expect("put_vote");
        // Check count
        let count = crdt.get_vote_count(&[1u8; 32], "test_subject").await.expect("get_count");
        assert_eq!(count, 1);
        // Check weight
        let weight =
            crdt.get_aggregate_weight(&[1u8; 32], "test_subject").await.expect("get_weight");
        assert!((weight - 0.8).abs() < 0.001);
    }
    #[tokio::test]
    async fn test_vote_count_state_merge() {
        let hash = [1u8; 32];
        let node1 = [1u8; 16];
        let node2 = [2u8; 16];
        let state1 = VoteCountState::new(hash, 10, 5.0, node1);
        let state2 = VoteCountState::new(hash, 15, 3.0, node2);
        // Merge: take max of each field
        let merged = state1.merge(&state2);
        assert_eq!(merged.count, 15); // max(10, 15)
        assert_eq!(merged.weight, 5.0); // max(5.0, 3.0)
    }
    #[tokio::test]
    async fn test_crdt_merge_higher_remote() {
        let store1 = create_test_store().await;
        let store2 = create_test_store().await;
        let node1 = [1u8; 16];
        let node2 = [2u8; 16];
        let crdt1 = CrdtVoteStore::new(store1.clone(), node1);
        let crdt2 = CrdtVoteStore::new(store2.clone(), node2);
        // Add votes to node1
        let vote1 = Vote {
            assertion_hash: [1u8; 32],
            agent_id: [2u8; 32],
            weight: 0.5,
            signature: [0u8; 64],
            timestamp: 12345,
            source_url: None,
            observed_context: None,
        };
        crdt1.put_vote(&vote1, "subject").await.expect("put");
        // Add more votes to node2
        for i in 0..5 {
            let vote = Vote {
                assertion_hash: [1u8; 32],
                agent_id: [(i + 10) as u8; 32],
                weight: 0.3,
                signature: [0u8; 64],
                timestamp: 12345 + i as u64,
                source_url: None,
                observed_context: None,
            };
            crdt2.put_vote(&vote, "subject").await.expect("put");
        }
        // Get state from node2
        let state2 = crdt2.get_state("subject").await.expect("get_state");
        assert_eq!(state2.assertions.len(), 1);
        assert_eq!(state2.assertions[0].count, 5);
        // Merge into node1
        crdt1.merge("subject", &state2).await.expect("merge");
        // Node1 should now have higher count
        let count = crdt1.get_vote_count(&[1u8; 32], "subject").await.expect("get");
        assert_eq!(count, 5); // Merged from node2
    }
    #[tokio::test]
    async fn test_crdt_merge_idempotent() {
        let store = create_test_store().await;
        let node_id = [1u8; 16];
        let crdt = CrdtVoteStore::new(store.clone(), node_id);
        // Create a state to merge
        let remote_state = SubjectVoteState {
            subject: "test".to_string(),
            assertions: vec![VoteCountState::new([1u8; 32], 10, 5.0, [2u8; 16])],
            source_node: [2u8; 16],
        };
        // Merge once
        crdt.merge("test", &remote_state).await.expect("merge1");
        let count1 = crdt.get_vote_count(&[1u8; 32], "test").await.expect("get");
        // Merge again (should be idempotent)
        crdt.merge("test", &remote_state).await.expect("merge2");
        let count2 = crdt.get_vote_count(&[1u8; 32], "test").await.expect("get");
        assert_eq!(count1, count2);
    }
 }
--- a/crates/stemedb-storage/src/crdt/vote_store_props.rs
+++ b/crates/stemedb-storage/src/crdt/vote_store_props.rs
@ -0,0 +1,77 @@
 //! Property-based tests for CRDT vote store.
 use super::vote_store::VoteCountState;
 use proptest::prelude::*;
 // Property: VoteCountState merge is commutative
 proptest! {
    #[test]
    fn merge_commutative(
        count_a in 0u64..1000,
        count_b in 0u64..1000,
        weight_a in 0.0f32..100.0,
        weight_b in 0.0f32..100.0,
    ) {
        let hash = [1u8; 32];
        let node1 = [1u8; 16];
        let node2 = [2u8; 16];
        let state_a = VoteCountState::new(hash, count_a, weight_a, node1);
        let state_b = VoteCountState::new(hash, count_b, weight_b, node2);
        let merged_ab = state_a.merge(&state_b);
        let merged_ba = state_b.merge(&state_a);
        // Count and weight should be the same regardless of merge order
        prop_assert_eq!(merged_ab.count, merged_ba.count);
        prop_assert!((merged_ab.weight - merged_ba.weight).abs() < 0.0001);
    }
 }
 // Property: VoteCountState merge is associative
 proptest! {
    #[test]
    fn merge_associative(
        count_a in 0u64..1000,
        count_b in 0u64..1000,
        count_c in 0u64..1000,
        weight_a in 0.0f32..100.0,
        weight_b in 0.0f32..100.0,
        weight_c in 0.0f32..100.0,
    ) {
        let hash = [1u8; 32];
        let node1 = [1u8; 16];
        let node2 = [2u8; 16];
        let node3 = [3u8; 16];
        let state_a = VoteCountState::new(hash, count_a, weight_a, node1);
        let state_b = VoteCountState::new(hash, count_b, weight_b, node2);
        let state_c = VoteCountState::new(hash, count_c, weight_c, node3);
        // (A merge B) merge C
        let merged_ab_c = state_a.merge(&state_b).merge(&state_c);
        // A merge (B merge C)
        let merged_a_bc = state_a.merge(&state_b.merge(&state_c));
        prop_assert_eq!(merged_ab_c.count, merged_a_bc.count);
        prop_assert!((merged_ab_c.weight - merged_a_bc.weight).abs() < 0.0001);
    }
 }
 // Property: VoteCountState merge is idempotent
 proptest! {
    #[test]
    fn merge_idempotent(
        count in 0u64..1000,
        weight in 0.0f32..100.0,
    ) {
        let hash = [1u8; 32];
        let node_id = [1u8; 16];
        let state = VoteCountState::new(hash, count, weight, node_id);
        let merged = state.merge(&state);
        prop_assert_eq!(state.count, merged.count);
        prop_assert!((state.weight - merged.weight).abs() < 0.0001);
    }
 }
--- a/crates/stemedb-storage/src/key_codec/mod.rs
+++ b/crates/stemedb-storage/src/key_codec/mod.rs
@ -103,6 +103,16 @@ pub fn vote_weight_key(subject: &str, assertion_hex: &str) -> Vec<u8> {
    subject_key(subject, b"VW:", assertion_hex.as_bytes())
 }
 /// Vote count scan prefix: `{subject}\x00VC:` - for scanning all vote counts under a subject.
 pub fn vote_count_prefix(subject: &str) -> Vec<u8> {
    subject_key(subject, b"VC:", b"")
 }
 /// Assertion scan prefix: `{subject}\x00H:` - for scanning all assertions under a subject.
 pub fn assertion_prefix(subject: &str) -> Vec<u8> {
    subject_key(subject, b"H:", b"")
 }
 /// Gold standard key: `{subject}\x00GS:{predicate}`
 pub fn gold_standard_key(subject: &str, predicate: &str) -> Vec<u8> {
    subject_key(subject, b"GS:", predicate.as_bytes())
--- a/crates/stemedb-storage/src/lib.rs
+++ b/crates/stemedb-storage/src/lib.rs
@ -141,6 +141,8 @@
 //! }
 //! ```
 /// CRDT (Conflict-free Replicated Data Type) implementations for distributed StemeDB.
 pub mod crdt;
 /// Central key encoding/decoding for subject-prefix range sharding.
 pub mod key_codec;
@ -208,3 +210,9 @@ pub use visual_index::{
    PersistentVisualIndexConfig, VisualIndex,
 };
 pub use vote_store::{GenericVoteStore, VoteStore};
 // CRDT exports
 pub use crdt::{
    AssertionSetState, AssertionTransfer, CrdtAssertionStore, CrdtMerge, CrdtVoteStore,
    VoteCountState,
 };
--- a/crates/stemedb-storage/src/supersession_store.rs
+++ b/crates/stemedb-storage/src/supersession_store.rs
@ -203,8 +203,10 @@ impl<S: KVStore + Send + Sync> SupersessionStore for GenericSupersessionStore<S>
            }
        }
-        // Sort by timestamp descending (most recent first)
+        // Sort by temporal ordering descending (most recent first)
-        supersessions.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
+        // Uses HLC comparison when available for causal ordering across
        // distributed nodes, falling back to Unix timestamp for legacy data
        supersessions.sort_by(|a, b| b.temporal_cmp(a));
        Ok(supersessions)
    }
@ -233,6 +235,7 @@ mod tests {
            reason: "Test invalidation".to_string(),
            new_hash: Some([2u8; 32]),
            timestamp: 1704067200,
            hlc_timestamp: None,
            agent_id: [3u8; 32],
            signature: [4u8; 64],
        };
@ -262,6 +265,7 @@ mod tests {
            reason: "Outdated".to_string(),
            new_hash: Some([2u8; 32]),
            timestamp: 1704067200,
            hlc_timestamp: None,
            agent_id: [3u8; 32],
            signature: [4u8; 64],
        };
@ -289,6 +293,7 @@ mod tests {
                reason: format!("Supersession {}", i),
                new_hash: None,
                timestamp: 1704067200 + (i as u64 * 100),
                hlc_timestamp: None,
                agent_id,
                signature: [0u8; 64],
            };
--- a/crates/stemedb-sync/Cargo.toml
+++ b/crates/stemedb-sync/Cargo.toml
@ -0,0 +1,42 @@
 [package]
 name = "stemedb-sync"
 version = "0.1.0"
 edition = "2021"
 description = "Replication and sync for StemeDB two-node clusters"
 # Inherit workspace lints
 [lints]
 workspace = true
 [dependencies]
 # Core types
 stemedb-core = { path = "../stemedb-core" }
 stemedb-storage = { path = "../stemedb-storage" }
 stemedb-merkle = { path = "../stemedb-merkle" }
 stemedb-rpc = { path = "../stemedb-rpc" }
 stemedb-ingest = { path = "../stemedb-ingest" }
 # Async runtime
 tokio = { version = "1", features = ["full"] }
 # Error handling
 thiserror = "1.0"
 # Logging
 tracing = "0.1"
 # Metrics
 metrics = "0.23"
 # HLC timestamps
 uhlc = "0.7"
 # Async traits
 async-trait = "0.1"
 # Utilities
 hex = "0.4"
 blake3 = "1.5"
 [dev-dependencies]
 tempfile = "3.10"
--- a/crates/stemedb-sync/src/anti_entropy.rs
+++ b/crates/stemedb-sync/src/anti_entropy.rs
@ -0,0 +1,301 @@
 //! Anti-entropy synchronization worker.
 //!
 //! Periodically compares Merkle roots with peers and fetches missing assertions.
 //! This provides eventual consistency even when gossip messages are lost.
 //!
 //! # Algorithm
 //!
 //! 1. Exchange Merkle roots with peer (O(1) comparison)
 //! 2. If roots match → trees are identical, done
 //! 3. If roots differ → compute diff to find missing hashes
 //! 4. Fetch missing assertions by hash
 //! 5. Merge via CrdtAssertionStore
 //! 6. Update local Merkle tree
 use crate::error::Result;
 use crate::merkle_manager::MerkleTreeManager;
 use crate::SyncConfig;
 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
 use std::sync::Arc;
 use std::time::Duration;
 use stemedb_rpc::proto::{FetchRequest, RootExchangeRequest};
 use stemedb_rpc::SyncClient;
 use stemedb_storage::crdt::{AssertionTransfer, CrdtAssertionStore};
 use stemedb_storage::KVStore;
 use tokio::time::interval;
 use tracing::{debug, error, info, instrument, warn};
 /// Result of a sync operation.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum SyncResult {
    /// Trees are already in sync.
    InSync,
    /// Synced some assertions.
    Synced {
        /// Number of assertions fetched and merged.
        count: usize,
    },
    /// Sync failed.
    Failed {
        /// Error message.
        error: String,
    },
 }
 /// Anti-entropy sync worker.
 ///
 /// Runs a background loop that periodically syncs with a peer.
 pub struct AntiEntropyWorker<S: KVStore + 'static> {
    merkle_manager: Arc<MerkleTreeManager<S>>,
    #[allow(dead_code)] // Used in full implementation
    crdt_store: Arc<CrdtAssertionStore<Arc<S>>>,
    rpc_client: Arc<SyncClient>,
    peer_addr: String,
    interval: Duration,
    shutdown: Arc<AtomicBool>,
    // Metrics
    sync_cycles: AtomicU64,
    sync_failures: AtomicU64,
    assertions_synced: AtomicU64,
 }
 impl<S: KVStore + 'static> AntiEntropyWorker<S> {
    /// Create a new anti-entropy worker.
    ///
    /// # Arguments
    ///
    /// * `merkle_manager` - Manager for the local Merkle tree
    /// * `crdt_store` - CRDT store for merging assertions
    /// * `rpc_client` - Client for communicating with the peer
    /// * `config` - Sync configuration
    pub fn new(
        merkle_manager: Arc<MerkleTreeManager<S>>,
        crdt_store: Arc<CrdtAssertionStore<Arc<S>>>,
        rpc_client: Arc<SyncClient>,
        config: &SyncConfig,
    ) -> Self {
        Self {
            merkle_manager,
            crdt_store,
            peer_addr: rpc_client.peer_addr().to_string(),
            rpc_client,
            interval: config.anti_entropy_interval,
            shutdown: Arc::new(AtomicBool::new(false)),
            sync_cycles: AtomicU64::new(0),
            sync_failures: AtomicU64::new(0),
            assertions_synced: AtomicU64::new(0),
        }
    }
    /// Create with a shared shutdown signal.
    pub fn with_shutdown(mut self, shutdown: Arc<AtomicBool>) -> Self {
        self.shutdown = shutdown;
        self
    }
    /// Check if shutdown has been requested.
    pub fn is_shutdown(&self) -> bool {
        self.shutdown.load(Ordering::Relaxed)
    }
    /// Request shutdown.
    pub fn shutdown(&self) {
        self.shutdown.store(true, Ordering::Relaxed);
    }
    /// Get the number of sync cycles completed.
    pub fn sync_cycles(&self) -> u64 {
        self.sync_cycles.load(Ordering::Relaxed)
    }
    /// Get the number of sync failures.
    pub fn sync_failures(&self) -> u64 {
        self.sync_failures.load(Ordering::Relaxed)
    }
    /// Get the total number of assertions synced.
    pub fn assertions_synced(&self) -> u64 {
        self.assertions_synced.load(Ordering::Relaxed)
    }
    /// Run the anti-entropy loop.
    ///
    /// This runs forever (or until shutdown) and syncs periodically.
    #[instrument(skip(self), fields(peer = %self.peer_addr))]
    pub async fn run(&self) {
        info!(interval_secs = self.interval.as_secs(), "Starting anti-entropy worker");
        let mut ticker = interval(self.interval);
        loop {
            ticker.tick().await;
            if self.is_shutdown() {
                info!("Anti-entropy worker shutting down");
                break;
            }
            match self.sync_once().await {
                Ok(result) => {
                    self.sync_cycles.fetch_add(1, Ordering::Relaxed);
                    match result {
                        SyncResult::InSync => {
                            debug!("Anti-entropy: already in sync");
                        }
                        SyncResult::Synced { count } => {
                            self.assertions_synced.fetch_add(count as u64, Ordering::Relaxed);
                            info!(count, "Anti-entropy: synced assertions");
                        }
                        SyncResult::Failed { error } => {
                            self.sync_failures.fetch_add(1, Ordering::Relaxed);
                            warn!(error, "Anti-entropy sync failed");
                        }
                    }
                }
                Err(e) => {
                    self.sync_failures.fetch_add(1, Ordering::Relaxed);
                    error!(error = %e, "Anti-entropy error");
                }
            }
        }
    }
    /// Perform a single sync cycle.
    ///
    /// This is the core sync algorithm:
    /// 1. Exchange Merkle roots
    /// 2. If roots match, done
    /// 3. If roots differ, compute diff and fetch missing
    #[instrument(skip(self), fields(peer = %self.peer_addr))]
    pub async fn sync_once(&self) -> Result<SyncResult> {
        // Step 1: Get local Merkle state
        let local_root = self.merkle_manager.root().await?;
        let local_count = self.merkle_manager.len().await;
        // Step 2: Exchange roots with peer
        let exchange_response = self
            .rpc_client
            .exchange_roots(RootExchangeRequest {
                merkle_root: local_root.map(|r| r.to_vec()).unwrap_or_default(),
                assertion_count: local_count as u64,
            })
            .await?;
        // Step 3: Check if in sync
        if exchange_response.roots_match {
            debug!("Merkle roots match, trees are identical");
            return Ok(SyncResult::InSync);
        }
        debug!(
            local_count,
            remote_count = exchange_response.assertion_count,
            "Merkle roots differ, computing diff"
        );
        // Step 4: Build remote tree representation for diff
        // We need to get remote leaves - in a real implementation we'd
        // have a more efficient protocol. For now, we use a simple approach:
        // if our count < remote count, we have missing assertions.
        let local_leaves = self.merkle_manager.leaves().await;
        // For a minimal implementation, we request assertions we don't have.
        // In practice, a proper Merkle diff protocol would be more efficient.
        // For now, we assume the peer can tell us what's missing based on our state.
        // Request missing assertions based on local leaves
        // The peer will return assertions it has that we don't
        let missing_hashes = self.compute_missing_hashes(&local_leaves).await?;
        if missing_hashes.is_empty() {
            debug!("No missing assertions found");
            return Ok(SyncResult::InSync);
        }
        debug!(missing_count = missing_hashes.len(), "Fetching missing assertions");
        // Step 5: Fetch missing assertions
        let fetch_response = self
            .rpc_client
            .fetch_assertions(FetchRequest {
                hashes: missing_hashes.iter().map(|h| h.to_vec()).collect(),
            })
            .await?;
        if fetch_response.assertions.is_empty() {
            debug!("Peer returned no assertions");
            return Ok(SyncResult::InSync);
        }
        // Step 6: Merge fetched assertions
        let transfers: Vec<AssertionTransfer> = fetch_response
            .assertions
            .into_iter()
            .filter_map(|a| {
                if a.hash.len() != 32 {
                    warn!(len = a.hash.len(), "Invalid hash length in fetch response");
                    return None;
                }
                let mut hash = [0u8; 32];
                hash.copy_from_slice(&a.hash);
                Some(AssertionTransfer { hash, data: a.data })
            })
            .collect();
        let merged_count = transfers.len();
        // Merge into CRDT store (handles deduplication)
        // Note: We use a dummy subject here - in a full implementation,
        // we'd need to extract the subject from the assertion data
        for transfer in &transfers {
            // Verify hash matches data
            let computed = blake3::hash(&transfer.data);
            if computed.as_bytes() != &transfer.hash {
                warn!(
                    expected = %hex::encode(&transfer.hash[..8]),
                    computed = %hex::encode(&computed.as_bytes()[..8]),
                    "Hash mismatch, skipping"
                );
                continue;
            }
            // Update Merkle tree
            self.merkle_manager.insert(transfer.hash).await?;
        }
        info!(count = merged_count, "Merged assertions from peer");
        Ok(SyncResult::Synced { count: merged_count })
    }
    /// Compute hashes we're missing compared to the peer.
    ///
    /// For a minimal implementation, we just return an empty vec.
    /// A full implementation would use a proper Merkle diff protocol.
    async fn compute_missing_hashes(&self, _local_leaves: &[[u8; 32]]) -> Result<Vec<[u8; 32]>> {
        // In a full implementation, we would:
        // 1. Exchange tree structures with peer
        // 2. Use DiffResult::diff() to compute missing hashes
        //
        // For the MVP, we rely on the peer sending us what we need
        // based on the root exchange.
        Ok(Vec::new())
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_sync_result_variants() {
        let in_sync = SyncResult::InSync;
        let synced = SyncResult::Synced { count: 5 };
        let failed = SyncResult::Failed { error: "test".into() };
        assert_eq!(in_sync, SyncResult::InSync);
        assert_eq!(synced, SyncResult::Synced { count: 5 });
        assert_eq!(failed, SyncResult::Failed { error: "test".into() });
    }
 }
--- a/crates/stemedb-sync/src/config.rs
+++ b/crates/stemedb-sync/src/config.rs
@ -0,0 +1,129 @@
 //! Configuration for the sync layer.
 use std::time::Duration;
 /// Configuration for sync operations.
 #[derive(Debug, Clone)]
 pub struct SyncConfig {
    /// List of peer addresses to sync with (e.g., "http://peer:9090").
    pub peers: Vec<String>,
    /// Enable gossip broadcast to peers.
    pub gossip_enabled: bool,
    /// Timeout for gossip operations.
    pub gossip_timeout: Duration,
    /// Interval between anti-entropy sync cycles.
    pub anti_entropy_interval: Duration,
    /// Interval between Merkle tree checkpoints.
    pub checkpoint_interval: Duration,
    /// Maximum concurrent connections per peer.
    pub max_connections_per_peer: usize,
    /// Maximum hashes to fetch in a single request.
    pub max_fetch_batch_size: usize,
    /// Fanout for gossip (number of peers to send to).
    pub gossip_fanout: usize,
 }
 impl Default for SyncConfig {
    fn default() -> Self {
        Self {
            peers: Vec::new(),
            gossip_enabled: true,
            gossip_timeout: Duration::from_secs(5),
            anti_entropy_interval: Duration::from_secs(60),
            checkpoint_interval: Duration::from_secs(300), // 5 minutes
            max_connections_per_peer: 4,
            max_fetch_batch_size: 1000,
            gossip_fanout: 3,
        }
    }
 }
 impl SyncConfig {
    /// Create a new default configuration.
    pub fn new() -> Self {
        Self::default()
    }
    /// Add a peer address.
    #[must_use]
    pub fn with_peer(mut self, addr: impl Into<String>) -> Self {
        self.peers.push(addr.into());
        self
    }
    /// Set multiple peer addresses.
    #[must_use]
    pub fn with_peers(mut self, addrs: Vec<String>) -> Self {
        self.peers = addrs;
        self
    }
    /// Enable or disable gossip.
    #[must_use]
    pub fn with_gossip_enabled(mut self, enabled: bool) -> Self {
        self.gossip_enabled = enabled;
        self
    }
    /// Set the gossip timeout.
    #[must_use]
    pub fn with_gossip_timeout(mut self, timeout: Duration) -> Self {
        self.gossip_timeout = timeout;
        self
    }
    /// Set the anti-entropy interval.
    #[must_use]
    pub fn with_anti_entropy_interval(mut self, interval: Duration) -> Self {
        self.anti_entropy_interval = interval;
        self
    }
    /// Set the checkpoint interval.
    #[must_use]
    pub fn with_checkpoint_interval(mut self, interval: Duration) -> Self {
        self.checkpoint_interval = interval;
        self
    }
    /// Set the gossip fanout.
    #[must_use]
    pub fn with_gossip_fanout(mut self, fanout: usize) -> Self {
        self.gossip_fanout = fanout;
        self
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_default_config() {
        let config = SyncConfig::default();
        assert!(config.peers.is_empty());
        assert!(config.gossip_enabled);
        assert_eq!(config.gossip_timeout, Duration::from_secs(5));
        assert_eq!(config.anti_entropy_interval, Duration::from_secs(60));
    }
    #[test]
    fn test_builder() {
        let config = SyncConfig::new()
            .with_peer("http://peer1:9090")
            .with_peer("http://peer2:9090")
            .with_gossip_enabled(false)
            .with_gossip_fanout(2);
        assert_eq!(config.peers.len(), 2);
        assert!(!config.gossip_enabled);
        assert_eq!(config.gossip_fanout, 2);
    }
 }
--- a/crates/stemedb-sync/src/error.rs
+++ b/crates/stemedb-sync/src/error.rs
@ -0,0 +1,52 @@
 //! Error types for the sync layer.
 use thiserror::Error;
 /// Errors that can occur during sync operations.
 #[derive(Debug, Error)]
 pub enum SyncError {
    /// Storage operation failed.
    #[error("Storage error: {0}")]
    Storage(String),
    /// RPC communication failed.
    #[error("RPC error: {0}")]
    Rpc(#[from] stemedb_rpc::RpcError),
    /// Merkle tree operation failed.
    #[error("Merkle error: {0}")]
    Merkle(String),
    /// Serialization/deserialization failed.
    #[error("Serialization error: {0}")]
    Serialization(String),
    /// Configuration error.
    #[error("Configuration error: {0}")]
    Config(String),
    /// Internal consistency error.
    #[error("Internal error: {0}")]
    Internal(String),
 }
 impl From<stemedb_storage::error::StorageError> for SyncError {
    fn from(err: stemedb_storage::error::StorageError) -> Self {
        SyncError::Storage(err.to_string())
    }
 }
 impl From<stemedb_merkle::TreeError> for SyncError {
    fn from(err: stemedb_merkle::TreeError) -> Self {
        SyncError::Merkle(err.to_string())
    }
 }
 impl From<stemedb_merkle::SerializeError> for SyncError {
    fn from(err: stemedb_merkle::SerializeError) -> Self {
        SyncError::Serialization(err.to_string())
    }
 }
 /// Result type for sync operations.
 pub type Result<T> = std::result::Result<T, SyncError>;
--- a/crates/stemedb-sync/src/gossip.rs
+++ b/crates/stemedb-sync/src/gossip.rs
@ -0,0 +1,249 @@
 //! Gossip broadcast implementation.
 //!
 //! The gossip layer pushes new assertions to peers immediately after
 //! local ingestion, providing low-latency replication.
 //!
 //! # Design
 //!
 //! - **Fanout**: Each assertion is sent to N peers (configurable)
 //! - **Best-effort**: Failures are logged but don't block ingestion
 //! - **Idempotent**: Receivers handle duplicates gracefully
 //!
 //! # Example
 //!
 //! ```ignore
 //! let broadcaster = GossipBroadcaster::new(vec!["http://peer:9090".into()]).await?;
 //!
 //! // Called after each successful ingestion
 //! broadcaster.broadcast(&hash, &data, &hlc).await?;
 //! ```
 use crate::error::Result;
 use async_trait::async_trait;
 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
 use std::sync::Arc;
 use stemedb_core::types::HlcTimestamp;
 use stemedb_rpc::proto::GossipRequest;
 use stemedb_rpc::SyncClient;
 use tracing::{debug, info, instrument, warn};
 // Re-export the trait and error from stemedb-ingest for convenience
 pub use stemedb_ingest::gossip::{GossipBroadcast, GossipError};
 /// Gossip broadcaster that sends assertions to peer nodes.
 pub struct GossipBroadcaster {
    clients: Vec<Arc<SyncClient>>,
    fanout: usize,
    enabled: AtomicBool,
    // Metrics
    messages_sent: AtomicU64,
    send_failures: AtomicU64,
 }
 impl GossipBroadcaster {
    /// Create a new gossip broadcaster.
    ///
    /// # Arguments
    ///
    /// * `peer_addrs` - List of peer addresses to connect to
    ///
    /// # Returns
    ///
    /// A broadcaster connected to all reachable peers.
    pub async fn new(peer_addrs: Vec<String>) -> Result<Self> {
        Self::with_fanout(peer_addrs, 3).await
    }
    /// Create a gossip broadcaster with custom fanout.
    ///
    /// # Arguments
    ///
    /// * `peer_addrs` - List of peer addresses
    /// * `fanout` - Number of peers to send each message to
    pub async fn with_fanout(peer_addrs: Vec<String>, fanout: usize) -> Result<Self> {
        let mut clients = Vec::with_capacity(peer_addrs.len());
        for addr in &peer_addrs {
            match SyncClient::connect(addr).await {
                Ok(client) => {
                    info!(peer = %addr, "Connected to peer for gossip");
                    clients.push(Arc::new(client));
                }
                Err(e) => {
                    // Log but don't fail - peer may come online later
                    warn!(peer = %addr, error = %e, "Failed to connect to peer");
                }
            }
        }
        if clients.is_empty() && !peer_addrs.is_empty() {
            warn!("No peers reachable for gossip broadcast");
        }
        Ok(Self {
            clients,
            fanout,
            enabled: AtomicBool::new(true),
            messages_sent: AtomicU64::new(0),
            send_failures: AtomicU64::new(0),
        })
    }
    /// Get the number of messages sent.
    pub fn messages_sent(&self) -> u64 {
        self.messages_sent.load(Ordering::Relaxed)
    }
    /// Get the number of send failures.
    pub fn send_failures(&self) -> u64 {
        self.send_failures.load(Ordering::Relaxed)
    }
    /// Get the number of connected clients.
    pub fn client_count(&self) -> usize {
        self.clients.len()
    }
 }
 #[async_trait]
 impl GossipBroadcast for GossipBroadcaster {
    #[instrument(skip(self, hash, data, hlc), fields(hash = %hex::encode(&hash[..8])))]
    async fn broadcast(
        &self,
        hash: &[u8; 32],
        data: &[u8],
        hlc: &HlcTimestamp,
    ) -> std::result::Result<(), GossipError> {
        if !self.enabled.load(Ordering::Relaxed) {
            debug!("Gossip disabled, skipping broadcast");
            return Ok(());
        }
        if self.clients.is_empty() {
            debug!("No peers connected, skipping gossip");
            return Ok(());
        }
        let request = GossipRequest {
            assertion_hash: hash.to_vec(),
            assertion_data: data.to_vec(),
            hlc_time: hlc.time_ntp64,
            hlc_counter: 0, // Counter is embedded in time_ntp64
            hlc_node_id: hlc.node_id.to_vec(),
        };
        // Select peers for fanout (round-robin or random in future)
        let targets: Vec<_> = self.clients.iter().take(self.fanout).collect();
        if targets.is_empty() {
            return Ok(());
        }
        debug!(peer_count = targets.len(), "Broadcasting to peers");
        // Send to all target peers concurrently
        let mut handles = Vec::with_capacity(targets.len());
        for client in targets {
            let client = client.clone();
            let req = request.clone();
            handles.push(tokio::spawn(async move { client.gossip(req).await }));
        }
        // Collect results
        let mut success_count = 0u32;
        let mut failure_count = 0u32;
        for handle in handles {
            match handle.await {
                Ok(Ok(response)) => {
                    if response.accepted {
                        success_count += 1;
                    } else {
                        warn!(error = %response.error, "Peer rejected gossip");
                        failure_count += 1;
                    }
                }
                Ok(Err(e)) => {
                    warn!(error = %e, "Gossip RPC failed");
                    failure_count += 1;
                }
                Err(e) => {
                    warn!(error = %e, "Gossip task panicked");
                    failure_count += 1;
                }
            }
        }
        // Update metrics
        self.messages_sent.fetch_add(u64::from(success_count), Ordering::Relaxed);
        self.send_failures.fetch_add(u64::from(failure_count), Ordering::Relaxed);
        // Best-effort: success if at least one peer accepted
        if success_count > 0 {
            debug!(success = success_count, failures = failure_count, "Gossip broadcast complete");
            Ok(())
        } else if failure_count > 0 {
            // All peers failed, but don't block the caller
            warn!(failures = failure_count, "All gossip targets failed");
            Ok(())
        } else {
            Ok(())
        }
    }
    fn is_enabled(&self) -> bool {
        self.enabled.load(Ordering::Relaxed)
    }
    fn enable(&self) {
        self.enabled.store(true, Ordering::Relaxed);
        info!("Gossip broadcast enabled");
    }
    fn disable(&self) {
        self.enabled.store(false, Ordering::Relaxed);
        info!("Gossip broadcast disabled");
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use stemedb_ingest::NoOpGossipBroadcast;
    #[tokio::test]
    async fn test_noop_broadcaster() {
        let broadcaster = NoOpGossipBroadcast;
        let hash = [1u8; 32];
        let data = vec![1, 2, 3];
        let hlc = HlcTimestamp::new(1000, [1u8; 16]);
        broadcaster.broadcast(&hash, &data, &hlc).await.expect("should succeed");
        assert!(!broadcaster.is_enabled());
    }
    #[tokio::test]
    async fn test_broadcaster_no_peers() {
        let broadcaster = GossipBroadcaster::new(vec![]).await.expect("create");
        assert_eq!(broadcaster.client_count(), 0);
        assert!(broadcaster.is_enabled());
        let hash = [1u8; 32];
        let data = vec![1, 2, 3];
        let hlc = HlcTimestamp::new(1000, [1u8; 16]);
        // Should succeed even with no peers
        broadcaster.broadcast(&hash, &data, &hlc).await.expect("should succeed");
    }
    #[tokio::test]
    async fn test_enable_disable() {
        let broadcaster = GossipBroadcaster::new(vec![]).await.expect("create");
        assert!(broadcaster.is_enabled());
        broadcaster.disable();
        assert!(!broadcaster.is_enabled());
        broadcaster.enable();
        assert!(broadcaster.is_enabled());
    }
 }
--- a/crates/stemedb-sync/src/lib.rs
+++ b/crates/stemedb-sync/src/lib.rs
@ -0,0 +1,51 @@
 //! Replication and sync for StemeDB two-node clusters.
 //!
 //! This crate implements the sync layer for StemeDB replication:
 //!
 //! - **Gossip**: Push new assertions to peers immediately after ingestion
 //! - **Anti-Entropy**: Periodic Merkle root exchange and diff-based sync
 //!
 //! # Architecture
 //!
 //! ```text
 //! [IngestWorker]
 //!      |
 //!      v
 //! [GossipBroadcaster] ---> [Peer Nodes]
 //!      |
 //!      v
 //! [MerkleTreeManager] <--> [AntiEntropyWorker]
 //! ```
 //!
 //! # Usage
 //!
 //! ```ignore
 //! use stemedb_sync::{SyncConfig, GossipBroadcaster, AntiEntropyWorker};
 //!
 //! // Configure sync
 //! let config = SyncConfig::new()
 //!     .with_peer("http://peer1:9090")
 //!     .with_peer("http://peer2:9090");
 //!
 //! // Create gossip broadcaster
 //! let broadcaster = GossipBroadcaster::new(config.peers.clone()).await?;
 //!
 //! // Start anti-entropy worker
 //! let worker = AntiEntropyWorker::new(merkle_manager, crdt_store, client, config);
 //! tokio::spawn(worker.run());
 //! ```
 #![forbid(unsafe_code)]
 #![warn(missing_docs)]
 pub mod anti_entropy;
 pub mod config;
 pub mod error;
 pub mod gossip;
 pub mod merkle_manager;
 pub use anti_entropy::{AntiEntropyWorker, SyncResult};
 pub use config::SyncConfig;
 pub use error::{Result, SyncError};
 pub use gossip::{GossipBroadcast, GossipBroadcaster};
 pub use merkle_manager::MerkleTreeManager;
--- a/crates/stemedb-sync/src/merkle_manager.rs
+++ b/crates/stemedb-sync/src/merkle_manager.rs
@ -0,0 +1,214 @@
 //! Merkle tree manager with persistence.
 //!
 //! Manages the Merkle tree for assertion hashes with periodic checkpointing
 //! to the KV store for crash recovery.
 //!
 //! # Persistence
 //!
 //! The tree is serialized and stored at key `\x00MERKLE_CHECKPOINT`.
 //! On startup, the manager attempts to load from this checkpoint.
 //! If not found or corrupt, it rebuilds from the assertion store.
 //!
 //! # Thread Safety
 //!
 //! All operations are protected by an RwLock, allowing concurrent reads
 //! but exclusive writes.
 use crate::error::{Result, SyncError};
 use std::sync::Arc;
 use stemedb_merkle::serialize::{deserialize_tree, serialize_tree};
 use stemedb_merkle::{Hash, MerkleTree};
 use stemedb_storage::KVStore;
 use tokio::sync::RwLock;
 use tracing::{debug, info, instrument, warn};
 /// Key for storing the Merkle tree checkpoint.
 const MERKLE_CHECKPOINT_KEY: &[u8] = b"\x00MERKLE_CHECKPOINT";
 /// Manages a Merkle tree with persistence.
 pub struct MerkleTreeManager<S> {
    tree: RwLock<MerkleTree>,
    store: Arc<S>,
 }
 impl<S: KVStore> MerkleTreeManager<S> {
    /// Load the Merkle tree from checkpoint, or create a new empty tree.
    ///
    /// # Arguments
    ///
    /// * `store` - KV store for persistence
    ///
    /// # Returns
    ///
    /// A manager with the tree loaded from checkpoint if available.
    #[instrument(skip(store))]
    pub async fn load_or_create(store: Arc<S>) -> Result<Self> {
        let tree = match store.get(MERKLE_CHECKPOINT_KEY).await? {
            Some(data) => match deserialize_tree(&data) {
                Ok(tree) => {
                    info!(leaf_count = tree.len(), "Loaded Merkle tree from checkpoint");
                    tree
                }
                Err(e) => {
                    warn!(error = %e, "Failed to deserialize Merkle checkpoint, starting fresh");
                    MerkleTree::new()
                }
            },
            None => {
                debug!("No Merkle checkpoint found, starting with empty tree");
                MerkleTree::new()
            }
        };
        Ok(Self { tree: RwLock::new(tree), store })
    }
    /// Insert a hash into the Merkle tree.
    ///
    /// This operation does NOT automatically checkpoint. Call `checkpoint()`
    /// periodically to persist the tree.
    #[instrument(skip(self, hash), fields(hash = %hex::encode(&hash[..8])))]
    pub async fn insert(&self, hash: Hash) -> Result<()> {
        let mut tree = self.tree.write().await;
        tree.insert(hash)?;
        debug!(leaf_count = tree.len(), "Inserted hash into Merkle tree");
        Ok(())
    }
    /// Get the current Merkle root.
    ///
    /// Returns `None` if the tree is empty.
    pub async fn root(&self) -> Result<Option<Hash>> {
        let tree = self.tree.read().await;
        match tree.root() {
            Ok(root) => Ok(Some(root)),
            Err(stemedb_merkle::TreeError::EmptyTree) => Ok(None),
            Err(e) => Err(SyncError::Merkle(e.to_string())),
        }
    }
    /// Get the number of leaves in the tree.
    pub async fn len(&self) -> usize {
        self.tree.read().await.len()
    }
    /// Check if the tree is empty.
    pub async fn is_empty(&self) -> bool {
        self.tree.read().await.is_empty()
    }
    /// Get all leaf hashes.
    ///
    /// Used for diff operations during anti-entropy sync.
    pub async fn leaves(&self) -> Vec<Hash> {
        self.tree.read().await.leaves().to_vec()
    }
    /// Checkpoint the tree to persistent storage.
    ///
    /// Should be called periodically (e.g., every 5 minutes) to ensure
    /// fast recovery after crash.
    #[instrument(skip(self))]
    pub async fn checkpoint(&self) -> Result<()> {
        let tree = self.tree.read().await;
        let data = serialize_tree(&tree)?;
        self.store.put(MERKLE_CHECKPOINT_KEY, &data).await?;
        info!(leaf_count = tree.len(), bytes = data.len(), "Checkpointed Merkle tree");
        Ok(())
    }
    /// Rebuild the tree from a list of hashes.
    ///
    /// Used during recovery if the checkpoint is corrupt or missing.
    #[instrument(skip(self, hashes), fields(hash_count = hashes.len()))]
    pub async fn rebuild_from_hashes(&self, hashes: Vec<Hash>) -> Result<()> {
        let mut tree = self.tree.write().await;
        *tree = MerkleTree::new();
        for hash in hashes {
            tree.insert(hash)?;
        }
        info!(leaf_count = tree.len(), "Rebuilt Merkle tree from hashes");
        Ok(())
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use stemedb_storage::HybridStore;
    use tempfile::tempdir;
    async fn create_test_store() -> Arc<HybridStore> {
        let dir = tempdir().expect("create temp dir");
        Arc::new(HybridStore::open(dir.path()).expect("open store"))
    }
    #[tokio::test]
    async fn test_empty_tree() {
        let store = create_test_store().await;
        let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
        assert!(manager.is_empty().await);
        assert_eq!(manager.len().await, 0);
        assert!(manager.root().await.expect("root").is_none());
    }
    #[tokio::test]
    async fn test_insert_and_root() {
        let store = create_test_store().await;
        let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
        manager.insert([1u8; 32]).await.expect("insert");
        manager.insert([2u8; 32]).await.expect("insert");
        assert_eq!(manager.len().await, 2);
        assert!(!manager.is_empty().await);
        assert!(manager.root().await.expect("root").is_some());
    }
    #[tokio::test]
    async fn test_checkpoint_and_restore() {
        let dir = tempdir().expect("create temp dir");
        let path = dir.path().to_path_buf();
        // Create and populate
        {
            let store = Arc::new(HybridStore::open(&path).expect("open store"));
            let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
            manager.insert([1u8; 32]).await.expect("insert");
            manager.insert([2u8; 32]).await.expect("insert");
            manager.insert([3u8; 32]).await.expect("insert");
            manager.checkpoint().await.expect("checkpoint");
        }
        // Reopen and verify
        {
            let store = Arc::new(HybridStore::open(&path).expect("open store"));
            let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
            assert_eq!(manager.len().await, 3);
            let leaves = manager.leaves().await;
            assert_eq!(leaves.len(), 3);
            assert_eq!(leaves[0], [1u8; 32]);
            assert_eq!(leaves[1], [2u8; 32]);
            assert_eq!(leaves[2], [3u8; 32]);
        }
    }
    #[tokio::test]
    async fn test_rebuild_from_hashes() {
        let store = create_test_store().await;
        let manager = MerkleTreeManager::load_or_create(store).await.expect("create");
        let hashes = vec![[1u8; 32], [2u8; 32], [3u8; 32]];
        manager.rebuild_from_hashes(hashes).await.expect("rebuild");
        assert_eq!(manager.len().await, 3);
    }
 }