stemedb/crates/stemedb-storage/src/lib.rs
jordan 42d4e09508 feat: Index persistence (Phase 5C) - vector hot/cold, visual checkpoint
Phase 5C (Index Persistence) implementation:
- PersistentVectorIndex with hot/cold architecture
  - Hot: in-memory HNSW for recent vectors
  - Cold: memory-mapped HNSW loaded from disk
  - Background builder for WAL replay and atomic swap
  - BLAKE3 integrity verification
- PersistentVisualIndex with checkpoint persistence
  - BkTreeSnapshot with rkyv serialization
  - CRC32C corruption detection
  - Atomic write pattern (temp → fsync → rename)
- Key codec additions for vector index metadata
- Split large files into modules (<500 lines each)
  - battery_pre_sentinel.rs → battery/ directory
  - visual_index.rs → visual_index/ directory
  - persistent.rs → persistent/ directory
- Refactored ingest worker tests for clarity
- Updated roadmap to mark Phase 5 complete

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 15:43:18 -07:00

208 lines
7.5 KiB
Rust

//! Storage engine abstractions and implementations for Episteme.
//!
//! This crate provides the `KVStore` trait for pluggable storage backends
//! and a concrete `HybridStore` that routes keys to fjall (write-heavy) or redb (read-heavy).
//!
//! # The Ballot Box
//!
//! The [`VoteStore`] trait provides specialized operations for high-velocity
//! vote ingestion. This is the core of the Ballot Box pattern - separating
//! votes from assertions to enable thousands of agents to vote simultaneously.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericVoteStore, VoteStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let vote_store = GenericVoteStore::new(kv_store);
//!
//! // High-velocity vote ingestion
//! let vote_hash = vote_store.put_vote(&vote, "subject").await?;
//!
//! // O(1) aggregation via caches
//! let count = vote_store.get_vote_count(&assertion_hash).await?;
//! let weight = vote_store.get_aggregate_weight(&assertion_hash).await?;
//! ```
//!
//! # TrustRank
//!
//! The [`TrustRankStore`] trait provides reputation tracking for The Hive
//! learning loop. Agent reputation scores affect how their assertions are
//! weighted in the Authority lens.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericTrustRankStore, TrustRankStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let trust_store = GenericTrustRankStore::new(kv_store);
//!
//! // Get agent's current reputation
//! let trust = trust_store.get_trust_rank(&agent_id).await?;
//!
//! // Update reputation based on accuracy
//! trust_store.record_outcome(&agent_id, was_accurate, timestamp).await?;
//!
//! // Apply confidence half-life decay
//! trust_store.decay_trust_ranks(current_timestamp, None).await?;
//! ```
//!
//! # Query Audit Trail
//!
//! The [`AuditStore`] trait provides query audit logging for incident investigation.
//! Every query is logged with provenance to enable "Why did you think that?" debugging.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericAuditStore, AuditStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let audit_store = GenericAuditStore::new(kv_store);
//!
//! // Log a query audit
//! audit_store.put_audit(&audit).await?;
//!
//! // Retrieve a specific audit
//! let audit = audit_store.get_audit(&query_id).await?;
//!
//! // Find all audits for an agent in a time range
//! let audits = audit_store.get_audits_for_agent(&agent_id, from, to).await?;
//! ```
//!
//! # TrustPack ("App Store for Trust")
//!
//! The [`TrustPackStore`] trait provides curated agent lists for filtering consensus.
//! Users subscribe to domain expert packs to see reality through trusted lenses.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericTrustPackStore, TrustPackStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let pack_store = GenericTrustPackStore::new(kv_store);
//!
//! // Create and store a pack
//! let pack = TrustPack::new(pack_id, "Mayo_Clinic_Experts".to_string(), maintainer_key);
//! pack_store.put_pack(&pack).await?;
//!
//! // Add trusted agents to the pack
//! pack_store.add_agent_to_pack(&pack_id, &agent_id).await?;
//!
//! // Check membership
//! let is_trusted = pack_store.is_agent_in_pack(&pack_id, &agent_id).await?;
//! ```
//!
//! # The Meter (Economic Throttling)
//!
//! The [`QuotaStore`] trait provides token bucket quota enforcement to prevent
//! runaway agents from exhausting system resources.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericQuotaStore, QuotaStore, OperationType};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let quota_store = GenericQuotaStore::new(kv_store);
//!
//! // Check and record cost for an operation
//! let result = quota_store.check_and_record(
//! &agent_id,
//! OperationType::Assert,
//! payload_bytes,
//! timestamp,
//! ).await?;
//!
//! if result.allowed {
//! // Process the operation
//! println!("Remaining quota: {}", result.remaining);
//! } else {
//! // Reject: quota exceeded
//! println!("Quota exceeded, resets at {}", result.reset_at);
//! }
//! ```
//!
//! # Gold Standard Verification
//!
//! The [`GoldStandardStore`] trait provides ground truth assertions for agent
//! verification. New agents must demonstrate knowledge of verified facts to
//! earn TrustRank and unlock premium features.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericGoldStandardStore, GoldStandardStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let gs_store = GenericGoldStandardStore::new(kv_store);
//!
//! // Create and store a gold standard
//! let gs = GoldStandard::new(assertion_hash, "Earth".into(), "has_shape".into(),
//! "oblate_spheroid".into(), timestamp, "admin".into());
//! gs_store.set_gold_standard(&gs).await?;
//!
//! // Verify agent answer against gold standard
//! let gs = gs_store.get_gold_standard("Earth", "has_shape").await?;
//! if let Some(gs) = gs {
//! let is_correct = gs.matches(agent_answer);
//! // Update TrustRank based on correctness
//! }
//! ```
/// Central key encoding/decoding for subject-prefix range sharding.
pub mod key_codec;
/// Shared checkpoint file format for index persistence.
pub mod checkpoint_format;
/// Query audit trail storage for incident investigation.
pub mod audit_store;
/// Error types and Result wrapper for storage operations.
pub mod error;
/// Escalation event storage for high-conflict assertions.
pub mod escalation_store;
/// Fjall (LSM-tree) backend for write-heavy key prefixes.
pub mod fjall_backend;
/// Gold standard assertions for agent verification.
pub mod gold_standard_store;
/// Hybrid storage backend: routes keys to fjall (write-heavy) or redb (read-heavy).
pub mod hybrid_backend;
/// Specialized storage for assertion indexes.
pub mod index_store;
/// Economic throttling via Token Bucket quotas (The Meter).
pub mod quota_store;
/// Redb (B-tree) backend for read-heavy key prefixes.
pub mod redb_backend;
/// Storage-layer serialization helpers.
pub(crate) mod serde_helpers;
/// Assertion supersession storage (Error Correction).
pub mod supersession_store;
/// Core traits for key-value storage.
pub mod traits;
/// TrustPack curation lists (App Store for Trust).
pub mod trust_pack_store;
/// TrustRank reputation storage (The Hive).
pub mod trust_rank_store;
/// HNSW-based vector similarity index for semantic k-NN queries.
pub mod vector_index;
/// BK-tree based visual similarity index for perceptual hash matching.
pub mod visual_index;
/// High-velocity vote storage (The Ballot Box).
pub mod vote_store;
pub use audit_store::{AuditStore, GenericAuditStore};
pub use error::{Result, StorageError};
pub use escalation_store::{EscalationStore, GenericEscalationStore};
pub use gold_standard_store::{GenericGoldStandardStore, GoldStandardStore};
pub use hybrid_backend::HybridStore;
pub use index_store::{GenericIndexStore, IndexStore};
pub use quota_store::{
CostConfig, GenericQuotaStore, OperationType, QuotaCheckResult, QuotaRecord, QuotaStore,
DEFAULT_QUOTA_LIMIT,
};
pub use supersession_store::{GenericSupersessionStore, SupersessionStore};
pub use traits::KVStore;
pub use trust_pack_store::{GenericTrustPackStore, TrustPackStore};
pub use trust_rank_store::{GenericTrustRankStore, TrustRank, TrustRankStore};
pub use vector_index::{
merge_search_results, HnswVectorIndex, PersistentVectorIndex, PersistentVectorIndexConfig,
VectorIndex,
};
pub use visual_index::{
hamming_distance, BkTreeSnapshot, BkTreeVisualIndex, PersistentVisualIndex,
PersistentVisualIndexConfig, VisualIndex,
};
pub use vote_store::{GenericVoteStore, VoteStore};