Phase 5C (Index Persistence) implementation: - PersistentVectorIndex with hot/cold architecture - Hot: in-memory HNSW for recent vectors - Cold: memory-mapped HNSW loaded from disk - Background builder for WAL replay and atomic swap - BLAKE3 integrity verification - PersistentVisualIndex with checkpoint persistence - BkTreeSnapshot with rkyv serialization - CRC32C corruption detection - Atomic write pattern (temp → fsync → rename) - Key codec additions for vector index metadata - Split large files into modules (<500 lines each) - battery_pre_sentinel.rs → battery/ directory - visual_index.rs → visual_index/ directory - persistent.rs → persistent/ directory - Refactored ingest worker tests for clarity - Updated roadmap to mark Phase 5 complete Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
208 lines
7.5 KiB
Rust
208 lines
7.5 KiB
Rust
//! Storage engine abstractions and implementations for Episteme.
|
|
//!
|
|
//! This crate provides the `KVStore` trait for pluggable storage backends
|
|
//! and a concrete `HybridStore` that routes keys to fjall (write-heavy) or redb (read-heavy).
|
|
//!
|
|
//! # The Ballot Box
|
|
//!
|
|
//! The [`VoteStore`] trait provides specialized operations for high-velocity
|
|
//! vote ingestion. This is the core of the Ballot Box pattern - separating
|
|
//! votes from assertions to enable thousands of agents to vote simultaneously.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericVoteStore, VoteStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let vote_store = GenericVoteStore::new(kv_store);
|
|
//!
|
|
//! // High-velocity vote ingestion
|
|
//! let vote_hash = vote_store.put_vote(&vote, "subject").await?;
|
|
//!
|
|
//! // O(1) aggregation via caches
|
|
//! let count = vote_store.get_vote_count(&assertion_hash).await?;
|
|
//! let weight = vote_store.get_aggregate_weight(&assertion_hash).await?;
|
|
//! ```
|
|
//!
|
|
//! # TrustRank
|
|
//!
|
|
//! The [`TrustRankStore`] trait provides reputation tracking for The Hive
|
|
//! learning loop. Agent reputation scores affect how their assertions are
|
|
//! weighted in the Authority lens.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericTrustRankStore, TrustRankStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let trust_store = GenericTrustRankStore::new(kv_store);
|
|
//!
|
|
//! // Get agent's current reputation
|
|
//! let trust = trust_store.get_trust_rank(&agent_id).await?;
|
|
//!
|
|
//! // Update reputation based on accuracy
|
|
//! trust_store.record_outcome(&agent_id, was_accurate, timestamp).await?;
|
|
//!
|
|
//! // Apply confidence half-life decay
|
|
//! trust_store.decay_trust_ranks(current_timestamp, None).await?;
|
|
//! ```
|
|
//!
|
|
//! # Query Audit Trail
|
|
//!
|
|
//! The [`AuditStore`] trait provides query audit logging for incident investigation.
|
|
//! Every query is logged with provenance to enable "Why did you think that?" debugging.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericAuditStore, AuditStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let audit_store = GenericAuditStore::new(kv_store);
|
|
//!
|
|
//! // Log a query audit
|
|
//! audit_store.put_audit(&audit).await?;
|
|
//!
|
|
//! // Retrieve a specific audit
|
|
//! let audit = audit_store.get_audit(&query_id).await?;
|
|
//!
|
|
//! // Find all audits for an agent in a time range
|
|
//! let audits = audit_store.get_audits_for_agent(&agent_id, from, to).await?;
|
|
//! ```
|
|
//!
|
|
//! # TrustPack ("App Store for Trust")
|
|
//!
|
|
//! The [`TrustPackStore`] trait provides curated agent lists for filtering consensus.
|
|
//! Users subscribe to domain expert packs to see reality through trusted lenses.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericTrustPackStore, TrustPackStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let pack_store = GenericTrustPackStore::new(kv_store);
|
|
//!
|
|
//! // Create and store a pack
|
|
//! let pack = TrustPack::new(pack_id, "Mayo_Clinic_Experts".to_string(), maintainer_key);
|
|
//! pack_store.put_pack(&pack).await?;
|
|
//!
|
|
//! // Add trusted agents to the pack
|
|
//! pack_store.add_agent_to_pack(&pack_id, &agent_id).await?;
|
|
//!
|
|
//! // Check membership
|
|
//! let is_trusted = pack_store.is_agent_in_pack(&pack_id, &agent_id).await?;
|
|
//! ```
|
|
//!
|
|
//! # The Meter (Economic Throttling)
|
|
//!
|
|
//! The [`QuotaStore`] trait provides token bucket quota enforcement to prevent
|
|
//! runaway agents from exhausting system resources.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericQuotaStore, QuotaStore, OperationType};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let quota_store = GenericQuotaStore::new(kv_store);
|
|
//!
|
|
//! // Check and record cost for an operation
|
|
//! let result = quota_store.check_and_record(
|
|
//! &agent_id,
|
|
//! OperationType::Assert,
|
|
//! payload_bytes,
|
|
//! timestamp,
|
|
//! ).await?;
|
|
//!
|
|
//! if result.allowed {
|
|
//! // Process the operation
|
|
//! println!("Remaining quota: {}", result.remaining);
|
|
//! } else {
|
|
//! // Reject: quota exceeded
|
|
//! println!("Quota exceeded, resets at {}", result.reset_at);
|
|
//! }
|
|
//! ```
|
|
//!
|
|
//! # Gold Standard Verification
|
|
//!
|
|
//! The [`GoldStandardStore`] trait provides ground truth assertions for agent
|
|
//! verification. New agents must demonstrate knowledge of verified facts to
|
|
//! earn TrustRank and unlock premium features.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericGoldStandardStore, GoldStandardStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let gs_store = GenericGoldStandardStore::new(kv_store);
|
|
//!
|
|
//! // Create and store a gold standard
|
|
//! let gs = GoldStandard::new(assertion_hash, "Earth".into(), "has_shape".into(),
|
|
//! "oblate_spheroid".into(), timestamp, "admin".into());
|
|
//! gs_store.set_gold_standard(&gs).await?;
|
|
//!
|
|
//! // Verify agent answer against gold standard
|
|
//! let gs = gs_store.get_gold_standard("Earth", "has_shape").await?;
|
|
//! if let Some(gs) = gs {
|
|
//! let is_correct = gs.matches(agent_answer);
|
|
//! // Update TrustRank based on correctness
|
|
//! }
|
|
//! ```
|
|
|
|
/// Central key encoding/decoding for subject-prefix range sharding.
|
|
pub mod key_codec;
|
|
|
|
/// Shared checkpoint file format for index persistence.
|
|
pub mod checkpoint_format;
|
|
|
|
/// Query audit trail storage for incident investigation.
|
|
pub mod audit_store;
|
|
/// Error types and Result wrapper for storage operations.
|
|
pub mod error;
|
|
/// Escalation event storage for high-conflict assertions.
|
|
pub mod escalation_store;
|
|
/// Fjall (LSM-tree) backend for write-heavy key prefixes.
|
|
pub mod fjall_backend;
|
|
/// Gold standard assertions for agent verification.
|
|
pub mod gold_standard_store;
|
|
/// Hybrid storage backend: routes keys to fjall (write-heavy) or redb (read-heavy).
|
|
pub mod hybrid_backend;
|
|
/// Specialized storage for assertion indexes.
|
|
pub mod index_store;
|
|
/// Economic throttling via Token Bucket quotas (The Meter).
|
|
pub mod quota_store;
|
|
/// Redb (B-tree) backend for read-heavy key prefixes.
|
|
pub mod redb_backend;
|
|
/// Storage-layer serialization helpers.
|
|
pub(crate) mod serde_helpers;
|
|
/// Assertion supersession storage (Error Correction).
|
|
pub mod supersession_store;
|
|
/// Core traits for key-value storage.
|
|
pub mod traits;
|
|
/// TrustPack curation lists (App Store for Trust).
|
|
pub mod trust_pack_store;
|
|
/// TrustRank reputation storage (The Hive).
|
|
pub mod trust_rank_store;
|
|
/// HNSW-based vector similarity index for semantic k-NN queries.
|
|
pub mod vector_index;
|
|
/// BK-tree based visual similarity index for perceptual hash matching.
|
|
pub mod visual_index;
|
|
/// High-velocity vote storage (The Ballot Box).
|
|
pub mod vote_store;
|
|
|
|
pub use audit_store::{AuditStore, GenericAuditStore};
|
|
pub use error::{Result, StorageError};
|
|
pub use escalation_store::{EscalationStore, GenericEscalationStore};
|
|
pub use gold_standard_store::{GenericGoldStandardStore, GoldStandardStore};
|
|
pub use hybrid_backend::HybridStore;
|
|
pub use index_store::{GenericIndexStore, IndexStore};
|
|
pub use quota_store::{
|
|
CostConfig, GenericQuotaStore, OperationType, QuotaCheckResult, QuotaRecord, QuotaStore,
|
|
DEFAULT_QUOTA_LIMIT,
|
|
};
|
|
pub use supersession_store::{GenericSupersessionStore, SupersessionStore};
|
|
pub use traits::KVStore;
|
|
pub use trust_pack_store::{GenericTrustPackStore, TrustPackStore};
|
|
pub use trust_rank_store::{GenericTrustRankStore, TrustRank, TrustRankStore};
|
|
pub use vector_index::{
|
|
merge_search_results, HnswVectorIndex, PersistentVectorIndex, PersistentVectorIndexConfig,
|
|
VectorIndex,
|
|
};
|
|
pub use visual_index::{
|
|
hamming_distance, BkTreeSnapshot, BkTreeVisualIndex, PersistentVisualIndex,
|
|
PersistentVisualIndexConfig, VisualIndex,
|
|
};
|
|
pub use vote_store::{GenericVoteStore, VoteStore};
|