stemedb/crates/stemedb-storage/src/lib.rs
jordan 157dbbb9eb feat: Complete Aphoria Phase 8-9 + UAT suite (90/90 tests passing)
## Phase 8: Enterprise Extractor Improvements 
- 14 security extractors (TLS, JWT, SQL injection, XSS, etc.)
- 10 framework-specific extractors (Spring, Django, Rails, etc.)
- Config file security detection (YAML, TOML)

## Phase 9: Autonomous Extractor Generation 
- Shadow mode executor with TP/FP tracking
- Graduation pipeline with confidence thresholds
- Auto-rollback on regression detection
- Cross-project pattern syncing

## UAT Suite Complete (14 scripts, 90 tests)
- test-core-detection.sh (6 tests)
- test-declarative-extractors.sh (5 tests)
- test-domain-frameworks.sh (5 tests)
- test-domain-unreal.sh (3 tests)
- test-llm-extraction.sh (6 tests)
- test-eval-harness.sh (5 tests)
- test-cross-language.sh (3 tests)
- test-precommit-performance.sh (4 tests)
- test-output-formats.sh (8 tests)
- test-drift-detection.sh (6 tests)
- test-exit-codes.sh (12 tests)
+ 3 more scripts

## Other Changes
- Updated roadmap to mark Phase 8-9 complete
- Added .gitignore entries for build artifacts
- Updated pre-commit: 800 line limit, exclude tests/data/cmd

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 22:50:55 -07:00

282 lines
11 KiB
Rust

//! Storage engine abstractions and implementations for Episteme.
//!
//! This crate provides the `KVStore` trait for pluggable storage backends
//! and a concrete `HybridStore` that routes keys to fjall (write-heavy) or redb (read-heavy).
//!
//! # The Ballot Box
//!
//! The [`VoteStore`] trait provides specialized operations for high-velocity
//! vote ingestion. This is the core of the Ballot Box pattern - separating
//! votes from assertions to enable thousands of agents to vote simultaneously.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericVoteStore, VoteStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let vote_store = GenericVoteStore::new(kv_store);
//!
//! // High-velocity vote ingestion
//! let vote_hash = vote_store.put_vote(&vote, "subject").await?;
//!
//! // O(1) aggregation via caches
//! let count = vote_store.get_vote_count(&assertion_hash).await?;
//! let weight = vote_store.get_aggregate_weight(&assertion_hash).await?;
//! ```
//!
//! # TrustRank
//!
//! The [`TrustRankStore`] trait provides reputation tracking for The Hive
//! learning loop. Agent reputation scores affect how their assertions are
//! weighted in the Authority lens.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericTrustRankStore, TrustRankStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let trust_store = GenericTrustRankStore::new(kv_store);
//!
//! // Get agent's current reputation
//! let trust = trust_store.get_trust_rank(&agent_id).await?;
//!
//! // Update reputation based on accuracy
//! trust_store.record_outcome(&agent_id, was_accurate, timestamp).await?;
//!
//! // Apply confidence half-life decay
//! trust_store.decay_trust_ranks(current_timestamp, None).await?;
//! ```
//!
//! # Query Audit Trail
//!
//! The [`AuditStore`] trait provides query audit logging for incident investigation.
//! Every query is logged with provenance to enable "Why did you think that?" debugging.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericAuditStore, AuditStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let audit_store = GenericAuditStore::new(kv_store);
//!
//! // Log a query audit
//! audit_store.put_audit(&audit).await?;
//!
//! // Retrieve a specific audit
//! let audit = audit_store.get_audit(&query_id).await?;
//!
//! // Find all audits for an agent in a time range
//! let audits = audit_store.get_audits_for_agent(&agent_id, from, to).await?;
//! ```
//!
//! # TrustPack ("App Store for Trust")
//!
//! The [`TrustPackStore`] trait provides curated agent lists for filtering consensus.
//! Users subscribe to domain expert packs to see reality through trusted lenses.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericTrustPackStore, TrustPackStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let pack_store = GenericTrustPackStore::new(kv_store);
//!
//! // Create and store a pack
//! let pack = TrustPack::new(pack_id, "Mayo_Clinic_Experts".to_string(), maintainer_key);
//! pack_store.put_pack(&pack).await?;
//!
//! // Add trusted agents to the pack
//! pack_store.add_agent_to_pack(&pack_id, &agent_id).await?;
//!
//! // Check membership
//! let is_trusted = pack_store.is_agent_in_pack(&pack_id, &agent_id).await?;
//! ```
//!
//! # The Meter (Economic Throttling)
//!
//! The [`QuotaStore`] trait provides token bucket quota enforcement to prevent
//! runaway agents from exhausting system resources.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericQuotaStore, QuotaStore, OperationType};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let quota_store = GenericQuotaStore::new(kv_store);
//!
//! // Check and record cost for an operation
//! let result = quota_store.check_and_record(
//! &agent_id,
//! OperationType::Assert,
//! payload_bytes,
//! timestamp,
//! ).await?;
//!
//! if result.allowed {
//! // Process the operation
//! println!("Remaining quota: {}", result.remaining);
//! } else {
//! // Reject: quota exceeded
//! println!("Quota exceeded, resets at {}", result.reset_at);
//! }
//! ```
//!
//! # Gold Standard Verification
//!
//! The [`GoldStandardStore`] trait provides ground truth assertions for agent
//! verification. New agents must demonstrate knowledge of verified facts to
//! earn TrustRank and unlock premium features.
//!
//! ```ignore
//! use stemedb_storage::{HybridStore, GenericGoldStandardStore, GoldStandardStore};
//!
//! let kv_store = HybridStore::open("./data")?;
//! let gs_store = GenericGoldStandardStore::new(kv_store);
//!
//! // Create and store a gold standard
//! let gs = GoldStandard::new(assertion_hash, "Earth".into(), "has_shape".into(),
//! "oblate_spheroid".into(), timestamp, "admin".into());
//! gs_store.set_gold_standard(&gs).await?;
//!
//! // Verify agent answer against gold standard
//! let gs = gs_store.get_gold_standard("Earth", "has_shape").await?;
//! if let Some(gs) = gs {
//! let is_correct = gs.matches(agent_answer);
//! // Update TrustRank based on correctness
//! }
//! ```
/// Admission control storage for graduated PoW and trust tiers.
pub mod admission_store;
/// API key storage for authentication (P4.2).
pub mod api_key_store;
/// Per-agent circuit breaker storage for misbehavior isolation.
pub mod circuit_breaker_store;
/// Content quality scoring for spam detection (Content Defense Phase 7C).
pub mod content_defense;
/// CRDT (Conflict-free Replicated Data Type) implementations for distributed StemeDB.
pub mod crdt;
/// Domain-specific trust tracking for per-domain expertise.
pub mod domain_trust_store;
/// Central key encoding/decoding for subject-prefix range sharding.
pub mod key_codec;
/// Pack source tracking for policy attribution.
pub mod pack_source_store;
/// Pattern aggregate storage for community corpus contributions.
pub mod pattern_aggregate_store;
/// Predicate alias storage for semantic predicate matching.
pub mod predicate_alias_store;
/// Global predicate index for querying assertions by predicate (Federated Policy).
pub mod predicate_index_store;
/// Quarantine storage for flagged assertions (Content Defense Phase 7C).
pub mod quarantine_store;
/// Near-duplicate detection via MinHash + LSH (Content Defense Phase 7C).
pub mod similarity_index;
/// Source Registry for human-readable source metadata.
pub mod source_registry;
/// EigenTrust trust graph for Sybil-resistant reputation.
pub mod trust_graph_store;
/// Shared checkpoint file format for index persistence.
pub mod checkpoint_format;
/// Cross-scheme alias storage for concept hierarchy.
pub mod alias_store;
/// Query audit trail storage for incident investigation.
pub mod audit_store;
/// Error types and Result wrapper for storage operations.
pub mod error;
/// Escalation event storage for high-conflict assertions.
pub mod escalation_store;
/// Fjall (LSM-tree) backend for write-heavy key prefixes.
pub mod fjall_backend;
/// Gold standard assertions for agent verification.
pub mod gold_standard_store;
/// Hybrid storage backend: routes keys to fjall (write-heavy) or redb (read-heavy).
pub mod hybrid_backend;
/// Specialized storage for assertion indexes.
pub mod index_store;
/// Economic throttling via Token Bucket quotas (The Meter).
pub mod quota_store;
/// Redb (B-tree) backend for read-heavy key prefixes.
pub mod redb_backend;
/// Storage-layer serialization helpers.
pub(crate) mod serde_helpers;
/// Assertion supersession storage (Error Correction).
pub mod supersession_store;
/// Core traits for key-value storage.
pub mod traits;
/// TrustPack curation lists (App Store for Trust).
pub mod trust_pack_store;
/// TrustRank reputation storage (The Hive).
pub mod trust_rank_store;
/// HNSW-based vector similarity index for semantic k-NN queries.
pub mod vector_index;
/// BK-tree based visual similarity index for perceptual hash matching.
pub mod visual_index;
/// High-velocity vote storage (The Ballot Box).
pub mod vote_store;
pub use admission_store::{
AdmissionCheck, AdmissionStatus, AdmissionStatusResult, AdmissionStore, GenericAdmissionStore,
};
pub use alias_store::{AliasStore, GenericAliasStore};
pub use api_key_store::{
ApiKeyRecord, ApiKeyRole, ApiKeyStore, GenericApiKeyStore, ParseApiKeyRoleError,
RateLimitResult, DEFAULT_API_KEY_RATE_LIMIT,
};
pub use audit_store::{AuditStore, GenericAuditStore};
pub use circuit_breaker_store::{
CircuitBreakerConfig, CircuitBreakerRecord, CircuitBreakerStore, CircuitState, FailureType,
GenericCircuitBreakerStore,
};
pub use domain_trust_store::{
domain_factor, extract_domain, DomainTrust, DomainTrustStore, GenericDomainTrustStore,
};
pub use error::{Result, StorageError};
pub use escalation_store::{EscalationStore, GenericEscalationStore};
pub use gold_standard_store::{GenericGoldStandardStore, GoldStandardStore};
pub use hybrid_backend::HybridStore;
pub use index_store::{GenericIndexStore, IndexStore};
pub use quota_store::{
CostConfig, GenericQuotaStore, OperationType, QuotaCheckResult, QuotaRecord, QuotaStore,
DEFAULT_QUOTA_LIMIT,
};
pub use supersession_store::{GenericSupersessionStore, SupersessionStore};
pub use traits::KVStore;
pub use trust_graph_store::{
compute_eigentrust_scores, EigenTrustConfig, EigenTrustResult, EigenTrustState,
GenericTrustGraphStore, TrustEdge, TrustGraphStore,
};
pub use trust_pack_store::{GenericTrustPackStore, TrustPackStore};
pub use trust_rank_store::{GenericTrustRankStore, TrustRank, TrustRankStore};
pub use vector_index::{
merge_search_results, HnswVectorIndex, PersistentVectorIndex, PersistentVectorIndexConfig,
VectorIndex,
};
pub use visual_index::{
hamming_distance, BkTreeSnapshot, BkTreeVisualIndex, PersistentVisualIndex,
PersistentVisualIndexConfig, VisualIndex,
};
pub use vote_store::{GenericVoteStore, VoteStore};
// Pattern aggregate store exports (Community Corpus)
pub use pattern_aggregate_store::{
GenericPatternAggregateStore, PatternAggregate, PatternAggregateStore,
};
// Content Defense Phase 7C exports
pub use content_defense::{ContentQualityScorer, QualityScoringConfig};
pub use pack_source_store::{GenericPackSourceStore, PackSourceInfo, PackSourceStore};
pub use predicate_alias_store::{
GenericPredicateAliasStore, PredicateAliasStore, StoredPredicateAliasSet,
};
pub use predicate_index_store::{GenericPredicateIndexStore, PredicateIndexStore};
pub use quarantine_store::{GenericQuarantineStore, QuarantineStore};
pub use similarity_index::{
GenericSimilarityIndex, LshBucket, MinHashSignature, SimilarityCheckResult, SimilarityIndex,
SimilarityIndexConfig,
};
pub use source_registry::{GenericSourceRegistry, SourceRegistry};
// CRDT exports
pub use crdt::{
AssertionSetState, AssertionTransfer, CrdtAssertionStore, CrdtMerge, CrdtVoteStore,
VoteCountState,
};