## Phase 8: Enterprise Extractor Improvements ✅ - 14 security extractors (TLS, JWT, SQL injection, XSS, etc.) - 10 framework-specific extractors (Spring, Django, Rails, etc.) - Config file security detection (YAML, TOML) ## Phase 9: Autonomous Extractor Generation ✅ - Shadow mode executor with TP/FP tracking - Graduation pipeline with confidence thresholds - Auto-rollback on regression detection - Cross-project pattern syncing ## UAT Suite Complete (14 scripts, 90 tests) - test-core-detection.sh (6 tests) - test-declarative-extractors.sh (5 tests) - test-domain-frameworks.sh (5 tests) - test-domain-unreal.sh (3 tests) - test-llm-extraction.sh (6 tests) - test-eval-harness.sh (5 tests) - test-cross-language.sh (3 tests) - test-precommit-performance.sh (4 tests) - test-output-formats.sh (8 tests) - test-drift-detection.sh (6 tests) - test-exit-codes.sh (12 tests) + 3 more scripts ## Other Changes - Updated roadmap to mark Phase 8-9 complete - Added .gitignore entries for build artifacts - Updated pre-commit: 800 line limit, exclude tests/data/cmd Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
282 lines
11 KiB
Rust
282 lines
11 KiB
Rust
//! Storage engine abstractions and implementations for Episteme.
|
|
//!
|
|
//! This crate provides the `KVStore` trait for pluggable storage backends
|
|
//! and a concrete `HybridStore` that routes keys to fjall (write-heavy) or redb (read-heavy).
|
|
//!
|
|
//! # The Ballot Box
|
|
//!
|
|
//! The [`VoteStore`] trait provides specialized operations for high-velocity
|
|
//! vote ingestion. This is the core of the Ballot Box pattern - separating
|
|
//! votes from assertions to enable thousands of agents to vote simultaneously.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericVoteStore, VoteStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let vote_store = GenericVoteStore::new(kv_store);
|
|
//!
|
|
//! // High-velocity vote ingestion
|
|
//! let vote_hash = vote_store.put_vote(&vote, "subject").await?;
|
|
//!
|
|
//! // O(1) aggregation via caches
|
|
//! let count = vote_store.get_vote_count(&assertion_hash).await?;
|
|
//! let weight = vote_store.get_aggregate_weight(&assertion_hash).await?;
|
|
//! ```
|
|
//!
|
|
//! # TrustRank
|
|
//!
|
|
//! The [`TrustRankStore`] trait provides reputation tracking for The Hive
|
|
//! learning loop. Agent reputation scores affect how their assertions are
|
|
//! weighted in the Authority lens.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericTrustRankStore, TrustRankStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let trust_store = GenericTrustRankStore::new(kv_store);
|
|
//!
|
|
//! // Get agent's current reputation
|
|
//! let trust = trust_store.get_trust_rank(&agent_id).await?;
|
|
//!
|
|
//! // Update reputation based on accuracy
|
|
//! trust_store.record_outcome(&agent_id, was_accurate, timestamp).await?;
|
|
//!
|
|
//! // Apply confidence half-life decay
|
|
//! trust_store.decay_trust_ranks(current_timestamp, None).await?;
|
|
//! ```
|
|
//!
|
|
//! # Query Audit Trail
|
|
//!
|
|
//! The [`AuditStore`] trait provides query audit logging for incident investigation.
|
|
//! Every query is logged with provenance to enable "Why did you think that?" debugging.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericAuditStore, AuditStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let audit_store = GenericAuditStore::new(kv_store);
|
|
//!
|
|
//! // Log a query audit
|
|
//! audit_store.put_audit(&audit).await?;
|
|
//!
|
|
//! // Retrieve a specific audit
|
|
//! let audit = audit_store.get_audit(&query_id).await?;
|
|
//!
|
|
//! // Find all audits for an agent in a time range
|
|
//! let audits = audit_store.get_audits_for_agent(&agent_id, from, to).await?;
|
|
//! ```
|
|
//!
|
|
//! # TrustPack ("App Store for Trust")
|
|
//!
|
|
//! The [`TrustPackStore`] trait provides curated agent lists for filtering consensus.
|
|
//! Users subscribe to domain expert packs to see reality through trusted lenses.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericTrustPackStore, TrustPackStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let pack_store = GenericTrustPackStore::new(kv_store);
|
|
//!
|
|
//! // Create and store a pack
|
|
//! let pack = TrustPack::new(pack_id, "Mayo_Clinic_Experts".to_string(), maintainer_key);
|
|
//! pack_store.put_pack(&pack).await?;
|
|
//!
|
|
//! // Add trusted agents to the pack
|
|
//! pack_store.add_agent_to_pack(&pack_id, &agent_id).await?;
|
|
//!
|
|
//! // Check membership
|
|
//! let is_trusted = pack_store.is_agent_in_pack(&pack_id, &agent_id).await?;
|
|
//! ```
|
|
//!
|
|
//! # The Meter (Economic Throttling)
|
|
//!
|
|
//! The [`QuotaStore`] trait provides token bucket quota enforcement to prevent
|
|
//! runaway agents from exhausting system resources.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericQuotaStore, QuotaStore, OperationType};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let quota_store = GenericQuotaStore::new(kv_store);
|
|
//!
|
|
//! // Check and record cost for an operation
|
|
//! let result = quota_store.check_and_record(
|
|
//! &agent_id,
|
|
//! OperationType::Assert,
|
|
//! payload_bytes,
|
|
//! timestamp,
|
|
//! ).await?;
|
|
//!
|
|
//! if result.allowed {
|
|
//! // Process the operation
|
|
//! println!("Remaining quota: {}", result.remaining);
|
|
//! } else {
|
|
//! // Reject: quota exceeded
|
|
//! println!("Quota exceeded, resets at {}", result.reset_at);
|
|
//! }
|
|
//! ```
|
|
//!
|
|
//! # Gold Standard Verification
|
|
//!
|
|
//! The [`GoldStandardStore`] trait provides ground truth assertions for agent
|
|
//! verification. New agents must demonstrate knowledge of verified facts to
|
|
//! earn TrustRank and unlock premium features.
|
|
//!
|
|
//! ```ignore
|
|
//! use stemedb_storage::{HybridStore, GenericGoldStandardStore, GoldStandardStore};
|
|
//!
|
|
//! let kv_store = HybridStore::open("./data")?;
|
|
//! let gs_store = GenericGoldStandardStore::new(kv_store);
|
|
//!
|
|
//! // Create and store a gold standard
|
|
//! let gs = GoldStandard::new(assertion_hash, "Earth".into(), "has_shape".into(),
|
|
//! "oblate_spheroid".into(), timestamp, "admin".into());
|
|
//! gs_store.set_gold_standard(&gs).await?;
|
|
//!
|
|
//! // Verify agent answer against gold standard
|
|
//! let gs = gs_store.get_gold_standard("Earth", "has_shape").await?;
|
|
//! if let Some(gs) = gs {
|
|
//! let is_correct = gs.matches(agent_answer);
|
|
//! // Update TrustRank based on correctness
|
|
//! }
|
|
//! ```
|
|
|
|
/// Admission control storage for graduated PoW and trust tiers.
|
|
pub mod admission_store;
|
|
/// API key storage for authentication (P4.2).
|
|
pub mod api_key_store;
|
|
/// Per-agent circuit breaker storage for misbehavior isolation.
|
|
pub mod circuit_breaker_store;
|
|
/// Content quality scoring for spam detection (Content Defense Phase 7C).
|
|
pub mod content_defense;
|
|
/// CRDT (Conflict-free Replicated Data Type) implementations for distributed StemeDB.
|
|
pub mod crdt;
|
|
/// Domain-specific trust tracking for per-domain expertise.
|
|
pub mod domain_trust_store;
|
|
/// Central key encoding/decoding for subject-prefix range sharding.
|
|
pub mod key_codec;
|
|
/// Pack source tracking for policy attribution.
|
|
pub mod pack_source_store;
|
|
/// Pattern aggregate storage for community corpus contributions.
|
|
pub mod pattern_aggregate_store;
|
|
/// Predicate alias storage for semantic predicate matching.
|
|
pub mod predicate_alias_store;
|
|
/// Global predicate index for querying assertions by predicate (Federated Policy).
|
|
pub mod predicate_index_store;
|
|
/// Quarantine storage for flagged assertions (Content Defense Phase 7C).
|
|
pub mod quarantine_store;
|
|
/// Near-duplicate detection via MinHash + LSH (Content Defense Phase 7C).
|
|
pub mod similarity_index;
|
|
/// Source Registry for human-readable source metadata.
|
|
pub mod source_registry;
|
|
/// EigenTrust trust graph for Sybil-resistant reputation.
|
|
pub mod trust_graph_store;
|
|
|
|
/// Shared checkpoint file format for index persistence.
|
|
pub mod checkpoint_format;
|
|
|
|
/// Cross-scheme alias storage for concept hierarchy.
|
|
pub mod alias_store;
|
|
/// Query audit trail storage for incident investigation.
|
|
pub mod audit_store;
|
|
/// Error types and Result wrapper for storage operations.
|
|
pub mod error;
|
|
/// Escalation event storage for high-conflict assertions.
|
|
pub mod escalation_store;
|
|
/// Fjall (LSM-tree) backend for write-heavy key prefixes.
|
|
pub mod fjall_backend;
|
|
/// Gold standard assertions for agent verification.
|
|
pub mod gold_standard_store;
|
|
/// Hybrid storage backend: routes keys to fjall (write-heavy) or redb (read-heavy).
|
|
pub mod hybrid_backend;
|
|
/// Specialized storage for assertion indexes.
|
|
pub mod index_store;
|
|
/// Economic throttling via Token Bucket quotas (The Meter).
|
|
pub mod quota_store;
|
|
/// Redb (B-tree) backend for read-heavy key prefixes.
|
|
pub mod redb_backend;
|
|
/// Storage-layer serialization helpers.
|
|
pub(crate) mod serde_helpers;
|
|
/// Assertion supersession storage (Error Correction).
|
|
pub mod supersession_store;
|
|
/// Core traits for key-value storage.
|
|
pub mod traits;
|
|
/// TrustPack curation lists (App Store for Trust).
|
|
pub mod trust_pack_store;
|
|
/// TrustRank reputation storage (The Hive).
|
|
pub mod trust_rank_store;
|
|
/// HNSW-based vector similarity index for semantic k-NN queries.
|
|
pub mod vector_index;
|
|
/// BK-tree based visual similarity index for perceptual hash matching.
|
|
pub mod visual_index;
|
|
/// High-velocity vote storage (The Ballot Box).
|
|
pub mod vote_store;
|
|
|
|
pub use admission_store::{
|
|
AdmissionCheck, AdmissionStatus, AdmissionStatusResult, AdmissionStore, GenericAdmissionStore,
|
|
};
|
|
pub use alias_store::{AliasStore, GenericAliasStore};
|
|
pub use api_key_store::{
|
|
ApiKeyRecord, ApiKeyRole, ApiKeyStore, GenericApiKeyStore, ParseApiKeyRoleError,
|
|
RateLimitResult, DEFAULT_API_KEY_RATE_LIMIT,
|
|
};
|
|
pub use audit_store::{AuditStore, GenericAuditStore};
|
|
pub use circuit_breaker_store::{
|
|
CircuitBreakerConfig, CircuitBreakerRecord, CircuitBreakerStore, CircuitState, FailureType,
|
|
GenericCircuitBreakerStore,
|
|
};
|
|
pub use domain_trust_store::{
|
|
domain_factor, extract_domain, DomainTrust, DomainTrustStore, GenericDomainTrustStore,
|
|
};
|
|
pub use error::{Result, StorageError};
|
|
pub use escalation_store::{EscalationStore, GenericEscalationStore};
|
|
pub use gold_standard_store::{GenericGoldStandardStore, GoldStandardStore};
|
|
pub use hybrid_backend::HybridStore;
|
|
pub use index_store::{GenericIndexStore, IndexStore};
|
|
pub use quota_store::{
|
|
CostConfig, GenericQuotaStore, OperationType, QuotaCheckResult, QuotaRecord, QuotaStore,
|
|
DEFAULT_QUOTA_LIMIT,
|
|
};
|
|
pub use supersession_store::{GenericSupersessionStore, SupersessionStore};
|
|
pub use traits::KVStore;
|
|
pub use trust_graph_store::{
|
|
compute_eigentrust_scores, EigenTrustConfig, EigenTrustResult, EigenTrustState,
|
|
GenericTrustGraphStore, TrustEdge, TrustGraphStore,
|
|
};
|
|
pub use trust_pack_store::{GenericTrustPackStore, TrustPackStore};
|
|
pub use trust_rank_store::{GenericTrustRankStore, TrustRank, TrustRankStore};
|
|
pub use vector_index::{
|
|
merge_search_results, HnswVectorIndex, PersistentVectorIndex, PersistentVectorIndexConfig,
|
|
VectorIndex,
|
|
};
|
|
pub use visual_index::{
|
|
hamming_distance, BkTreeSnapshot, BkTreeVisualIndex, PersistentVisualIndex,
|
|
PersistentVisualIndexConfig, VisualIndex,
|
|
};
|
|
pub use vote_store::{GenericVoteStore, VoteStore};
|
|
|
|
// Pattern aggregate store exports (Community Corpus)
|
|
pub use pattern_aggregate_store::{
|
|
GenericPatternAggregateStore, PatternAggregate, PatternAggregateStore,
|
|
};
|
|
|
|
// Content Defense Phase 7C exports
|
|
pub use content_defense::{ContentQualityScorer, QualityScoringConfig};
|
|
pub use pack_source_store::{GenericPackSourceStore, PackSourceInfo, PackSourceStore};
|
|
pub use predicate_alias_store::{
|
|
GenericPredicateAliasStore, PredicateAliasStore, StoredPredicateAliasSet,
|
|
};
|
|
pub use predicate_index_store::{GenericPredicateIndexStore, PredicateIndexStore};
|
|
pub use quarantine_store::{GenericQuarantineStore, QuarantineStore};
|
|
pub use similarity_index::{
|
|
GenericSimilarityIndex, LshBucket, MinHashSignature, SimilarityCheckResult, SimilarityIndex,
|
|
SimilarityIndexConfig,
|
|
};
|
|
pub use source_registry::{GenericSourceRegistry, SourceRegistry};
|
|
|
|
// CRDT exports
|
|
pub use crdt::{
|
|
AssertionSetState, AssertionTransfer, CrdtAssertionStore, CrdtMerge, CrdtVoteStore,
|
|
VoteCountState,
|
|
};
|