stemedb/applications/aphoria/src/episteme/local/mod.rs
jordan 41c676a78e feat: Aphoria enterprise features + ontology SDK + file length compliance
Enterprise Features:
- Hosted mode with remote sync for team pattern aggregation
- Community sharing with privacy-preserving anonymization
- LLM-based semantic claim extraction with Gemini integration
- Pattern learning with promotion to declarative extractors
- High-entropy secrets extractor with configurable thresholds
- Auth bypass and insecure cookies extractors

Module Refactoring:
- Split oversized files to comply with 500-line limit
- Config split: types/core.rs, types/extractors.rs, types/hosted.rs, etc.
- Handlers split: scan.rs, policy.rs, report.rs modules
- Extractors split: declarative/, high_entropy_secrets/, insecure_cookies/
- Learning split: store modules with metrics and persistence

SDK & Ontology:
- stemedb-ontology SDK with fluent builders and StemeDB client
- Pharma domain extractors for FDA Orange Book data
- Consumer health UAT test infrastructure

Code Quality:
- Fixed clippy warnings (needless_borrows_for_generic_args)
- Added KVStore trait imports where needed
- Fixed utoipa path re-exports for OpenAPI docs

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 12:55:29 -07:00

132 lines
4.5 KiB
Rust

//! Local Episteme instance for persistent storage and alias management.
//!
//! Provides ingestion, conflict checking, and auto-alias creation backed by
//! write-ahead log and KV store.
mod queries;
mod store;
use std::path::Path;
use std::sync::Arc;
use ed25519_dalek::SigningKey;
use stemedb_ingest::Ingestor;
use stemedb_storage::{
GenericAliasStore, GenericPackSourceStore, GenericPredicateIndexStore, HybridStore, KVStore,
};
use stemedb_wal::Journal;
use tokio::sync::Mutex;
use tracing::{info, instrument};
use crate::bridge::load_or_generate_key;
use crate::config::AphoriaConfig;
use crate::AphoriaError;
/// Local Episteme instance for Aphoria.
pub struct LocalEpisteme {
pub(super) journal: Arc<Mutex<Journal>>,
pub(super) store: Arc<HybridStore>, // KV store for assertions
pub(super) ingestor: Ingestor<HybridStore>,
pub(super) signing_key: SigningKey,
pub(super) alias_store: GenericAliasStore<Arc<HybridStore>>,
pub(super) predicate_index_store: GenericPredicateIndexStore<Arc<HybridStore>>,
pub(super) pack_source_store: GenericPackSourceStore<Arc<HybridStore>>,
}
impl LocalEpisteme {
/// Open or create a local Episteme instance.
#[instrument(skip(config), fields(data_dir = %config.episteme.data_dir.display()))]
pub async fn open(config: &AphoriaConfig, project_root: &Path) -> Result<Self, AphoriaError> {
let data_dir = &config.episteme.data_dir;
// Create directories if needed
std::fs::create_dir_all(data_dir)?;
// Canonicalize paths (required by fjall/lsm-tree)
let data_dir = data_dir.canonicalize().map_err(|e| {
AphoriaError::Storage(format!("Failed to canonicalize data_dir: {}", e))
})?;
let wal_dir = data_dir.join("wal");
let store_dir = data_dir.join("store");
std::fs::create_dir_all(&wal_dir)?;
std::fs::create_dir_all(&store_dir)?;
info!("Opening local Episteme at {}", data_dir.display());
// Open WAL
let journal = Arc::new(Mutex::new(
Journal::open(&wal_dir).map_err(|e| AphoriaError::Storage(e.to_string()))?,
));
// Open store
let store = Arc::new(
HybridStore::open(&store_dir).map_err(|e| AphoriaError::Storage(e.to_string()))?,
);
// Create ingestor
let mut ingestor = Ingestor::new(journal.clone(), store.clone())
.await
.map_err(|e| AphoriaError::Storage(e.to_string()))?;
ingestor.start();
// Load or generate signing key
let signing_key =
load_or_generate_key(project_root).map_err(|e| AphoriaError::Storage(e.to_string()))?;
// Create alias store for auto-alias persistence
let alias_store = GenericAliasStore::new(store.clone());
// Create predicate index store for predicate-based queries
let predicate_index_store = GenericPredicateIndexStore::new(store.clone());
// Create pack source store for policy attribution
let pack_source_store = GenericPackSourceStore::new(store.clone());
Ok(Self {
journal,
store,
ingestor,
signing_key,
alias_store,
predicate_index_store,
pack_source_store,
})
}
/// Shut down the Episteme instance gracefully.
pub async fn shutdown(&mut self) {
info!("Shutting down local Episteme");
self.ingestor.shutdown(std::time::Duration::from_secs(2)).await;
// Flush the store to ensure all data is persisted to disk.
// This is critical for pack_source data written during policy import.
if let Err(e) = self.store.as_ref().flush().await {
tracing::warn!(error = %e, "Failed to flush store during shutdown");
}
}
/// Get the signing key's public key bytes for alias creation.
pub fn agent_id(&self) -> [u8; 32] {
self.signing_key.verifying_key().to_bytes()
}
/// Get a reference to the alias store for querying created aliases.
#[allow(dead_code)]
pub fn alias_store(&self) -> &GenericAliasStore<Arc<HybridStore>> {
&self.alias_store
}
/// Get a reference to the underlying KV store.
///
/// Used for direct storage operations like importing policies.
pub fn store(&self) -> &Arc<HybridStore> {
&self.store
}
/// Get a reference to the pack source store for policy attribution.
pub fn pack_source_store(&self) -> &GenericPackSourceStore<Arc<HybridStore>> {
&self.pack_source_store
}
}