//! Local Episteme integration for Aphoria. //! //! Provides a simplified interface to the local Episteme instance for: //! - Ingesting assertions from extracted claims //! - Querying for conflicts with authoritative sources //! - Managing the authoritative corpus //! - Auto-creating aliases when conflicts are detected (Phase 2A.3) mod corpus; #[cfg(test)] mod tests; use std::collections::HashMap; use std::path::Path; use std::sync::Arc; use ed25519_dalek::SigningKey; use stemedb_core::types::{AliasOrigin, Assertion, ConceptAlias, ConceptPath, SourceClass}; use stemedb_ingest::{serialize_assertion, Ingestor}; use stemedb_storage::{AliasStore, GenericAliasStore, HybridStore}; use stemedb_wal::Journal; use tokio::sync::Mutex; use tracing::{debug, info, instrument, warn}; use crate::bridge::{claim_to_assertion, load_or_generate_key}; use crate::config::AphoriaConfig; use crate::types::{ConflictResult, ConflictingSource, ExtractedClaim, Verdict}; use crate::AphoriaError; use corpus::current_timestamp; pub use corpus::{create_authoritative_assertion, create_authoritative_corpus}; /// In-memory index for concept matching by tail path segments. /// /// Maps `{tail_seg1}/{tail_seg2}::{predicate}` → `Vec`. /// This enables matching claims across different URI schemes by their /// trailing path components. /// /// # Example /// /// Both of these subjects produce the same key `"tls/cert_verification::enabled"`: /// - `rfc://5246/tls/cert_verification` /// - `code://rust/myapp/client/tls/cert_verification` pub struct ConceptIndex { entries: HashMap>, } impl ConceptIndex { /// Build a ConceptIndex from a slice of assertions. pub fn build(assertions: &[Assertion]) -> Self { // Pre-allocate based on expected unique keys let mut entries: HashMap> = HashMap::with_capacity(assertions.len()); for assertion in assertions { if let Some(key) = Self::make_key(&assertion.subject, &assertion.predicate) { entries.entry(key).or_default().push(assertion.clone()); } } Self { entries } } /// Look up assertions matching the tail segments of a subject and predicate. pub fn lookup(&self, subject: &str, predicate: &str) -> Option<&Vec> { let key = Self::make_key(subject, predicate)?; self.entries.get(&key) } /// Create a lookup key from subject and predicate. /// /// Algorithm: /// 1. Split subject on `"://"`, take path part /// 2. Split path on `"/"` in reverse, get last 2 non-empty segments /// 3. If < 2 segments, return None /// 4. Return `"{seg[-2]}/{seg[-1]}::{predicate}"` pub fn make_key(subject: &str, predicate: &str) -> Option { // Split on "://" to separate scheme from path let path = subject.find("://").map(|i| &subject[i + 3..]).unwrap_or(subject); // Get last two non-empty segments using rsplit (avoids Vec allocation) let mut segments = path.rsplit('/').filter(|s| !s.is_empty()); let tail2 = segments.next()?; let tail1 = segments.next()?; Some(format!("{}/{}::{}", tail1, tail2, predicate)) } } /// Local Episteme instance for Aphoria. pub struct LocalEpisteme { journal: Arc>, /// Store is owned by this struct but accessed via the Ingestor and AliasStore. /// Keeping a reference ensures the store outlives dependent structs. #[allow(dead_code)] store: Arc, ingestor: Ingestor, signing_key: SigningKey, /// AliasStore for persisting cross-scheme aliases discovered during conflict detection. alias_store: GenericAliasStore>, } impl LocalEpisteme { /// Open or create a local Episteme instance. #[instrument(skip(config), fields(data_dir = %config.episteme.data_dir.display()))] pub async fn open(config: &AphoriaConfig, project_root: &Path) -> Result { let data_dir = &config.episteme.data_dir; // Create directories if needed std::fs::create_dir_all(data_dir)?; // Canonicalize paths (required by fjall/lsm-tree) let data_dir = data_dir.canonicalize().map_err(|e| { AphoriaError::Storage(format!("Failed to canonicalize data_dir: {}", e)) })?; let wal_dir = data_dir.join("wal"); let store_dir = data_dir.join("store"); std::fs::create_dir_all(&wal_dir)?; std::fs::create_dir_all(&store_dir)?; info!("Opening local Episteme at {}", data_dir.display()); // Open WAL let journal = Arc::new(Mutex::new( Journal::open(&wal_dir).map_err(|e| AphoriaError::Storage(e.to_string()))?, )); // Open store let store = Arc::new( HybridStore::open(&store_dir).map_err(|e| AphoriaError::Storage(e.to_string()))?, ); // Create ingestor let mut ingestor = Ingestor::new(journal.clone(), store.clone()) .await .map_err(|e| AphoriaError::Storage(e.to_string()))?; ingestor.start(); // Load or generate signing key let signing_key = load_or_generate_key(project_root).map_err(|e| AphoriaError::Storage(e.to_string()))?; // Create alias store for auto-alias persistence let alias_store = GenericAliasStore::new(store.clone()); Ok(Self { journal, store, ingestor, signing_key, alias_store }) } /// Ingest a batch of extracted claims into Episteme. #[instrument(skip(self, claims), fields(claim_count = claims.len()))] pub async fn ingest_claims(&self, claims: &[ExtractedClaim]) -> Result { let timestamp = current_timestamp(); let mut ingested = 0; for claim in claims { let assertion = claim_to_assertion(claim, &self.signing_key, timestamp); // Serialize and write to WAL let record_bytes = serialize_assertion(&assertion) .map_err(|e| AphoriaError::Storage(e.to_string()))?; let mut journal = self.journal.lock().await; journal.append(record_bytes).map_err(|e| AphoriaError::Storage(e.to_string()))?; debug!( concept_path = %claim.concept_path, predicate = %claim.predicate, "Ingested claim" ); ingested += 1; } // Sync WAL { let mut journal = self.journal.lock().await; journal.force_sync().map_err(|e| AphoriaError::Storage(e.to_string()))?; } // Wait for ingestion to process self.ingestor.process_pending().await.map_err(|e| AphoriaError::Storage(e.to_string()))?; info!(ingested, "Ingested claims into Episteme"); Ok(ingested) } /// Check for conflicts between extracted claims and authoritative sources. /// /// Uses tail-path matching via `ConceptIndex` to find conflicts across different /// URI schemes. For example, a code claim at `code://rust/myapp/tls/cert_verification` /// will match authoritative assertions at `rfc://5246/tls/cert_verification`. /// /// When `config.aliases.auto_create_aliases` is enabled, this method will /// automatically persist aliases for matched concepts, enabling faster future /// queries via `QueryEngine` with `resolve_aliases: true`. #[instrument(skip(self, claims, config, index), fields(claim_count = claims.len()))] pub async fn check_conflicts( &self, claims: &[ExtractedClaim], config: &AphoriaConfig, index: &ConceptIndex, ) -> Result, AphoriaError> { let mut results = Vec::new(); let mut aliases_created = 0usize; let timestamp = current_timestamp(); let agent_id = self.agent_id(); for claim in claims { // Look up authoritative assertions matching this claim's tail path let auth_assertions = match index.lookup(&claim.concept_path, &claim.predicate) { Some(assertions) => assertions, None => continue, // No authoritative coverage for this concept }; // Find conflicting authoritative sources let mut conflicts = Vec::new(); for assertion in auth_assertions { // Skip if it's our own assertion (same source class) if assertion.source_class == SourceClass::Expert { continue; } // Auto-create alias if enabled (regardless of value conflict) // This bridges the code path to the authoritative path for future queries if config.aliases.auto_create_aliases { if let Err(e) = self .create_alias_if_new( &claim.concept_path, &assertion.subject, agent_id, timestamp, ) .await { warn!( code_path = %claim.concept_path, auth_path = %assertion.subject, error = %e, "Failed to create alias" ); } else { aliases_created += 1; } } // Check if value differs (for conflict reporting) if assertion.object != claim.value { // Only consider Tier 0-2 as authoritative if assertion.source_class.tier() <= 2 { conflicts.push(ConflictingSource { path: assertion.subject.clone(), source_class: assertion.source_class, value: assertion.object.clone(), confidence: assertion.confidence, }); } } } if conflicts.is_empty() { continue; } // Compute conflict score let conflict_score = compute_conflict_score(&conflicts, claim.confidence); // Determine verdict let verdict = if conflict_score >= config.thresholds.block { Verdict::Block } else if conflict_score >= config.thresholds.flag { Verdict::Flag } else { Verdict::Pass }; results.push(ConflictResult { claim: claim.clone(), conflicts, conflict_score, verdict, acknowledged: None, }); } info!( conflicts = results.len(), blocks = results.iter().filter(|r| r.verdict == Verdict::Block).count(), flags = results.iter().filter(|r| r.verdict == Verdict::Flag).count(), aliases_created, "Conflict check complete" ); Ok(results) } /// Ingest authoritative assertions (RFC, OWASP, etc.). #[instrument(skip(self, assertions), fields(count = assertions.len()))] pub async fn ingest_authoritative( &self, assertions: &[Assertion], ) -> Result { let mut ingested = 0; for assertion in assertions { let record_bytes = serialize_assertion(assertion).map_err(|e| AphoriaError::Storage(e.to_string()))?; let mut journal = self.journal.lock().await; journal.append(record_bytes).map_err(|e| AphoriaError::Storage(e.to_string()))?; ingested += 1; } // Sync and process { let mut journal = self.journal.lock().await; journal.force_sync().map_err(|e| AphoriaError::Storage(e.to_string()))?; } self.ingestor.process_pending().await.map_err(|e| AphoriaError::Storage(e.to_string()))?; info!(ingested, "Ingested authoritative assertions"); Ok(ingested) } /// Shut down the Episteme instance gracefully. pub async fn shutdown(&mut self) { info!("Shutting down local Episteme"); self.ingestor.shutdown(std::time::Duration::from_secs(2)).await; } /// Get the signing key's public key bytes for alias creation. pub fn agent_id(&self) -> [u8; 32] { self.signing_key.verifying_key().to_bytes() } /// Create an alias from a code path to an authoritative path, if it doesn't already exist. /// /// This is used during conflict detection to persist the relationship between /// code concepts and their authoritative counterparts. #[instrument(skip(self), fields(code_path = %code_path, auth_path = %auth_path))] async fn create_alias_if_new( &self, code_path: &str, auth_path: &str, agent_id: [u8; 32], timestamp: u64, ) -> Result<(), AphoriaError> { // Check if alias already exists let existing = self .alias_store .get_canonical(code_path) .await .map_err(|e| AphoriaError::Storage(e.to_string()))?; if existing.is_some() { debug!("Alias already exists, skipping"); return Ok(()); } // Parse paths let alias_path = ConceptPath::parse(code_path) .map_err(|e| AphoriaError::Storage(format!("Invalid code path: {}", e)))?; let canonical_path = ConceptPath::parse(auth_path) .map_err(|e| AphoriaError::Storage(format!("Invalid auth path: {}", e)))?; // Create and persist alias let alias = ConceptAlias::new( alias_path, canonical_path, agent_id, timestamp, AliasOrigin::AutoDetected, ); self.alias_store .set_alias(&alias) .await .map_err(|e| AphoriaError::Storage(e.to_string()))?; debug!("Created auto-detected alias"); Ok(()) } /// Get a reference to the alias store for querying created aliases. #[allow(dead_code)] pub fn alias_store(&self) -> &GenericAliasStore> { &self.alias_store } } /// Compute conflict score based on authoritative sources and claim confidence. /// /// The score uses two approaches and takes the maximum: /// /// 1. **Boosted score**: `max_tier_weight * (1.0 - code_weight) * max_confidence` /// where code_weight = Expert (Tier 3) = 0.5. This is low unless the /// authoritative source has very high authority weight. /// /// 2. **Normalized score**: Linear mapping from tier distance to score: /// - Tier 0 (Regulatory) vs code → 0.95 (above BLOCK threshold 0.7) /// - Tier 1 (Clinical) vs code → 0.77 (above BLOCK threshold 0.7) /// - Tier 2 (Observational) vs code → 0.58 (above FLAG threshold 0.4) /// - Tier 3 (same tier) vs code → 0.40 (at FLAG threshold) /// /// The final score is capped at 1.0. fn compute_conflict_score(conflicts: &[ConflictingSource], _claim_confidence: f32) -> f32 { if conflicts.is_empty() { return 0.0; } // Get max tier weight from conflicting sources let max_tier_weight = conflicts .iter() .map(|c| c.source_class.authority_weight()) .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)) .unwrap_or(0.0); // Code claims are Expert (Tier 3) = 0.5 weight let code_weight = SourceClass::Expert.authority_weight(); // Base conflict score from tier spread let base_score = max_tier_weight * (1.0 - code_weight); // Boost by authoritative source confidence let max_confidence = conflicts .iter() .map(|c| c.confidence) .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)) .unwrap_or(1.0); let boosted_score = base_score * max_confidence; // Normalize: tier spread 0→3 maps to 0.4→0.95 let min_tier = conflicts.iter().map(|c| c.source_class.tier()).min().unwrap_or(3) as f32; let normalized = 0.4 + (3.0 - min_tier) / 3.0 * 0.55; normalized.max(boosted_score).min(1.0) }