//! Self-audit meta-extractor for dogfooding Aphoria on its own codebase. //! //! Produces observations about Aphoria's own code patterns: //! - Bridge tier assignments //! - Parent hash usage //! - Lifecycle stage skipping //! - `.unwrap()` / `.expect()` usage count use regex::Regex; use stemedb_core::types::ObjectValue; use super::Extractor; use crate::types::{Language, Observation}; /// Meta-extractor that audits Aphoria's own code patterns. /// /// Opt-in only (like `dep_versions`). Registered with the name `self_audit`. pub struct SelfAuditExtractor { /// Matches: .unwrap() or .expect() calls unwrap_pattern: Regex, /// Matches: SourceClass:: usage for tier assignment source_class_pattern: Regex, /// Matches: parent_hash: None parent_hash_none: Regex, /// Matches: LifecycleStage::Approved lifecycle_approved: Regex, } impl Default for SelfAuditExtractor { fn default() -> Self { Self::new() } } impl SelfAuditExtractor { /// Create a new self-audit extractor. /// /// # Panics /// Panics if any regex pattern is invalid (programmer error). #[allow(clippy::expect_used)] pub fn new() -> Self { Self { unwrap_pattern: Regex::new(r"\.(unwrap|expect)\(").expect("valid regex"), source_class_pattern: Regex::new(r"SourceClass::\w+").expect("valid regex"), parent_hash_none: Regex::new(r"parent_hash:\s*None").expect("valid regex"), lifecycle_approved: Regex::new(r"LifecycleStage::Approved").expect("valid regex"), } } } impl Extractor for SelfAuditExtractor { fn name(&self) -> &str { "self_audit" } fn languages(&self) -> &[Language] { &[Language::Rust] } fn extract( &self, path_segments: &[String], content: &str, _language: Language, file: &str, ) -> Vec { let mut observations = Vec::new(); // Count unwrap/expect usage let mut unwrap_count: usize = 0; let lines: Vec<&str> = content.lines().collect(); let mut in_test_module = false; for (line_num, line) in lines.iter().enumerate() { let line_number = line_num + 1; // Track #[cfg(test)] module boundaries if line.contains("#[cfg(test)]") { in_test_module = true; } // Skip test modules entirely if in_test_module { // Still check for bridge patterns below, but don't count unwraps } else if self.unwrap_pattern.is_match(line) { // Check if the enclosing function has #[allow(clippy::unwrap_used)] // or #[allow(clippy::expect_used)]. // Scan backwards to the fn boundary, then check attributes above it. let mut allowed = false; let mut found_fn = false; for prev in (0..line_num).rev() { let prev_line = lines[prev].trim(); if prev_line.is_empty() { if found_fn { break; // blank line above fn means attributes are done } continue; } if prev_line.contains("#[allow(clippy::unwrap_used)]") || prev_line.contains("#[allow(clippy::expect_used)]") { allowed = true; break; } // Mark that we found the fn boundary if !found_fn && (prev_line.starts_with("fn ") || prev_line.starts_with("pub fn ") || prev_line.contains(" fn ")) { found_fn = true; continue; // check attributes above fn } // If we're past the fn and hit non-attribute lines, stop if found_fn && !prev_line.starts_with('#') { break; } } if !allowed { unwrap_count += 1; } } // Detect SourceClass assignments in bridge code if file.contains("bridge") { if let Some(m) = self.source_class_pattern.find(line) { observations.push(super::traits::build_claim( path_segments, &["bridge", "tier_assignment"], "default_tier", ObjectValue::Text(m.as_str().to_string()), file, line_number, m.as_str(), 0.9, "Bridge tier assignment pattern", )); } } // Detect parent_hash: None patterns in bridge code if file.contains("bridge") && self.parent_hash_none.is_match(line) { observations.push(super::traits::build_claim( path_segments, &["bridge", "parent_hash"], "always_none", ObjectValue::Boolean(true), file, line_number, "parent_hash: None", 0.9, "Parent hash always set to None", )); } // Detect LifecycleStage::Approved skipping Pending if file.contains("bridge") && self.lifecycle_approved.is_match(line) { observations.push(super::traits::build_claim( path_segments, &["bridge", "lifecycle"], "skips_pending", ObjectValue::Boolean(true), file, line_number, "LifecycleStage::Approved", 0.9, "Lifecycle stage skips Pending, goes directly to Approved", )); } } // Emit a single summary observation for unwrap count if !file.contains("test") { #[allow(clippy::cast_precision_loss)] observations.push(super::traits::build_claim( path_segments, &["production", "error_handling"], "unwrap_count", ObjectValue::Number(unwrap_count as f64), file, 1, &format!("{unwrap_count} unwrap/expect calls"), 1.0, "Count of .unwrap()/.expect() calls in production code", )); } observations } fn verifiable_predicates(&self) -> Vec<(&str, &str)> { vec![ ("bridge/tier_assignment", "default_tier"), ("bridge/parent_hash", "always_none"), ("bridge/lifecycle", "skips_pending"), ("production/error_handling", "unwrap_count"), ] } } #[cfg(test)] mod tests { use super::*; #[test] fn test_detects_unwrap() { let ext = SelfAuditExtractor::new(); let content = r#" fn main() { let x = foo().unwrap(); let y = bar().expect("should work"); } "#; let obs = ext.extract( &["rust".to_string(), "aphoria".to_string()], content, Language::Rust, "src/main.rs", ); let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect(); assert_eq!(unwrap_obs.len(), 1); assert_eq!(unwrap_obs[0].value, ObjectValue::Number(2.0)); } #[test] fn test_skips_allowed_unwrap() { let ext = SelfAuditExtractor::new(); let content = r#" #[allow(clippy::unwrap_used)] fn allowed() { let x = foo().unwrap(); } fn not_allowed() { let y = bar().unwrap(); } "#; let obs = ext.extract( &["rust".to_string(), "aphoria".to_string()], content, Language::Rust, "src/main.rs", ); let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect(); assert_eq!(unwrap_obs.len(), 1); // The allowed one should be skipped, only the non-allowed one counted assert_eq!(unwrap_obs[0].value, ObjectValue::Number(1.0)); } #[test] fn test_bridge_detection() { let ext = SelfAuditExtractor::new(); let content = r#" fn build_assertion() { let source_class = SourceClass::Community; let parent_hash: None; let lifecycle = LifecycleStage::Approved; } "#; let obs = ext.extract( &["rust".to_string(), "aphoria".to_string()], content, Language::Rust, "src/bridge.rs", ); assert!(obs.iter().any(|o| o.predicate == "default_tier")); assert!(obs.iter().any(|o| o.predicate == "skips_pending")); } #[test] fn test_no_bridge_obs_for_non_bridge() { let ext = SelfAuditExtractor::new(); let content = "let source_class = SourceClass::Community;\n"; let obs = ext.extract(&["rust".to_string()], content, Language::Rust, "src/other.rs"); assert!(!obs.iter().any(|o| o.predicate == "default_tier")); } #[test] fn test_skips_test_files_for_unwrap() { let ext = SelfAuditExtractor::new(); let content = "let x = foo().unwrap();\n"; let obs = ext.extract(&["rust".to_string()], content, Language::Rust, "src/tests/verify.rs"); // Test files should not produce unwrap_count observations let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect(); assert!(unwrap_obs.is_empty()); } }