stemedb/applications/aphoria/src/extractors/self_audit.rs
jml 3b5f88b4f0 feat(aphoria): implement claims architecture (A1-A5) with verify engine, corpus, coverage, and explain
Complete Aphoria claims system overhaul:
- A1: Rename ExtractedClaim to Observation (extractors produce observations, not claims)
- A2: Add AuthoredClaim with full provenance, invariants, and authority tiers
- A3: Verify engine comparing observations against authored claims, CLI + formatters
- A4: Corpus as first-class assertions with predicate indexing, authority lens, trust packs
- A5: Coverage analysis, explain/docs generation, self-audit extractor, claim suggester skill

Also includes: 42 extractors updated for Observation type, verifiable_predicates trait,
conflict detection with comparison modes, claims TOML persistence, Grafana dashboard,
backup/restore scripts, and comprehensive test coverage.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 09:11:47 +00:00

303 lines
9.8 KiB
Rust

//! Self-audit meta-extractor for dogfooding Aphoria on its own codebase.
//!
//! Produces observations about Aphoria's own code patterns:
//! - Bridge tier assignments
//! - Parent hash usage
//! - Lifecycle stage skipping
//! - `.unwrap()` / `.expect()` usage count
use regex::Regex;
use stemedb_core::types::ObjectValue;
use super::Extractor;
use crate::types::{Language, Observation};
/// Meta-extractor that audits Aphoria's own code patterns.
///
/// Opt-in only (like `dep_versions`). Registered with the name `self_audit`.
pub struct SelfAuditExtractor {
/// Matches: .unwrap() or .expect() calls
unwrap_pattern: Regex,
/// Matches: SourceClass:: usage for tier assignment
source_class_pattern: Regex,
/// Matches: parent_hash: None
parent_hash_none: Regex,
/// Matches: LifecycleStage::Approved
lifecycle_approved: Regex,
}
impl Default for SelfAuditExtractor {
fn default() -> Self {
Self::new()
}
}
impl SelfAuditExtractor {
/// Create a new self-audit extractor.
///
/// # Panics
/// Panics if any regex pattern is invalid (programmer error).
#[allow(clippy::expect_used)]
pub fn new() -> Self {
Self {
unwrap_pattern: Regex::new(r"\.(unwrap|expect)\(").expect("valid regex"),
source_class_pattern: Regex::new(r"SourceClass::\w+").expect("valid regex"),
parent_hash_none: Regex::new(r"parent_hash:\s*None").expect("valid regex"),
lifecycle_approved: Regex::new(r"LifecycleStage::Approved").expect("valid regex"),
}
}
}
impl Extractor for SelfAuditExtractor {
fn name(&self) -> &str {
"self_audit"
}
fn languages(&self) -> &[Language] {
&[Language::Rust]
}
fn extract(
&self,
path_segments: &[String],
content: &str,
_language: Language,
file: &str,
) -> Vec<Observation> {
let mut observations = Vec::new();
// Count unwrap/expect usage
let mut unwrap_count: usize = 0;
let lines: Vec<&str> = content.lines().collect();
let mut in_test_module = false;
for (line_num, line) in lines.iter().enumerate() {
let line_number = line_num + 1;
// Track #[cfg(test)] module boundaries
if line.contains("#[cfg(test)]") {
in_test_module = true;
}
// Skip test modules entirely
if in_test_module {
// Still check for bridge patterns below, but don't count unwraps
} else if self.unwrap_pattern.is_match(line) {
// Check if the enclosing function has #[allow(clippy::unwrap_used)]
// or #[allow(clippy::expect_used)].
// Scan backwards to the fn boundary, then check attributes above it.
let mut allowed = false;
let mut found_fn = false;
for prev in (0..line_num).rev() {
let prev_line = lines[prev].trim();
if prev_line.is_empty() {
if found_fn {
break; // blank line above fn means attributes are done
}
continue;
}
if prev_line.contains("#[allow(clippy::unwrap_used)]")
|| prev_line.contains("#[allow(clippy::expect_used)]")
{
allowed = true;
break;
}
// Mark that we found the fn boundary
if !found_fn
&& (prev_line.starts_with("fn ")
|| prev_line.starts_with("pub fn ")
|| prev_line.contains(" fn "))
{
found_fn = true;
continue; // check attributes above fn
}
// If we're past the fn and hit non-attribute lines, stop
if found_fn && !prev_line.starts_with('#') {
break;
}
}
if !allowed {
unwrap_count += 1;
}
}
// Detect SourceClass assignments in bridge code
if file.contains("bridge") {
if let Some(m) = self.source_class_pattern.find(line) {
observations.push(super::traits::build_claim(
path_segments,
&["bridge", "tier_assignment"],
"default_tier",
ObjectValue::Text(m.as_str().to_string()),
file,
line_number,
m.as_str(),
0.9,
"Bridge tier assignment pattern",
));
}
}
// Detect parent_hash: None patterns in bridge code
if file.contains("bridge") && self.parent_hash_none.is_match(line) {
observations.push(super::traits::build_claim(
path_segments,
&["bridge", "parent_hash"],
"always_none",
ObjectValue::Boolean(true),
file,
line_number,
"parent_hash: None",
0.9,
"Parent hash always set to None",
));
}
// Detect LifecycleStage::Approved skipping Pending
if file.contains("bridge") && self.lifecycle_approved.is_match(line) {
observations.push(super::traits::build_claim(
path_segments,
&["bridge", "lifecycle"],
"skips_pending",
ObjectValue::Boolean(true),
file,
line_number,
"LifecycleStage::Approved",
0.9,
"Lifecycle stage skips Pending, goes directly to Approved",
));
}
}
// Emit a single summary observation for unwrap count
if !file.contains("test") {
#[allow(clippy::cast_precision_loss)]
observations.push(super::traits::build_claim(
path_segments,
&["production", "error_handling"],
"unwrap_count",
ObjectValue::Number(unwrap_count as f64),
file,
1,
&format!("{unwrap_count} unwrap/expect calls"),
1.0,
"Count of .unwrap()/.expect() calls in production code",
));
}
observations
}
fn verifiable_predicates(&self) -> Vec<(&str, &str)> {
vec![
("bridge/tier_assignment", "default_tier"),
("bridge/parent_hash", "always_none"),
("bridge/lifecycle", "skips_pending"),
("production/error_handling", "unwrap_count"),
]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detects_unwrap() {
let ext = SelfAuditExtractor::new();
let content = r#"
fn main() {
let x = foo().unwrap();
let y = bar().expect("should work");
}
"#;
let obs = ext.extract(
&["rust".to_string(), "aphoria".to_string()],
content,
Language::Rust,
"src/main.rs",
);
let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect();
assert_eq!(unwrap_obs.len(), 1);
assert_eq!(unwrap_obs[0].value, ObjectValue::Number(2.0));
}
#[test]
fn test_skips_allowed_unwrap() {
let ext = SelfAuditExtractor::new();
let content = r#"
#[allow(clippy::unwrap_used)]
fn allowed() {
let x = foo().unwrap();
}
fn not_allowed() {
let y = bar().unwrap();
}
"#;
let obs = ext.extract(
&["rust".to_string(), "aphoria".to_string()],
content,
Language::Rust,
"src/main.rs",
);
let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect();
assert_eq!(unwrap_obs.len(), 1);
// The allowed one should be skipped, only the non-allowed one counted
assert_eq!(unwrap_obs[0].value, ObjectValue::Number(1.0));
}
#[test]
fn test_bridge_detection() {
let ext = SelfAuditExtractor::new();
let content = r#"
fn build_assertion() {
let source_class = SourceClass::Community;
let parent_hash: None;
let lifecycle = LifecycleStage::Approved;
}
"#;
let obs = ext.extract(
&["rust".to_string(), "aphoria".to_string()],
content,
Language::Rust,
"src/bridge.rs",
);
assert!(obs.iter().any(|o| o.predicate == "default_tier"));
assert!(obs.iter().any(|o| o.predicate == "skips_pending"));
}
#[test]
fn test_no_bridge_obs_for_non_bridge() {
let ext = SelfAuditExtractor::new();
let content = "let source_class = SourceClass::Community;\n";
let obs = ext.extract(
&["rust".to_string()],
content,
Language::Rust,
"src/other.rs",
);
assert!(!obs.iter().any(|o| o.predicate == "default_tier"));
}
#[test]
fn test_skips_test_files_for_unwrap() {
let ext = SelfAuditExtractor::new();
let content = "let x = foo().unwrap();\n";
let obs = ext.extract(
&["rust".to_string()],
content,
Language::Rust,
"src/tests/verify.rs",
);
// Test files should not produce unwrap_count observations
let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect();
assert!(unwrap_obs.is_empty());
}
}