stemedb/applications/aphoria/src/extractors/self_audit.rs

//! Self-audit meta-extractor for dogfooding Aphoria on its own codebase.
//!
//! Produces observations about Aphoria's own code patterns:
//! - Bridge tier assignments
//! - Parent hash usage
//! - Lifecycle stage skipping
//! - `.unwrap()` / `.expect()` usage count

use regex::Regex;
use stemedb_core::types::ObjectValue;

use super::Extractor;
use crate::types::{Language, Observation};

/// Meta-extractor that audits Aphoria's own code patterns.
///
/// Opt-in only (like `dep_versions`). Registered with the name `self_audit`.
pub struct SelfAuditExtractor {
    /// Matches: .unwrap() or .expect() calls
    unwrap_pattern: Regex,
    /// Matches: SourceClass:: usage for tier assignment
    source_class_pattern: Regex,
    /// Matches: parent_hash: None
    parent_hash_none: Regex,
    /// Matches: LifecycleStage::Approved
    lifecycle_approved: Regex,
}

impl Default for SelfAuditExtractor {
    fn default() -> Self {
        Self::new()
    }
}

impl SelfAuditExtractor {
    /// Create a new self-audit extractor.
    ///
    /// # Panics
    /// Panics if any regex pattern is invalid (programmer error).
    #[allow(clippy::expect_used)]
    pub fn new() -> Self {
        Self {
            unwrap_pattern: Regex::new(r"\.(unwrap|expect)\(").expect("valid regex"),
            source_class_pattern: Regex::new(r"SourceClass::\w+").expect("valid regex"),
            parent_hash_none: Regex::new(r"parent_hash:\s*None").expect("valid regex"),
            lifecycle_approved: Regex::new(r"LifecycleStage::Approved").expect("valid regex"),
        }
    }
}

impl Extractor for SelfAuditExtractor {
    fn name(&self) -> &str {
        "self_audit"
    }

    fn languages(&self) -> &[Language] {
        &[Language::Rust]
    }

    fn extract(
        &self,
        path_segments: &[String],
        content: &str,
        _language: Language,
        file: &str,
    ) -> Vec<Observation> {
        let mut observations = Vec::new();

        // Count unwrap/expect usage
        let mut unwrap_count: usize = 0;
        let lines: Vec<&str> = content.lines().collect();
        let mut in_test_module = false;

        for (line_num, line) in lines.iter().enumerate() {
            let line_number = line_num + 1;

            // Track #[cfg(test)] module boundaries
            if line.contains("#[cfg(test)]") {
                in_test_module = true;
            }

            // Skip test modules entirely
            if in_test_module {
                // Still check for bridge patterns below, but don't count unwraps
            } else if self.unwrap_pattern.is_match(line) {
                // Check if the enclosing function has #[allow(clippy::unwrap_used)]
                // or #[allow(clippy::expect_used)].
                // Scan backwards to the fn boundary, then check attributes above it.
                let mut allowed = false;
                let mut found_fn = false;
                for prev in (0..line_num).rev() {
                    let prev_line = lines[prev].trim();
                    if prev_line.is_empty() {
                        if found_fn {
                            break; // blank line above fn means attributes are done
                        }
                        continue;
                    }
                    if prev_line.contains("#[allow(clippy::unwrap_used)]")
                        || prev_line.contains("#[allow(clippy::expect_used)]")
                    {
                        allowed = true;
                        break;
                    }
                    // Mark that we found the fn boundary
                    if !found_fn
                        && (prev_line.starts_with("fn ")
                            || prev_line.starts_with("pub fn ")
                            || prev_line.contains(" fn "))
                    {
                        found_fn = true;
                        continue; // check attributes above fn
                    }
                    // If we're past the fn and hit non-attribute lines, stop
                    if found_fn && !prev_line.starts_with('#') {
                        break;
                    }
                }
                if !allowed {
                    unwrap_count += 1;
                }
            }

            // Detect SourceClass assignments in bridge code
            if file.contains("bridge") {
                if let Some(m) = self.source_class_pattern.find(line) {
                    observations.push(super::traits::build_claim(
                        path_segments,
                        &["bridge", "tier_assignment"],
                        "default_tier",
                        ObjectValue::Text(m.as_str().to_string()),
                        file,
                        line_number,
                        m.as_str(),
                        0.9,
                        "Bridge tier assignment pattern",
                    ));
                }
            }

            // Detect parent_hash: None patterns in bridge code
            if file.contains("bridge") && self.parent_hash_none.is_match(line) {
                observations.push(super::traits::build_claim(
                    path_segments,
                    &["bridge", "parent_hash"],
                    "always_none",
                    ObjectValue::Boolean(true),
                    file,
                    line_number,
                    "parent_hash: None",
                    0.9,
                    "Parent hash always set to None",
                ));
            }

            // Detect LifecycleStage::Approved skipping Pending
            if file.contains("bridge") && self.lifecycle_approved.is_match(line) {
                observations.push(super::traits::build_claim(
                    path_segments,
                    &["bridge", "lifecycle"],
                    "skips_pending",
                    ObjectValue::Boolean(true),
                    file,
                    line_number,
                    "LifecycleStage::Approved",
                    0.9,
                    "Lifecycle stage skips Pending, goes directly to Approved",
                ));
            }
        }

        // Emit a single summary observation for unwrap count
        if !file.contains("test") {
            #[allow(clippy::cast_precision_loss)]
            observations.push(super::traits::build_claim(
                path_segments,
                &["production", "error_handling"],
                "unwrap_count",
                ObjectValue::Number(unwrap_count as f64),
                file,
                1,
                &format!("{unwrap_count} unwrap/expect calls"),
                1.0,
                "Count of .unwrap()/.expect() calls in production code",
            ));
        }

        observations
    }

    fn verifiable_predicates(&self) -> Vec<(&str, &str)> {
        vec![
            ("bridge/tier_assignment", "default_tier"),
            ("bridge/parent_hash", "always_none"),
            ("bridge/lifecycle", "skips_pending"),
            ("production/error_handling", "unwrap_count"),
        ]
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_detects_unwrap() {
        let ext = SelfAuditExtractor::new();
        let content = r#"
fn main() {
    let x = foo().unwrap();
    let y = bar().expect("should work");
}
"#;
        let obs = ext.extract(
            &["rust".to_string(), "aphoria".to_string()],
            content,
            Language::Rust,
            "src/main.rs",
        );

        let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect();
        assert_eq!(unwrap_obs.len(), 1);
        assert_eq!(unwrap_obs[0].value, ObjectValue::Number(2.0));
    }

    #[test]
    fn test_skips_allowed_unwrap() {
        let ext = SelfAuditExtractor::new();
        let content = r#"
#[allow(clippy::unwrap_used)]
fn allowed() {
    let x = foo().unwrap();
}

fn not_allowed() {
    let y = bar().unwrap();
}
"#;
        let obs = ext.extract(
            &["rust".to_string(), "aphoria".to_string()],
            content,
            Language::Rust,
            "src/main.rs",
        );

        let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect();
        assert_eq!(unwrap_obs.len(), 1);
        // The allowed one should be skipped, only the non-allowed one counted
        assert_eq!(unwrap_obs[0].value, ObjectValue::Number(1.0));
    }

    #[test]
    fn test_bridge_detection() {
        let ext = SelfAuditExtractor::new();
        let content = r#"
fn build_assertion() {
    let source_class = SourceClass::Community;
    let parent_hash: None;
    let lifecycle = LifecycleStage::Approved;
}
"#;
        let obs = ext.extract(
            &["rust".to_string(), "aphoria".to_string()],
            content,
            Language::Rust,
            "src/bridge.rs",
        );

        assert!(obs.iter().any(|o| o.predicate == "default_tier"));
        assert!(obs.iter().any(|o| o.predicate == "skips_pending"));
    }

    #[test]
    fn test_no_bridge_obs_for_non_bridge() {
        let ext = SelfAuditExtractor::new();
        let content = "let source_class = SourceClass::Community;\n";
        let obs = ext.extract(
            &["rust".to_string()],
            content,
            Language::Rust,
            "src/other.rs",
        );

        assert!(!obs.iter().any(|o| o.predicate == "default_tier"));
    }

    #[test]
    fn test_skips_test_files_for_unwrap() {
        let ext = SelfAuditExtractor::new();
        let content = "let x = foo().unwrap();\n";
        let obs = ext.extract(
            &["rust".to_string()],
            content,
            Language::Rust,
            "src/tests/verify.rs",
        );

        // Test files should not produce unwrap_count observations
        let unwrap_obs: Vec<_> = obs.iter().filter(|o| o.predicate == "unwrap_count").collect();
        assert!(unwrap_obs.is_empty());
    }
}