stemedb/applications/aphoria/src/convergence.rs

//! Convergence engine: compare local observations against remote org patterns.
//!
//! This module answers one question at read time: "Does this project's code
//! agree with what the rest of the org has decided?"
//!
//! The engine is pure — no I/O, no mutation. Feed it a slice of `Observation`s
//! produced by local extractors and a slice of `AuthoredClaim`s fetched from a
//! remote StemeDB instance, and it returns `ConvergenceSuggestion`s wherever the
//! two disagree.
//!
//! # Data flow
//!
//! ```text
//! local scan → [Observation, ...]
//!                       │
//!                       ▼
//!             compute_convergence_suggestions()
//!                       │
//! remote fetch → [AuthoredClaim, ...]
//!                       │
//!                       ▼
//!             [ConvergenceSuggestion, ...]  (sorted: Authoritative → Advisory → Informational)
//! ```

use std::collections::HashMap;

use stemedb_core::types::ObjectValue;

use crate::types::authored_claim::{AuthoredClaim, AuthoredValue};
use crate::types::convergence::{ConvergenceSeverity, ConvergenceSuggestion, DriveClaimSummary};
use crate::types::Observation;

// ---------------------------------------------------------------------------
// Private helpers
// ---------------------------------------------------------------------------

/// Convert an `ObjectValue` to a human-readable string.
fn object_value_to_string(val: &ObjectValue) -> String {
    match val {
        ObjectValue::Boolean(b) => b.to_string(),
        ObjectValue::Number(n) => n.to_string(),
        ObjectValue::Text(s) => s.clone(),
        ObjectValue::Reference(r) => r.clone(),
    }
}

/// Convert an `AuthoredValue` to a human-readable string.
fn authored_value_to_string(val: &AuthoredValue) -> String {
    match val {
        AuthoredValue::Bool(b) => b.to_string(),
        AuthoredValue::Number(n) => n.to_string(),
        AuthoredValue::Text(s) => s.clone(),
    }
}

/// Map an authority tier name to its integer number (0–5).
///
/// This is a local copy so that `convergence` does not need to reach into the
/// private `types::promotion` module.
///
/// | Tier name      | Number |
/// |----------------|--------|
/// | regulatory     | 0      |
/// | clinical       | 1      |
/// | observational  | 2      |
/// | team_policy    | 2      |
/// | expert         | 3      |
/// | community      | 4      |
/// | anecdotal / *  | 5      |
fn tier_to_number(tier: &str) -> u8 {
    match tier.to_lowercase().as_str() {
        "regulatory" => 0,
        "clinical" => 1,
        "observational" | "team_policy" => 2,
        "expert" => 3,
        "community" => 4,
        _ => 5,
    }
}

/// Format an authority tier number as a human-readable name.
fn tier_number_to_name(tier: u8) -> &'static str {
    match tier {
        0 => "Regulatory",
        1 => "Clinical",
        2 => "Observational",
        3 => "Expert",
        4 => "Community",
        _ => "Anecdotal",
    }
}

/// Returns `true` when an `ObjectValue` and an `AuthoredValue` represent
/// different logical values.
///
/// Cross-type comparisons (e.g. `Boolean` vs `Text`) always differ.
fn values_differ(local: &ObjectValue, remote: &AuthoredValue) -> bool {
    match (local, remote) {
        (ObjectValue::Boolean(b), AuthoredValue::Bool(expected)) => b != expected,
        (ObjectValue::Number(n), AuthoredValue::Number(expected)) => {
            (n - expected).abs() > f64::EPSILON
        }
        (ObjectValue::Text(s), AuthoredValue::Text(expected)) => s != expected,
        // Cross-type: always differ
        _ => true,
    }
}

/// Ordering index for severity — lower is more authoritative.
fn severity_order(s: &ConvergenceSeverity) -> u8 {
    match s {
        ConvergenceSeverity::Authoritative => 0,
        ConvergenceSeverity::Advisory => 1,
        ConvergenceSeverity::Informational => 2,
    }
}

// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------

/// Compare a slice of local observations against a slice of remote org claims.
///
/// Returns convergence suggestions wherever the local code differs from the
/// org pattern, sorted by severity (Authoritative first, then Advisory, then
/// Informational).
///
/// A suggestion is generated when **all** of the following hold:
/// 1. A remote claim shares the same `concept_path` **and** `predicate` as the
///    local observation.
/// 2. The local observed value differs from the remote claim's expected value.
/// 3. The remote claim's tier number is `<= max_suggestion_tier` (defaults to
///    `5`, meaning all tiers are included).
///
/// When multiple claims match the same `(concept_path, predicate)` in the
/// remote, only the most authoritative (lowest tier number) claim drives the
/// suggestion. A single suggestion is emitted per
/// `(concept_path, predicate, file, line)` tuple.
///
/// # Arguments
///
/// * `local_observations` – observations produced by local extractors.
/// * `remote_claims` – org claims fetched from the remote StemeDB instance.
/// * `max_suggestion_tier` – optional upper bound on the tier of claims that
///   generate suggestions. `None` is equivalent to `Some(5)` (all tiers).
///
/// # Returns
///
/// A `Vec<ConvergenceSuggestion>` sorted Authoritative → Advisory →
/// Informational.
pub fn compute_convergence_suggestions(
    local_observations: &[Observation],
    remote_claims: &[AuthoredClaim],
    max_suggestion_tier: Option<u8>,
) -> Vec<ConvergenceSuggestion> {
    let max_tier = max_suggestion_tier.unwrap_or(5);

    // Pre-compute the tier number for every remote claim once.
    let claim_tiers: Vec<u8> =
        remote_claims.iter().map(|c| tier_to_number(&c.authority_tier)).collect();

    // Deduplication key: (concept_path, predicate, file, line) → suggestion.
    // We keep only the suggestion driven by the most authoritative claim.
    let mut dedup: HashMap<(String, String, String, usize), ConvergenceSuggestion> = HashMap::new();

    for obs in local_observations {
        // Count all remote claims that share this (concept_path, predicate) —
        // used for `matching_claims_count` regardless of whether they differ.
        let matching_count = remote_claims
            .iter()
            .filter(|c| c.concept_path == obs.concept_path && c.predicate == obs.predicate)
            .count();

        if matching_count == 0 {
            continue;
        }

        // Among matching claims, find the most authoritative one that differs
        // and is within the tier limit.
        let best_match = remote_claims
            .iter()
            .zip(claim_tiers.iter())
            .filter(|(c, tier)| {
                c.concept_path == obs.concept_path
                    && c.predicate == obs.predicate
                    && **tier <= max_tier
                    && values_differ(&obs.value, &c.value)
            })
            .min_by_key(|(_, tier)| **tier);

        let (driving_claim, org_tier) = match best_match {
            Some((claim, tier)) => (claim, *tier),
            None => continue, // no differing claim within tier limit
        };

        let severity = ConvergenceSeverity::from_tier(org_tier);
        let suggestion = ConvergenceSuggestion {
            concept_path: obs.concept_path.clone(),
            predicate: obs.predicate.clone(),
            local_value: object_value_to_string(&obs.value),
            org_value: authored_value_to_string(&driving_claim.value),
            org_tier,
            org_tier_name: tier_number_to_name(org_tier).to_string(),
            matching_claims_count: matching_count,
            driving_claim: Some(DriveClaimSummary {
                claim_id: driving_claim.id.clone(),
                invariant: driving_claim.invariant.clone(),
                consequence: driving_claim.consequence.clone(),
                provenance: driving_claim.provenance.clone(),
                evidence: driving_claim.evidence.clone(),
            }),
            severity,
            file: obs.file.clone(),
            line: obs.line,
        };

        let key = (obs.concept_path.clone(), obs.predicate.clone(), obs.file.clone(), obs.line);

        // Keep only the most authoritative suggestion (lowest tier = lower
        // severity_order value).
        let replace = dedup
            .get(&key)
            .map(|existing| {
                severity_order(&suggestion.severity) < severity_order(&existing.severity)
            })
            .unwrap_or(true);

        if replace {
            dedup.insert(key, suggestion);
        }
    }

    let mut results: Vec<ConvergenceSuggestion> = dedup.into_values().collect();

    // Sort: Authoritative first, then Advisory, then Informational.
    // Within the same severity bucket, sort by (concept_path, predicate, file,
    // line) for deterministic output.
    results.sort_by(|a, b| {
        severity_order(&a.severity)
            .cmp(&severity_order(&b.severity))
            .then_with(|| a.concept_path.cmp(&b.concept_path))
            .then_with(|| a.predicate.cmp(&b.predicate))
            .then_with(|| a.file.cmp(&b.file))
            .then_with(|| a.line.cmp(&b.line))
    });

    results
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use crate::types::authored_claim::{ClaimStatus, ComparisonMode};

    fn make_claim(
        id: &str,
        concept_path: &str,
        predicate: &str,
        value: AuthoredValue,
        tier: &str,
    ) -> AuthoredClaim {
        AuthoredClaim {
            id: id.to_string(),
            concept_path: concept_path.to_string(),
            predicate: predicate.to_string(),
            value,
            comparison: ComparisonMode::Equals,
            provenance: format!("test provenance for {id}"),
            invariant: format!("invariant for {id}"),
            consequence: format!("consequence for {id}"),
            authority_tier: tier.to_string(),
            evidence: vec!["test-evidence".to_string()],
            category: "test".to_string(),
            status: ClaimStatus::Active,
            supersedes: None,
            created_by: "test".to_string(),
            created_at: "2025-01-01T00:00:00Z".to_string(),
            updated_at: None,
        }
    }

    fn make_observation(
        concept_path: &str,
        predicate: &str,
        value: ObjectValue,
        file: &str,
        line: usize,
    ) -> Observation {
        Observation {
            concept_path: concept_path.to_string(),
            predicate: predicate.to_string(),
            value,
            file: file.to_string(),
            line,
            matched_text: "test match".to_string(),
            confidence: 1.0,
            description: "test observation".to_string(),
        }
    }

    // -----------------------------------------------------------------------
    // Test: boolean divergence is detected
    // -----------------------------------------------------------------------

    #[test]
    fn test_boolean_divergence_produces_suggestion() {
        let observations = vec![make_observation(
            "code://rust/tls/cert_verification",
            "enabled",
            ObjectValue::Boolean(false),
            "src/client.rs",
            42,
        )];

        let claims = vec![make_claim(
            "tls-cert-verify-001",
            "code://rust/tls/cert_verification",
            "enabled",
            AuthoredValue::Bool(true),
            "expert",
        )];

        let suggestions = compute_convergence_suggestions(&observations, &claims, None);

        assert_eq!(suggestions.len(), 1);
        let s = &suggestions[0];
        assert_eq!(s.concept_path, "code://rust/tls/cert_verification");
        assert_eq!(s.predicate, "enabled");
        assert_eq!(s.local_value, "false");
        assert_eq!(s.org_value, "true");
        assert_eq!(s.org_tier, 3);
        assert_eq!(s.org_tier_name, "Expert");
        assert_eq!(s.severity, ConvergenceSeverity::Advisory);
        assert_eq!(s.file, "src/client.rs");
        assert_eq!(s.line, 42);

        let dc = s.driving_claim.as_ref().expect("driving claim should be present");
        assert_eq!(dc.claim_id, "tls-cert-verify-001");
    }

    // -----------------------------------------------------------------------
    // Test: no suggestion when values agree
    // -----------------------------------------------------------------------

    #[test]
    fn test_matching_values_produce_no_suggestion() {
        let observations = vec![make_observation(
            "code://rust/tls/cert_verification",
            "enabled",
            ObjectValue::Boolean(true),
            "src/client.rs",
            10,
        )];

        let claims = vec![make_claim(
            "tls-cert-verify-001",
            "code://rust/tls/cert_verification",
            "enabled",
            AuthoredValue::Bool(true),
            "expert",
        )];

        let suggestions = compute_convergence_suggestions(&observations, &claims, None);
        assert!(suggestions.is_empty(), "no suggestion when values agree");
    }

    // -----------------------------------------------------------------------
    // Test: max_suggestion_tier filters out claims above the limit
    // -----------------------------------------------------------------------

    #[test]
    fn test_max_suggestion_tier_filters_high_tier_claims() {
        let observations = vec![make_observation(
            "code://go/http/timeout",
            "set",
            ObjectValue::Boolean(false),
            "main.go",
            7,
        )];

        // Community claim (tier 4) — should be suppressed when max_tier is 3.
        let claims = vec![make_claim(
            "http-timeout-001",
            "code://go/http/timeout",
            "set",
            AuthoredValue::Bool(true),
            "community",
        )];

        let suggestions = compute_convergence_suggestions(&observations, &claims, Some(3));
        assert!(
            suggestions.is_empty(),
            "community-tier claim should be suppressed when max_tier=3"
        );

        // With no tier limit, the suggestion should appear.
        let suggestions_all = compute_convergence_suggestions(&observations, &claims, None);
        assert_eq!(suggestions_all.len(), 1);
        assert_eq!(suggestions_all[0].severity, ConvergenceSeverity::Informational);
    }

    // -----------------------------------------------------------------------
    // Test: deduplication keeps the most authoritative suggestion
    // -----------------------------------------------------------------------

    #[test]
    fn test_deduplication_keeps_highest_authority() {
        // Same observation targeted by two conflicting remote claims at different
        // tiers. Only the most authoritative (lowest tier) should survive.
        let observations = vec![make_observation(
            "code://rust/crypto/hash_algorithm",
            "value",
            ObjectValue::Text("md5".to_string()),
            "src/crypto.rs",
            5,
        )];

        let claims = vec![
            make_claim(
                "crypto-hash-community-001",
                "code://rust/crypto/hash_algorithm",
                "value",
                AuthoredValue::Text("sha256".to_string()),
                "community", // tier 4
            ),
            make_claim(
                "crypto-hash-regulatory-001",
                "code://rust/crypto/hash_algorithm",
                "value",
                AuthoredValue::Text("sha256".to_string()),
                "regulatory", // tier 0 — should win
            ),
        ];

        let suggestions = compute_convergence_suggestions(&observations, &claims, None);

        assert_eq!(suggestions.len(), 1, "should be deduplicated to one suggestion");
        let s = &suggestions[0];
        assert_eq!(s.org_tier, 0, "most authoritative (regulatory, tier 0) should drive");
        assert_eq!(s.severity, ConvergenceSeverity::Authoritative);

        let dc = s.driving_claim.as_ref().expect("driving claim present");
        assert_eq!(dc.claim_id, "crypto-hash-regulatory-001");

        // matching_claims_count reflects ALL claims with this concept_path+predicate.
        assert_eq!(s.matching_claims_count, 2);
    }

    // -----------------------------------------------------------------------
    // Test: sort order — Authoritative before Advisory before Informational
    // -----------------------------------------------------------------------

    #[test]
    fn test_sort_order_authoritative_first() {
        let observations = vec![
            make_observation(
                "code://go/http/timeout",
                "set",
                ObjectValue::Boolean(false),
                "main.go",
                1,
            ),
            make_observation(
                "code://rust/tls/version",
                "min_version",
                ObjectValue::Text("tls1.0".to_string()),
                "src/tls.rs",
                10,
            ),
            make_observation(
                "code://python/logging/level",
                "value",
                ObjectValue::Text("DEBUG".to_string()),
                "app.py",
                3,
            ),
        ];

        let claims = vec![
            // community → Informational
            make_claim(
                "http-timeout-001",
                "code://go/http/timeout",
                "set",
                AuthoredValue::Bool(true),
                "community",
            ),
            // clinical → Authoritative
            make_claim(
                "tls-version-001",
                "code://rust/tls/version",
                "min_version",
                AuthoredValue::Text("tls1.2".to_string()),
                "clinical",
            ),
            // expert → Advisory
            make_claim(
                "logging-level-001",
                "code://python/logging/level",
                "value",
                AuthoredValue::Text("INFO".to_string()),
                "expert",
            ),
        ];

        let suggestions = compute_convergence_suggestions(&observations, &claims, None);

        assert_eq!(suggestions.len(), 3);
        assert_eq!(
            suggestions[0].severity,
            ConvergenceSeverity::Authoritative,
            "first item must be Authoritative"
        );
        assert_eq!(
            suggestions[1].severity,
            ConvergenceSeverity::Advisory,
            "second item must be Advisory"
        );
        assert_eq!(
            suggestions[2].severity,
            ConvergenceSeverity::Informational,
            "third item must be Informational"
        );
    }

    // -----------------------------------------------------------------------
    // Test: number comparison uses epsilon, not exact equality
    // -----------------------------------------------------------------------

    #[test]
    fn test_number_comparison_with_epsilon() {
        // Identical values should not trigger a suggestion.
        let observations_same = vec![make_observation(
            "code://rust/pool/max_size",
            "value",
            ObjectValue::Number(50.0),
            "src/pool.rs",
            1,
        )];
        let claims = vec![make_claim(
            "pool-max-001",
            "code://rust/pool/max_size",
            "value",
            AuthoredValue::Number(50.0),
            "expert",
        )];
        let suggestions = compute_convergence_suggestions(&observations_same, &claims, None);
        assert!(suggestions.is_empty(), "identical numbers must not diverge");

        // Different values (beyond epsilon) should trigger a suggestion.
        let observations_diff = vec![make_observation(
            "code://rust/pool/max_size",
            "value",
            ObjectValue::Number(25.0),
            "src/pool.rs",
            1,
        )];
        let suggestions_diff = compute_convergence_suggestions(&observations_diff, &claims, None);
        assert_eq!(suggestions_diff.len(), 1);
        assert_eq!(suggestions_diff[0].local_value, "25");
        assert_eq!(suggestions_diff[0].org_value, "50");
    }

    // -----------------------------------------------------------------------
    // Test: cross-type comparison always differs
    // -----------------------------------------------------------------------

    #[test]
    fn test_cross_type_comparison_always_differs() {
        let observations = vec![make_observation(
            "code://rust/flag",
            "enabled",
            ObjectValue::Text("true".to_string()), // text, not bool
            "src/lib.rs",
            1,
        )];
        let claims = vec![make_claim(
            "flag-001",
            "code://rust/flag",
            "enabled",
            AuthoredValue::Bool(true), // bool
            "expert",
        )];

        // Text "true" vs Bool(true) are cross-type — should differ.
        let suggestions = compute_convergence_suggestions(&observations, &claims, None);
        assert_eq!(suggestions.len(), 1, "cross-type should always differ");
    }

    // -----------------------------------------------------------------------
    // Test: empty inputs produce no suggestions
    // -----------------------------------------------------------------------

    #[test]
    fn test_empty_inputs() {
        let suggestions = compute_convergence_suggestions(&[], &[], None);
        assert!(suggestions.is_empty());

        let obs = vec![make_observation(
            "code://rust/flag",
            "enabled",
            ObjectValue::Boolean(true),
            "src/lib.rs",
            1,
        )];
        let suggestions = compute_convergence_suggestions(&obs, &[], None);
        assert!(suggestions.is_empty(), "no claims means no suggestions");

        let claims = vec![make_claim(
            "flag-001",
            "code://rust/flag",
            "enabled",
            AuthoredValue::Bool(true),
            "expert",
        )];
        let suggestions = compute_convergence_suggestions(&[], &claims, None);
        assert!(suggestions.is_empty(), "no observations means no suggestions");
    }
}