//! Convergence engine: compare local observations against remote org patterns. //! //! This module answers one question at read time: "Does this project's code //! agree with what the rest of the org has decided?" //! //! The engine is pure — no I/O, no mutation. Feed it a slice of `Observation`s //! produced by local extractors and a slice of `AuthoredClaim`s fetched from a //! remote StemeDB instance, and it returns `ConvergenceSuggestion`s wherever the //! two disagree. //! //! # Data flow //! //! ```text //! local scan → [Observation, ...] //! │ //! ▼ //! compute_convergence_suggestions() //! │ //! remote fetch → [AuthoredClaim, ...] //! │ //! ▼ //! [ConvergenceSuggestion, ...] (sorted: Authoritative → Advisory → Informational) //! ``` use std::collections::HashMap; use stemedb_core::types::ObjectValue; use crate::types::authored_claim::{AuthoredClaim, AuthoredValue}; use crate::types::convergence::{ConvergenceSeverity, ConvergenceSuggestion, DriveClaimSummary}; use crate::types::Observation; // --------------------------------------------------------------------------- // Private helpers // --------------------------------------------------------------------------- /// Convert an `ObjectValue` to a human-readable string. fn object_value_to_string(val: &ObjectValue) -> String { match val { ObjectValue::Boolean(b) => b.to_string(), ObjectValue::Number(n) => n.to_string(), ObjectValue::Text(s) => s.clone(), ObjectValue::Reference(r) => r.clone(), } } /// Convert an `AuthoredValue` to a human-readable string. fn authored_value_to_string(val: &AuthoredValue) -> String { match val { AuthoredValue::Bool(b) => b.to_string(), AuthoredValue::Number(n) => n.to_string(), AuthoredValue::Text(s) => s.clone(), } } /// Map an authority tier name to its integer number (0–5). /// /// This is a local copy so that `convergence` does not need to reach into the /// private `types::promotion` module. /// /// | Tier name | Number | /// |----------------|--------| /// | regulatory | 0 | /// | clinical | 1 | /// | observational | 2 | /// | team_policy | 2 | /// | expert | 3 | /// | community | 4 | /// | anecdotal / * | 5 | fn tier_to_number(tier: &str) -> u8 { match tier.to_lowercase().as_str() { "regulatory" => 0, "clinical" => 1, "observational" | "team_policy" => 2, "expert" => 3, "community" => 4, _ => 5, } } /// Format an authority tier number as a human-readable name. fn tier_number_to_name(tier: u8) -> &'static str { match tier { 0 => "Regulatory", 1 => "Clinical", 2 => "Observational", 3 => "Expert", 4 => "Community", _ => "Anecdotal", } } /// Returns `true` when an `ObjectValue` and an `AuthoredValue` represent /// different logical values. /// /// Cross-type comparisons (e.g. `Boolean` vs `Text`) always differ. fn values_differ(local: &ObjectValue, remote: &AuthoredValue) -> bool { match (local, remote) { (ObjectValue::Boolean(b), AuthoredValue::Bool(expected)) => b != expected, (ObjectValue::Number(n), AuthoredValue::Number(expected)) => { (n - expected).abs() > f64::EPSILON } (ObjectValue::Text(s), AuthoredValue::Text(expected)) => s != expected, // Cross-type: always differ _ => true, } } /// Ordering index for severity — lower is more authoritative. fn severity_order(s: &ConvergenceSeverity) -> u8 { match s { ConvergenceSeverity::Authoritative => 0, ConvergenceSeverity::Advisory => 1, ConvergenceSeverity::Informational => 2, } } // --------------------------------------------------------------------------- // Public API // --------------------------------------------------------------------------- /// Compare a slice of local observations against a slice of remote org claims. /// /// Returns convergence suggestions wherever the local code differs from the /// org pattern, sorted by severity (Authoritative first, then Advisory, then /// Informational). /// /// A suggestion is generated when **all** of the following hold: /// 1. A remote claim shares the same `concept_path` **and** `predicate` as the /// local observation. /// 2. The local observed value differs from the remote claim's expected value. /// 3. The remote claim's tier number is `<= max_suggestion_tier` (defaults to /// `5`, meaning all tiers are included). /// /// When multiple claims match the same `(concept_path, predicate)` in the /// remote, only the most authoritative (lowest tier number) claim drives the /// suggestion. A single suggestion is emitted per /// `(concept_path, predicate, file, line)` tuple. /// /// # Arguments /// /// * `local_observations` – observations produced by local extractors. /// * `remote_claims` – org claims fetched from the remote StemeDB instance. /// * `max_suggestion_tier` – optional upper bound on the tier of claims that /// generate suggestions. `None` is equivalent to `Some(5)` (all tiers). /// /// # Returns /// /// A `Vec` sorted Authoritative → Advisory → /// Informational. pub fn compute_convergence_suggestions( local_observations: &[Observation], remote_claims: &[AuthoredClaim], max_suggestion_tier: Option, ) -> Vec { let max_tier = max_suggestion_tier.unwrap_or(5); // Pre-compute the tier number for every remote claim once. let claim_tiers: Vec = remote_claims.iter().map(|c| tier_to_number(&c.authority_tier)).collect(); // Deduplication key: (concept_path, predicate, file, line) → suggestion. // We keep only the suggestion driven by the most authoritative claim. let mut dedup: HashMap<(String, String, String, usize), ConvergenceSuggestion> = HashMap::new(); for obs in local_observations { // Count all remote claims that share this (concept_path, predicate) — // used for `matching_claims_count` regardless of whether they differ. let matching_count = remote_claims .iter() .filter(|c| c.concept_path == obs.concept_path && c.predicate == obs.predicate) .count(); if matching_count == 0 { continue; } // Among matching claims, find the most authoritative one that differs // and is within the tier limit. let best_match = remote_claims .iter() .zip(claim_tiers.iter()) .filter(|(c, tier)| { c.concept_path == obs.concept_path && c.predicate == obs.predicate && **tier <= max_tier && values_differ(&obs.value, &c.value) }) .min_by_key(|(_, tier)| **tier); let (driving_claim, org_tier) = match best_match { Some((claim, tier)) => (claim, *tier), None => continue, // no differing claim within tier limit }; let severity = ConvergenceSeverity::from_tier(org_tier); let suggestion = ConvergenceSuggestion { concept_path: obs.concept_path.clone(), predicate: obs.predicate.clone(), local_value: object_value_to_string(&obs.value), org_value: authored_value_to_string(&driving_claim.value), org_tier, org_tier_name: tier_number_to_name(org_tier).to_string(), matching_claims_count: matching_count, driving_claim: Some(DriveClaimSummary { claim_id: driving_claim.id.clone(), invariant: driving_claim.invariant.clone(), consequence: driving_claim.consequence.clone(), provenance: driving_claim.provenance.clone(), evidence: driving_claim.evidence.clone(), }), severity, file: obs.file.clone(), line: obs.line, }; let key = (obs.concept_path.clone(), obs.predicate.clone(), obs.file.clone(), obs.line); // Keep only the most authoritative suggestion (lowest tier = lower // severity_order value). let replace = dedup .get(&key) .map(|existing| { severity_order(&suggestion.severity) < severity_order(&existing.severity) }) .unwrap_or(true); if replace { dedup.insert(key, suggestion); } } let mut results: Vec = dedup.into_values().collect(); // Sort: Authoritative first, then Advisory, then Informational. // Within the same severity bucket, sort by (concept_path, predicate, file, // line) for deterministic output. results.sort_by(|a, b| { severity_order(&a.severity) .cmp(&severity_order(&b.severity)) .then_with(|| a.concept_path.cmp(&b.concept_path)) .then_with(|| a.predicate.cmp(&b.predicate)) .then_with(|| a.file.cmp(&b.file)) .then_with(|| a.line.cmp(&b.line)) }); results } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; use crate::types::authored_claim::{ClaimStatus, ComparisonMode}; fn make_claim( id: &str, concept_path: &str, predicate: &str, value: AuthoredValue, tier: &str, ) -> AuthoredClaim { AuthoredClaim { id: id.to_string(), concept_path: concept_path.to_string(), predicate: predicate.to_string(), value, comparison: ComparisonMode::Equals, provenance: format!("test provenance for {id}"), invariant: format!("invariant for {id}"), consequence: format!("consequence for {id}"), authority_tier: tier.to_string(), evidence: vec!["test-evidence".to_string()], category: "test".to_string(), status: ClaimStatus::Active, supersedes: None, created_by: "test".to_string(), created_at: "2025-01-01T00:00:00Z".to_string(), updated_at: None, } } fn make_observation( concept_path: &str, predicate: &str, value: ObjectValue, file: &str, line: usize, ) -> Observation { Observation { concept_path: concept_path.to_string(), predicate: predicate.to_string(), value, file: file.to_string(), line, matched_text: "test match".to_string(), confidence: 1.0, description: "test observation".to_string(), } } // ----------------------------------------------------------------------- // Test: boolean divergence is detected // ----------------------------------------------------------------------- #[test] fn test_boolean_divergence_produces_suggestion() { let observations = vec![make_observation( "code://rust/tls/cert_verification", "enabled", ObjectValue::Boolean(false), "src/client.rs", 42, )]; let claims = vec![make_claim( "tls-cert-verify-001", "code://rust/tls/cert_verification", "enabled", AuthoredValue::Bool(true), "expert", )]; let suggestions = compute_convergence_suggestions(&observations, &claims, None); assert_eq!(suggestions.len(), 1); let s = &suggestions[0]; assert_eq!(s.concept_path, "code://rust/tls/cert_verification"); assert_eq!(s.predicate, "enabled"); assert_eq!(s.local_value, "false"); assert_eq!(s.org_value, "true"); assert_eq!(s.org_tier, 3); assert_eq!(s.org_tier_name, "Expert"); assert_eq!(s.severity, ConvergenceSeverity::Advisory); assert_eq!(s.file, "src/client.rs"); assert_eq!(s.line, 42); let dc = s.driving_claim.as_ref().expect("driving claim should be present"); assert_eq!(dc.claim_id, "tls-cert-verify-001"); } // ----------------------------------------------------------------------- // Test: no suggestion when values agree // ----------------------------------------------------------------------- #[test] fn test_matching_values_produce_no_suggestion() { let observations = vec![make_observation( "code://rust/tls/cert_verification", "enabled", ObjectValue::Boolean(true), "src/client.rs", 10, )]; let claims = vec![make_claim( "tls-cert-verify-001", "code://rust/tls/cert_verification", "enabled", AuthoredValue::Bool(true), "expert", )]; let suggestions = compute_convergence_suggestions(&observations, &claims, None); assert!(suggestions.is_empty(), "no suggestion when values agree"); } // ----------------------------------------------------------------------- // Test: max_suggestion_tier filters out claims above the limit // ----------------------------------------------------------------------- #[test] fn test_max_suggestion_tier_filters_high_tier_claims() { let observations = vec![make_observation( "code://go/http/timeout", "set", ObjectValue::Boolean(false), "main.go", 7, )]; // Community claim (tier 4) — should be suppressed when max_tier is 3. let claims = vec![make_claim( "http-timeout-001", "code://go/http/timeout", "set", AuthoredValue::Bool(true), "community", )]; let suggestions = compute_convergence_suggestions(&observations, &claims, Some(3)); assert!( suggestions.is_empty(), "community-tier claim should be suppressed when max_tier=3" ); // With no tier limit, the suggestion should appear. let suggestions_all = compute_convergence_suggestions(&observations, &claims, None); assert_eq!(suggestions_all.len(), 1); assert_eq!(suggestions_all[0].severity, ConvergenceSeverity::Informational); } // ----------------------------------------------------------------------- // Test: deduplication keeps the most authoritative suggestion // ----------------------------------------------------------------------- #[test] fn test_deduplication_keeps_highest_authority() { // Same observation targeted by two conflicting remote claims at different // tiers. Only the most authoritative (lowest tier) should survive. let observations = vec![make_observation( "code://rust/crypto/hash_algorithm", "value", ObjectValue::Text("md5".to_string()), "src/crypto.rs", 5, )]; let claims = vec![ make_claim( "crypto-hash-community-001", "code://rust/crypto/hash_algorithm", "value", AuthoredValue::Text("sha256".to_string()), "community", // tier 4 ), make_claim( "crypto-hash-regulatory-001", "code://rust/crypto/hash_algorithm", "value", AuthoredValue::Text("sha256".to_string()), "regulatory", // tier 0 — should win ), ]; let suggestions = compute_convergence_suggestions(&observations, &claims, None); assert_eq!(suggestions.len(), 1, "should be deduplicated to one suggestion"); let s = &suggestions[0]; assert_eq!(s.org_tier, 0, "most authoritative (regulatory, tier 0) should drive"); assert_eq!(s.severity, ConvergenceSeverity::Authoritative); let dc = s.driving_claim.as_ref().expect("driving claim present"); assert_eq!(dc.claim_id, "crypto-hash-regulatory-001"); // matching_claims_count reflects ALL claims with this concept_path+predicate. assert_eq!(s.matching_claims_count, 2); } // ----------------------------------------------------------------------- // Test: sort order — Authoritative before Advisory before Informational // ----------------------------------------------------------------------- #[test] fn test_sort_order_authoritative_first() { let observations = vec![ make_observation( "code://go/http/timeout", "set", ObjectValue::Boolean(false), "main.go", 1, ), make_observation( "code://rust/tls/version", "min_version", ObjectValue::Text("tls1.0".to_string()), "src/tls.rs", 10, ), make_observation( "code://python/logging/level", "value", ObjectValue::Text("DEBUG".to_string()), "app.py", 3, ), ]; let claims = vec![ // community → Informational make_claim( "http-timeout-001", "code://go/http/timeout", "set", AuthoredValue::Bool(true), "community", ), // clinical → Authoritative make_claim( "tls-version-001", "code://rust/tls/version", "min_version", AuthoredValue::Text("tls1.2".to_string()), "clinical", ), // expert → Advisory make_claim( "logging-level-001", "code://python/logging/level", "value", AuthoredValue::Text("INFO".to_string()), "expert", ), ]; let suggestions = compute_convergence_suggestions(&observations, &claims, None); assert_eq!(suggestions.len(), 3); assert_eq!( suggestions[0].severity, ConvergenceSeverity::Authoritative, "first item must be Authoritative" ); assert_eq!( suggestions[1].severity, ConvergenceSeverity::Advisory, "second item must be Advisory" ); assert_eq!( suggestions[2].severity, ConvergenceSeverity::Informational, "third item must be Informational" ); } // ----------------------------------------------------------------------- // Test: number comparison uses epsilon, not exact equality // ----------------------------------------------------------------------- #[test] fn test_number_comparison_with_epsilon() { // Identical values should not trigger a suggestion. let observations_same = vec![make_observation( "code://rust/pool/max_size", "value", ObjectValue::Number(50.0), "src/pool.rs", 1, )]; let claims = vec![make_claim( "pool-max-001", "code://rust/pool/max_size", "value", AuthoredValue::Number(50.0), "expert", )]; let suggestions = compute_convergence_suggestions(&observations_same, &claims, None); assert!(suggestions.is_empty(), "identical numbers must not diverge"); // Different values (beyond epsilon) should trigger a suggestion. let observations_diff = vec![make_observation( "code://rust/pool/max_size", "value", ObjectValue::Number(25.0), "src/pool.rs", 1, )]; let suggestions_diff = compute_convergence_suggestions(&observations_diff, &claims, None); assert_eq!(suggestions_diff.len(), 1); assert_eq!(suggestions_diff[0].local_value, "25"); assert_eq!(suggestions_diff[0].org_value, "50"); } // ----------------------------------------------------------------------- // Test: cross-type comparison always differs // ----------------------------------------------------------------------- #[test] fn test_cross_type_comparison_always_differs() { let observations = vec![make_observation( "code://rust/flag", "enabled", ObjectValue::Text("true".to_string()), // text, not bool "src/lib.rs", 1, )]; let claims = vec![make_claim( "flag-001", "code://rust/flag", "enabled", AuthoredValue::Bool(true), // bool "expert", )]; // Text "true" vs Bool(true) are cross-type — should differ. let suggestions = compute_convergence_suggestions(&observations, &claims, None); assert_eq!(suggestions.len(), 1, "cross-type should always differ"); } // ----------------------------------------------------------------------- // Test: empty inputs produce no suggestions // ----------------------------------------------------------------------- #[test] fn test_empty_inputs() { let suggestions = compute_convergence_suggestions(&[], &[], None); assert!(suggestions.is_empty()); let obs = vec![make_observation( "code://rust/flag", "enabled", ObjectValue::Boolean(true), "src/lib.rs", 1, )]; let suggestions = compute_convergence_suggestions(&obs, &[], None); assert!(suggestions.is_empty(), "no claims means no suggestions"); let claims = vec![make_claim( "flag-001", "code://rust/flag", "enabled", AuthoredValue::Bool(true), "expert", )]; let suggestions = compute_convergence_suggestions(&[], &claims, None); assert!(suggestions.is_empty(), "no observations means no suggestions"); } }