stemedb/crates/stemedb-lens/src/traits.rs

//! Core Lens trait and types.
//!
//! The Lens trait is the fundamental abstraction for resolving conflicting
//! assertions into a deterministic answer.
//!
//! # Two Kinds of Lenses
//!
//! | Trait | Purpose | Returns |
//! |-------|---------|---------|
//! | `Lens` / `AsyncLens` | Pick a winner | `Resolution` (single assertion) |
//! | `AnalysisLens` | Map conflict landscape | `ConflictAnalysis` (all claims) |
//!
//! Use `Lens` when you need "the answer". Use `AnalysisLens` when you need
//! "Trust but Verify" - showing users what's contested.

use async_trait::async_trait;
use stemedb_core::types::{Assertion, ConflictAnalysis, SourceClass};

/// The result of a Lens resolution.
#[derive(Debug, Clone)]
pub struct Resolution {
    /// The winning assertion, if any candidates were provided.
    pub winner: Option<Assertion>,

    /// All candidates that were considered.
    pub candidates_count: usize,

    /// Confidence in the resolution (0.0 to 1.0).
    /// Higher values indicate stronger consensus or more decisive selection.
    pub resolution_confidence: f32,

    /// Degree of disagreement among candidates (0.0 = full agreement, 1.0 = max conflict).
    /// Computed as normalized variance of candidate confidence values.
    /// This is the numeric basis for the "disagreement is the information" thesis.
    pub conflict_score: f32,
}

impl Resolution {
    /// Create an empty resolution (no candidates).
    pub fn empty() -> Self {
        Self { winner: None, candidates_count: 0, resolution_confidence: 0.0, conflict_score: 0.0 }
    }

    /// Create a resolution with a single winner.
    pub fn with_winner(
        winner: Assertion,
        candidates_count: usize,
        confidence: f32,
        conflict_score: f32,
    ) -> Self {
        Self {
            winner: Some(winner),
            candidates_count,
            resolution_confidence: confidence,
            conflict_score,
        }
    }
}

/// Compute conflict score from candidate assertion confidences.
///
/// This is the **canonical definition** of conflict score in Episteme.
/// Other modules should reference this documentation.
///
/// # Algorithm
///
/// Uses normalized variance of confidence values:
/// - 0 or 1 candidates: 0.0 (no conflict possible)
/// - All same confidence: 0.0 (unanimous agreement)
/// - Max variance (e.g., 0.0 vs 1.0): 1.0 (maximum disagreement)
///
/// # Normalization
///
/// The formula normalizes variance to [0.0, 1.0] range:
/// variance of [0,1] values has max 0.25 (when values are 0 and 1),
/// so we multiply by 4 to normalize.
///
/// # Edge Cases
///
/// - NaN confidences: Treated defensively as 0.0 conflict (fail-safe)
/// - Empty candidates: Returns 0.0
/// - Single candidate: Returns 0.0 (no disagreement possible)
///
/// # Vision Alignment
///
/// This score enables the "disagreement is the information" thesis:
/// high conflict scores surface uncertainty for user review rather than
/// hiding it behind a confident-looking single answer.
pub fn compute_conflict_score(candidates: &[Assertion]) -> f32 {
    if candidates.len() <= 1 {
        return 0.0;
    }

    let n = candidates.len() as f32;
    let sum: f32 = candidates.iter().map(|a| a.confidence).sum();
    let mean = sum / n;

    let variance: f32 = candidates.iter().map(|a| (a.confidence - mean).powi(2)).sum::<f32>() / n;

    // Normalize: max variance of [0,1] values is 0.25, so 4x normalizes to [0,1]
    let score = 4.0 * variance;

    // Defensive: NaN from malformed confidences treated as no meaningful conflict
    if score.is_nan() {
        return 0.0;
    }

    score.min(1.0)
}

// ============================================================================
// Layered Resolution Types
// ============================================================================

/// Per-tier resolution result from `LayeredLens`.
///
/// Represents the consensus within a single source class tier.
/// Tiers range from 0 (Regulatory, highest authority) to 5 (Anecdotal, lowest).
#[derive(Debug, Clone)]
pub struct TierResolution {
    /// The tier number (0-5). Lower = higher authority.
    pub tier: u8,

    /// The source class for this tier.
    pub source_class: SourceClass,

    /// The winning assertion from within-tier consensus, if any candidates.
    pub winner: Option<Assertion>,

    /// Number of candidates in this tier.
    pub candidates_count: usize,

    /// Within-tier conflict score (0.0 = unanimous, 1.0 = max conflict).
    pub conflict_score: f32,

    /// Within-tier resolution confidence (0.0 to 1.0).
    pub resolution_confidence: f32,
}

impl TierResolution {
    /// Create a tier resolution with no candidates.
    pub fn empty(tier: u8, source_class: SourceClass) -> Self {
        Self {
            tier,
            source_class,
            winner: None,
            candidates_count: 0,
            conflict_score: 0.0,
            resolution_confidence: 0.0,
        }
    }
}

/// Multi-tier resolution result from `LayeredLens`.
///
/// Contains per-tier consensus results plus an overall winner.
/// Enables "What does Tier 0 say? What does Tier 5 say?" queries.
///
/// # Cross-Tier Conflict
///
/// The `overall_conflict_score` measures disagreement between tiers:
/// - 0.0: All tiers with winners agree on the same object value
/// - 1.0: Tiers disagree on what the answer should be
///
/// This is different from within-tier conflict (measured in each `TierResolution`).
#[derive(Debug, Clone)]
pub struct LayeredResolution {
    /// Per-tier consensus results, ordered by tier (0 = highest authority first).
    /// Only tiers with at least one candidate are included.
    pub tiers: Vec<TierResolution>,

    /// Overall winner: winner from the highest-authority tier that has candidates.
    /// This is the answer from the most authoritative source class present.
    pub overall_winner: Option<Assertion>,

    /// Cross-tier disagreement score (0.0 = tiers agree, 1.0 = tiers disagree).
    /// Measures whether tier winners agree on the same object value.
    pub overall_conflict_score: f32,

    /// Total candidates considered across all tiers.
    pub total_candidates: usize,
}

impl LayeredResolution {
    /// Create an empty layered resolution (no candidates in any tier).
    pub fn empty() -> Self {
        Self {
            tiers: Vec::new(),
            overall_winner: None,
            overall_conflict_score: 0.0,
            total_candidates: 0,
        }
    }
}

/// A LayeredLens resolves conflicts with per-tier consensus.
///
/// Unlike a standard `Lens` which returns a single winner, a `LayeredLens`
/// provides visibility into what each source class tier says.
///
/// # Use Case: Consumer Health
///
/// Query "semaglutide muscle_loss" and see:
/// - Tier 0 (FDA): [no data]
/// - Tier 1 (Clinical): "Significant loss" (12 sources, 0.85 confidence)
/// - Tier 5 (Anecdotal): "Minimal loss" (200 sources, 0.45 confidence)
/// - Overall winner: "Significant loss" (from Tier 1)
/// - Cross-tier conflict: 0.8 (clinical and anecdotal disagree)
///
/// # Contract
///
/// - **Stateless:** LayeredLenses must not maintain internal state.
/// - **Deterministic:** Same input must produce same output.
/// - **Tier-Ordered:** Results are always ordered by tier (0 first).
pub trait LayeredLens: Send + Sync {
    /// Resolve candidates with per-tier consensus.
    ///
    /// # Arguments
    /// * `candidates` - All assertions matching the query filters
    ///
    /// # Returns
    /// A `LayeredResolution` with per-tier results and overall winner.
    fn resolve_layered(&self, candidates: &[Assertion]) -> LayeredResolution;

    /// Human-readable name of this lens for logging/debugging.
    fn name(&self) -> &'static str;
}

// ============================================================================
// Standard Resolution Types
// ============================================================================

/// A Lens resolves conflicting assertions into a deterministic answer.
///
/// # Contract
///
/// - **Stateless:** Lenses must not maintain internal state.
/// - **Deterministic:** Same input must produce same output.
/// - **Fast:** Runs on every read, avoid allocations where possible.
///
/// # Implementation Notes
///
/// Lenses should handle edge cases gracefully:
/// - Empty input: Return `Resolution::empty()`
/// - Single candidate: Return that candidate (trivial resolution)
/// - Ties: Define a consistent tiebreaker (e.g., lowest hash)
pub trait Lens: Send + Sync {
    /// Resolve a set of candidate assertions into a single answer.
    ///
    /// # Arguments
    /// * `candidates` - All assertions matching the query filters
    ///
    /// # Returns
    /// A resolution containing the winning assertion (if any) and metadata.
    fn resolve(&self, candidates: &[Assertion]) -> Resolution;

    /// Human-readable name of this lens for logging/debugging.
    fn name(&self) -> &'static str;
}

/// An AnalysisLens maps the conflict landscape instead of picking a winner.
///
/// Unlike `Lens` which collapses uncertainty into a single answer, `AnalysisLens`
/// surfaces all competing claims with their relative support. This enables
/// "Trust but Verify" UX where users see disagreement explicitly.
///
/// # Contract
///
/// - **Stateless:** AnalysisLenses must not maintain internal state.
/// - **Deterministic:** Same input and storage state produces same output.
/// - **Complete:** Returns ALL distinct claims, not just the top one.
///
/// # When to Use
///
/// Use `AnalysisLens` when:
/// - User needs to see "who disagrees and why"
/// - Fact is contested and hiding conflict would be misleading
/// - Building a "Living Review" dashboard for research
///
/// Use regular `Lens` when:
/// - You need a definitive answer for action
/// - Conflict is resolved elsewhere
/// - Performance is critical (Analysis is more expensive)
#[async_trait]
pub trait AnalysisLens: Send + Sync {
    /// Analyze a set of candidate assertions and return the conflict landscape.
    ///
    /// # Arguments
    /// * `candidates` - All assertions matching the query filters
    ///
    /// # Returns
    /// A `ConflictAnalysis` containing:
    /// - `status`: Unanimous, Agreed, or Contested
    /// - `conflict_score`: 0.0 (unanimous) to 1.0 (chaos)
    /// - `claims`: All distinct claims ranked by support
    async fn analyze(&self, candidates: &[Assertion]) -> ConflictAnalysis;

    /// Human-readable name of this lens for logging/debugging.
    fn name(&self) -> &'static str;
}

#[cfg(test)]
mod tests {
    use super::*;
    use stemedb_core::testing::AssertionBuilder;

    #[test]
    fn test_empty_resolution() {
        let resolution = Resolution::empty();
        assert!(resolution.winner.is_none());
        assert_eq!(resolution.candidates_count, 0);
        assert!((resolution.resolution_confidence - 0.0).abs() < f32::EPSILON);
        assert!((resolution.conflict_score - 0.0).abs() < f32::EPSILON);
    }

    #[test]
    fn test_conflict_score_zero_for_empty() {
        let score = compute_conflict_score(&[]);
        assert!((score - 0.0).abs() < f32::EPSILON);
    }

    #[test]
    fn test_conflict_score_zero_for_single() {
        let assertion = AssertionBuilder::new().confidence(0.9).build();
        let score = compute_conflict_score(&[assertion]);
        assert!((score - 0.0).abs() < f32::EPSILON);
    }

    #[test]
    fn test_conflict_score_zero_for_agreement() {
        // All same confidence = no conflict
        let assertions = vec![
            AssertionBuilder::new().confidence(0.9).build(),
            AssertionBuilder::new().confidence(0.9).build(),
            AssertionBuilder::new().confidence(0.9).build(),
        ];
        let score = compute_conflict_score(&assertions);
        assert!(score < 0.01, "Expected near-zero, got {}", score);
    }

    #[test]
    fn test_conflict_score_high_for_disagreement() {
        // Candidates at 0.1, 0.5, 0.9 = high variance
        let assertions = vec![
            AssertionBuilder::new().confidence(0.1).build(),
            AssertionBuilder::new().confidence(0.5).build(),
            AssertionBuilder::new().confidence(0.9).build(),
        ];
        let score = compute_conflict_score(&assertions);
        assert!(score > 0.3, "Expected high conflict, got {}", score);
    }

    #[test]
    fn test_conflict_score_max_for_extremes() {
        // 0.0 vs 1.0 = maximum disagreement
        let assertions = vec![
            AssertionBuilder::new().confidence(0.0).build(),
            AssertionBuilder::new().confidence(1.0).build(),
        ];
        let score = compute_conflict_score(&assertions);
        assert!((score - 1.0).abs() < 0.01, "Expected ~1.0, got {}", score);
    }

    #[test]
    fn test_conflict_score_handles_nan_defensively() {
        // NaN confidences should result in 0.0 (fail-safe)
        let mut assertions = vec![
            AssertionBuilder::new().confidence(0.5).build(),
            AssertionBuilder::new().confidence(0.5).build(),
        ];
        assertions[0].confidence = f32::NAN;
        assertions[1].confidence = f32::NAN;

        let score = compute_conflict_score(&assertions);
        assert!((score - 0.0).abs() < f32::EPSILON, "Expected 0.0 for NaN, got {}", score);
    }
}