stemedb/crates/stemedb-lens/src/traits.rs
jordan c59066949a feat: Add quickstart "Beyond Hello World" sections with Skeptic and Layered endpoints
- Add Layered() method to Go SDK for per-source-class consensus queries
- Add LayeredQueryParams, LayeredResult, TierResolution types to Go SDK
- Create conflict example demonstrating Skeptic and Layered endpoints
- Update quickstart.md with sections 6 (conflict detection) and 7 (authority tiers)
- Remove tracked Go binary and add data/ to .gitignore

The new quickstart sections demonstrate Episteme's differentiating features:
- Skeptic endpoint shows "Trust but Verify" conflict analysis
- Layered endpoint shows per-tier resolution (Clinical vs Anecdotal)

Note: Pre-existing large files flagged by pre-commit hook (technical debt from prior sessions)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 21:00:59 -07:00

378 lines
13 KiB
Rust

//! Core Lens trait and types.
//!
//! The Lens trait is the fundamental abstraction for resolving conflicting
//! assertions into a deterministic answer.
//!
//! # Two Kinds of Lenses
//!
//! | Trait | Purpose | Returns |
//! |-------|---------|---------|
//! | `Lens` / `AsyncLens` | Pick a winner | `Resolution` (single assertion) |
//! | `AnalysisLens` | Map conflict landscape | `ConflictAnalysis` (all claims) |
//!
//! Use `Lens` when you need "the answer". Use `AnalysisLens` when you need
//! "Trust but Verify" - showing users what's contested.
use async_trait::async_trait;
use stemedb_core::types::{Assertion, ConflictAnalysis, SourceClass};
/// The result of a Lens resolution.
#[derive(Debug, Clone)]
pub struct Resolution {
/// The winning assertion, if any candidates were provided.
pub winner: Option<Assertion>,
/// All candidates that were considered.
pub candidates_count: usize,
/// Confidence in the resolution (0.0 to 1.0).
/// Higher values indicate stronger consensus or more decisive selection.
pub resolution_confidence: f32,
/// Degree of disagreement among candidates (0.0 = full agreement, 1.0 = max conflict).
/// Computed as normalized variance of candidate confidence values.
/// This is the numeric basis for the "disagreement is the information" thesis.
pub conflict_score: f32,
}
impl Resolution {
/// Create an empty resolution (no candidates).
pub fn empty() -> Self {
Self { winner: None, candidates_count: 0, resolution_confidence: 0.0, conflict_score: 0.0 }
}
/// Create a resolution with a single winner.
pub fn with_winner(
winner: Assertion,
candidates_count: usize,
confidence: f32,
conflict_score: f32,
) -> Self {
Self {
winner: Some(winner),
candidates_count,
resolution_confidence: confidence,
conflict_score,
}
}
}
/// Compute conflict score from candidate assertion confidences.
///
/// This is the **canonical definition** of conflict score in Episteme.
/// Other modules should reference this documentation.
///
/// # Algorithm
///
/// Uses normalized variance of confidence values:
/// - 0 or 1 candidates: 0.0 (no conflict possible)
/// - All same confidence: 0.0 (unanimous agreement)
/// - Max variance (e.g., 0.0 vs 1.0): 1.0 (maximum disagreement)
///
/// # Normalization
///
/// The formula normalizes variance to [0.0, 1.0] range:
/// variance of [0,1] values has max 0.25 (when values are 0 and 1),
/// so we multiply by 4 to normalize.
///
/// # Edge Cases
///
/// - NaN confidences: Treated defensively as 0.0 conflict (fail-safe)
/// - Empty candidates: Returns 0.0
/// - Single candidate: Returns 0.0 (no disagreement possible)
///
/// # Vision Alignment
///
/// This score enables the "disagreement is the information" thesis:
/// high conflict scores surface uncertainty for user review rather than
/// hiding it behind a confident-looking single answer.
pub fn compute_conflict_score(candidates: &[Assertion]) -> f32 {
if candidates.len() <= 1 {
return 0.0;
}
let n = candidates.len() as f32;
let sum: f32 = candidates.iter().map(|a| a.confidence).sum();
let mean = sum / n;
let variance: f32 = candidates.iter().map(|a| (a.confidence - mean).powi(2)).sum::<f32>() / n;
// Normalize: max variance of [0,1] values is 0.25, so 4x normalizes to [0,1]
let score = 4.0 * variance;
// Defensive: NaN from malformed confidences treated as no meaningful conflict
if score.is_nan() {
return 0.0;
}
score.min(1.0)
}
// ============================================================================
// Layered Resolution Types
// ============================================================================
/// Per-tier resolution result from `LayeredLens`.
///
/// Represents the consensus within a single source class tier.
/// Tiers range from 0 (Regulatory, highest authority) to 5 (Anecdotal, lowest).
#[derive(Debug, Clone)]
pub struct TierResolution {
/// The tier number (0-5). Lower = higher authority.
pub tier: u8,
/// The source class for this tier.
pub source_class: SourceClass,
/// The winning assertion from within-tier consensus, if any candidates.
pub winner: Option<Assertion>,
/// Number of candidates in this tier.
pub candidates_count: usize,
/// Within-tier conflict score (0.0 = unanimous, 1.0 = max conflict).
pub conflict_score: f32,
/// Within-tier resolution confidence (0.0 to 1.0).
pub resolution_confidence: f32,
}
impl TierResolution {
/// Create a tier resolution with no candidates.
pub fn empty(tier: u8, source_class: SourceClass) -> Self {
Self {
tier,
source_class,
winner: None,
candidates_count: 0,
conflict_score: 0.0,
resolution_confidence: 0.0,
}
}
}
/// Multi-tier resolution result from `LayeredLens`.
///
/// Contains per-tier consensus results plus an overall winner.
/// Enables "What does Tier 0 say? What does Tier 5 say?" queries.
///
/// # Cross-Tier Conflict
///
/// The `overall_conflict_score` measures disagreement between tiers:
/// - 0.0: All tiers with winners agree on the same object value
/// - 1.0: Tiers disagree on what the answer should be
///
/// This is different from within-tier conflict (measured in each `TierResolution`).
#[derive(Debug, Clone)]
pub struct LayeredResolution {
/// Per-tier consensus results, ordered by tier (0 = highest authority first).
/// Only tiers with at least one candidate are included.
pub tiers: Vec<TierResolution>,
/// Overall winner: winner from the highest-authority tier that has candidates.
/// This is the answer from the most authoritative source class present.
pub overall_winner: Option<Assertion>,
/// Cross-tier disagreement score (0.0 = tiers agree, 1.0 = tiers disagree).
/// Measures whether tier winners agree on the same object value.
pub overall_conflict_score: f32,
/// Total candidates considered across all tiers.
pub total_candidates: usize,
}
impl LayeredResolution {
/// Create an empty layered resolution (no candidates in any tier).
pub fn empty() -> Self {
Self {
tiers: Vec::new(),
overall_winner: None,
overall_conflict_score: 0.0,
total_candidates: 0,
}
}
}
/// A LayeredLens resolves conflicts with per-tier consensus.
///
/// Unlike a standard `Lens` which returns a single winner, a `LayeredLens`
/// provides visibility into what each source class tier says.
///
/// # Use Case: Consumer Health
///
/// Query "semaglutide muscle_loss" and see:
/// - Tier 0 (FDA): [no data]
/// - Tier 1 (Clinical): "Significant loss" (12 sources, 0.85 confidence)
/// - Tier 5 (Anecdotal): "Minimal loss" (200 sources, 0.45 confidence)
/// - Overall winner: "Significant loss" (from Tier 1)
/// - Cross-tier conflict: 0.8 (clinical and anecdotal disagree)
///
/// # Contract
///
/// - **Stateless:** LayeredLenses must not maintain internal state.
/// - **Deterministic:** Same input must produce same output.
/// - **Tier-Ordered:** Results are always ordered by tier (0 first).
pub trait LayeredLens: Send + Sync {
/// Resolve candidates with per-tier consensus.
///
/// # Arguments
/// * `candidates` - All assertions matching the query filters
///
/// # Returns
/// A `LayeredResolution` with per-tier results and overall winner.
fn resolve_layered(&self, candidates: &[Assertion]) -> LayeredResolution;
/// Human-readable name of this lens for logging/debugging.
fn name(&self) -> &'static str;
}
// ============================================================================
// Standard Resolution Types
// ============================================================================
/// A Lens resolves conflicting assertions into a deterministic answer.
///
/// # Contract
///
/// - **Stateless:** Lenses must not maintain internal state.
/// - **Deterministic:** Same input must produce same output.
/// - **Fast:** Runs on every read, avoid allocations where possible.
///
/// # Implementation Notes
///
/// Lenses should handle edge cases gracefully:
/// - Empty input: Return `Resolution::empty()`
/// - Single candidate: Return that candidate (trivial resolution)
/// - Ties: Define a consistent tiebreaker (e.g., lowest hash)
pub trait Lens: Send + Sync {
/// Resolve a set of candidate assertions into a single answer.
///
/// # Arguments
/// * `candidates` - All assertions matching the query filters
///
/// # Returns
/// A resolution containing the winning assertion (if any) and metadata.
fn resolve(&self, candidates: &[Assertion]) -> Resolution;
/// Human-readable name of this lens for logging/debugging.
fn name(&self) -> &'static str;
}
/// An AnalysisLens maps the conflict landscape instead of picking a winner.
///
/// Unlike `Lens` which collapses uncertainty into a single answer, `AnalysisLens`
/// surfaces all competing claims with their relative support. This enables
/// "Trust but Verify" UX where users see disagreement explicitly.
///
/// # Contract
///
/// - **Stateless:** AnalysisLenses must not maintain internal state.
/// - **Deterministic:** Same input and storage state produces same output.
/// - **Complete:** Returns ALL distinct claims, not just the top one.
///
/// # When to Use
///
/// Use `AnalysisLens` when:
/// - User needs to see "who disagrees and why"
/// - Fact is contested and hiding conflict would be misleading
/// - Building a "Living Review" dashboard for research
///
/// Use regular `Lens` when:
/// - You need a definitive answer for action
/// - Conflict is resolved elsewhere
/// - Performance is critical (Analysis is more expensive)
#[async_trait]
pub trait AnalysisLens: Send + Sync {
/// Analyze a set of candidate assertions and return the conflict landscape.
///
/// # Arguments
/// * `candidates` - All assertions matching the query filters
///
/// # Returns
/// A `ConflictAnalysis` containing:
/// - `status`: Unanimous, Agreed, or Contested
/// - `conflict_score`: 0.0 (unanimous) to 1.0 (chaos)
/// - `claims`: All distinct claims ranked by support
async fn analyze(&self, candidates: &[Assertion]) -> ConflictAnalysis;
/// Human-readable name of this lens for logging/debugging.
fn name(&self) -> &'static str;
}
#[cfg(test)]
mod tests {
use super::*;
use stemedb_core::testing::AssertionBuilder;
#[test]
fn test_empty_resolution() {
let resolution = Resolution::empty();
assert!(resolution.winner.is_none());
assert_eq!(resolution.candidates_count, 0);
assert!((resolution.resolution_confidence - 0.0).abs() < f32::EPSILON);
assert!((resolution.conflict_score - 0.0).abs() < f32::EPSILON);
}
#[test]
fn test_conflict_score_zero_for_empty() {
let score = compute_conflict_score(&[]);
assert!((score - 0.0).abs() < f32::EPSILON);
}
#[test]
fn test_conflict_score_zero_for_single() {
let assertion = AssertionBuilder::new().confidence(0.9).build();
let score = compute_conflict_score(&[assertion]);
assert!((score - 0.0).abs() < f32::EPSILON);
}
#[test]
fn test_conflict_score_zero_for_agreement() {
// All same confidence = no conflict
let assertions = vec![
AssertionBuilder::new().confidence(0.9).build(),
AssertionBuilder::new().confidence(0.9).build(),
AssertionBuilder::new().confidence(0.9).build(),
];
let score = compute_conflict_score(&assertions);
assert!(score < 0.01, "Expected near-zero, got {}", score);
}
#[test]
fn test_conflict_score_high_for_disagreement() {
// Candidates at 0.1, 0.5, 0.9 = high variance
let assertions = vec![
AssertionBuilder::new().confidence(0.1).build(),
AssertionBuilder::new().confidence(0.5).build(),
AssertionBuilder::new().confidence(0.9).build(),
];
let score = compute_conflict_score(&assertions);
assert!(score > 0.3, "Expected high conflict, got {}", score);
}
#[test]
fn test_conflict_score_max_for_extremes() {
// 0.0 vs 1.0 = maximum disagreement
let assertions = vec![
AssertionBuilder::new().confidence(0.0).build(),
AssertionBuilder::new().confidence(1.0).build(),
];
let score = compute_conflict_score(&assertions);
assert!((score - 1.0).abs() < 0.01, "Expected ~1.0, got {}", score);
}
#[test]
fn test_conflict_score_handles_nan_defensively() {
// NaN confidences should result in 0.0 (fail-safe)
let mut assertions = vec![
AssertionBuilder::new().confidence(0.5).build(),
AssertionBuilder::new().confidence(0.5).build(),
];
assertions[0].confidence = f32::NAN;
assertions[1].confidence = f32::NAN;
let score = compute_conflict_score(&assertions);
assert!((score - 0.0).abs() < f32::EPSILON, "Expected 0.0 for NaN, got {}", score);
}
}