stemedb/crates/stemedb-query/src/decay.rs
jordan c59066949a feat: Add quickstart "Beyond Hello World" sections with Skeptic and Layered endpoints
- Add Layered() method to Go SDK for per-source-class consensus queries
- Add LayeredQueryParams, LayeredResult, TierResolution types to Go SDK
- Create conflict example demonstrating Skeptic and Layered endpoints
- Update quickstart.md with sections 6 (conflict detection) and 7 (authority tiers)
- Remove tracked Go binary and add data/ to .gitignore

The new quickstart sections demonstrate Episteme's differentiating features:
- Skeptic endpoint shows "Trust but Verify" conflict analysis
- Layered endpoint shows per-tier resolution (Clinical vs Anecdotal)

Note: Pre-existing large files flagged by pre-commit hook (technical debt from prior sessions)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 21:00:59 -07:00

502 lines
17 KiB
Rust

//! Semantic decay for assertion confidence.
//!
//! This module implements time-based confidence decay for assertions.
//! Older assertions have their effective confidence reduced based on age,
//! allowing recent evidence to outweigh stale claims.
//!
//! # The Problem
//!
//! Medical knowledge decays at different rates. A Reddit post from 2022
//! shouldn't compete equally with a 2024 RCT. Without decay, old assertions
//! with high confidence can dominate over recent, more relevant evidence.
//!
//! # Formula
//!
//! ```text
//! effective_confidence = original_confidence * 2^(-(age / halflife))
//! ```
//!
//! Where:
//! - `age` = now - assertion.timestamp (in seconds)
//! - `halflife` = decay half-life (in seconds)
//!
//! # Example
//!
//! With a 1-year half-life (31,536,000 seconds):
//! - 0 years old: 100% of original confidence
//! - 1 year old: 50% of original confidence
//! - 2 years old: 25% of original confidence
//! - 3 years old: 12.5% of original confidence
use stemedb_core::types::Assertion;
/// Seconds per day (86,400).
const SECONDS_PER_DAY: u64 = 86_400;
/// Apply decay to a set of assertions based on age.
///
/// Returns cloned assertions with their confidence scores reduced based on
/// how old they are relative to the given timestamp.
///
/// # Arguments
///
/// * `assertions` - The assertions to decay
/// * `halflife` - Decay half-life in seconds. After this duration, confidence is halved.
/// * `now` - Reference timestamp (usually current time or `as_of` for time-travel)
///
/// # Returns
///
/// A new vector of assertions with decayed confidence scores.
/// The original assertions are not modified.
///
/// # Formula
///
/// ```text
/// age = now - assertion.timestamp
/// decay_factor = 2^(-(age / halflife))
/// effective_confidence = confidence * decay_factor
/// ```
///
/// # Edge Cases
///
/// - If `halflife` is 0, returns assertions unchanged (no decay)
/// - If assertion timestamp > now, no decay is applied (future assertions)
/// - Confidence is clamped to [0.0, 1.0]
pub fn apply_decay(assertions: &[Assertion], halflife: u64, now: u64) -> Vec<Assertion> {
if halflife == 0 {
return assertions.to_vec();
}
assertions
.iter()
.map(|assertion| {
let decayed_confidence = compute_decayed_confidence(
assertion.confidence,
assertion.timestamp,
halflife,
now,
);
let mut decayed = assertion.clone();
decayed.confidence = decayed_confidence;
decayed
})
.collect()
}
/// Apply source-class-aware decay to assertions.
///
/// Each assertion's decay half-life is determined by its `source_class` tier:
/// - Tier 0 (Regulatory): No decay
/// - Tier 1 (Clinical): 2-year half-life
/// - Tier 2 (Observational): 1-year half-life
/// - Tier 3 (Expert): 6-month half-life
/// - Tier 4 (Community): 3-month half-life
/// - Tier 5 (Anecdotal): 1-month half-life
///
/// # Arguments
///
/// * `assertions` - The assertions to decay
/// * `fallback_halflife` - Half-life in seconds to use when source_class has no default
/// * `now` - Reference timestamp
///
/// # Returns
///
/// A new vector of assertions with tier-appropriate decay applied.
pub fn apply_source_class_decay(
assertions: &[Assertion],
fallback_halflife: u64,
now: u64,
) -> Vec<Assertion> {
assertions
.iter()
.map(|assertion| {
// Get tier-specific half-life from SourceClass, convert days to seconds
// If default_decay_days() returns None (e.g., Regulatory), no decay is applied.
let halflife_opt = assertion
.source_class
.default_decay_days()
.map(|days| u64::from(days) * SECONDS_PER_DAY);
// If source class has no decay (None), return unchanged
// Otherwise use tier-specific halflife (or fallback if zero)
let halflife = match halflife_opt {
None => {
// Source class explicitly has no decay (e.g., Regulatory)
return assertion.clone();
}
Some(0) => fallback_halflife, // Shouldn't happen, but fallback to avoid div-by-zero
Some(h) => h,
};
let decayed_confidence = compute_decayed_confidence(
assertion.confidence,
assertion.timestamp,
halflife,
now,
);
let mut decayed = assertion.clone();
decayed.confidence = decayed_confidence;
decayed
})
.collect()
}
/// Compute the decayed confidence for a single assertion.
///
/// # Formula
///
/// ```text
/// age = now - timestamp
/// decay_factor = 2^(-(age / halflife))
/// decayed_confidence = confidence * decay_factor
/// ```
///
/// # Arguments
///
/// * `confidence` - Original confidence score (0.0 to 1.0)
/// * `timestamp` - When the assertion was created (Unix seconds)
/// * `halflife` - Decay half-life in seconds
/// * `now` - Reference timestamp (Unix seconds)
///
/// # Returns
///
/// The decayed confidence, clamped to [0.0, 1.0].
fn compute_decayed_confidence(confidence: f32, timestamp: u64, halflife: u64, now: u64) -> f32 {
// No decay for future assertions
if timestamp >= now {
return confidence;
}
let age = now - timestamp;
let age_f = age as f32;
let halflife_f = halflife as f32;
// decay_factor = 2^(-(age / halflife))
// This equals e^(-(age / halflife) * ln(2))
let decay_factor = 2_f32.powf(-age_f / halflife_f);
// Clamp to valid confidence range
(confidence * decay_factor).clamp(0.0, 1.0)
}
#[cfg(test)]
mod tests {
use super::*;
use stemedb_core::testing::AssertionBuilder;
use stemedb_core::types::SourceClass;
/// One year in seconds (365 days).
const ONE_YEAR_SECONDS: u64 = 365 * 24 * 60 * 60;
/// One hour in seconds.
const ONE_HOUR_SECONDS: u64 = 60 * 60;
// ========================================================================
// Core Decay Tests
// ========================================================================
#[test]
fn test_decay_reduces_old_assertion_confidence() {
// Assertion is 1 year old with 1-year half-life
// Expected: ~50% of original confidence
let now = 1_000_000_000_u64; // Some reference time
let one_year_ago = now - ONE_YEAR_SECONDS;
let assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.9)
.timestamp(one_year_ago)
.build();
let decayed = apply_decay(&[assertion], ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 1);
let decayed_conf = decayed[0].confidence;
// Should be approximately 0.45 (0.9 * 0.5)
// Allow 1% tolerance for floating point
assert!((decayed_conf - 0.45).abs() < 0.01, "Expected ~0.45, got {}", decayed_conf);
}
#[test]
fn test_decay_preserves_fresh_assertions() {
// Assertion is 1 hour old with 1-year half-life
// Expected: ~100% of original confidence (minimal decay)
let now = 1_000_000_000_u64;
let one_hour_ago = now - ONE_HOUR_SECONDS;
let assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.9)
.timestamp(one_hour_ago)
.build();
let decayed = apply_decay(&[assertion], ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 1);
let decayed_conf = decayed[0].confidence;
// Should be very close to original (99.99%+)
assert!((decayed_conf - 0.9).abs() < 0.001, "Expected ~0.9, got {}", decayed_conf);
}
#[test]
fn test_decay_interacts_with_lens() {
// Two assertions: older has higher base confidence but should lose after decay
let now = 1_000_000_000_u64;
let two_years_ago = now - (2 * ONE_YEAR_SECONDS);
let one_week_ago = now - (7 * 24 * 60 * 60);
// Old assertion: high confidence (0.9), but 2 years old
let old_assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.9)
.timestamp(two_years_ago)
.build();
// New assertion: lower confidence (0.6), but only 1 week old
let new_assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.6)
.timestamp(one_week_ago)
.build();
let decayed = apply_decay(&[old_assertion, new_assertion], ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 2);
// Old assertion: 0.9 * 0.25 (2 half-lives) = ~0.225
let old_decayed = decayed[0].confidence;
assert!(
(old_decayed - 0.225).abs() < 0.02,
"Old assertion expected ~0.225, got {}",
old_decayed
);
// New assertion: 0.6 * ~1.0 = ~0.6 (negligible decay)
// 1 week = 604800 seconds, 1 year = 31536000 seconds
// decay factor = 2^(-(604800/31536000)) = 2^(-0.0192) ≈ 0.9868
// 0.6 * 0.9868 ≈ 0.592
let new_decayed = decayed[1].confidence;
assert!(
(new_decayed - 0.6).abs() < 0.02, // Allow 2% tolerance
"New assertion expected ~0.6, got {}",
new_decayed
);
// The newer assertion should now have higher effective confidence
assert!(
new_decayed > old_decayed,
"Newer assertion ({}) should beat older ({}) after decay",
new_decayed,
old_decayed
);
}
// ========================================================================
// Source-Class-Aware Decay Tests
// ========================================================================
#[test]
fn test_source_aware_decay_tier0_no_decay() {
// Regulatory (Tier 0) sources should never decay
let now = 1_000_000_000_u64;
let five_years_ago = now - (5 * ONE_YEAR_SECONDS);
let assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.95)
.timestamp(five_years_ago)
.source_class(SourceClass::Regulatory)
.build();
let decayed = apply_source_class_decay(&[assertion], ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 1);
// Regulatory sources have no decay (default_decay_days returns None)
// So fallback is used, but since Regulatory returns None, we should handle this
// Actually, looking at the code, None means no decay
assert_eq!(decayed[0].confidence, 0.95, "Regulatory sources should not decay");
}
#[test]
fn test_source_aware_decay_tier5_rapid_decay() {
// Anecdotal (Tier 5) sources decay rapidly (30-day half-life)
let now = 1_000_000_000_u64;
let sixty_days_ago = now - (60 * SECONDS_PER_DAY);
let assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.8)
.timestamp(sixty_days_ago)
.source_class(SourceClass::Anecdotal)
.build();
let decayed = apply_source_class_decay(&[assertion], ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 1);
// 60 days = 2 half-lives for Anecdotal (30-day half-life)
// Expected: 0.8 * 0.25 = 0.2
let decayed_conf = decayed[0].confidence;
assert!(
(decayed_conf - 0.2).abs() < 0.02,
"Anecdotal (60 days, 30-day halflife) expected ~0.2, got {}",
decayed_conf
);
}
#[test]
fn test_source_aware_decay_mixed_tiers() {
// Compare Clinical (2yr halflife) vs Anecdotal (30-day halflife)
let now = 1_000_000_000_u64;
let one_year_ago = now - ONE_YEAR_SECONDS;
// Clinical: 1 year = 0.5 half-lives → ~70% decay factor
let clinical = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.8)
.timestamp(one_year_ago)
.source_class(SourceClass::Clinical)
.build();
// Anecdotal: 1 year = 12+ half-lives → ~0% decay factor
let anecdotal = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.8)
.timestamp(one_year_ago)
.source_class(SourceClass::Anecdotal)
.build();
let decayed = apply_source_class_decay(&[clinical, anecdotal], ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 2);
// Clinical (2yr halflife): 1yr = 0.5 halflife → 2^(-0.5) ≈ 0.707
// 0.8 * 0.707 ≈ 0.566
let clinical_decayed = decayed[0].confidence;
assert!(
(clinical_decayed - 0.566).abs() < 0.02,
"Clinical expected ~0.566, got {}",
clinical_decayed
);
// Anecdotal (30-day halflife): 365 days = ~12.2 half-lives → 2^(-12.2) ≈ 0.0002
// Should be near zero
let anecdotal_decayed = decayed[1].confidence;
assert!(
anecdotal_decayed < 0.01,
"Anecdotal expected near zero, got {}",
anecdotal_decayed
);
// Clinical should be much higher than Anecdotal after tier-aware decay
assert!(
clinical_decayed > anecdotal_decayed * 10.0,
"Clinical ({}) should be much higher than Anecdotal ({})",
clinical_decayed,
anecdotal_decayed
);
}
// ========================================================================
// Edge Case Tests
// ========================================================================
#[test]
fn test_decay_zero_halflife_no_change() {
let now = 1_000_000_000_u64;
let one_year_ago = now - ONE_YEAR_SECONDS;
let assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.9)
.timestamp(one_year_ago)
.build();
// Zero half-life means no decay
let decayed = apply_decay(&[assertion], 0, now);
assert_eq!(decayed.len(), 1);
assert_eq!(decayed[0].confidence, 0.9, "Zero halflife should skip decay");
}
#[test]
fn test_decay_future_assertion_no_change() {
let now = 1_000_000_000_u64;
let future = now + ONE_YEAR_SECONDS;
let assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.9)
.timestamp(future)
.build();
let decayed = apply_decay(&[assertion], ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 1);
assert_eq!(decayed[0].confidence, 0.9, "Future assertions should not decay");
}
#[test]
fn test_decay_empty_assertions() {
let decayed = apply_decay(&[], ONE_YEAR_SECONDS, 1_000_000_000);
assert!(decayed.is_empty());
}
#[test]
fn test_decay_confidence_clamps_to_valid_range() {
// Very old assertion should decay to near-zero but never negative
// Use a large enough `now` to avoid overflow with 100-year old assertion
let now = 5_000_000_000_u64; // ~2128 in Unix time
let ancient = now - (100 * ONE_YEAR_SECONDS); // 100 years ago
let assertion = AssertionBuilder::new()
.subject("test")
.predicate("value")
.confidence(0.9)
.timestamp(ancient)
.build();
let decayed = apply_decay(&[assertion], ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 1);
assert!(decayed[0].confidence >= 0.0, "Confidence should not be negative");
assert!(decayed[0].confidence <= 1.0, "Confidence should not exceed 1.0");
}
#[test]
fn test_decay_preserves_other_fields() {
let now = 1_000_000_000_u64;
let one_year_ago = now - ONE_YEAR_SECONDS;
let assertion = AssertionBuilder::new()
.subject("Tesla")
.predicate("revenue")
.object_number(96.7)
.confidence(0.9)
.timestamp(one_year_ago)
.build();
let decayed = apply_decay(std::slice::from_ref(&assertion), ONE_YEAR_SECONDS, now);
assert_eq!(decayed.len(), 1);
assert_eq!(decayed[0].subject, assertion.subject);
assert_eq!(decayed[0].predicate, assertion.predicate);
assert_eq!(decayed[0].object, assertion.object);
assert_eq!(decayed[0].timestamp, assertion.timestamp);
// Only confidence should change
assert_ne!(decayed[0].confidence, assertion.confidence);
}
}