Phase 5C (Index Persistence) implementation: - PersistentVectorIndex with hot/cold architecture - Hot: in-memory HNSW for recent vectors - Cold: memory-mapped HNSW loaded from disk - Background builder for WAL replay and atomic swap - BLAKE3 integrity verification - PersistentVisualIndex with checkpoint persistence - BkTreeSnapshot with rkyv serialization - CRC32C corruption detection - Atomic write pattern (temp → fsync → rename) - Key codec additions for vector index metadata - Split large files into modules (<500 lines each) - battery_pre_sentinel.rs → battery/ directory - visual_index.rs → visual_index/ directory - persistent.rs → persistent/ directory - Refactored ingest worker tests for clarity - Updated roadmap to mark Phase 5 complete Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
496 lines
19 KiB
Rust
496 lines
19 KiB
Rust
//! Battery 1: The Semaglutide Scenario.
|
|
//!
|
|
//! Validates the exact scenario from `what-is-episteme.md`:
|
|
//! four sources, four tiers, one subject, conflicting claims.
|
|
//!
|
|
//! # Test Coverage
|
|
//!
|
|
//! | Test | Pipeline Stage | Validates |
|
|
//! |------|---------------|-----------|
|
|
//! | `test_semaglutide_four_sources_ingest_and_query` | Full pipeline | Multi-lens resolution |
|
|
//! | `test_semaglutide_skeptic_analysis` | Skeptic | Conflict landscape grouping |
|
|
//! | `test_semaglutide_source_class_decay` | Decay | Tier-specific confidence decay |
|
|
//! | `test_semaglutide_time_travel` | Query | as_of temporal filtering |
|
|
|
|
#![allow(clippy::expect_used)] // Test code uses expect() for clear failure messages
|
|
|
|
use super::helpers::*;
|
|
|
|
/// Test 1.1: Full pipeline with 4 sources, verified through multiple lenses.
|
|
///
|
|
/// Setup:
|
|
/// - Agent A: FDA regulatory warning (Tier 0, confidence 1.0)
|
|
/// - Agent B: Clinical trial no-signal (Tier 1, confidence 0.9)
|
|
/// - Agent C: Patient report gastroparesis (Tier 5, confidence 0.2)
|
|
/// - Agent D: Another clinical no-signal (Tier 1, confidence 0.9)
|
|
///
|
|
/// Proves:
|
|
/// 1. Raw query returns all 4 assertions
|
|
/// 2. TrustAwareAuthority picks Regulatory (highest confidence * default trust)
|
|
/// 3. RecencyLens picks Agent D (most recent timestamp)
|
|
/// 4. All 4 assertions persist in store
|
|
#[tokio::test]
|
|
async fn test_semaglutide_four_sources_ingest_and_query() {
|
|
let dir = tempdir().expect("create temp dir");
|
|
let wal_dir = dir.path().join("wal");
|
|
let db_dir = dir.path().join("db");
|
|
|
|
let base_ts: u64 = 1_000_000;
|
|
|
|
// === Setup: Create 4 conflicting assertions ===
|
|
|
|
// Agent A: FDA regulatory warning (Tier 0, confidence 1.0)
|
|
let agent_a = create_signed_assertion_with_source(
|
|
"Semaglutide",
|
|
"has_side_effect",
|
|
ObjectValue::Text("gastroparesis_warning".to_string()),
|
|
SourceClass::Regulatory,
|
|
1.0,
|
|
base_ts,
|
|
);
|
|
|
|
// Agent B: Clinical trial - no signal (Tier 1, confidence 0.9)
|
|
let agent_b = create_signed_assertion_with_source(
|
|
"Semaglutide",
|
|
"has_side_effect",
|
|
ObjectValue::Text("no_gastroparesis_signal".to_string()),
|
|
SourceClass::Clinical,
|
|
0.9,
|
|
base_ts + 1,
|
|
);
|
|
|
|
// Agent C: Patient report - gastroparesis (Tier 5, confidence 0.2)
|
|
let agent_c = create_signed_assertion_with_source(
|
|
"Semaglutide",
|
|
"has_side_effect",
|
|
ObjectValue::Text("gastroparesis".to_string()),
|
|
SourceClass::Anecdotal,
|
|
0.2,
|
|
base_ts + 2,
|
|
);
|
|
|
|
// Agent D: Another clinical trial - no signal (Tier 1, confidence 0.9)
|
|
let agent_d = create_signed_assertion_with_source(
|
|
"Semaglutide",
|
|
"has_side_effect",
|
|
ObjectValue::Text("no_gastroparesis_signal".to_string()),
|
|
SourceClass::Clinical,
|
|
0.9,
|
|
base_ts + 3,
|
|
);
|
|
|
|
// === Step 1: Write all 4 to WAL ===
|
|
let mut journal = Journal::open(&wal_dir).expect("open journal");
|
|
journal.append(serialize_assertion(&agent_a).expect("ser")).expect("append a");
|
|
journal.append(serialize_assertion(&agent_b).expect("ser")).expect("append b");
|
|
journal.append(serialize_assertion(&agent_c).expect("ser")).expect("append c");
|
|
journal.append(serialize_assertion(&agent_d).expect("ser")).expect("append d");
|
|
|
|
// === Step 2: Ingest all 4 via IngestWorker ===
|
|
let journal = Arc::new(Mutex::new(journal));
|
|
let store = Arc::new(HybridStore::open(&db_dir).expect("open store"));
|
|
|
|
let mut worker =
|
|
IngestWorker::new(journal.clone(), store.clone()).await.expect("create worker");
|
|
|
|
for _ in 0..4 {
|
|
let bytes = worker.step().await.expect("ingest step");
|
|
assert!(bytes > 0, "should process data from WAL");
|
|
}
|
|
|
|
// Verify H: keys exist (subject-prefixed: Semaglutide\x00H:{hash})
|
|
let h_prefix = key_codec::assertion_key("Semaglutide", "");
|
|
let h_entries = store.scan_prefix(&h_prefix).await.expect("scan H:");
|
|
assert_eq!(h_entries.len(), 4, "should have 4 assertions stored");
|
|
|
|
// Verify SP: index created (subject-prefixed: Semaglutide\x00SP:{predicate})
|
|
let sp_prefix = key_codec::subject_predicate_scan_prefix("Semaglutide");
|
|
let sp_entries = store.scan_prefix(&sp_prefix).await.expect("scan SP:");
|
|
assert_eq!(sp_entries.len(), 1, "should have one SP: index entry");
|
|
|
|
// === Assert 1: Raw query (no materialization) returns all 4 ===
|
|
let engine = QueryEngine::new(store.clone());
|
|
let query = Query::builder().subject("Semaglutide").predicate("has_side_effect").build();
|
|
|
|
let result = engine.execute(&query).await.expect("raw query");
|
|
assert_eq!(result.assertions.len(), 4, "raw query should return all 4 assertions");
|
|
|
|
// === Assert 2: TrustAwareAuthority picks Regulatory (Agent A) ===
|
|
// With default trust (0.5 for all agents):
|
|
// Agent A: 1.0 * 0.5 = 0.50 (winner)
|
|
// Agent B: 0.9 * 0.5 = 0.45
|
|
// Agent C: 0.2 * 0.5 = 0.10
|
|
// Agent D: 0.9 * 0.5 = 0.45
|
|
let trust_store = Arc::new(GenericTrustRankStore::new(store.clone()));
|
|
let authority_lens = TrustAwareAuthorityLens::new(trust_store);
|
|
let materializer = Materializer::new(store.clone(), Box::new(authority_lens));
|
|
|
|
let report = materializer.step().await.expect("materialize authority");
|
|
assert_eq!(report.views_updated, 1, "should update one view");
|
|
|
|
let authority_result = engine.execute(&query).await.expect("authority query");
|
|
assert_eq!(authority_result.assertions.len(), 1, "materialized query returns winner");
|
|
assert_eq!(
|
|
authority_result.assertions[0].object,
|
|
ObjectValue::Text("gastroparesis_warning".to_string()),
|
|
"Authority lens should pick Regulatory assertion (highest confidence * default trust)"
|
|
);
|
|
|
|
// === Assert 3: RecencyLens picks Agent D (most recent timestamp) ===
|
|
let recency_lens = SyncLensWrapper(RecencyLens);
|
|
let materializer2 = Materializer::new(store.clone(), Box::new(recency_lens));
|
|
|
|
let report2 = materializer2.step().await.expect("materialize recency");
|
|
assert_eq!(report2.views_updated, 1, "should update view with recency winner");
|
|
|
|
let recency_result = engine.execute(&query).await.expect("recency query");
|
|
assert_eq!(recency_result.assertions.len(), 1, "materialized query returns winner");
|
|
assert_eq!(
|
|
recency_result.assertions[0].object,
|
|
ObjectValue::Text("no_gastroparesis_signal".to_string()),
|
|
"Recency lens should pick Agent D (most recent timestamp)"
|
|
);
|
|
assert_eq!(
|
|
recency_result.assertions[0].timestamp,
|
|
base_ts + 3,
|
|
"Winner should have the latest timestamp"
|
|
);
|
|
|
|
// === Assert 4: All 4 assertions still persist in store ===
|
|
let all_h = store.scan_prefix(&h_prefix).await.expect("final scan H:");
|
|
assert_eq!(all_h.len(), 4, "all 4 assertions should persist in store");
|
|
}
|
|
|
|
/// Test 1.2: Skeptic analysis surfaces the conflict landscape.
|
|
///
|
|
/// With 4 assertions across 3 distinct object values:
|
|
/// - "gastroparesis_warning" (1 assertion, Regulatory)
|
|
/// - "no_gastroparesis_signal" (2 assertions, Clinical)
|
|
/// - "gastroparesis" (1 assertion, Anecdotal)
|
|
///
|
|
/// Proves the Skeptic lens correctly groups claims, counts assertions,
|
|
/// and identifies the conflict as Contested.
|
|
#[tokio::test]
|
|
async fn test_semaglutide_skeptic_analysis() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("store"));
|
|
let index_store = GenericIndexStore::new(store.clone());
|
|
|
|
let base_ts: u64 = 1_000_000;
|
|
|
|
// === Setup: Store 4 assertions directly ===
|
|
|
|
let agent_a = AssertionBuilder::new()
|
|
.subject("Semaglutide")
|
|
.predicate("has_side_effect")
|
|
.object_text("gastroparesis_warning")
|
|
.source_class(SourceClass::Regulatory)
|
|
.confidence(1.0)
|
|
.agent_id([1u8; 32])
|
|
.timestamp(base_ts)
|
|
.build();
|
|
|
|
let agent_b = AssertionBuilder::new()
|
|
.subject("Semaglutide")
|
|
.predicate("has_side_effect")
|
|
.object_text("no_gastroparesis_signal")
|
|
.source_class(SourceClass::Clinical)
|
|
.confidence(0.9)
|
|
.agent_id([2u8; 32])
|
|
.timestamp(base_ts + 1)
|
|
.build();
|
|
|
|
let agent_c = AssertionBuilder::new()
|
|
.subject("Semaglutide")
|
|
.predicate("has_side_effect")
|
|
.object_text("gastroparesis")
|
|
.source_class(SourceClass::Anecdotal)
|
|
.confidence(0.2)
|
|
.agent_id([3u8; 32])
|
|
.timestamp(base_ts + 2)
|
|
.build();
|
|
|
|
let agent_d = AssertionBuilder::new()
|
|
.subject("Semaglutide")
|
|
.predicate("has_side_effect")
|
|
.object_text("no_gastroparesis_signal")
|
|
.source_class(SourceClass::Clinical)
|
|
.confidence(0.9)
|
|
.agent_id([4u8; 32])
|
|
.timestamp(base_ts + 3)
|
|
.build();
|
|
|
|
store_assertion_direct(&store, &index_store, &agent_a).await;
|
|
store_assertion_direct(&store, &index_store, &agent_b).await;
|
|
store_assertion_direct(&store, &index_store, &agent_c).await;
|
|
store_assertion_direct(&store, &index_store, &agent_d).await;
|
|
|
|
// === Run SkepticResolver ===
|
|
let vote_store = Arc::new(GenericVoteStore::new(store.clone()));
|
|
let trust_store = Arc::new(GenericTrustRankStore::new(store.clone()));
|
|
let resolver = SkepticResolver::new(store.clone(), vote_store, trust_store);
|
|
|
|
let result = resolver.resolve("Semaglutide", "has_side_effect").await.expect("resolve");
|
|
let view = result.expect("should have a SkepticView");
|
|
let analysis = &view.analysis;
|
|
|
|
// === Asserts ===
|
|
|
|
// Total candidates
|
|
assert_eq!(analysis.candidates_count, 4, "should consider all 4 assertions");
|
|
|
|
// 3 distinct groups: "gastroparesis_warning" (1), "no_gastroparesis_signal" (2), "gastroparesis" (1)
|
|
assert_eq!(analysis.claims.len(), 3, "should have 3 distinct claim groups");
|
|
|
|
// Status should be Contested (significant disagreement across 3 groups)
|
|
assert_eq!(
|
|
analysis.status,
|
|
ResolutionStatus::Contested,
|
|
"4 assertions across 3 groups should be contested"
|
|
);
|
|
|
|
// Conflict score should be meaningful (Shannon entropy of 3-way split)
|
|
assert!(
|
|
analysis.conflict_score > 0.3,
|
|
"conflict score {} should be > 0.3 for 3-way split",
|
|
analysis.conflict_score
|
|
);
|
|
|
|
// Find the "no_gastroparesis_signal" group - should have 2 assertions
|
|
let no_signal_claim = analysis
|
|
.claims
|
|
.iter()
|
|
.find(|c| matches!(&c.value, ObjectValue::Text(t) if t == "no_gastroparesis_signal"))
|
|
.expect("should have no_gastroparesis_signal claim");
|
|
assert_eq!(
|
|
no_signal_claim.assertion_count, 2,
|
|
"no_gastroparesis_signal should have 2 supporting assertions"
|
|
);
|
|
|
|
// Claims should be sorted descending by weight_share
|
|
for window in analysis.claims.windows(2) {
|
|
assert!(
|
|
window[0].weight_share >= window[1].weight_share,
|
|
"claims should be sorted descending by weight_share: {} >= {}",
|
|
window[0].weight_share,
|
|
window[1].weight_share
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Test 1.3: Source-class-aware decay at 180 days.
|
|
///
|
|
/// With all 4 assertions timestamped 180 days ago:
|
|
/// - Regulatory (Tier 0): No decay, confidence stays 1.0
|
|
/// - Clinical (Tier 1, 730-day half-life): 0.9 * 2^(-180/730) ~ 0.759
|
|
/// - Anecdotal (Tier 5, 30-day half-life): 0.2 * 2^(-6) ~ 0.003
|
|
///
|
|
/// After decay, Authority lens with default trust still picks Regulatory.
|
|
#[tokio::test]
|
|
async fn test_semaglutide_source_class_decay() {
|
|
let now: u64 = 1_000_000_000;
|
|
let days_180: u64 = 180 * 86_400;
|
|
let past = now - days_180;
|
|
|
|
// All assertions at 180 days ago
|
|
let regulatory = AssertionBuilder::new()
|
|
.subject("Semaglutide")
|
|
.predicate("has_side_effect")
|
|
.object_text("gastroparesis_warning")
|
|
.source_class(SourceClass::Regulatory)
|
|
.confidence(1.0)
|
|
.agent_id([1u8; 32])
|
|
.timestamp(past)
|
|
.build();
|
|
|
|
let clinical_b = AssertionBuilder::new()
|
|
.subject("Semaglutide")
|
|
.predicate("has_side_effect")
|
|
.object_text("no_gastroparesis_signal")
|
|
.source_class(SourceClass::Clinical)
|
|
.confidence(0.9)
|
|
.agent_id([2u8; 32])
|
|
.timestamp(past)
|
|
.build();
|
|
|
|
let anecdotal = AssertionBuilder::new()
|
|
.subject("Semaglutide")
|
|
.predicate("has_side_effect")
|
|
.object_text("gastroparesis")
|
|
.source_class(SourceClass::Anecdotal)
|
|
.confidence(0.2)
|
|
.agent_id([3u8; 32])
|
|
.timestamp(past)
|
|
.build();
|
|
|
|
let clinical_d = AssertionBuilder::new()
|
|
.subject("Semaglutide")
|
|
.predicate("has_side_effect")
|
|
.object_text("no_gastroparesis_signal")
|
|
.source_class(SourceClass::Clinical)
|
|
.confidence(0.9)
|
|
.agent_id([4u8; 32])
|
|
.timestamp(past)
|
|
.build();
|
|
|
|
let assertions = vec![regulatory, clinical_b, anecdotal, clinical_d];
|
|
|
|
// Apply tier-specific decay
|
|
let fallback_halflife: u64 = 365 * 86_400; // 1 year fallback
|
|
let decayed = apply_source_class_decay(&assertions, fallback_halflife, now);
|
|
|
|
assert_eq!(decayed.len(), 4);
|
|
|
|
// Regulatory (Tier 0): No decay, stays at 1.0
|
|
assert_eq!(decayed[0].confidence, 1.0, "Regulatory should not decay");
|
|
|
|
// Clinical (Tier 1, 730-day half-life): 0.9 * 2^(-180/730) ~ 0.759
|
|
let clinical_conf = decayed[1].confidence;
|
|
assert!(
|
|
clinical_conf > 0.7 && clinical_conf < 0.85,
|
|
"Clinical should decay to ~0.759, got {}",
|
|
clinical_conf
|
|
);
|
|
|
|
// Anecdotal (Tier 5, 30-day half-life): 0.2 * 2^(-180/30) = 0.2 * 2^(-6) ~ 0.003
|
|
let anecdotal_conf = decayed[2].confidence;
|
|
assert!(anecdotal_conf < 0.01, "Anecdotal should decay to near zero, got {}", anecdotal_conf);
|
|
|
|
// Second clinical should match first clinical's decay
|
|
let clinical2_conf = decayed[3].confidence;
|
|
assert!(
|
|
(clinical2_conf - clinical_conf).abs() < 0.001,
|
|
"Both clinical assertions should decay identically: {} vs {}",
|
|
clinical_conf,
|
|
clinical2_conf
|
|
);
|
|
|
|
// After decay, Authority lens with default trust still picks Regulatory
|
|
// Weighted scores (default trust 0.5):
|
|
// Regulatory: 1.0 * 0.5 = 0.50
|
|
// Clinical: ~0.759 * 0.5 = ~0.38
|
|
// Anecdotal: ~0.003 * 0.5 = ~0.001
|
|
let store = HybridStore::open_temp().expect("store");
|
|
let trust_store = Arc::new(GenericTrustRankStore::new(store));
|
|
let lens = TrustAwareAuthorityLens::new(trust_store);
|
|
|
|
let resolution = lens.resolve_async(&decayed).await;
|
|
|
|
assert!(resolution.winner.is_some(), "should have a winner");
|
|
assert_eq!(
|
|
resolution.winner.as_ref().expect("winner").object,
|
|
ObjectValue::Text("gastroparesis_warning".to_string()),
|
|
"After decay, Regulatory assertion should still win with Authority lens"
|
|
);
|
|
}
|
|
|
|
/// Test 1.4: Time-travel query filters by timestamp.
|
|
///
|
|
/// With 4 assertions at timestamps 1000, 1100, 1200, 1300:
|
|
/// - Query with `as_of: 1150` returns only assertions at T=1000 and T=1100
|
|
/// - Assertions at T=1200 and T=1300 are excluded
|
|
/// - The conflict landscape is reduced to a 2-way split
|
|
#[tokio::test]
|
|
async fn test_semaglutide_time_travel() {
|
|
let dir = tempdir().expect("create temp dir");
|
|
let wal_dir = dir.path().join("wal");
|
|
let db_dir = dir.path().join("db");
|
|
|
|
// Agent A: T=1000 (Regulatory)
|
|
let agent_a = create_signed_assertion_with_source(
|
|
"Semaglutide",
|
|
"has_side_effect",
|
|
ObjectValue::Text("gastroparesis_warning".to_string()),
|
|
SourceClass::Regulatory,
|
|
1.0,
|
|
1000,
|
|
);
|
|
|
|
// Agent B: T=1100 (Clinical)
|
|
let agent_b = create_signed_assertion_with_source(
|
|
"Semaglutide",
|
|
"has_side_effect",
|
|
ObjectValue::Text("no_gastroparesis_signal".to_string()),
|
|
SourceClass::Clinical,
|
|
0.9,
|
|
1100,
|
|
);
|
|
|
|
// Agent C: T=1200 (Anecdotal)
|
|
let agent_c = create_signed_assertion_with_source(
|
|
"Semaglutide",
|
|
"has_side_effect",
|
|
ObjectValue::Text("gastroparesis".to_string()),
|
|
SourceClass::Anecdotal,
|
|
0.2,
|
|
1200,
|
|
);
|
|
|
|
// Agent D: T=1300 (Clinical)
|
|
let agent_d = create_signed_assertion_with_source(
|
|
"Semaglutide",
|
|
"has_side_effect",
|
|
ObjectValue::Text("no_gastroparesis_signal".to_string()),
|
|
SourceClass::Clinical,
|
|
0.9,
|
|
1300,
|
|
);
|
|
|
|
// === Write to WAL and ingest ===
|
|
let mut journal = Journal::open(&wal_dir).expect("open journal");
|
|
journal.append(serialize_assertion(&agent_a).expect("ser")).expect("append a");
|
|
journal.append(serialize_assertion(&agent_b).expect("ser")).expect("append b");
|
|
journal.append(serialize_assertion(&agent_c).expect("ser")).expect("append c");
|
|
journal.append(serialize_assertion(&agent_d).expect("ser")).expect("append d");
|
|
|
|
let journal = Arc::new(Mutex::new(journal));
|
|
let store = Arc::new(HybridStore::open(&db_dir).expect("open store"));
|
|
|
|
let mut worker =
|
|
IngestWorker::new(journal.clone(), store.clone()).await.expect("create worker");
|
|
|
|
for _ in 0..4 {
|
|
let bytes = worker.step().await.expect("ingest step");
|
|
assert!(bytes > 0, "should process data from WAL");
|
|
}
|
|
|
|
// Verify all 4 ingested (subject-prefixed: Semaglutide\x00H:{hash})
|
|
let h_prefix = key_codec::assertion_key("Semaglutide", "");
|
|
let h_entries = store.scan_prefix(&h_prefix).await.expect("scan H:");
|
|
assert_eq!(h_entries.len(), 4, "should have 4 assertions stored");
|
|
|
|
// === Query with as_of: 1150 (between B at 1100 and C at 1200) ===
|
|
let engine = QueryEngine::new(store.clone());
|
|
let query =
|
|
Query::builder().subject("Semaglutide").predicate("has_side_effect").as_of(1150).build();
|
|
|
|
let result = engine.execute(&query).await.expect("time-travel query");
|
|
|
|
// Only 2 assertions should pass the timestamp filter
|
|
assert_eq!(
|
|
result.assertions.len(),
|
|
2,
|
|
"as_of=1150 should return only A (T=1000) and B (T=1100), got {}",
|
|
result.assertions.len()
|
|
);
|
|
|
|
// Verify the correct assertions are returned
|
|
let timestamps: Vec<u64> = result.assertions.iter().map(|a| a.timestamp).collect();
|
|
assert!(timestamps.contains(&1000), "should include Agent A (T=1000)");
|
|
assert!(timestamps.contains(&1100), "should include Agent B (T=1100)");
|
|
|
|
// Verify the excluded assertions are NOT returned
|
|
assert!(!timestamps.contains(&1200), "should exclude Agent C (T=1200)");
|
|
assert!(!timestamps.contains(&1300), "should exclude Agent D (T=1300)");
|
|
|
|
// The conflict landscape is now a 2-way split (regulatory vs clinical)
|
|
let objects: Vec<&ObjectValue> = result.assertions.iter().map(|a| &a.object).collect();
|
|
assert!(
|
|
objects.contains(&&ObjectValue::Text("gastroparesis_warning".to_string())),
|
|
"should include regulatory warning"
|
|
);
|
|
assert!(
|
|
objects.contains(&&ObjectValue::Text("no_gastroparesis_signal".to_string())),
|
|
"should include clinical no-signal"
|
|
);
|
|
}
|