stemedb/crates/stemedb-query/tests/e2e_decay.rs
jordan 137a588ed0 feat: Concept hierarchy (Phase 5D) - ConceptPath, source schemes, AliasStore
Implements hierarchical subject identifiers with scheme-based source tier inference:

- ConceptPath type with parse/wire_format, leaf/parent, prefix matching
- SourceScheme registry mapping schemes to default SourceClass tiers:
  - rfc://, fda://, ietf:// → Regulatory (Tier 0)
  - peer://, pubmed:// → PeerReviewed (Tier 1)
  - code://, wiki:// → Expert (Tier 3)
  - blog://, anon:// → Anecdotal (Tier 5)
- AliasStore for cross-scheme entity resolution (bidirectional indexing)
- API endpoints for concept operations
- Battery tests 8, 9 & 10 for concepts, aliases, and advanced signatures
- Go SDK updates for concept types and signing

Completes Phase 5, advancing to Phase 6 (Distributed Writes).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:44:54 -07:00

196 lines
7.1 KiB
Rust

//! E2E decay integration tests.
//!
//! Tests that verify time-based confidence decay in queries.
//!
//! # Test Coverage
//!
//! | Test | Validates |
//! |------|-----------|
//! | `test_e2e_decay_reduces_old_confidence` | Decay reduces effective confidence of old assertions |
#![allow(clippy::expect_used)] // Test code uses expect() for clear failure messages
use ed25519_dalek::{Signer, SigningKey};
use rand::rngs::OsRng;
use std::sync::Arc;
use stemedb_core::testing::AssertionBuilder;
use stemedb_core::types::{Assertion, LifecycleStage, ObjectValue, SignatureEntry};
use stemedb_ingest::worker::{serialize_assertion, IngestWorker};
use stemedb_query::{Query, QueryEngine};
use stemedb_storage::{key_codec, HybridStore, KVStore};
use stemedb_wal::Journal;
use tempfile::tempdir;
use tokio::sync::Mutex;
// ============================================================================
// TEST HELPERS
// ============================================================================
/// Create a signed assertion with Ed25519 signature.
///
/// The signature signs the message `"{subject}:{predicate}"` which matches
/// IngestWorker's verification logic.
fn create_signed_assertion(
subject: &str,
predicate: &str,
value: f64,
timestamp: u64,
) -> Assertion {
let mut csprng = OsRng;
let signing_key = SigningKey::generate(&mut csprng);
let verifying_key = signing_key.verifying_key();
let message = format!("{}:{}", subject, predicate);
let signature = signing_key.sign(message.as_bytes());
AssertionBuilder::new()
.subject(subject)
.predicate(predicate)
.object_number(value)
.confidence(0.95)
.lifecycle(LifecycleStage::Proposed)
.timestamp(timestamp)
.signatures(vec![SignatureEntry {
version: 1,
agent_id: verifying_key.to_bytes(),
signature: signature.to_bytes(),
timestamp,
}])
.build()
}
// ============================================================================
// DECAY INTEGRATION TESTS
// ============================================================================
/// Test: Decay reduces effective confidence of old assertions.
///
/// Proves that when `decay_halflife` is set, older assertions have their
/// confidence reduced, allowing newer lower-confidence assertions to win.
#[tokio::test]
async fn test_e2e_decay_reduces_old_confidence() {
let dir = tempdir().expect("create temp dir");
let wal_dir = dir.path().join("wal");
let db_dir = dir.path().join("db");
// Constants for decay calculation
let now: u64 = 1_000_000_000;
let one_year_ago = now - (365 * 24 * 60 * 60);
let one_week_ago = now - (7 * 24 * 60 * 60);
let one_year_seconds: u64 = 365 * 24 * 60 * 60;
// Old assertion with HIGH original confidence (0.95)
// But 1 year old with 1-year halflife = ~0.475 effective
let old_assertion = {
let mut a = create_signed_assertion("Semaglutide", "muscle_effect", -5.0, one_year_ago);
a.confidence = 0.95;
a
};
// New assertion with LOWER original confidence (0.6)
// Only 1 week old = ~0.59 effective (minimal decay)
let new_assertion = {
let mut a = create_signed_assertion("Semaglutide", "muscle_effect", -2.0, one_week_ago);
a.confidence = 0.6;
a
};
// Write both to WAL and ingest
let mut journal = Journal::open(&wal_dir).expect("open journal");
journal.append(serialize_assertion(&old_assertion).expect("ser")).expect("append");
journal.append(serialize_assertion(&new_assertion).expect("ser")).expect("append");
let journal = Arc::new(Mutex::new(journal));
let store = Arc::new(HybridStore::open(&db_dir).expect("open store"));
let mut worker = IngestWorker::new(journal.clone(), store.clone()).await.expect("worker");
worker.step().await.expect("step 1");
worker.step().await.expect("step 2");
// Verify both assertions are stored (check via subject-scoped assertion keys)
let old_hash =
*blake3::hash(&stemedb_core::serde::serialize(&old_assertion).expect("ser")).as_bytes();
let new_hash =
*blake3::hash(&stemedb_core::serde::serialize(&new_assertion).expect("ser")).as_bytes();
let old_key = key_codec::assertion_key("Semaglutide", &hex::encode(old_hash));
let new_key = key_codec::assertion_key("Semaglutide", &hex::encode(new_hash));
assert!(
store.get(&old_key).await.expect("get old").is_some(),
"old assertion should be stored"
);
assert!(
store.get(&new_key).await.expect("get new").is_some(),
"new assertion should be stored"
);
// Query WITHOUT decay: old assertion wins (0.95 > 0.6)
let engine = QueryEngine::new(store.clone());
let query_no_decay = Query::builder().subject("Semaglutide").predicate("muscle_effect").build();
let result_no_decay = engine.execute(&query_no_decay).await.expect("query no decay");
assert_eq!(result_no_decay.assertions.len(), 2);
// Find the highest confidence one without decay
let highest_no_decay = result_no_decay
.assertions
.iter()
.max_by(|a, b| a.confidence.partial_cmp(&b.confidence).unwrap_or(std::cmp::Ordering::Equal))
.expect("at least one assertion");
assert_eq!(
highest_no_decay.object,
ObjectValue::Number(-5.0),
"Without decay, old high-confidence assertion has highest confidence"
);
// Query WITH decay: new assertion should have higher effective confidence
// Old: 0.95 * 2^(-1) = 0.475
// New: 0.6 * 2^(-(7/365)) ≈ 0.59
let query_with_decay = Query::builder()
.subject("Semaglutide")
.predicate("muscle_effect")
.decay_halflife(one_year_seconds)
.as_of(now) // Use as_of to control "now" for deterministic test
.build();
let result_with_decay = engine.execute(&query_with_decay).await.expect("query with decay");
assert_eq!(result_with_decay.assertions.len(), 2);
// Find the highest confidence one WITH decay applied
let highest_with_decay = result_with_decay
.assertions
.iter()
.max_by(|a, b| a.confidence.partial_cmp(&b.confidence).unwrap_or(std::cmp::Ordering::Equal))
.expect("at least one assertion");
assert_eq!(
highest_with_decay.object,
ObjectValue::Number(-2.0),
"With decay, newer assertion should have higher effective confidence"
);
// Verify the actual decayed confidence values
let old_decayed = result_with_decay
.assertions
.iter()
.find(|a| a.object == ObjectValue::Number(-5.0))
.expect("find old assertion");
let new_decayed = result_with_decay
.assertions
.iter()
.find(|a| a.object == ObjectValue::Number(-2.0))
.expect("find new assertion");
// Old: 0.95 * 2^(-1) ≈ 0.475
assert!(
(old_decayed.confidence - 0.475).abs() < 0.02,
"Old assertion should decay to ~0.475, got {}",
old_decayed.confidence
);
// New: 0.6 * 2^(-(7/365)) ≈ 0.592
assert!(
(new_decayed.confidence - 0.592).abs() < 0.02,
"New assertion should decay minimally to ~0.592, got {}",
new_decayed.confidence
);
}