stemedb/crates/stemedb-lens/src/hlc_recency.rs
jordan d3a88585fe feat: Phase 6 UAT - Admission control, HLC recency, cluster coordination
This commit includes comprehensive work on Phase 6 features:

## Admission Control (Phase 6 admission middleware)
- AdmissionStore implementation backed by TrustRankStore
- PoW verification with tier-based difficulty computation
- Trust tier progression (Newcomer → Established → Trusted → Authority)
- API integration with admission status endpoints

## HLC Recency Lens (Phase 6C)
- HlcRecencyLens for distributed system ordering
- Hybrid logical clock integration with causality preservation

## Cluster Coordination (Phase 6C)
- Multi-node cluster tests (availability, partition tolerance)
- CRDT convergence tests for anti-entropy sync
- Gateway handler improvements

## Aphoria Code Linter (Phase 2A)
- RFC/OWASP corpus builders with network fetching and caching
- Concept hierarchy with auto-alias creation on conflict detection
- Multiple security extractors (TLS, JWT, CORS, secrets, rate limiting)

## Code Organization
- Split large files into modules to comply with 500-line limit
- Improved test organization with separate test modules
- Fixed rkyv serialization for EigenTrustState (AgentScore struct)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 00:43:37 -07:00

287 lines
10 KiB
Rust

//! HLC-based Recency Lens: Hybrid Logical Clock timestamp wins.
//!
//! This lens provides distributed-consistent recency ordering using HLC timestamps,
//! which handle clock skew between nodes better than Unix timestamps alone.
//!
//! # Why HLC over Unix timestamp?
//!
//! - **Clock skew tolerance**: Two nodes with drifted clocks will still produce
//! consistent ordering because HLC combines physical time with logical counters.
//! - **Total ordering**: HLC + node_id provides deterministic ordering even for
//! concurrent events on different nodes.
//! - **Causal consistency**: HLC preserves happens-before relationships across
//! distributed nodes.
//!
//! # Resolution Strategy
//!
//! 1. Compare by `hlc_timestamp` (includes NTP64 time + logical counter)
//! 2. If HLC times are equal (concurrent events), compare by `node_id`
//! 3. Final tiebreaker: `source_hash` for determinism
use crate::traits::{compute_conflict_score, Lens, Resolution};
use stemedb_core::types::Assertion;
use tracing::instrument;
/// HLC-based Recency Lens: Returns the assertion with the highest HLC timestamp.
///
/// # Resolution Strategy
///
/// 1. Find assertion with maximum `hlc_timestamp`
/// 2. If HLC tie: HLC's `node_id` provides tiebreaker
/// 3. Final tiebreaker: `source_hash` for determinism across identical HLCs
///
/// # Confidence Calculation
///
/// - Single candidate: 1.0 (trivial resolution)
/// - Multiple candidates: Based on HLC timestamp gap (in milliseconds) to next candidate
/// - > 1 day gap: 0.95
/// - > 1 hour gap: 0.8
/// - > 1 minute gap: 0.6
/// - Otherwise: 0.5
#[derive(Debug, Clone, Copy, Default)]
pub struct HlcRecencyLens;
impl Lens for HlcRecencyLens {
#[instrument(skip(self, candidates), fields(candidates_count = candidates.len(), lens = "HlcRecency"))]
fn resolve(&self, candidates: &[Assertion]) -> Resolution {
if candidates.is_empty() {
return Resolution::empty();
}
if candidates.len() == 1 {
return Resolution::with_winner(candidates[0].clone(), 1, 1.0, 0.0);
}
// Find the assertion with the highest HLC timestamp
// HLC's Ord implementation compares time_ntp64 first, then node_id
let winner = candidates
.iter()
.max_by(|a, b| {
// Primary: highest HLC timestamp (includes NTP64 time + node_id tiebreaker)
// Final tiebreaker: source_hash for determinism
a.hlc_timestamp
.cmp(&b.hlc_timestamp)
.then_with(|| a.source_hash.cmp(&b.source_hash))
})
.cloned();
match winner {
Some(w) => {
// Calculate confidence based on how much newer the winner is
let max_hlc = &w.hlc_timestamp;
let max_ms = max_hlc.millis();
// Find the second-highest HLC timestamp
let second_max_ms = candidates
.iter()
.filter(|a| a.hlc_timestamp < *max_hlc)
.map(|a| a.hlc_timestamp.millis())
.max()
.unwrap_or(0);
// Confidence is higher when the gap is larger
let gap_ms = max_ms.saturating_sub(second_max_ms);
let confidence = if gap_ms > 86_400_000 {
// More than a day: high confidence
0.95
} else if gap_ms > 3_600_000 {
// More than an hour: good confidence
0.8
} else if gap_ms > 60_000 {
// More than a minute: moderate confidence
0.6
} else {
// Very close: low confidence
0.5
};
let conflict = compute_conflict_score(candidates);
Resolution::with_winner(w, candidates.len(), confidence, conflict)
}
None => Resolution::empty(),
}
}
fn name(&self) -> &'static str {
"HlcRecency"
}
}
#[cfg(test)]
mod tests {
use super::*;
use stemedb_core::testing::AssertionBuilder;
use stemedb_core::types::HlcTimestamp;
fn create_assertion_with_hlc(subject: &str, time_ntp64: u64, node_id: [u8; 16]) -> Assertion {
AssertionBuilder::new()
.subject(subject)
.hlc_timestamp(HlcTimestamp::new(time_ntp64, node_id))
.build()
}
#[test]
fn test_empty_candidates() {
let lens = HlcRecencyLens;
let resolution = lens.resolve(&[]);
assert!(resolution.winner.is_none());
assert_eq!(resolution.candidates_count, 0);
}
#[test]
fn test_single_candidate() {
let lens = HlcRecencyLens;
let assertion = create_assertion_with_hlc("Tesla", 1000, [1u8; 16]);
let resolution = lens.resolve(std::slice::from_ref(&assertion));
assert!(resolution.winner.is_some());
assert_eq!(resolution.winner.as_ref().map(|a| &a.subject), Some(&"Tesla".to_string()));
assert_eq!(resolution.candidates_count, 1);
assert!((resolution.resolution_confidence - 1.0).abs() < f32::EPSILON);
}
#[test]
fn test_hlc_ordering_beats_unix_timestamp() {
// Test that HLC ordering is used, not Unix timestamp
let lens = HlcRecencyLens;
// Create two assertions with same Unix timestamp but different HLC
let mut older = AssertionBuilder::new()
.subject("Older")
.timestamp(1000) // Same Unix timestamp
.hlc_timestamp(HlcTimestamp::new(1000, [1u8; 16]))
.build();
older.source_hash = [1u8; 32];
let mut newer = AssertionBuilder::new()
.subject("Newer")
.timestamp(1000) // Same Unix timestamp
.hlc_timestamp(HlcTimestamp::new(2000, [1u8; 16])) // Higher HLC
.build();
newer.source_hash = [2u8; 32];
let resolution = lens.resolve(&[older, newer.clone()]);
assert!(resolution.winner.is_some());
assert_eq!(resolution.winner.as_ref().map(|a| &a.subject), Some(&"Newer".to_string()));
}
#[test]
fn test_deterministic_tiebreaker_same_hlc_time() {
// When HLC time is equal, node_id should break the tie
let lens = HlcRecencyLens;
let mut a1 = create_assertion_with_hlc("A", 1000, [1u8; 16]);
a1.source_hash = [1u8; 32];
let mut a2 = create_assertion_with_hlc("B", 1000, [2u8; 16]); // Higher node_id
a2.source_hash = [2u8; 32];
// Same HLC time, should use node_id as tiebreaker
let resolution1 = lens.resolve(&[a1.clone(), a2.clone()]);
let resolution2 = lens.resolve(&[a2.clone(), a1.clone()]);
// Should be deterministic regardless of input order
// Higher node_id wins
assert_eq!(
resolution1.winner.as_ref().map(|a| &a.subject),
resolution2.winner.as_ref().map(|a| &a.subject)
);
// Node B has higher node_id [2u8; 16] > [1u8; 16]
assert_eq!(resolution1.winner.as_ref().map(|a| &a.subject), Some(&"B".to_string()));
}
#[test]
fn test_clock_skew_scenario() {
// Scenario: Node A's wall clock is ahead, but Node B's assertion is causally later
// In HLC, the causally later assertion should have a higher HLC timestamp
let lens = HlcRecencyLens;
// Node A: wall clock ahead (higher NTP64 base), but logically older event
let node_a_ahead = create_assertion_with_hlc("NodeA_Ahead", 5000, [1u8; 16]);
// Node B: wall clock behind, but received Node A's timestamp and incremented
// In real HLC, this would be: max(local_time, received_time) + 1
let node_b_later = create_assertion_with_hlc("NodeB_CausallyLater", 5001, [2u8; 16]);
let resolution = lens.resolve(&[node_a_ahead, node_b_later.clone()]);
// Node B's assertion should win because it's causally later (higher HLC)
assert_eq!(
resolution.winner.as_ref().map(|a| &a.subject),
Some(&"NodeB_CausallyLater".to_string())
);
}
#[test]
fn test_source_hash_final_tiebreaker() {
// When HLC timestamps are completely identical, source_hash is final tiebreaker
let lens = HlcRecencyLens;
let mut a1 = AssertionBuilder::new()
.subject("A")
.hlc_timestamp(HlcTimestamp::new(1000, [1u8; 16]))
.build();
a1.source_hash = [1u8; 32];
let mut a2 = AssertionBuilder::new()
.subject("B")
.hlc_timestamp(HlcTimestamp::new(1000, [1u8; 16])) // Identical HLC!
.build();
a2.source_hash = [2u8; 32]; // Higher source_hash
let resolution = lens.resolve(&[a1.clone(), a2.clone()]);
// Higher source_hash should win
assert_eq!(resolution.winner.as_ref().map(|a| &a.subject), Some(&"B".to_string()));
}
#[test]
fn test_confidence_calculation() {
let lens = HlcRecencyLens;
// Create assertions with large time gap (> 1 day in milliseconds)
// NTP64 seconds are in upper 32 bits, so 1 second = 1 << 32
// For a 2-day gap: 2 * 86400 seconds = 172800 seconds
const NTP_UNIX_OFFSET: u64 = 2_208_988_800;
let base_seconds = NTP_UNIX_OFFSET + 1000;
let ntp64_base = base_seconds << 32;
let ntp64_later = (base_seconds + 172800) << 32; // 2 days later
let old = create_assertion_with_hlc("Old", ntp64_base, [1u8; 16]);
let new = create_assertion_with_hlc("New", ntp64_later, [1u8; 16]);
let resolution = lens.resolve(&[old, new]);
assert!(resolution.winner.is_some());
// With > 1 day gap, confidence should be 0.95
assert!(
resolution.resolution_confidence > 0.9,
"Expected high confidence for large gap, got {}",
resolution.resolution_confidence
);
}
#[test]
fn test_multiple_candidates_selects_newest() {
let lens = HlcRecencyLens;
let old = create_assertion_with_hlc("Old", 1000, [1u8; 16]);
let newer = create_assertion_with_hlc("Newer", 2000, [1u8; 16]);
let newest = create_assertion_with_hlc("Newest", 3000, [1u8; 16]);
let resolution = lens.resolve(&[old, newer, newest.clone()]);
assert!(resolution.winner.is_some());
assert_eq!(resolution.winner.as_ref().map(|a| &a.subject), Some(&"Newest".to_string()));
assert_eq!(resolution.candidates_count, 3);
}
#[test]
fn test_lens_name() {
let lens = HlcRecencyLens;
assert_eq!(lens.name(), "HlcRecency");
}
}