tidaldb/tidal/tests/m7_uat.rs
2026-02-23 22:41:16 -07:00

792 lines
25 KiB
Rust

//! Milestone 7 Phase 5: End-to-end UAT integration test suite.
//!
//! Validates the complete M7 feature set through public API only:
//! - Crash recovery (state survives restart)
//! - Hard negatives survive restart
//! - Session TTL auto-cleanup
//! - Graceful degradation under load
//! - Per-agent rate limiting isolation
//! - `QueryStats` populated in results
//! - RLHF signal export
//! - User session aggregation
//! - `MetricsState` fields available
//! - tidalctl diagnostics structural check
#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::thread;
use std::time::Duration;
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
use tidaldb::{AgentPolicy, DegradationLevel, ExportRequest, TidalDb, TidalError};
// ── Shared schema builder ───────────────────────────────────────────────────
fn uat_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
let _ = builder
.signal(
"view",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(7 * 24 * 3600),
},
)
.windows(&[Window::OneHour, Window::TwentyFourHours, Window::AllTime])
.velocity(true)
.add();
let _ = builder
.signal(
"like",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(14 * 24 * 3600),
},
)
.windows(&[Window::TwentyFourHours, Window::AllTime])
.velocity(false)
.add();
let _ = builder
.signal(
"hide",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(24 * 3600),
},
)
.windows(&[Window::AllTime])
.velocity(false)
.add();
let _ = builder
.signal(
"block",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(24 * 3600),
},
)
.windows(&[Window::AllTime])
.velocity(false)
.add();
// Short-TTL policy for session TTL tests.
builder.session_policy(
"short_ttl",
AgentPolicy {
max_session_duration: Duration::from_millis(500),
max_signals_per_session: 1000,
allowed_signals: vec![],
denied_signals: vec![],
},
);
// Normal policy for general session tests.
builder.session_policy(
"default",
AgentPolicy {
max_session_duration: Duration::from_secs(3600),
max_signals_per_session: 10_000,
allowed_signals: vec![],
denied_signals: vec![],
},
);
builder.build().unwrap()
}
/// Helper: build item metadata with a `creator_id` assignment.
fn item_meta(id: u64) -> HashMap<String, String> {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Item {id}"));
meta.insert("category".to_string(), "test".to_string());
meta.insert("format".to_string(), "video".to_string());
meta.insert("creator_id".to_string(), format!("{}", (id % 5) + 1));
meta
}
// ── UAT-01: Crash Recovery -- State Survives Restart ────────────────────────
/// Write 200 items with metadata and signals. Close and reopen (simulating
/// crash/restart). Verify RETRIEVE returns items (state persisted).
#[test]
fn uat_01_crash_recovery_state_survives_restart() {
let dir = tempfile::tempdir().unwrap();
let schema = uat_schema();
// Phase 1: populate and close.
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema.clone())
.open()
.unwrap();
let now = Timestamp::now();
for i in 1..=200u64 {
let meta = item_meta(i);
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
db.signal("view", EntityId::new(i), 1.0, now).unwrap();
}
db.close().unwrap();
}
// Phase 2: reopen, repopulate in-memory bitmap indexes, query.
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema)
.open()
.unwrap();
// Repopulate in-memory indexes (bitmap/universe) since they are not persisted.
for i in 1..=200u64 {
let meta = item_meta(i);
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
}
let query = tidaldb::query::retrieve::Retrieve::builder()
.profile("new")
.limit(50)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(
!results.is_empty(),
"RETRIEVE after restart must return items; got 0"
);
db.close().unwrap();
}
}
// ── UAT-02: Hard Negatives Don't Leak After Restart ─────────────────────────
/// Write 10 items from 2 creators; hide item 1 from user 1; block creator 2
/// for user 1. Close and reopen. Verify hidden/blocked items absent.
#[test]
fn uat_02_hard_negatives_survive_restart() {
use tidaldb::entities::RelationshipType;
let dir = tempfile::tempdir().unwrap();
let schema = uat_schema();
let user_id = 1001u64;
let creator_fn = |id: u64| (id % 2) + 1; // creators 1 and 2
// Phase 1: populate, hide, block, close.
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema.clone())
.open()
.unwrap();
let now = Timestamp::now();
for i in 1..=10u64 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Item {i}"));
meta.insert("category".to_string(), "test".to_string());
meta.insert("format".to_string(), "video".to_string());
meta.insert("creator_id".to_string(), creator_fn(i).to_string());
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
db.signal("view", EntityId::new(i), 1.0, now).unwrap();
}
// Write user so user-context filtering is active.
db.write_user(EntityId::new(user_id), &HashMap::new())
.unwrap();
// Hide item 1.
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Hide,
EntityId::new(1),
1.0,
now,
)
.unwrap();
// Block creator 2 (owns odd items: 1, 3, 5, 7, 9).
db.write_relationship(
EntityId::new(user_id),
RelationshipType::Blocks,
EntityId::new(2),
1.0,
now,
)
.unwrap();
db.close().unwrap();
}
// Phase 2: reopen, repopulate, verify.
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema)
.open()
.unwrap();
for i in 1..=10u64 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Item {i}"));
meta.insert("category".to_string(), "test".to_string());
meta.insert("format".to_string(), "video".to_string());
meta.insert("creator_id".to_string(), creator_fn(i).to_string());
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
}
let query = tidaldb::query::retrieve::Retrieve::builder()
.profile("new")
.for_user(user_id)
.limit(50)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let returned_ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
// Item 1 was hidden -- must not appear.
assert!(
!returned_ids.contains(&1),
"hidden item 1 must not appear after restart; got: {returned_ids:?}"
);
// Creator 2's items: (id % 2) + 1 == 2 means id is odd: 1, 3, 5, 7, 9.
// Item 1 is also hidden, but 3, 5, 7, 9 should be blocked.
for blocked_item in [3u64, 5, 7, 9] {
assert!(
!returned_ids.contains(&blocked_item),
"blocked creator 2's item {blocked_item} must not appear; got: {returned_ids:?}"
);
}
// Should still have some results (creator 1's non-hidden items).
assert!(
!results.is_empty(),
"should have non-blocked, non-hidden items"
);
db.close().unwrap();
}
}
// ── UAT-03: Session TTL Auto-Cleanup ────────────────────────────────────────
/// Create a session with short TTL (500ms). Wait past TTL. Force sweep.
/// Verify session is auto-closed.
#[test]
fn uat_03_session_ttl_auto_cleanup() {
let schema = uat_schema();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
// Start a session with the short_ttl policy (500ms max duration).
let handle = db
.start_session(1, "agent-ttl", "short_ttl", HashMap::new())
.unwrap();
let session_id = handle.id;
// Write a signal while session is active.
db.session_signal(
&handle,
"view",
EntityId::new(1),
1.0,
Timestamp::now(),
None,
)
.unwrap();
assert_eq!(db.active_sessions().len(), 1);
// Wait past the TTL.
thread::sleep(Duration::from_millis(600));
// Manually trigger a sweep (ephemeral mode: sweeper thread not auto-spawned).
db.force_sweep();
// Session should be removed from active sessions.
assert_eq!(
db.active_sessions().len(),
0,
"expired session must be swept"
);
// The session snapshot must be archived.
let snapshot = db.session_snapshot(session_id).unwrap();
assert_eq!(snapshot.id, session_id);
// The handle's closed flag must be set.
assert!(
handle.closed.load(Ordering::Acquire),
"handle.closed must be true after auto-close"
);
}
// ── UAT-04: Graceful Degradation -- Queries Return Results Under Load ───────
/// Spawn 50 concurrent threads each doing 5 RETRIEVE queries.
/// All must return Ok (no non-backpressure errors).
/// Verify `degradation_level` field exists and is a `DegradationLevel`.
#[test]
fn uat_04_graceful_degradation_queries_ok_under_load() {
let schema = uat_schema();
let db = Arc::new(
TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap(),
);
// Seed items.
for i in 1..=100u64 {
let meta = item_meta(i);
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
db.signal("view", EntityId::new(i), 1.0, Timestamp::now())
.unwrap();
}
let error_count = Arc::new(AtomicUsize::new(0));
let query_count = Arc::new(AtomicUsize::new(0));
let stop = Arc::new(AtomicBool::new(false));
let mut handles = Vec::new();
for _ in 0..50 {
let db = Arc::clone(&db);
let errors = Arc::clone(&error_count);
let queries = Arc::clone(&query_count);
let stop = Arc::clone(&stop);
handles.push(thread::spawn(move || {
for _ in 0..5 {
if stop.load(Ordering::Relaxed) {
break;
}
let query = tidaldb::query::retrieve::Retrieve::builder()
.profile("hot")
.limit(10)
.build()
.unwrap();
match db.retrieve(&query) {
Ok(results) => {
queries.fetch_add(1, Ordering::Relaxed);
// Verify degradation_level field is accessible and is a valid variant.
assert!(matches!(
results.degradation_level,
DegradationLevel::Full
| DegradationLevel::ReducedCandidates
| DegradationLevel::CoarseAggregates
| DegradationLevel::NoDiversity
));
}
Err(TidalError::Backpressure { .. }) => {
// Backpressure is acceptable under overload.
}
Err(_) => {
errors.fetch_add(1, Ordering::Relaxed);
}
}
}
}));
}
for h in handles {
h.join().unwrap();
}
let total_queries = query_count.load(Ordering::Relaxed);
let total_errors = error_count.load(Ordering::Relaxed);
assert!(
total_queries > 0,
"expected some queries to complete; got 0"
);
assert_eq!(
total_errors, 0,
"expected zero non-backpressure errors, got {total_errors}"
);
}
// ── UAT-05: Per-Agent Rate Limiting Isolation ───────────────────────────────
/// Configure rate-limited sessions. Flood agent A past burst capacity.
/// Verify agent B's bucket is unaffected.
#[test]
fn uat_05_per_agent_rate_limiting_isolation() {
let schema = uat_schema();
let rl_config = tidaldb::load::RateLimiterConfig::limited(100.0, 200.0);
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.with_rate_limiter_config(rl_config)
.open()
.unwrap();
// Seed at least one item for signals.
db.write_item_with_metadata(EntityId::new(1), &item_meta(1))
.unwrap();
db.write_item_with_metadata(EntityId::new(2), &item_meta(2))
.unwrap();
let handle_a = db
.start_session(1, "agent-a", "default", HashMap::new())
.unwrap();
let handle_b = db
.start_session(2, "agent-b", "default", HashMap::new())
.unwrap();
// Flood agent A with 300 signals (burst cap = 200).
let mut a_accepted = 0u32;
let mut a_rejected = 0u32;
for i in 0..300u64 {
let ts = Timestamp::from_nanos(Timestamp::now().as_nanos().saturating_add(i));
match db.session_signal(&handle_a, "view", EntityId::new(1), 1.0, ts, None) {
Ok(()) => a_accepted += 1,
Err(TidalError::RateLimited { .. }) => a_rejected += 1,
Err(e) => panic!("unexpected error for agent-a: {e}"),
}
}
// Agent B should still be able to write (separate bucket).
for i in 0..10u64 {
let ts = Timestamp::from_nanos(Timestamp::now().as_nanos().saturating_add(i));
let result = db.session_signal(&handle_b, "view", EntityId::new(2), 1.0, ts, None);
assert!(
result.is_ok(),
"agent-b signal {i} must not be rate-limited by agent-a: {result:?}"
);
}
// Agent A should have hit the limit.
assert!(
a_rejected > 0,
"expected agent-a to be rate-limited; accepted={a_accepted}, rejected={a_rejected}"
);
assert!(
a_accepted > 0,
"expected some agent-a signals to succeed; accepted={a_accepted}"
);
db.close_session(handle_a).unwrap();
db.close_session(handle_b).unwrap();
}
// ── UAT-06: QueryStats Populated in Results ─────────────────────────────────
/// Write 20 items, execute RETRIEVE and SEARCH. Verify `QueryStats` fields.
#[test]
fn uat_06_query_stats_populated_in_results() {
let schema = uat_schema();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
for i in 1..=20u64 {
let meta = item_meta(i);
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
db.signal("view", EntityId::new(i), 1.0, Timestamp::now())
.unwrap();
}
// RETRIEVE: verify stats populated.
let query = tidaldb::query::retrieve::Retrieve::builder()
.profile("hot")
.limit(10)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
assert!(
results.stats.total_time_us > 0,
"RETRIEVE total_time_us must be > 0; got {}",
results.stats.total_time_us
);
assert!(
!results.stats.profile_name.is_empty(),
"RETRIEVE profile_name must be non-empty"
);
assert_eq!(
results.stats.profile_name, "hot",
"RETRIEVE profile_name must match the requested profile"
);
// SEARCH: verify stats populated.
let search = tidaldb::query::search::Search::builder()
.query("Item")
.limit(10)
.build()
.unwrap();
let search_results = db.search(&search).unwrap();
assert!(
search_results.stats.total_time_us > 0,
"SEARCH total_time_us must be > 0; got {}",
search_results.stats.total_time_us
);
assert_eq!(
search_results.stats.profile_name, "search",
"SEARCH profile_name must be 'search'"
);
}
// ── UAT-07: RLHF Signal Export ──────────────────────────────────────────────
/// Open a persistent DB, write signals, and export them before close.
/// Verify the export contains the written signals.
#[cfg(feature = "test-utils")]
#[test]
fn uat_07_rlhf_signal_export() {
use tidaldb::TempTidalHome;
let home = TempTidalHome::new().unwrap();
let schema = uat_schema();
let db = TidalDb::builder()
.with_data_dir(home.path())
.with_schema(schema)
.open()
.unwrap();
let since = Timestamp::now().as_nanos();
// Write 5 "view" signals to items 1-5.
for i in 1..=5u64 {
db.signal("view", EntityId::new(i), 1.0, Timestamp::now())
.unwrap();
}
// Export BEFORE close -- WAL segments are live on disk.
let req = ExportRequest::time_range(since, u64::MAX);
let signals = db.export_signals(&req).unwrap();
assert!(
signals.len() >= 5,
"expected at least 5 exported signals; got {}",
signals.len()
);
// Verify all are "view" signals.
let views: Vec<_> = signals.iter().filter(|s| s.signal_type == "view").collect();
assert!(
views.len() >= 5,
"expected at least 5 view signals; got {}",
views.len()
);
// Verify entity IDs 1-5 are present.
for eid in 1..=5u64 {
assert!(
signals.iter().any(|s| s.entity_id == eid),
"entity {eid} must be present in exported signals"
);
}
db.close().unwrap();
}
// ── UAT-08: User Session Aggregation ────────────────────────────────────────
/// Open 2 sessions for user 1. Write 5 signals in each. Close both.
/// Verify `user_session_summary` aggregates correctly.
#[test]
fn uat_08_user_session_aggregation() {
let schema = uat_schema();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
let user_id = 42u64;
// Session 1: 5 view signals.
let handle1 = db
.start_session(user_id, "agent-1", "default", HashMap::new())
.unwrap();
for i in 1..=5u64 {
db.session_signal(
&handle1,
"view",
EntityId::new(i),
1.0,
Timestamp::now(),
None,
)
.unwrap();
}
db.close_session(handle1).unwrap();
// Session 2: 3 view + 2 like signals.
let handle2 = db
.start_session(user_id, "agent-2", "default", HashMap::new())
.unwrap();
for i in 6..=8u64 {
db.session_signal(
&handle2,
"view",
EntityId::new(i),
1.0,
Timestamp::now(),
None,
)
.unwrap();
}
for i in 9..=10u64 {
db.session_signal(
&handle2,
"like",
EntityId::new(i),
1.0,
Timestamp::now(),
None,
)
.unwrap();
}
db.close_session(handle2).unwrap();
// Aggregate.
let summary = db.user_session_summary(user_id, 0).unwrap();
assert!(
summary.sessions_count >= 2,
"expected at least 2 sessions; got {}",
summary.sessions_count
);
assert!(
summary.total_signals >= 10,
"expected at least 10 total signals; got {}",
summary.total_signals
);
assert!(
!summary.top_signal_types.is_empty(),
"top_signal_types must be non-empty"
);
// Verify "view" is the top signal type (8 views vs 2 likes).
assert_eq!(
summary.top_signal_types[0].0, "view",
"top signal type should be 'view'"
);
}
// ── UAT-09: MetricsState Fields Available ───────────────────────────────────
/// Verify `MetricsState` struct has expected fields and methods.
/// This is a compilation + field access test.
#[test]
fn uat_09_metrics_state_fields_available() {
let schema = uat_schema();
let db = TidalDb::builder()
.ephemeral()
.with_schema(schema)
.open()
.unwrap();
// Write some signals to populate metrics.
for i in 1..=5u64 {
db.write_item_with_metadata(EntityId::new(i), &item_meta(i))
.unwrap();
db.signal("view", EntityId::new(i), 1.0, Timestamp::now())
.unwrap();
}
let metrics = db.metrics();
// Base fields available unconditionally.
let uptime = metrics.uptime_seconds();
assert!(uptime >= 0.0, "uptime must be non-negative; got {uptime}");
let health = metrics.health_ok_value();
assert!(
(health - 1.0).abs() < f64::EPSILON,
"health must be 1.0 for a healthy db; got {health}"
);
// Prometheus rendering works.
let prom = metrics.render_prometheus();
assert!(
prom.contains("tidaldb_uptime_seconds"),
"Prometheus output must contain uptime metric"
);
assert!(
prom.contains("tidaldb_health_ok"),
"Prometheus output must contain health_ok metric"
);
// Healthz rendering works.
let healthz = metrics.render_healthz();
assert!(
healthz.contains("\"status\":"),
"healthz must contain status"
);
assert!(
healthz.contains("\"uptime_seconds\":"),
"healthz must contain uptime_seconds"
);
assert!(
healthz.contains("\"version\":"),
"healthz must contain version"
);
}
// ── UAT-10: tidalctl Diagnostics (Structural Test) ──────────────────────────
/// Structural test for the diagnostics infrastructure.
///
/// The tidalctl binary is tested in its own crate (`tidalctl/tests/cli.rs`).
/// This test verifies the underlying Paths, WAL diagnostics, and text index
/// stats APIs that tidalctl depends on -- proving the diagnostics data path
/// compiles and produces sensible values from the tidal library.
#[cfg(feature = "test-utils")]
#[test]
fn uat_10_tidalctl_diagnostics_structural() {
use tidaldb::TempTidalHome;
let home = TempTidalHome::new().unwrap();
let paths = tidaldb::Paths::new(home.path());
paths.ensure_all().unwrap();
// Verify Paths resolves expected subdirectories.
assert!(paths.base().exists(), "base dir must exist");
assert!(paths.wal_dir().exists(), "wal dir must exist");
assert!(paths.items_dir().exists(), "items dir must exist");
assert!(paths.users_dir().exists(), "users dir must exist");
assert!(paths.creators_dir().exists(), "creators dir must exist");
// Verify WAL diagnostics can run against an empty directory.
let report = tidaldb::wal::diagnostics::diagnose_wal(home.path()).unwrap();
assert_eq!(report.total_events, 0, "empty WAL should have 0 events");
assert_eq!(report.segment_count, 0, "empty WAL should have 0 segments");
assert_eq!(
report.inconsistency_count, 0,
"empty WAL should have 0 inconsistencies"
);
// Verify text index stats reader works on non-existent dir.
let (segs, docs) =
tidaldb::text::read_stats_from_dir(&home.path().join("text_index")).unwrap_or((0, 0));
assert_eq!(segs, 0, "no text index should have 0 segments");
assert_eq!(docs, 0, "no text index should have 0 docs");
// Verify BUILD_HASH compiles and is non-empty.
assert!(
!tidaldb::BUILD_HASH.is_empty(),
"BUILD_HASH must be non-empty"
);
}