tidaldb/tidal/tests/m4_uat.rs
jordan 192c473f55 feat: complete Milestone 5 — full-text search, RRF fusion, and creator search
- M5p1: BM25 text indexing via Tantivy with background syncer (0.26ms @ 10K docs)
- M5p2: RRF fusion layer combining BM25 + ANN scores (46µs @ 1K candidates)
- M5p3: unified Search query API (8-stage pipeline, BM25 + vector + ranking)
- M5p4: creator text + vector indexing and creator search executor (< 20ms @ 200 creators)
- Refactor db/mod.rs into focused sub-modules (creators, items, sessions, signals, etc.)
- Decompose monolithic files into directory modules (query/executor, ranking/diversity, etc.)
- Split brute.rs → brute/mod.rs + brute/tests.rs; extract search executor helpers
- Add benches: fusion, search, session, text_index
- Add M5 UAT test suites (m5_uat, m5_search, m5p4_creator_search, text_index)
- Update blog posts, roadmap, content strategy, and M5 planning docs
- Add tmp/ and .claude/worktrees/ to .gitignore

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-21 23:53:16 -07:00

468 lines
15 KiB
Rust

#![allow(clippy::unwrap_used)]
//! M4 User Acceptance Test: Agent Session Layer.
//!
//! Proves the full agent session lifecycle: schema-declared policies,
//! session start/close, session signals with policy enforcement, audit log,
//! session snapshot, FOR SESSION ranking boost, and session isolation.
use std::collections::HashMap;
use std::time::Duration;
use tidaldb::AgentId;
use tidaldb::TidalDb;
use tidaldb::query::retrieve::{ProfileRef, RetrieveBuilder};
use tidaldb::schema::{
AgentPolicy, DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window,
};
fn test_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
// Content signals.
for sig in &["view", "like", "share", "reward", "skip"] {
let _ = builder
.signal(
sig,
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(7 * 24 * 3600),
},
)
.windows(&[Window::OneHour, Window::TwentyFourHours])
.velocity(true)
.add();
}
// Schema-declared session policy: planner_policy.
// Allows reward + view signals; denies skip; cap of 100 signals.
let _ = builder.session_policy(
"planner_policy",
AgentPolicy {
allowed_signals: vec!["reward".to_string(), "view".to_string()],
denied_signals: vec!["skip".to_string()],
max_session_duration: Duration::from_secs(3600), // 1 hour
max_signals_per_session: 100,
},
);
builder.build().unwrap()
}
fn test_db() -> TidalDb {
TidalDb::builder()
.ephemeral()
.with_schema(test_schema())
.open()
.unwrap()
}
// ── Step 1: Schema with session_policy compiles ─────────────────────────────
#[test]
fn step1_schema_with_session_policy_builds() {
// Schema should build cleanly with a declared policy.
let schema = test_schema();
let policy = schema.session_policy("planner_policy");
assert!(policy.is_some(), "planner_policy should be in schema");
let p = policy.unwrap();
assert!(p.allowed_signals.contains(&"reward".to_string()));
assert!(p.denied_signals.contains(&"skip".to_string()));
assert_eq!(p.max_signals_per_session, 100);
}
// ── Step 2: Session lifecycle — start and close ─────────────────────────────
#[test]
fn step2_session_start_and_close() {
let db = test_db();
let mut meta = HashMap::new();
meta.insert("context".to_string(), "video-feed".to_string());
let handle = db
.start_session(42, "planner-agent", "planner_policy", meta)
.unwrap();
assert_eq!(handle.user_id, 42);
assert_eq!(handle.agent_id.as_str(), "planner-agent");
assert_eq!(handle.policy_name, "planner_policy");
// Session should appear in active_sessions.
let active = db.active_sessions();
assert_eq!(active.len(), 1);
assert_eq!(active[0].user_id, 42);
let session_id = handle.id;
let summary = db.close_session(handle).unwrap();
assert_eq!(summary.id, session_id);
assert_eq!(summary.rejections, 0);
// Session should no longer be active.
assert!(db.active_sessions().is_empty());
}
// ── Step 3: session_signal writes and audit ─────────────────────────────────
#[test]
fn step3_session_signal_and_audit() {
let db = test_db();
// Write some content items.
for i in 1u64..=5 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("item-{i}"));
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
}
let handle = db
.start_session(10, "planner-agent", "planner_policy", HashMap::new())
.unwrap();
let session_id = handle.id;
let ts = Timestamp::now();
// Write allowed signals.
db.session_signal(&handle, "reward", EntityId::new(1), 1.0, ts, None)
.unwrap();
db.session_signal(&handle, "view", EntityId::new(2), 0.5, ts, None)
.unwrap();
// Check snapshot counts.
let snap = db.session_snapshot(session_id).unwrap();
assert_eq!(snap.signals_written, 2);
assert_eq!(snap.signals_rejected, 0);
// Check audit log.
let audit = db.session_audit(session_id).unwrap();
assert_eq!(audit.len(), 2);
assert!(audit.iter().all(|e| e.accepted));
db.close_session(handle).unwrap();
}
// ── Step 4: Policy rejects denied signals ──────────────────────────────────
#[test]
fn step4_policy_rejects_denied_signal() {
let db = test_db();
let handle = db
.start_session(20, "test-agent", "planner_policy", HashMap::new())
.unwrap();
let session_id = handle.id;
let ts = Timestamp::now();
// "skip" is explicitly denied in planner_policy.
let result = db.session_signal(&handle, "skip", EntityId::new(99), 1.0, ts, None);
assert!(result.is_err(), "skip should be rejected by policy");
let err_str = format!("{}", result.unwrap_err());
assert!(
err_str.to_lowercase().contains("policy") || err_str.to_lowercase().contains("denied"),
"error should mention policy: {err_str}"
);
// Rejection should be counted.
let snap = db.session_snapshot(session_id).unwrap();
assert_eq!(snap.signals_rejected, 1);
assert_eq!(snap.signals_written, 0);
// Audit log should show the rejection.
let audit = db.session_audit(session_id).unwrap();
assert_eq!(audit.len(), 1);
assert!(!audit[0].accepted);
assert!(audit[0].reason.is_some());
db.close_session(handle).unwrap();
}
// ── Step 5: Policy rejects signals not in allow list ───────────────────────
#[test]
fn step5_policy_rejects_non_allowed_signal() {
let db = test_db();
let handle = db
.start_session(30, "test-agent", "planner_policy", HashMap::new())
.unwrap();
let ts = Timestamp::now();
// "share" is not in the allowed list (which only allows reward + view).
let result = db.session_signal(&handle, "share", EntityId::new(1), 1.0, ts, None);
assert!(
result.is_err(),
"share should be rejected (not in allowed_signals)"
);
db.close_session(handle).unwrap();
}
// ── Step 6: Session annotations and snapshot ──────────────────────────────
#[test]
fn step6_session_annotations_and_snapshot() {
let db = test_db();
let handle = db
.start_session(40, "planner-agent", "planner_policy", HashMap::new())
.unwrap();
let session_id = handle.id;
let ts = Timestamp::now();
// Write with annotation.
db.session_signal(
&handle,
"reward",
EntityId::new(5),
1.0,
ts,
Some("rust programming language".to_string()),
)
.unwrap();
let snap = db.session_snapshot(session_id).unwrap();
assert!(!snap.annotations.is_empty());
assert!(snap.annotations[0].1.contains("rust"));
// signaled_entities should include entity 5.
assert!(snap.signaled_entities.contains(&5));
db.close_session(handle).unwrap();
}
// ── Step 7: Session snapshot is accessible after close ─────────────────────
#[test]
fn step7_closed_session_snapshot() {
let db = test_db();
let handle = db
.start_session(50, "planner-agent", "planner_policy", HashMap::new())
.unwrap();
let session_id = handle.id;
let ts = Timestamp::now();
db.session_signal(&handle, "reward", EntityId::new(1), 1.0, ts, None)
.unwrap();
db.session_signal(&handle, "view", EntityId::new(2), 0.5, ts, None)
.unwrap();
let summary = db.close_session(handle).unwrap();
assert_eq!(summary.signals_written, 2);
// Snapshot should be retrievable from closed_sessions.
let snap = db.session_snapshot(session_id).unwrap();
assert_eq!(snap.signals_written, 2);
assert_eq!(snap.signals_rejected, 0);
}
// ── Step 8: FOR SESSION ranking boost ─────────────────────────────────────
#[test]
fn step8_for_session_ranking_boost() {
let db = test_db();
// Write 10 content items.
for i in 1u64..=10 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("item-{i}"));
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
}
// Write global signals to entity 1 (so it ranks without any session).
let ts = Timestamp::now();
for _ in 0..5 {
db.signal("view", EntityId::new(1), 1.0, ts).unwrap();
}
let handle = db
.start_session(60, "planner-agent", "planner_policy", HashMap::new())
.unwrap();
let session_id = handle.id;
// Signal entity 5 heavily in the session (the "personalized" pick).
for _ in 0..10 {
db.session_signal(&handle, "reward", EntityId::new(5), 2.0, ts, None)
.unwrap();
}
// Query WITH for_session: entity 5 should get a boost.
let query = RetrieveBuilder::new(EntityKind::Item, ProfileRef::new("hot"))
.limit(10)
.for_session(session_id)
.build()
.unwrap();
let results_with = db.retrieve(&query).unwrap();
// Query WITHOUT for_session: entity 5 has no global signals.
let query_no_session = RetrieveBuilder::new(EntityKind::Item, ProfileRef::new("hot"))
.limit(10)
.build()
.unwrap();
let results_without = db.retrieve(&query_no_session).unwrap();
// Both queries should return results.
assert!(!results_with.items.is_empty());
assert!(!results_without.items.is_empty());
// WITH session should have a session_snapshot attached.
assert!(
results_with.session_snapshot.is_some(),
"FOR SESSION query should populate session_snapshot in results"
);
// Entity 5 should rank higher with the session boost than without.
let rank_with = results_with
.items
.iter()
.position(|r| r.entity_id == EntityId::new(5));
let rank_without = results_without
.items
.iter()
.position(|r| r.entity_id == EntityId::new(5));
// With session boost, entity 5 should appear (even if entity 1 still leads
// due to global signals).
assert!(
rank_with.is_some(),
"entity 5 should appear in FOR SESSION results"
);
// If entity 5 appears in both, its rank should be better (lower index)
// with the session boost.
if let (Some(rw), Some(rwo)) = (rank_with, rank_without) {
assert!(
rw <= rwo,
"entity 5 rank={rw} with session should be at least as good as rank={rwo} without"
);
}
db.close_session(handle).unwrap();
}
// ── Step 9: Session isolation — two sessions don't cross-contaminate ────────
#[test]
fn step9_session_isolation() {
let db = test_db();
for i in 1u64..=5 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("item-{i}"));
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
}
// Session A: signals entity 1.
let handle_a = db
.start_session(70, "agent-a", "planner_policy", HashMap::new())
.unwrap();
let id_a = handle_a.id;
// Session B: signals entity 5.
let handle_b = db
.start_session(71, "agent-b", "planner_policy", HashMap::new())
.unwrap();
let id_b = handle_b.id;
let ts = Timestamp::now();
db.session_signal(&handle_a, "reward", EntityId::new(1), 1.0, ts, None)
.unwrap();
db.session_signal(&handle_b, "reward", EntityId::new(5), 1.0, ts, None)
.unwrap();
// Snapshot A should only see entity 1.
let snap_a = db.session_snapshot(id_a).unwrap();
assert!(
snap_a.signaled_entities.contains(&1),
"session A should have entity 1"
);
assert!(
!snap_a.signaled_entities.contains(&5),
"session A should NOT have entity 5"
);
// Snapshot B should only see entity 5.
let snap_b = db.session_snapshot(id_b).unwrap();
assert!(
snap_b.signaled_entities.contains(&5),
"session B should have entity 5"
);
assert!(
!snap_b.signaled_entities.contains(&1),
"session B should NOT have entity 1"
);
db.close_session(handle_a).unwrap();
db.close_session(handle_b).unwrap();
}
// ── Step 10: Invalid policy name is rejected ────────────────────────────────
#[test]
fn step10_invalid_policy_name_rejected() {
let db = test_db();
// "nonexistent_policy" is not declared in the schema.
let result = db.start_session(80, "test-agent", "nonexistent_policy", HashMap::new());
assert!(result.is_err(), "unknown policy should be rejected");
let err_str = format!("{}", result.unwrap_err());
assert!(
err_str.contains("policy") || err_str.contains("not found"),
"error should mention policy: {err_str}"
);
}
// ── Step 11: AgentId validation ─────────────────────────────────────────────
#[test]
fn step11_agent_id_validation() {
// Valid AgentId formats.
assert!(AgentId::new("planner-agent").is_ok());
assert!(AgentId::new("agent_01").is_ok());
assert!(AgentId::new("a").is_ok());
// Invalid formats.
assert!(AgentId::new("").is_err()); // empty
assert!(AgentId::new("Agent-ID").is_err()); // uppercase
assert!(AgentId::new("agent id").is_err()); // space
assert!(AgentId::new(&"a".repeat(65)).is_err()); // too long
}
// ── Step 12: active_sessions tracks multiple sessions ──────────────────────
#[test]
fn step12_active_sessions_tracking() {
let db = test_db();
let h1 = db
.start_session(90, "agent-1", "planner_policy", HashMap::new())
.unwrap();
let h2 = db
.start_session(91, "agent-2", "planner_policy", HashMap::new())
.unwrap();
let active = db.active_sessions();
assert_eq!(active.len(), 2);
let id1 = h1.id;
db.close_session(h1).unwrap();
let active = db.active_sessions();
assert_eq!(active.len(), 1);
assert_eq!(active[0].id, h2.id);
db.close_session(h2).unwrap();
assert!(db.active_sessions().is_empty());
// Closed session snapshot still accessible.
let snap = db.session_snapshot(id1).unwrap();
assert_eq!(snap.id, id1);
}