tidaldb/tidal/tests/m6p4_collections.rs
2026-02-23 22:41:16 -07:00

446 lines
14 KiB
Rust

//! Milestone 6 Phase 4 Integration Tests: Collections, Saved Searches, Cross-Session Preference.
//!
//! Exercises the M6p4 feature set end-to-end through `TidalDb`:
//!
//! 1. Collection CRUD (create, add, list, remove).
//! 2. `FilterExpr::InCollection` constrains retrieve candidates.
//! 3. `InProgress` user-state filter evaluation.
//! 4. Saved search CRUD (save, list, retrieve, delete).
//! 5. Collection persistence across restart.
//! 6. Saved search persistence across restart.
//! 7. Cross-session preference aggregation.
//! 8. `in_collection` filter performance.
#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
use std::collections::HashMap;
use std::time::Duration;
use tidaldb::TidalDb;
use tidaldb::entities::Visibility;
use tidaldb::query::retrieve::Retrieve;
use tidaldb::schema::{
AgentPolicy, DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window,
};
use tidaldb::storage::indexes::filter::FilterExpr;
// ── Schema ──────────────────────────────────────────────────────────────────
fn m6p4_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
for &(name, half_life_days) in &[
("view", 7),
("like", 14),
("share", 7),
("skip", 1),
("completion", 30),
] {
let _ = builder
.signal(
name,
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(half_life_days * 24 * 3600),
},
)
.windows(&[
Window::OneHour,
Window::TwentyFourHours,
Window::SevenDays,
Window::AllTime,
])
.velocity(true)
.add();
}
builder.build().expect("m6p4 schema must be valid")
}
fn open_ephemeral() -> TidalDb {
TidalDb::builder()
.ephemeral()
.with_schema(m6p4_schema())
.open()
.expect("ephemeral open")
}
fn write_items(db: &TidalDb, ids: &[u64]) {
for &id in ids {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Item {id}"));
meta.insert("category".to_string(), "jazz".to_string());
meta.insert("format".to_string(), "audio".to_string());
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
}
// ── Test 1: Collection create, add, list ────────────────────────────────────
#[test]
fn collection_create_add_list() {
let db = open_ephemeral();
let owner = EntityId::new(1);
let cid = db
.create_collection(owner, "favorites", Visibility::Private)
.unwrap();
assert_eq!(cid.as_u64(), 1);
db.add_to_collection(cid, EntityId::new(100)).unwrap();
db.add_to_collection(cid, EntityId::new(200)).unwrap();
// Idempotent: adding same item twice is fine.
db.add_to_collection(cid, EntityId::new(100)).unwrap();
let collections = db.list_collections(owner).unwrap();
assert_eq!(collections.len(), 1);
assert_eq!(collections[0].name, "favorites");
assert_eq!(collections[0].item_count, 2); // 100 and 200
assert_eq!(collections[0].visibility, Visibility::Private);
}
// ── Test 2: Remove from collection ──────────────────────────────────────────
#[test]
fn collection_remove_item() {
let db = open_ephemeral();
let owner = EntityId::new(1);
let cid = db
.create_collection(owner, "watchlist", Visibility::Public)
.unwrap();
db.add_to_collection(cid, EntityId::new(10)).unwrap();
db.add_to_collection(cid, EntityId::new(20)).unwrap();
db.add_to_collection(cid, EntityId::new(30)).unwrap();
db.remove_from_collection(cid, EntityId::new(20)).unwrap();
// Removing again is a no-op.
db.remove_from_collection(cid, EntityId::new(20)).unwrap();
let collections = db.list_collections(owner).unwrap();
assert_eq!(collections[0].item_count, 2); // 10 and 30
}
// ── Test 3: FilterExpr::InCollection constrains retrieve candidates ─────────
#[test]
fn retrieve_with_in_collection_filter() {
let db = open_ephemeral();
let owner = EntityId::new(1);
// Write 10 items.
write_items(&db, &(1..=10).collect::<Vec<_>>());
// Create a collection with items 3, 7, 9.
let cid = db
.create_collection(owner, "picks", Visibility::Private)
.unwrap();
db.add_to_collection(cid, EntityId::new(3)).unwrap();
db.add_to_collection(cid, EntityId::new(7)).unwrap();
db.add_to_collection(cid, EntityId::new(9)).unwrap();
// Retrieve with InCollection filter.
let query = Retrieve::builder()
.profile("new")
.filter(FilterExpr::in_collection(cid))
.limit(20)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
assert_eq!(ids.len(), 3);
assert!(ids.contains(&3));
assert!(ids.contains(&7));
assert!(ids.contains(&9));
}
// ── Test 4: InProgress user-state filter ────────────────────────────────────
#[test]
fn retrieve_in_progress_filter() {
let db = open_ephemeral();
let user_id = 42u64;
// Write 5 items.
write_items(&db, &[1, 2, 3, 4, 5]);
// Record completion progress via user-state index.
// Item 2: 30% (in progress), Item 4: 95% (complete), Item 5: 50% (in progress).
db.user_state().record_completion(user_id, 2, 0.3);
db.user_state().record_completion(user_id, 4, 0.95);
db.user_state().record_completion(user_id, 5, 0.5);
// Retrieve with InProgress filter (threshold=0.9).
let query = Retrieve::builder()
.profile("new")
.for_user(user_id)
.filter(FilterExpr::InProgress {
user_id,
threshold: 0.9,
})
.limit(20)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
// Items 2 (0.3) and 5 (0.5) are in progress (0 < v < 0.9).
// Item 4 (0.95) is complete (>= 0.9).
assert_eq!(ids.len(), 2, "expected 2 in-progress items, got {ids:?}");
assert!(ids.contains(&2));
assert!(ids.contains(&5));
}
// ── Test 5: Saved search CRUD ───────────────────────────────────────────────
#[test]
fn saved_search_crud() {
let db = open_ephemeral();
let user = EntityId::new(1);
db.save_search(user, "jazz-tunes", "jazz music", None)
.unwrap();
db.save_search(user, "rust-db", "rust database", None)
.unwrap();
let searches = db.list_saved_searches(user).unwrap();
assert_eq!(searches.len(), 2);
let names: Vec<&str> = searches.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"jazz-tunes"));
assert!(names.contains(&"rust-db"));
}
// ── Test 6: Delete saved search ─────────────────────────────────────────────
#[test]
fn saved_search_delete() {
let db = open_ephemeral();
let user = EntityId::new(1);
db.save_search(user, "temp-search", "temporary query", None)
.unwrap();
let before = db.list_saved_searches(user).unwrap();
assert_eq!(before.len(), 1);
db.delete_saved_search(user, "temp-search").unwrap();
let after = db.list_saved_searches(user).unwrap();
assert!(after.is_empty());
}
// ── Test 7: Collections persist across restart ──────────────────────────────
#[test]
fn collections_persist_across_restart() {
let dir = tempfile::tempdir().unwrap();
let schema = m6p4_schema();
// First open: create collection and add items.
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema.clone())
.open()
.unwrap();
let cid = db
.create_collection(EntityId::new(1), "watchlist", Visibility::Private)
.unwrap();
db.add_to_collection(cid, EntityId::new(100)).unwrap();
db.add_to_collection(cid, EntityId::new(200)).unwrap();
db.close().unwrap();
}
// Second open: verify collection survives.
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema)
.open()
.unwrap();
let collections = db.list_collections(EntityId::new(1)).unwrap();
assert_eq!(collections.len(), 1, "collection should survive restart");
assert_eq!(collections[0].name, "watchlist");
assert_eq!(collections[0].item_count, 2);
db.close().unwrap();
}
}
// ── Test 8: Saved searches persist across restart ───────────────────────────
#[test]
fn saved_searches_persist_across_restart() {
let dir = tempfile::tempdir().unwrap();
let schema = m6p4_schema();
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema.clone())
.open()
.unwrap();
db.save_search(EntityId::new(1), "my-query", "rust database", None)
.unwrap();
db.close().unwrap();
}
{
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema)
.open()
.unwrap();
let searches = db.list_saved_searches(EntityId::new(1)).unwrap();
assert_eq!(searches.len(), 1, "saved search should survive restart");
assert_eq!(searches[0].name, "my-query");
assert_eq!(searches[0].query_text, "rust database");
db.close().unwrap();
}
}
// ── Test 9: Cross-session preference aggregation ────────────────────────────
#[test]
fn cross_session_preference_aggregation() {
let dir = tempfile::tempdir().unwrap();
let mut builder = SchemaBuilder::new();
let _ = builder
.signal(
"view",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(7 * 24 * 3600),
},
)
.windows(&[Window::OneHour, Window::AllTime])
.velocity(false)
.add();
// Declare the 4-dimensional embedding slot so PreferenceVectors initializes
// with dim=4 (matching the embeddings written in this test).
builder.embedding_slot("content", EntityKind::Item, 4);
// Add a session policy so we can open sessions.
builder.session_policy(
"default",
AgentPolicy {
allowed_signals: vec!["view".to_string()],
denied_signals: vec![],
max_session_duration: Duration::from_secs(3600),
max_signals_per_session: 100,
},
);
let schema = builder.build().unwrap();
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema)
.open()
.unwrap();
// Write an item with an embedding.
let item_id = EntityId::new(42);
let mut meta = HashMap::new();
meta.insert("title".to_string(), "Test Item".to_string());
meta.insert("category".to_string(), "jazz".to_string());
meta.insert("format".to_string(), "audio".to_string());
db.write_item_with_metadata(item_id, &meta).unwrap();
// Write an embedding for the item.
let embedding = vec![1.0f32, 0.0, 0.0, 0.0];
db.write_item_embedding(item_id, &embedding).unwrap();
// Verify no preference exists for user yet.
let user_id = 1u64;
assert!(
db.preference_vectors().get(user_id).is_none(),
"no preference before session"
);
// Open a session, signal the item, close session.
let handle = db
.start_session(user_id, "test-agent", "default", HashMap::new())
.unwrap();
db.session_signal(&handle, "view", item_id, 1.0, Timestamp::now(), None)
.unwrap();
db.close_session(handle).unwrap();
// After close_session, the preference vector should be non-empty
// (blended from the item's embedding with damping=0.1).
let pref = db.preference_vectors().get(user_id);
assert!(
pref.is_some(),
"preference should exist after cross-session aggregation"
);
let pref = pref.unwrap();
assert_eq!(
pref.len(),
4,
"preference should have same dim as embedding"
);
// The preference should be approximately [1, 0, 0, 0] (normalized).
assert!(
pref[0] > 0.5,
"first component should be dominant, got {:.4}",
pref[0]
);
db.close().unwrap();
}
// ── Test 10: InCollection filter performance ────────────────────────────────
#[test]
fn in_collection_filter_performance() {
let db = open_ephemeral();
let owner = EntityId::new(1);
// Write 1000 items.
for id in 1..=1000u64 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Item {id}"));
meta.insert("category".to_string(), "jazz".to_string());
meta.insert("format".to_string(), "audio".to_string());
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
// Create a collection with 100 items.
let cid = db
.create_collection(owner, "bench-collection", Visibility::Private)
.unwrap();
for id in 1..=100u64 {
db.add_to_collection(cid, EntityId::new(id)).unwrap();
}
// Time the retrieve with InCollection filter.
let start = std::time::Instant::now();
let query = Retrieve::builder()
.profile("new")
.filter(FilterExpr::in_collection(cid))
.limit(100)
.build()
.unwrap();
let results = db.retrieve(&query).unwrap();
let elapsed = start.elapsed();
assert_eq!(results.total_candidates, 100);
assert!(
elapsed.as_millis() < 10,
"in_collection filter took {}ms, expected < 10ms",
elapsed.as_millis()
);
}