tidaldb/tidal/tests/m6p5_scope.rs
2026-02-23 22:41:16 -07:00

616 lines
20 KiB
Rust

//! Milestone 6 Phase 5 Integration Tests: `WithinScope` + Suggest API.
//!
//! Exercises m6p5 features end-to-end through `TidalDb`:
//!
//! 1. `WithinScope::Category` restricts SEARCH to a category.
//! 2. `WithinScope::Collection` restricts SEARCH to a collection's items.
//! 3. `WithinScope::Following` restricts SEARCH to followed creators' items.
//! 4. `WithinScope::Trending` scopes to high-velocity items.
//! 5. Scope with no matches returns empty results.
//! 6. `db.suggest()` prefix autocomplete from indexed titles.
//! 7. `db.suggest()` trending (empty prefix) returns top queries.
//! 8. `db.suggest()` limit validation rejects 0 and >50.
//! 9. Title indexing deduplicates terms across items.
//! 10. Scope + filter composition (`WithinScope` + metadata filter).
//! 11. Suggest performance with 10K terms.
//! 12. `WithinScope::CohortTrending` scopes to cohort-velocity items.
//! 13. `WithinScope::CohortTrending` with unknown cohort returns an error.
#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
use std::collections::HashMap;
use std::time::Duration;
use tidaldb::TidalDb;
use tidaldb::cohort::{CohortDef, Predicate};
use tidaldb::entities::Visibility;
use tidaldb::query::search::{Search, WithinScope};
use tidaldb::query::suggest::Suggest;
use tidaldb::schema::{
DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Timestamp, Window,
};
use tidaldb::storage::indexes::filter::FilterExpr;
// ── Schema ──────────────────────────────────────────────────────────────────
fn m6p5_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
for &(name, half_life_days) in &[("view", 7), ("like", 14), ("share", 7)] {
let _ = builder
.signal(
name,
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(half_life_days * 24 * 3600),
},
)
.windows(&[
Window::OneHour,
Window::TwentyFourHours,
Window::SevenDays,
Window::AllTime,
])
.velocity(true)
.add();
}
builder.text_field("title", TextFieldType::Text);
builder.text_field("description", TextFieldType::Text);
builder.text_field("category", TextFieldType::Keyword);
builder.build().expect("m6p5 schema must be valid")
}
fn open_ephemeral() -> TidalDb {
TidalDb::builder()
.ephemeral()
.with_schema(m6p5_schema())
.open()
.expect("ephemeral open")
}
fn write_items_with_category(db: &TidalDb, ids: &[u64], category: &str) {
for &id in ids {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Item {id} about {category}"));
meta.insert("category".to_string(), category.to_string());
meta.insert("format".to_string(), "audio".to_string());
meta.insert("creator_id".to_string(), format!("{}", id % 5 + 1));
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
}
// ── Test 1: WithinScope::Category restricts SEARCH ──────────────────────────
#[test]
fn search_within_category() {
let db = open_ephemeral();
// Write items in two categories.
write_items_with_category(&db, &[1, 2, 3, 4, 5], "music");
write_items_with_category(&db, &[6, 7, 8, 9, 10], "tech");
// Flush text index so BM25 can find them.
db.flush_text_index().unwrap();
// Search within "music" category.
let query = Search::builder()
.query("item")
.within(WithinScope::Category {
name: "music".into(),
})
.limit(20)
.build()
.unwrap();
let results = db.search(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
// All returned items should be in the music category (IDs 1-5).
for &id in &ids {
assert!(
(1..=5).contains(&id),
"expected music items (1-5), got ID {id}"
);
}
// Should have found some results.
assert!(
!ids.is_empty(),
"expected some results within music category"
);
}
// ── Test 2: WithinScope::Collection restricts SEARCH ────────────────────────
#[test]
fn search_within_collection() {
let db = open_ephemeral();
let owner = EntityId::new(1);
// Write 10 items.
write_items_with_category(&db, &(1..=10).collect::<Vec<_>>(), "jazz");
db.flush_text_index().unwrap();
// Create a collection with items 3, 7, 9.
let cid = db
.create_collection(owner, "favorites", Visibility::Private)
.unwrap();
db.add_to_collection(cid, EntityId::new(3)).unwrap();
db.add_to_collection(cid, EntityId::new(7)).unwrap();
db.add_to_collection(cid, EntityId::new(9)).unwrap();
// Search within the collection.
let query = Search::builder()
.query("item")
.within(WithinScope::Collection { id: cid })
.limit(20)
.build()
.unwrap();
let results = db.search(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
for &id in &ids {
assert!(
id == 3 || id == 7 || id == 9,
"expected collection items (3, 7, 9), got ID {id}"
);
}
assert!(
!ids.is_empty(),
"expected some results within the collection"
);
}
// ── Test 3: WithinScope::Following restricts SEARCH ─────────────────────────
#[test]
fn search_within_following() {
let db = open_ephemeral();
let user_id = 42u64;
// Creator 1 owns items 1-5, creator 2 owns items 6-10.
for id in 1..=5u64 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Creator1 item {id}"));
meta.insert("category".to_string(), "music".to_string());
meta.insert("format".to_string(), "audio".to_string());
meta.insert("creator_id".to_string(), "1".to_string());
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
for id in 6..=10u64 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Creator2 item {id}"));
meta.insert("category".to_string(), "music".to_string());
meta.insert("format".to_string(), "audio".to_string());
meta.insert("creator_id".to_string(), "2".to_string());
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
db.flush_text_index().unwrap();
// User follows only creator 1.
db.user_state().add_follow(user_id, 1);
// Search within following.
let query = Search::builder()
.query("item")
.for_user(user_id)
.within(WithinScope::Following)
.limit(20)
.build()
.unwrap();
let results = db.search(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
for &id in &ids {
assert!(
(1..=5).contains(&id),
"expected creator 1's items (1-5), got ID {id}"
);
}
assert!(
!ids.is_empty(),
"expected some results within followed creators"
);
}
// ── Test 4: WithinScope::Trending scopes to high-velocity items ─────────────
#[test]
fn search_within_trending() {
let db = open_ephemeral();
let ts = Timestamp::now();
// Write 10 items.
write_items_with_category(&db, &(1..=10).collect::<Vec<_>>(), "jazz");
db.flush_text_index().unwrap();
// Generate heavy signal traffic on items 1-3, none on items 4-10.
for _ in 0..20 {
db.signal("view", EntityId::new(1), 1.0, ts).unwrap();
db.signal("view", EntityId::new(2), 1.0, ts).unwrap();
db.signal("view", EntityId::new(3), 1.0, ts).unwrap();
}
// Search within trending (1-hour window).
let query = Search::builder()
.query("item")
.within(WithinScope::Trending { window_hours: 1 })
.limit(20)
.build()
.unwrap();
let results = db.search(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
// Trending items (1-3) should be present; items without signals may be excluded.
assert!(
ids.contains(&1) && ids.contains(&2) && ids.contains(&3),
"trending items 1, 2, 3 should appear in results; got {ids:?}"
);
}
// ── Test 5: Scope with no matches returns empty results ─────────────────────
#[test]
fn search_within_empty_category_returns_empty() {
let db = open_ephemeral();
// Write items only in "music" category.
write_items_with_category(&db, &[1, 2, 3], "music");
db.flush_text_index().unwrap();
// Search within a non-existent category.
let query = Search::builder()
.query("item")
.within(WithinScope::Category {
name: "nonexistent".into(),
})
.limit(20)
.build()
.unwrap();
let results = db.search(&query).unwrap();
assert!(
results.items.is_empty(),
"expected no results for nonexistent category"
);
}
// ── Test 6: db.suggest() prefix autocomplete ────────────────────────────────
#[test]
fn suggest_prefix_autocomplete() {
let db = open_ephemeral();
// Write items with distinct titles.
let mut meta = HashMap::new();
meta.insert("title".to_string(), "Rust Tutorial Advanced".to_string());
meta.insert("category".to_string(), "tech".to_string());
meta.insert("format".to_string(), "video".to_string());
db.write_item_with_metadata(EntityId::new(1), &meta)
.unwrap();
meta.insert("title".to_string(), "Rust Async Programming".to_string());
db.write_item_with_metadata(EntityId::new(2), &meta)
.unwrap();
meta.insert("title".to_string(), "Python Machine Learning".to_string());
db.write_item_with_metadata(EntityId::new(3), &meta)
.unwrap();
// Suggest "rus" -- should match "rust".
let req = Suggest::new("rus");
let results = db.suggest(&req).unwrap();
assert!(!results.is_empty(), "expected suggestions for 'rus'");
assert!(
results.iter().any(|s| s.text == "rust"),
"expected 'rust' in suggestions; got {:?}",
results.iter().map(|s| &s.text).collect::<Vec<_>>()
);
// Suggest "pyt" -- should match "python".
let req = Suggest::new("pyt");
let results = db.suggest(&req).unwrap();
assert!(
results.iter().any(|s| s.text == "python"),
"expected 'python' in suggestions"
);
// Suggest "zzz" -- no match.
let req = Suggest::new("zzz");
let results = db.suggest(&req).unwrap();
assert!(results.is_empty(), "expected no suggestions for 'zzz'");
}
// ── Test 7: db.suggest() trending (empty prefix) ────────────────────────────
#[test]
fn suggest_trending_queries() {
let db = open_ephemeral();
// Write items so text index works.
write_items_with_category(&db, &[1, 2, 3], "music");
db.flush_text_index().unwrap();
// Perform several searches to build trending data.
let q1 = Search::builder().query("jazz").build().unwrap();
let q2 = Search::builder().query("blues").build().unwrap();
let _ = db.search(&q1);
let _ = db.search(&q1);
let _ = db.search(&q1);
let _ = db.search(&q2);
// Empty prefix returns trending.
let req = Suggest::new("");
let results = db.suggest(&req).unwrap();
assert!(!results.is_empty(), "expected trending suggestions");
// "jazz" should rank first (3 searches vs 1 for "blues").
assert_eq!(
results[0].text, "jazz",
"expected 'jazz' as top trending; got '{}'",
results[0].text
);
assert_eq!(results[0].frequency, 3);
}
// ── Test 8: db.suggest() limit validation ───────────────────────────────────
#[test]
fn suggest_limit_validation() {
let db = open_ephemeral();
// Limit 0 should be rejected.
let req = Suggest::new("a").limit(0);
let result = db.suggest(&req);
assert!(result.is_err(), "limit 0 should be rejected");
// Limit 51 should be rejected.
let req = Suggest::new("a").limit(51);
let result = db.suggest(&req);
assert!(result.is_err(), "limit 51 should be rejected");
// Limit 50 should be accepted.
let req = Suggest::new("a").limit(50);
let result = db.suggest(&req);
assert!(result.is_ok(), "limit 50 should be accepted");
// Limit 1 should be accepted.
let req = Suggest::new("a").limit(1);
let result = db.suggest(&req);
assert!(result.is_ok(), "limit 1 should be accepted");
}
// ── Test 9: Title indexing deduplicates terms across items ──────────────────
#[test]
fn suggest_deduplicates_across_items() {
let db = open_ephemeral();
// Write 5 items all with "Rust" in the title.
for id in 1..=5u64 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Rust Guide Part {id}"));
meta.insert("category".to_string(), "tech".to_string());
meta.insert("format".to_string(), "article".to_string());
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
// Suggest "rus" -- should return exactly one "rust" result, not 5.
let req = Suggest::new("rus");
let results = db.suggest(&req).unwrap();
let rust_results: Vec<_> = results.iter().filter(|s| s.text == "rust").collect();
assert_eq!(
rust_results.len(),
1,
"expected exactly 1 'rust' suggestion, got {}",
rust_results.len()
);
}
// ── Test 10: Scope + filter composition ─────────────────────────────────────
#[test]
fn search_within_scope_plus_filter() {
let db = open_ephemeral();
// Music items in audio and video formats.
for id in 1..=5u64 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Music audio {id}"));
meta.insert("category".to_string(), "music".to_string());
meta.insert("format".to_string(), "audio".to_string());
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
for id in 6..=10u64 {
let mut meta = HashMap::new();
meta.insert("title".to_string(), format!("Music video {id}"));
meta.insert("category".to_string(), "music".to_string());
meta.insert("format".to_string(), "video".to_string());
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
// Tech items.
write_items_with_category(&db, &[11, 12, 13], "tech");
db.flush_text_index().unwrap();
// Search within "music" category AND filter format=video.
let query = Search::builder()
.query("music")
.within(WithinScope::Category {
name: "music".into(),
})
.filter(FilterExpr::eq("format", "video"))
.limit(20)
.build()
.unwrap();
let results = db.search(&query).unwrap();
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
for &id in &ids {
assert!(
(6..=10).contains(&id),
"expected music+video items (6-10), got ID {id}"
);
}
// Should find some results.
assert!(
!ids.is_empty(),
"expected results for music category + video format"
);
}
// ── Test 11: Suggest performance with many terms ────────────────────────────
#[test]
fn suggest_performance() {
let db = open_ephemeral();
// Index 10K items to build a large term set.
for id in 1..=10_000u64 {
let mut meta = HashMap::new();
meta.insert(
"title".to_string(),
format!("unique_term_{id} common_word content"),
);
meta.insert("category".to_string(), "tech".to_string());
meta.insert("format".to_string(), "article".to_string());
db.write_item_with_metadata(EntityId::new(id), &meta)
.unwrap();
}
// Time prefix suggestion.
let start = std::time::Instant::now();
let req = Suggest::new("unique_term_5").limit(10);
let results = db.suggest(&req).unwrap();
let elapsed = start.elapsed();
assert!(
!results.is_empty(),
"expected suggestions for 'unique_term_5'"
);
assert!(
elapsed.as_millis() < 20,
"suggest took {}ms, expected < 20ms",
elapsed.as_millis()
);
}
// ── Test 12: WithinScope::CohortTrending scopes to cohort-velocity items ────
#[test]
fn search_within_cohort_trending() {
let db = open_ephemeral();
let ts = Timestamp::now();
// Define a "tech_en" cohort.
db.define_cohort(CohortDef {
name: "tech_en".to_string(),
predicate: Predicate::Eq {
field: "primary_category".into(),
value: "tech".into(),
},
})
.unwrap();
// Write a tech user and a non-tech user.
let mut user_meta = HashMap::new();
user_meta.insert("locale".to_string(), "en".to_string());
user_meta.insert("primary_category".to_string(), "tech".to_string());
db.write_user(EntityId::new(1001), &user_meta).unwrap();
let mut non_tech = HashMap::new();
non_tech.insert("locale".to_string(), "en".to_string());
non_tech.insert("primary_category".to_string(), "sports".to_string());
db.write_user(EntityId::new(1002), &non_tech).unwrap();
// Write 6 items: items 1-3 get heavy tech-cohort signal, items 4-6 do not.
write_items_with_category(&db, &(1..=6).collect::<Vec<_>>(), "tech");
db.flush_text_index().unwrap();
// tech_en user views items 1-3 heavily; non-tech user views items 4-6.
for _ in 0..15 {
db.signal_with_context("view", EntityId::new(1), 1.0, ts, Some(1001), Some(1))
.unwrap();
db.signal_with_context("view", EntityId::new(2), 1.0, ts, Some(1001), Some(2))
.unwrap();
db.signal_with_context("view", EntityId::new(3), 1.0, ts, Some(1001), Some(3))
.unwrap();
}
for _ in 0..15 {
db.signal_with_context("view", EntityId::new(4), 1.0, ts, Some(1002), Some(4))
.unwrap();
db.signal_with_context("view", EntityId::new(5), 1.0, ts, Some(1002), Some(5))
.unwrap();
db.signal_with_context("view", EntityId::new(6), 1.0, ts, Some(1002), Some(6))
.unwrap();
}
// Search within cohort trending for "tech_en".
let query = Search::builder()
.query("item")
.within(WithinScope::CohortTrending {
cohort: "tech_en".into(),
window_hours: 1,
})
.limit(20)
.build()
.unwrap();
let results = db.search(&query).unwrap();
// Items 1-3 should appear (tech_en cohort velocity); items 4-6 had no cohort signal.
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
assert!(
!ids.is_empty(),
"expected results within cohort trending; got none"
);
for &id in &ids {
assert!(
(1..=3).contains(&id),
"expected only cohort-trending items (1-3), got ID {id}"
);
}
}
// ── Test 13: WithinScope::CohortTrending with unknown cohort returns error ───
#[test]
fn search_within_cohort_trending_unknown_cohort_returns_error() {
let db = open_ephemeral();
write_items_with_category(&db, &[1, 2, 3], "music");
db.flush_text_index().unwrap();
let query = Search::builder()
.query("item")
.within(WithinScope::CohortTrending {
cohort: "nonexistent_cohort".into(),
window_hours: 24,
})
.limit(20)
.build()
.unwrap();
let result = db.search(&query);
assert!(
result.is_err(),
"expected error for unknown cohort, got: {:?}",
result.map(|r| r.items.len())
);
let err = result.unwrap_err();
let err_str = err.to_string();
assert!(
err_str.contains("nonexistent_cohort") || err_str.contains("not found"),
"expected error message to mention the cohort name; got: {err_str}"
);
}