616 lines
20 KiB
Rust
616 lines
20 KiB
Rust
//! Milestone 6 Phase 5 Integration Tests: `WithinScope` + Suggest API.
|
|
//!
|
|
//! Exercises m6p5 features end-to-end through `TidalDb`:
|
|
//!
|
|
//! 1. `WithinScope::Category` restricts SEARCH to a category.
|
|
//! 2. `WithinScope::Collection` restricts SEARCH to a collection's items.
|
|
//! 3. `WithinScope::Following` restricts SEARCH to followed creators' items.
|
|
//! 4. `WithinScope::Trending` scopes to high-velocity items.
|
|
//! 5. Scope with no matches returns empty results.
|
|
//! 6. `db.suggest()` prefix autocomplete from indexed titles.
|
|
//! 7. `db.suggest()` trending (empty prefix) returns top queries.
|
|
//! 8. `db.suggest()` limit validation rejects 0 and >50.
|
|
//! 9. Title indexing deduplicates terms across items.
|
|
//! 10. Scope + filter composition (`WithinScope` + metadata filter).
|
|
//! 11. Suggest performance with 10K terms.
|
|
//! 12. `WithinScope::CohortTrending` scopes to cohort-velocity items.
|
|
//! 13. `WithinScope::CohortTrending` with unknown cohort returns an error.
|
|
|
|
#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
|
|
|
|
use std::collections::HashMap;
|
|
use std::time::Duration;
|
|
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::cohort::{CohortDef, Predicate};
|
|
use tidaldb::entities::Visibility;
|
|
use tidaldb::query::search::{Search, WithinScope};
|
|
use tidaldb::query::suggest::Suggest;
|
|
use tidaldb::schema::{
|
|
DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Timestamp, Window,
|
|
};
|
|
use tidaldb::storage::indexes::filter::FilterExpr;
|
|
|
|
// ── Schema ──────────────────────────────────────────────────────────────────
|
|
|
|
fn m6p5_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
|
|
for &(name, half_life_days) in &[("view", 7), ("like", 14), ("share", 7)] {
|
|
let _ = builder
|
|
.signal(
|
|
name,
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(half_life_days * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[
|
|
Window::OneHour,
|
|
Window::TwentyFourHours,
|
|
Window::SevenDays,
|
|
Window::AllTime,
|
|
])
|
|
.velocity(true)
|
|
.add();
|
|
}
|
|
|
|
builder.text_field("title", TextFieldType::Text);
|
|
builder.text_field("description", TextFieldType::Text);
|
|
builder.text_field("category", TextFieldType::Keyword);
|
|
builder.build().expect("m6p5 schema must be valid")
|
|
}
|
|
|
|
fn open_ephemeral() -> TidalDb {
|
|
TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(m6p5_schema())
|
|
.open()
|
|
.expect("ephemeral open")
|
|
}
|
|
|
|
fn write_items_with_category(db: &TidalDb, ids: &[u64], category: &str) {
|
|
for &id in ids {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), format!("Item {id} about {category}"));
|
|
meta.insert("category".to_string(), category.to_string());
|
|
meta.insert("format".to_string(), "audio".to_string());
|
|
meta.insert("creator_id".to_string(), format!("{}", id % 5 + 1));
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
}
|
|
}
|
|
|
|
// ── Test 1: WithinScope::Category restricts SEARCH ──────────────────────────
|
|
|
|
#[test]
|
|
fn search_within_category() {
|
|
let db = open_ephemeral();
|
|
|
|
// Write items in two categories.
|
|
write_items_with_category(&db, &[1, 2, 3, 4, 5], "music");
|
|
write_items_with_category(&db, &[6, 7, 8, 9, 10], "tech");
|
|
|
|
// Flush text index so BM25 can find them.
|
|
db.flush_text_index().unwrap();
|
|
|
|
// Search within "music" category.
|
|
let query = Search::builder()
|
|
.query("item")
|
|
.within(WithinScope::Category {
|
|
name: "music".into(),
|
|
})
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.search(&query).unwrap();
|
|
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
// All returned items should be in the music category (IDs 1-5).
|
|
for &id in &ids {
|
|
assert!(
|
|
(1..=5).contains(&id),
|
|
"expected music items (1-5), got ID {id}"
|
|
);
|
|
}
|
|
// Should have found some results.
|
|
assert!(
|
|
!ids.is_empty(),
|
|
"expected some results within music category"
|
|
);
|
|
}
|
|
|
|
// ── Test 2: WithinScope::Collection restricts SEARCH ────────────────────────
|
|
|
|
#[test]
|
|
fn search_within_collection() {
|
|
let db = open_ephemeral();
|
|
let owner = EntityId::new(1);
|
|
|
|
// Write 10 items.
|
|
write_items_with_category(&db, &(1..=10).collect::<Vec<_>>(), "jazz");
|
|
db.flush_text_index().unwrap();
|
|
|
|
// Create a collection with items 3, 7, 9.
|
|
let cid = db
|
|
.create_collection(owner, "favorites", Visibility::Private)
|
|
.unwrap();
|
|
db.add_to_collection(cid, EntityId::new(3)).unwrap();
|
|
db.add_to_collection(cid, EntityId::new(7)).unwrap();
|
|
db.add_to_collection(cid, EntityId::new(9)).unwrap();
|
|
|
|
// Search within the collection.
|
|
let query = Search::builder()
|
|
.query("item")
|
|
.within(WithinScope::Collection { id: cid })
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.search(&query).unwrap();
|
|
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
for &id in &ids {
|
|
assert!(
|
|
id == 3 || id == 7 || id == 9,
|
|
"expected collection items (3, 7, 9), got ID {id}"
|
|
);
|
|
}
|
|
assert!(
|
|
!ids.is_empty(),
|
|
"expected some results within the collection"
|
|
);
|
|
}
|
|
|
|
// ── Test 3: WithinScope::Following restricts SEARCH ─────────────────────────
|
|
|
|
#[test]
|
|
fn search_within_following() {
|
|
let db = open_ephemeral();
|
|
let user_id = 42u64;
|
|
|
|
// Creator 1 owns items 1-5, creator 2 owns items 6-10.
|
|
for id in 1..=5u64 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), format!("Creator1 item {id}"));
|
|
meta.insert("category".to_string(), "music".to_string());
|
|
meta.insert("format".to_string(), "audio".to_string());
|
|
meta.insert("creator_id".to_string(), "1".to_string());
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
}
|
|
for id in 6..=10u64 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), format!("Creator2 item {id}"));
|
|
meta.insert("category".to_string(), "music".to_string());
|
|
meta.insert("format".to_string(), "audio".to_string());
|
|
meta.insert("creator_id".to_string(), "2".to_string());
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
}
|
|
db.flush_text_index().unwrap();
|
|
|
|
// User follows only creator 1.
|
|
db.user_state().add_follow(user_id, 1);
|
|
|
|
// Search within following.
|
|
let query = Search::builder()
|
|
.query("item")
|
|
.for_user(user_id)
|
|
.within(WithinScope::Following)
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.search(&query).unwrap();
|
|
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
for &id in &ids {
|
|
assert!(
|
|
(1..=5).contains(&id),
|
|
"expected creator 1's items (1-5), got ID {id}"
|
|
);
|
|
}
|
|
assert!(
|
|
!ids.is_empty(),
|
|
"expected some results within followed creators"
|
|
);
|
|
}
|
|
|
|
// ── Test 4: WithinScope::Trending scopes to high-velocity items ─────────────
|
|
|
|
#[test]
|
|
fn search_within_trending() {
|
|
let db = open_ephemeral();
|
|
let ts = Timestamp::now();
|
|
|
|
// Write 10 items.
|
|
write_items_with_category(&db, &(1..=10).collect::<Vec<_>>(), "jazz");
|
|
db.flush_text_index().unwrap();
|
|
|
|
// Generate heavy signal traffic on items 1-3, none on items 4-10.
|
|
for _ in 0..20 {
|
|
db.signal("view", EntityId::new(1), 1.0, ts).unwrap();
|
|
db.signal("view", EntityId::new(2), 1.0, ts).unwrap();
|
|
db.signal("view", EntityId::new(3), 1.0, ts).unwrap();
|
|
}
|
|
|
|
// Search within trending (1-hour window).
|
|
let query = Search::builder()
|
|
.query("item")
|
|
.within(WithinScope::Trending { window_hours: 1 })
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.search(&query).unwrap();
|
|
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
// Trending items (1-3) should be present; items without signals may be excluded.
|
|
assert!(
|
|
ids.contains(&1) && ids.contains(&2) && ids.contains(&3),
|
|
"trending items 1, 2, 3 should appear in results; got {ids:?}"
|
|
);
|
|
}
|
|
|
|
// ── Test 5: Scope with no matches returns empty results ─────────────────────
|
|
|
|
#[test]
|
|
fn search_within_empty_category_returns_empty() {
|
|
let db = open_ephemeral();
|
|
|
|
// Write items only in "music" category.
|
|
write_items_with_category(&db, &[1, 2, 3], "music");
|
|
db.flush_text_index().unwrap();
|
|
|
|
// Search within a non-existent category.
|
|
let query = Search::builder()
|
|
.query("item")
|
|
.within(WithinScope::Category {
|
|
name: "nonexistent".into(),
|
|
})
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.search(&query).unwrap();
|
|
assert!(
|
|
results.items.is_empty(),
|
|
"expected no results for nonexistent category"
|
|
);
|
|
}
|
|
|
|
// ── Test 6: db.suggest() prefix autocomplete ────────────────────────────────
|
|
|
|
#[test]
|
|
fn suggest_prefix_autocomplete() {
|
|
let db = open_ephemeral();
|
|
|
|
// Write items with distinct titles.
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), "Rust Tutorial Advanced".to_string());
|
|
meta.insert("category".to_string(), "tech".to_string());
|
|
meta.insert("format".to_string(), "video".to_string());
|
|
db.write_item_with_metadata(EntityId::new(1), &meta)
|
|
.unwrap();
|
|
|
|
meta.insert("title".to_string(), "Rust Async Programming".to_string());
|
|
db.write_item_with_metadata(EntityId::new(2), &meta)
|
|
.unwrap();
|
|
|
|
meta.insert("title".to_string(), "Python Machine Learning".to_string());
|
|
db.write_item_with_metadata(EntityId::new(3), &meta)
|
|
.unwrap();
|
|
|
|
// Suggest "rus" -- should match "rust".
|
|
let req = Suggest::new("rus");
|
|
let results = db.suggest(&req).unwrap();
|
|
assert!(!results.is_empty(), "expected suggestions for 'rus'");
|
|
assert!(
|
|
results.iter().any(|s| s.text == "rust"),
|
|
"expected 'rust' in suggestions; got {:?}",
|
|
results.iter().map(|s| &s.text).collect::<Vec<_>>()
|
|
);
|
|
|
|
// Suggest "pyt" -- should match "python".
|
|
let req = Suggest::new("pyt");
|
|
let results = db.suggest(&req).unwrap();
|
|
assert!(
|
|
results.iter().any(|s| s.text == "python"),
|
|
"expected 'python' in suggestions"
|
|
);
|
|
|
|
// Suggest "zzz" -- no match.
|
|
let req = Suggest::new("zzz");
|
|
let results = db.suggest(&req).unwrap();
|
|
assert!(results.is_empty(), "expected no suggestions for 'zzz'");
|
|
}
|
|
|
|
// ── Test 7: db.suggest() trending (empty prefix) ────────────────────────────
|
|
|
|
#[test]
|
|
fn suggest_trending_queries() {
|
|
let db = open_ephemeral();
|
|
|
|
// Write items so text index works.
|
|
write_items_with_category(&db, &[1, 2, 3], "music");
|
|
db.flush_text_index().unwrap();
|
|
|
|
// Perform several searches to build trending data.
|
|
let q1 = Search::builder().query("jazz").build().unwrap();
|
|
let q2 = Search::builder().query("blues").build().unwrap();
|
|
let _ = db.search(&q1);
|
|
let _ = db.search(&q1);
|
|
let _ = db.search(&q1);
|
|
let _ = db.search(&q2);
|
|
|
|
// Empty prefix returns trending.
|
|
let req = Suggest::new("");
|
|
let results = db.suggest(&req).unwrap();
|
|
assert!(!results.is_empty(), "expected trending suggestions");
|
|
|
|
// "jazz" should rank first (3 searches vs 1 for "blues").
|
|
assert_eq!(
|
|
results[0].text, "jazz",
|
|
"expected 'jazz' as top trending; got '{}'",
|
|
results[0].text
|
|
);
|
|
assert_eq!(results[0].frequency, 3);
|
|
}
|
|
|
|
// ── Test 8: db.suggest() limit validation ───────────────────────────────────
|
|
|
|
#[test]
|
|
fn suggest_limit_validation() {
|
|
let db = open_ephemeral();
|
|
|
|
// Limit 0 should be rejected.
|
|
let req = Suggest::new("a").limit(0);
|
|
let result = db.suggest(&req);
|
|
assert!(result.is_err(), "limit 0 should be rejected");
|
|
|
|
// Limit 51 should be rejected.
|
|
let req = Suggest::new("a").limit(51);
|
|
let result = db.suggest(&req);
|
|
assert!(result.is_err(), "limit 51 should be rejected");
|
|
|
|
// Limit 50 should be accepted.
|
|
let req = Suggest::new("a").limit(50);
|
|
let result = db.suggest(&req);
|
|
assert!(result.is_ok(), "limit 50 should be accepted");
|
|
|
|
// Limit 1 should be accepted.
|
|
let req = Suggest::new("a").limit(1);
|
|
let result = db.suggest(&req);
|
|
assert!(result.is_ok(), "limit 1 should be accepted");
|
|
}
|
|
|
|
// ── Test 9: Title indexing deduplicates terms across items ──────────────────
|
|
|
|
#[test]
|
|
fn suggest_deduplicates_across_items() {
|
|
let db = open_ephemeral();
|
|
|
|
// Write 5 items all with "Rust" in the title.
|
|
for id in 1..=5u64 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), format!("Rust Guide Part {id}"));
|
|
meta.insert("category".to_string(), "tech".to_string());
|
|
meta.insert("format".to_string(), "article".to_string());
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
}
|
|
|
|
// Suggest "rus" -- should return exactly one "rust" result, not 5.
|
|
let req = Suggest::new("rus");
|
|
let results = db.suggest(&req).unwrap();
|
|
let rust_results: Vec<_> = results.iter().filter(|s| s.text == "rust").collect();
|
|
assert_eq!(
|
|
rust_results.len(),
|
|
1,
|
|
"expected exactly 1 'rust' suggestion, got {}",
|
|
rust_results.len()
|
|
);
|
|
}
|
|
|
|
// ── Test 10: Scope + filter composition ─────────────────────────────────────
|
|
|
|
#[test]
|
|
fn search_within_scope_plus_filter() {
|
|
let db = open_ephemeral();
|
|
|
|
// Music items in audio and video formats.
|
|
for id in 1..=5u64 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), format!("Music audio {id}"));
|
|
meta.insert("category".to_string(), "music".to_string());
|
|
meta.insert("format".to_string(), "audio".to_string());
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
}
|
|
for id in 6..=10u64 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), format!("Music video {id}"));
|
|
meta.insert("category".to_string(), "music".to_string());
|
|
meta.insert("format".to_string(), "video".to_string());
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
}
|
|
// Tech items.
|
|
write_items_with_category(&db, &[11, 12, 13], "tech");
|
|
db.flush_text_index().unwrap();
|
|
|
|
// Search within "music" category AND filter format=video.
|
|
let query = Search::builder()
|
|
.query("music")
|
|
.within(WithinScope::Category {
|
|
name: "music".into(),
|
|
})
|
|
.filter(FilterExpr::eq("format", "video"))
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.search(&query).unwrap();
|
|
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
for &id in &ids {
|
|
assert!(
|
|
(6..=10).contains(&id),
|
|
"expected music+video items (6-10), got ID {id}"
|
|
);
|
|
}
|
|
// Should find some results.
|
|
assert!(
|
|
!ids.is_empty(),
|
|
"expected results for music category + video format"
|
|
);
|
|
}
|
|
|
|
// ── Test 11: Suggest performance with many terms ────────────────────────────
|
|
|
|
#[test]
|
|
fn suggest_performance() {
|
|
let db = open_ephemeral();
|
|
|
|
// Index 10K items to build a large term set.
|
|
for id in 1..=10_000u64 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert(
|
|
"title".to_string(),
|
|
format!("unique_term_{id} common_word content"),
|
|
);
|
|
meta.insert("category".to_string(), "tech".to_string());
|
|
meta.insert("format".to_string(), "article".to_string());
|
|
db.write_item_with_metadata(EntityId::new(id), &meta)
|
|
.unwrap();
|
|
}
|
|
|
|
// Time prefix suggestion.
|
|
let start = std::time::Instant::now();
|
|
let req = Suggest::new("unique_term_5").limit(10);
|
|
let results = db.suggest(&req).unwrap();
|
|
let elapsed = start.elapsed();
|
|
|
|
assert!(
|
|
!results.is_empty(),
|
|
"expected suggestions for 'unique_term_5'"
|
|
);
|
|
assert!(
|
|
elapsed.as_millis() < 20,
|
|
"suggest took {}ms, expected < 20ms",
|
|
elapsed.as_millis()
|
|
);
|
|
}
|
|
|
|
// ── Test 12: WithinScope::CohortTrending scopes to cohort-velocity items ────
|
|
|
|
#[test]
|
|
fn search_within_cohort_trending() {
|
|
let db = open_ephemeral();
|
|
let ts = Timestamp::now();
|
|
|
|
// Define a "tech_en" cohort.
|
|
db.define_cohort(CohortDef {
|
|
name: "tech_en".to_string(),
|
|
predicate: Predicate::Eq {
|
|
field: "primary_category".into(),
|
|
value: "tech".into(),
|
|
},
|
|
})
|
|
.unwrap();
|
|
|
|
// Write a tech user and a non-tech user.
|
|
let mut user_meta = HashMap::new();
|
|
user_meta.insert("locale".to_string(), "en".to_string());
|
|
user_meta.insert("primary_category".to_string(), "tech".to_string());
|
|
db.write_user(EntityId::new(1001), &user_meta).unwrap();
|
|
|
|
let mut non_tech = HashMap::new();
|
|
non_tech.insert("locale".to_string(), "en".to_string());
|
|
non_tech.insert("primary_category".to_string(), "sports".to_string());
|
|
db.write_user(EntityId::new(1002), &non_tech).unwrap();
|
|
|
|
// Write 6 items: items 1-3 get heavy tech-cohort signal, items 4-6 do not.
|
|
write_items_with_category(&db, &(1..=6).collect::<Vec<_>>(), "tech");
|
|
db.flush_text_index().unwrap();
|
|
|
|
// tech_en user views items 1-3 heavily; non-tech user views items 4-6.
|
|
for _ in 0..15 {
|
|
db.signal_with_context("view", EntityId::new(1), 1.0, ts, Some(1001), Some(1))
|
|
.unwrap();
|
|
db.signal_with_context("view", EntityId::new(2), 1.0, ts, Some(1001), Some(2))
|
|
.unwrap();
|
|
db.signal_with_context("view", EntityId::new(3), 1.0, ts, Some(1001), Some(3))
|
|
.unwrap();
|
|
}
|
|
for _ in 0..15 {
|
|
db.signal_with_context("view", EntityId::new(4), 1.0, ts, Some(1002), Some(4))
|
|
.unwrap();
|
|
db.signal_with_context("view", EntityId::new(5), 1.0, ts, Some(1002), Some(5))
|
|
.unwrap();
|
|
db.signal_with_context("view", EntityId::new(6), 1.0, ts, Some(1002), Some(6))
|
|
.unwrap();
|
|
}
|
|
|
|
// Search within cohort trending for "tech_en".
|
|
let query = Search::builder()
|
|
.query("item")
|
|
.within(WithinScope::CohortTrending {
|
|
cohort: "tech_en".into(),
|
|
window_hours: 1,
|
|
})
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let results = db.search(&query).unwrap();
|
|
|
|
// Items 1-3 should appear (tech_en cohort velocity); items 4-6 had no cohort signal.
|
|
let ids: Vec<u64> = results.items.iter().map(|r| r.entity_id.as_u64()).collect();
|
|
assert!(
|
|
!ids.is_empty(),
|
|
"expected results within cohort trending; got none"
|
|
);
|
|
for &id in &ids {
|
|
assert!(
|
|
(1..=3).contains(&id),
|
|
"expected only cohort-trending items (1-3), got ID {id}"
|
|
);
|
|
}
|
|
}
|
|
|
|
// ── Test 13: WithinScope::CohortTrending with unknown cohort returns error ───
|
|
|
|
#[test]
|
|
fn search_within_cohort_trending_unknown_cohort_returns_error() {
|
|
let db = open_ephemeral();
|
|
|
|
write_items_with_category(&db, &[1, 2, 3], "music");
|
|
db.flush_text_index().unwrap();
|
|
|
|
let query = Search::builder()
|
|
.query("item")
|
|
.within(WithinScope::CohortTrending {
|
|
cohort: "nonexistent_cohort".into(),
|
|
window_hours: 24,
|
|
})
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let result = db.search(&query);
|
|
assert!(
|
|
result.is_err(),
|
|
"expected error for unknown cohort, got: {:?}",
|
|
result.map(|r| r.items.len())
|
|
);
|
|
let err = result.unwrap_err();
|
|
let err_str = err.to_string();
|
|
assert!(
|
|
err_str.contains("nonexistent_cohort") || err_str.contains("not found"),
|
|
"expected error message to mention the cohort name; got: {err_str}"
|
|
);
|
|
}
|