//! Milestone 6 Phase 5 Integration Tests: `WithinScope` + Suggest API. //! //! Exercises m6p5 features end-to-end through `TidalDb`: //! //! 1. `WithinScope::Category` restricts SEARCH to a category. //! 2. `WithinScope::Collection` restricts SEARCH to a collection's items. //! 3. `WithinScope::Following` restricts SEARCH to followed creators' items. //! 4. `WithinScope::Trending` scopes to high-velocity items. //! 5. Scope with no matches returns empty results. //! 6. `db.suggest()` prefix autocomplete from indexed titles. //! 7. `db.suggest()` trending (empty prefix) returns top queries. //! 8. `db.suggest()` limit validation rejects 0 and >50. //! 9. Title indexing deduplicates terms across items. //! 10. Scope + filter composition (`WithinScope` + metadata filter). //! 11. Suggest performance with 10K terms. //! 12. `WithinScope::CohortTrending` scopes to cohort-velocity items. //! 13. `WithinScope::CohortTrending` with unknown cohort returns an error. #![allow(clippy::unwrap_used, clippy::cast_precision_loss)] use std::collections::HashMap; use std::time::Duration; use tidaldb::TidalDb; use tidaldb::cohort::{CohortDef, Predicate}; use tidaldb::entities::Visibility; use tidaldb::query::search::{Search, WithinScope}; use tidaldb::query::suggest::Suggest; use tidaldb::schema::{ DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Timestamp, Window, }; use tidaldb::storage::indexes::filter::FilterExpr; // ── Schema ────────────────────────────────────────────────────────────────── fn m6p5_schema() -> tidaldb::schema::Schema { let mut builder = SchemaBuilder::new(); for &(name, half_life_days) in &[("view", 7), ("like", 14), ("share", 7)] { let _ = builder .signal( name, EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(half_life_days * 24 * 3600), }, ) .windows(&[ Window::OneHour, Window::TwentyFourHours, Window::SevenDays, Window::AllTime, ]) .velocity(true) .add(); } builder.text_field("title", TextFieldType::Text); builder.text_field("description", TextFieldType::Text); builder.text_field("category", TextFieldType::Keyword); builder.build().expect("m6p5 schema must be valid") } fn open_ephemeral() -> TidalDb { TidalDb::builder() .ephemeral() .with_schema(m6p5_schema()) .open() .expect("ephemeral open") } fn write_items_with_category(db: &TidalDb, ids: &[u64], category: &str) { for &id in ids { let mut meta = HashMap::new(); meta.insert("title".to_string(), format!("Item {id} about {category}")); meta.insert("category".to_string(), category.to_string()); meta.insert("format".to_string(), "audio".to_string()); meta.insert("creator_id".to_string(), format!("{}", id % 5 + 1)); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); } } // ── Test 1: WithinScope::Category restricts SEARCH ────────────────────────── #[test] fn search_within_category() { let db = open_ephemeral(); // Write items in two categories. write_items_with_category(&db, &[1, 2, 3, 4, 5], "music"); write_items_with_category(&db, &[6, 7, 8, 9, 10], "tech"); // Flush text index so BM25 can find them. db.flush_text_index().unwrap(); // Search within "music" category. let query = Search::builder() .query("item") .within(WithinScope::Category { name: "music".into(), }) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); // All returned items should be in the music category (IDs 1-5). for &id in &ids { assert!( (1..=5).contains(&id), "expected music items (1-5), got ID {id}" ); } // Should have found some results. assert!( !ids.is_empty(), "expected some results within music category" ); } // ── Test 2: WithinScope::Collection restricts SEARCH ──────────────────────── #[test] fn search_within_collection() { let db = open_ephemeral(); let owner = EntityId::new(1); // Write 10 items. write_items_with_category(&db, &(1..=10).collect::>(), "jazz"); db.flush_text_index().unwrap(); // Create a collection with items 3, 7, 9. let cid = db .create_collection(owner, "favorites", Visibility::Private) .unwrap(); db.add_to_collection(cid, EntityId::new(3)).unwrap(); db.add_to_collection(cid, EntityId::new(7)).unwrap(); db.add_to_collection(cid, EntityId::new(9)).unwrap(); // Search within the collection. let query = Search::builder() .query("item") .within(WithinScope::Collection { id: cid }) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); for &id in &ids { assert!( id == 3 || id == 7 || id == 9, "expected collection items (3, 7, 9), got ID {id}" ); } assert!( !ids.is_empty(), "expected some results within the collection" ); } // ── Test 3: WithinScope::Following restricts SEARCH ───────────────────────── #[test] fn search_within_following() { let db = open_ephemeral(); let user_id = 42u64; // Creator 1 owns items 1-5, creator 2 owns items 6-10. for id in 1..=5u64 { let mut meta = HashMap::new(); meta.insert("title".to_string(), format!("Creator1 item {id}")); meta.insert("category".to_string(), "music".to_string()); meta.insert("format".to_string(), "audio".to_string()); meta.insert("creator_id".to_string(), "1".to_string()); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); } for id in 6..=10u64 { let mut meta = HashMap::new(); meta.insert("title".to_string(), format!("Creator2 item {id}")); meta.insert("category".to_string(), "music".to_string()); meta.insert("format".to_string(), "audio".to_string()); meta.insert("creator_id".to_string(), "2".to_string()); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); } db.flush_text_index().unwrap(); // User follows only creator 1. db.user_state().add_follow(user_id, 1); // Search within following. let query = Search::builder() .query("item") .for_user(user_id) .within(WithinScope::Following) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); for &id in &ids { assert!( (1..=5).contains(&id), "expected creator 1's items (1-5), got ID {id}" ); } assert!( !ids.is_empty(), "expected some results within followed creators" ); } // ── Test 4: WithinScope::Trending scopes to high-velocity items ───────────── #[test] fn search_within_trending() { let db = open_ephemeral(); let ts = Timestamp::now(); // Write 10 items. write_items_with_category(&db, &(1..=10).collect::>(), "jazz"); db.flush_text_index().unwrap(); // Generate heavy signal traffic on items 1-3, none on items 4-10. for _ in 0..20 { db.signal("view", EntityId::new(1), 1.0, ts).unwrap(); db.signal("view", EntityId::new(2), 1.0, ts).unwrap(); db.signal("view", EntityId::new(3), 1.0, ts).unwrap(); } // Search within trending (1-hour window). let query = Search::builder() .query("item") .within(WithinScope::Trending { window_hours: 1 }) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); // Trending items (1-3) should be present; items without signals may be excluded. assert!( ids.contains(&1) && ids.contains(&2) && ids.contains(&3), "trending items 1, 2, 3 should appear in results; got {ids:?}" ); } // ── Test 5: Scope with no matches returns empty results ───────────────────── #[test] fn search_within_empty_category_returns_empty() { let db = open_ephemeral(); // Write items only in "music" category. write_items_with_category(&db, &[1, 2, 3], "music"); db.flush_text_index().unwrap(); // Search within a non-existent category. let query = Search::builder() .query("item") .within(WithinScope::Category { name: "nonexistent".into(), }) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); assert!( results.items.is_empty(), "expected no results for nonexistent category" ); } // ── Test 6: db.suggest() prefix autocomplete ──────────────────────────────── #[test] fn suggest_prefix_autocomplete() { let db = open_ephemeral(); // Write items with distinct titles. let mut meta = HashMap::new(); meta.insert("title".to_string(), "Rust Tutorial Advanced".to_string()); meta.insert("category".to_string(), "tech".to_string()); meta.insert("format".to_string(), "video".to_string()); db.write_item_with_metadata(EntityId::new(1), &meta) .unwrap(); meta.insert("title".to_string(), "Rust Async Programming".to_string()); db.write_item_with_metadata(EntityId::new(2), &meta) .unwrap(); meta.insert("title".to_string(), "Python Machine Learning".to_string()); db.write_item_with_metadata(EntityId::new(3), &meta) .unwrap(); // Suggest "rus" -- should match "rust". let req = Suggest::new("rus"); let results = db.suggest(&req).unwrap(); assert!(!results.is_empty(), "expected suggestions for 'rus'"); assert!( results.iter().any(|s| s.text == "rust"), "expected 'rust' in suggestions; got {:?}", results.iter().map(|s| &s.text).collect::>() ); // Suggest "pyt" -- should match "python". let req = Suggest::new("pyt"); let results = db.suggest(&req).unwrap(); assert!( results.iter().any(|s| s.text == "python"), "expected 'python' in suggestions" ); // Suggest "zzz" -- no match. let req = Suggest::new("zzz"); let results = db.suggest(&req).unwrap(); assert!(results.is_empty(), "expected no suggestions for 'zzz'"); } // ── Test 7: db.suggest() trending (empty prefix) ──────────────────────────── #[test] fn suggest_trending_queries() { let db = open_ephemeral(); // Write items so text index works. write_items_with_category(&db, &[1, 2, 3], "music"); db.flush_text_index().unwrap(); // Perform several searches to build trending data. let q1 = Search::builder().query("jazz").build().unwrap(); let q2 = Search::builder().query("blues").build().unwrap(); let _ = db.search(&q1); let _ = db.search(&q1); let _ = db.search(&q1); let _ = db.search(&q2); // Empty prefix returns trending. let req = Suggest::new(""); let results = db.suggest(&req).unwrap(); assert!(!results.is_empty(), "expected trending suggestions"); // "jazz" should rank first (3 searches vs 1 for "blues"). assert_eq!( results[0].text, "jazz", "expected 'jazz' as top trending; got '{}'", results[0].text ); assert_eq!(results[0].frequency, 3); } // ── Test 8: db.suggest() limit validation ─────────────────────────────────── #[test] fn suggest_limit_validation() { let db = open_ephemeral(); // Limit 0 should be rejected. let req = Suggest::new("a").limit(0); let result = db.suggest(&req); assert!(result.is_err(), "limit 0 should be rejected"); // Limit 51 should be rejected. let req = Suggest::new("a").limit(51); let result = db.suggest(&req); assert!(result.is_err(), "limit 51 should be rejected"); // Limit 50 should be accepted. let req = Suggest::new("a").limit(50); let result = db.suggest(&req); assert!(result.is_ok(), "limit 50 should be accepted"); // Limit 1 should be accepted. let req = Suggest::new("a").limit(1); let result = db.suggest(&req); assert!(result.is_ok(), "limit 1 should be accepted"); } // ── Test 9: Title indexing deduplicates terms across items ────────────────── #[test] fn suggest_deduplicates_across_items() { let db = open_ephemeral(); // Write 5 items all with "Rust" in the title. for id in 1..=5u64 { let mut meta = HashMap::new(); meta.insert("title".to_string(), format!("Rust Guide Part {id}")); meta.insert("category".to_string(), "tech".to_string()); meta.insert("format".to_string(), "article".to_string()); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); } // Suggest "rus" -- should return exactly one "rust" result, not 5. let req = Suggest::new("rus"); let results = db.suggest(&req).unwrap(); let rust_results: Vec<_> = results.iter().filter(|s| s.text == "rust").collect(); assert_eq!( rust_results.len(), 1, "expected exactly 1 'rust' suggestion, got {}", rust_results.len() ); } // ── Test 10: Scope + filter composition ───────────────────────────────────── #[test] fn search_within_scope_plus_filter() { let db = open_ephemeral(); // Music items in audio and video formats. for id in 1..=5u64 { let mut meta = HashMap::new(); meta.insert("title".to_string(), format!("Music audio {id}")); meta.insert("category".to_string(), "music".to_string()); meta.insert("format".to_string(), "audio".to_string()); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); } for id in 6..=10u64 { let mut meta = HashMap::new(); meta.insert("title".to_string(), format!("Music video {id}")); meta.insert("category".to_string(), "music".to_string()); meta.insert("format".to_string(), "video".to_string()); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); } // Tech items. write_items_with_category(&db, &[11, 12, 13], "tech"); db.flush_text_index().unwrap(); // Search within "music" category AND filter format=video. let query = Search::builder() .query("music") .within(WithinScope::Category { name: "music".into(), }) .filter(FilterExpr::eq("format", "video")) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); for &id in &ids { assert!( (6..=10).contains(&id), "expected music+video items (6-10), got ID {id}" ); } // Should find some results. assert!( !ids.is_empty(), "expected results for music category + video format" ); } // ── Test 11: Suggest performance with many terms ──────────────────────────── #[test] fn suggest_performance() { let db = open_ephemeral(); // Index 10K items to build a large term set. for id in 1..=10_000u64 { let mut meta = HashMap::new(); meta.insert( "title".to_string(), format!("unique_term_{id} common_word content"), ); meta.insert("category".to_string(), "tech".to_string()); meta.insert("format".to_string(), "article".to_string()); db.write_item_with_metadata(EntityId::new(id), &meta) .unwrap(); } // Time prefix suggestion. let start = std::time::Instant::now(); let req = Suggest::new("unique_term_5").limit(10); let results = db.suggest(&req).unwrap(); let elapsed = start.elapsed(); assert!( !results.is_empty(), "expected suggestions for 'unique_term_5'" ); assert!( elapsed.as_millis() < 20, "suggest took {}ms, expected < 20ms", elapsed.as_millis() ); } // ── Test 12: WithinScope::CohortTrending scopes to cohort-velocity items ──── #[test] fn search_within_cohort_trending() { let db = open_ephemeral(); let ts = Timestamp::now(); // Define a "tech_en" cohort. db.define_cohort(CohortDef { name: "tech_en".to_string(), predicate: Predicate::Eq { field: "primary_category".into(), value: "tech".into(), }, }) .unwrap(); // Write a tech user and a non-tech user. let mut user_meta = HashMap::new(); user_meta.insert("locale".to_string(), "en".to_string()); user_meta.insert("primary_category".to_string(), "tech".to_string()); db.write_user(EntityId::new(1001), &user_meta).unwrap(); let mut non_tech = HashMap::new(); non_tech.insert("locale".to_string(), "en".to_string()); non_tech.insert("primary_category".to_string(), "sports".to_string()); db.write_user(EntityId::new(1002), &non_tech).unwrap(); // Write 6 items: items 1-3 get heavy tech-cohort signal, items 4-6 do not. write_items_with_category(&db, &(1..=6).collect::>(), "tech"); db.flush_text_index().unwrap(); // tech_en user views items 1-3 heavily; non-tech user views items 4-6. for _ in 0..15 { db.signal_with_context("view", EntityId::new(1), 1.0, ts, Some(1001), Some(1)) .unwrap(); db.signal_with_context("view", EntityId::new(2), 1.0, ts, Some(1001), Some(2)) .unwrap(); db.signal_with_context("view", EntityId::new(3), 1.0, ts, Some(1001), Some(3)) .unwrap(); } for _ in 0..15 { db.signal_with_context("view", EntityId::new(4), 1.0, ts, Some(1002), Some(4)) .unwrap(); db.signal_with_context("view", EntityId::new(5), 1.0, ts, Some(1002), Some(5)) .unwrap(); db.signal_with_context("view", EntityId::new(6), 1.0, ts, Some(1002), Some(6)) .unwrap(); } // Search within cohort trending for "tech_en". let query = Search::builder() .query("item") .within(WithinScope::CohortTrending { cohort: "tech_en".into(), window_hours: 1, }) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); // Items 1-3 should appear (tech_en cohort velocity); items 4-6 had no cohort signal. let ids: Vec = results.items.iter().map(|r| r.entity_id.as_u64()).collect(); assert!( !ids.is_empty(), "expected results within cohort trending; got none" ); for &id in &ids { assert!( (1..=3).contains(&id), "expected only cohort-trending items (1-3), got ID {id}" ); } } // ── Test 13: WithinScope::CohortTrending with unknown cohort returns error ─── #[test] fn search_within_cohort_trending_unknown_cohort_returns_error() { let db = open_ephemeral(); write_items_with_category(&db, &[1, 2, 3], "music"); db.flush_text_index().unwrap(); let query = Search::builder() .query("item") .within(WithinScope::CohortTrending { cohort: "nonexistent_cohort".into(), window_hours: 24, }) .limit(20) .build() .unwrap(); let result = db.search(&query); assert!( result.is_err(), "expected error for unknown cohort, got: {:?}", result.map(|r| r.items.len()) ); let err = result.unwrap_err(); let err_str = err.to_string(); assert!( err_str.contains("nonexistent_cohort") || err_str.contains("not found"), "expected error message to mention the cohort name; got: {err_str}" ); }