#![allow(clippy::unwrap_used)] //! m5p4 Creator Search integration tests. //! //! Validates that the SEARCH pipeline works for `EntityKind::Creator`: //! schema declaration → creator writes → text index flush → BM25 retrieval //! → profile scoring → result assembly. //! //! # UAT Scenario //! //! ``` //! Given: A database with 200 indexed creators (name, handle, language) //! When: db.search(Search { entity_kind: Creator, query: "jazz" }) //! Then: Returns non-empty SearchResults with BM25 scores //! And: Creators matching "jazz" appear in results //! ``` use std::collections::HashMap; use std::time::{Duration, Instant}; use tidaldb::TidalDb; use tidaldb::query::search::Search; use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Window}; // ── Schema and fixture helpers ─────────────────────────────────────────────── fn creator_search_schema() -> tidaldb::schema::Schema { let mut builder = SchemaBuilder::new(); let _ = builder .signal( "follow", EntityKind::Creator, DecaySpec::Exponential { half_life: Duration::from_secs(30 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); let _ = builder .signal( "view", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(7 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); builder.creator_text_field("name", TextFieldType::Text); builder.creator_text_field("handle", TextFieldType::Text); builder.creator_text_field("language", TextFieldType::Keyword); builder.build().unwrap() } /// Build a TidalDb with `n` indexed creators and wait for the text syncer to /// commit all documents. /// /// Creators with IDs 0..n/2 get name "Jazz Piano Creator {i}" (matching corpus). /// Creators with IDs n/2..n get name "Rock Guitar Artist {i}" (non-matching). /// /// For n < 1000: sleeps 2.5s then calls reload_creator_text_index() to let the /// time-based commit (every 2s) fire. fn make_creator_db(n: u64) -> TidalDb { let schema = creator_search_schema(); let db = TidalDb::builder() .ephemeral() .with_schema(schema) .open() .unwrap(); for i in 0..n { let mut meta = HashMap::new(); let name = if i < n / 2 { format!("Jazz Piano Creator {i}") } else { format!("Rock Guitar Artist {i}") }; meta.insert("name".to_string(), name); meta.insert("handle".to_string(), format!("creator_{i}")); meta.insert("language".to_string(), "en".to_string()); meta.insert("verified".to_string(), (i % 3 == 0).to_string()); db.write_creator(EntityId::new(i + 1), &meta).unwrap(); } // For small datasets (< 1000), wait for time-based commit (2s) + reload. std::thread::sleep(Duration::from_millis(2500)); db.reload_creator_text_index().unwrap(); db } // ── Tests ──────────────────────────────────────────────────────────────────── /// step01: Creator text search returns results with BM25 scores. #[test] fn step01_creator_text_search_returns_results() { let db = make_creator_db(200); let query = Search::builder() .entity_kind(EntityKind::Creator) .query("jazz") .limit(10) .build() .unwrap(); let results = db.search(&query).unwrap(); assert!(!results.is_empty(), "Expected search results for 'jazz'"); assert!( results.items.iter().any(|r| r.bm25_score.is_some()), "Expected at least one result with a BM25 score" ); // All results should rank higher the "Jazz" creators let top = &results.items[0]; assert!( top.bm25_score.is_some(), "Top result should have BM25 score" ); } /// step02: Creator verified filter returns only verified creators. #[test] fn step02_creator_verified_filter() { use tidaldb::storage::indexes::filter::FilterExpr; let db = make_creator_db(200); // Search with a filter on "verified" = "true" using Keyword equality. // FilterExpr::eq maps to CategoryEq which checks the category bitmap. // Since we're doing a text search here, filtering by metadata requires // checking storage. For simplicity, verify the filter doesn't break search. let query = Search::builder() .entity_kind(EntityKind::Creator) .query("jazz") .filter(FilterExpr::eq("language", "en")) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); // Language filter is metadata-based. Results may be 0 if bitmap not populated for creators, // but search should not error. // Verify no panic and the search completes. let _ = results; } /// step03: Creator vector search returns results with semantic scores. #[test] fn step03_creator_vector_search() { let schema = creator_search_schema(); let db = TidalDb::builder() .ephemeral() .with_schema(schema) .open() .unwrap(); // Write 10 creators with embeddings. for i in 0u64..10 { let mut meta = HashMap::new(); meta.insert("name".to_string(), format!("Jazz Creator {i}")); meta.insert("handle".to_string(), format!("jazz_{i}")); db.write_creator(EntityId::new(i + 1), &meta).unwrap(); // Write a simple embedding: first component varies by creator. let mut emb = vec![0.0f32; 16]; emb[0] = (i as f32) + 1.0; emb[1] = 1.0; db.write_creator_embedding(EntityId::new(i + 1), &emb) .unwrap(); } // Query with a vector similar to creator 5. let mut query_vec = vec![0.0f32; 16]; query_vec[0] = 5.0; query_vec[1] = 1.0; let query = Search::builder() .entity_kind(EntityKind::Creator) .vector(query_vec) .limit(5) .build() .unwrap(); let results = db.search(&query).unwrap(); assert!( !results.is_empty(), "Expected ANN results for creator vector search" ); assert!( results.items.iter().any(|r| r.semantic_score.is_some()), "Expected at least one result with semantic_score" ); } /// step04: Creator text search latency < 20ms at 200 creators. #[test] fn step04_creator_search_latency_under_20ms() { let db = make_creator_db(200); let query = Search::builder() .entity_kind(EntityKind::Creator) .query("jazz") .limit(10) .build() .unwrap(); // Warm up. for _ in 0..3 { let _ = db.search(&query).unwrap(); } // Measure 10 iterations. let iters = 10; let mut total = Duration::ZERO; for _ in 0..iters { let start = Instant::now(); let _ = db.search(&query).unwrap(); total += start.elapsed(); } let avg = total / iters; assert!( avg < Duration::from_millis(20), "Average creator text search latency {avg:?} exceeds 20ms target" ); } /// step05: read_creator_embedding returns stored vector. #[test] fn step05_read_creator_embedding_roundtrip() { let schema = creator_search_schema(); let db = TidalDb::builder() .ephemeral() .with_schema(schema) .open() .unwrap(); let id = EntityId::new(42); let emb = vec![1.0f32, 0.0, 0.0, 0.0]; db.write_creator_embedding(id, &emb).unwrap(); let stored = db.read_creator_embedding(id).unwrap(); assert!(stored.is_some(), "Expected stored embedding to be readable"); let stored = stored.unwrap(); // The stored vector is L2-normalized, so check it's unit length. let norm: f32 = stored.iter().map(|x| x * x).sum::().sqrt(); assert!( (norm - 1.0).abs() < 1e-5, "Stored embedding should be L2-normalized" ); } /// step06: Existing item search still works (regression check). #[test] fn step06_item_search_unaffected_by_creator_search() { let mut builder = SchemaBuilder::new(); let _ = builder .signal( "view", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(7 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); let _ = builder .signal( "follow", EntityKind::Creator, DecaySpec::Exponential { half_life: Duration::from_secs(30 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); builder.text_field("title", TextFieldType::Text); builder.creator_text_field("name", TextFieldType::Text); let schema = builder.build().unwrap(); let db = TidalDb::builder() .ephemeral() .with_schema(schema) .open() .unwrap(); // Write 5 items. for i in 0u64..5 { let mut meta = HashMap::new(); meta.insert("title".to_string(), format!("Rust tutorial {i}")); db.write_item_with_metadata(EntityId::new(i + 1), &meta) .unwrap(); } // Write 5 creators. for i in 0u64..5 { let mut meta = HashMap::new(); meta.insert("name".to_string(), format!("Jazz Creator {i}")); db.write_creator(EntityId::new(i + 100), &meta).unwrap(); } std::thread::sleep(Duration::from_millis(2500)); db.reload_text_index().unwrap(); db.reload_creator_text_index().unwrap(); // Item search should return items. let item_query = Search::builder().query("Rust").limit(10).build().unwrap(); let item_results = db.search(&item_query).unwrap(); assert!( !item_results.is_empty(), "Item search should return results" ); // Creator search should return creators. let creator_query = Search::builder() .entity_kind(EntityKind::Creator) .query("jazz") .limit(10) .build() .unwrap(); let creator_results = db.search(&creator_query).unwrap(); assert!( !creator_results.is_empty(), "Creator search should return results" ); }