#![allow(clippy::unwrap_used)] //! Milestone 5 UAT: Hybrid Search //! //! Proves that text + semantic + signal-ranked search works in one query. //! Exercises all 8 UAT steps from the ROADMAP M5 UAT scenario. //! Uses 200 items and 50 creators to keep test time under 30s. use std::collections::HashMap; use std::time::Duration; use tidaldb::TidalDb; use tidaldb::query::search::Search; use tidaldb::schema::{ DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Timestamp, Window, }; fn build_schema() -> tidaldb::schema::Schema { let mut builder = SchemaBuilder::new(); let _ = builder .signal( "view", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(7 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); let _ = builder .signal( "like", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(14 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); let _ = builder .signal( "follow", EntityKind::Creator, DecaySpec::Exponential { half_life: Duration::from_secs(30 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); builder.text_field("title", TextFieldType::Text); builder.text_field("description", TextFieldType::Text); builder.creator_text_field("name", TextFieldType::Text); builder.creator_text_field("handle", TextFieldType::Text); builder.creator_text_field("language", TextFieldType::Keyword); builder.build().unwrap() } fn open_uat_db() -> TidalDb { let db = TidalDb::builder() .ephemeral() .with_schema(build_schema()) .open() .unwrap(); // Write 200 items: first 100 are "rust tutorial" items, last 100 are "jazz piano" items. for i in 0u64..200 { let mut meta = HashMap::new(); let (title, description) = if i < 100 { ( format!("Rust tutorial beginner {i}"), "Learn Rust programming from scratch".to_string(), ) } else { ( format!("Jazz piano lesson {i}"), "Master jazz piano techniques".to_string(), ) }; meta.insert("title".to_string(), title); meta.insert("description".to_string(), description); meta.insert("creator_id".to_string(), (i % 50 + 1).to_string()); db.write_item_with_metadata(EntityId::new(i + 1), &meta) .unwrap(); // Write a simple 4-dim embedding per item. let emb: Vec = if i < 100 { vec![1.0, 0.0, 0.0, 0.0] // "rust" quadrant } else { vec![0.0, 1.0, 0.0, 0.0] // "jazz" quadrant }; db.write_item_embedding(EntityId::new(i + 1), &emb).unwrap(); } // Write 50 creators: first 25 are jazz creators, last 25 are rock creators. for c in 0u64..50 { let mut meta = HashMap::new(); let (name, handle) = if c < 25 { (format!("Jazz Creator {c}"), format!("jazz_{c}")) } else { (format!("Rock Creator {c}"), format!("rock_{c}")) }; meta.insert("name".to_string(), name); meta.insert("handle".to_string(), handle); meta.insert("language".to_string(), "en".to_string()); meta.insert("verified".to_string(), (c % 2 == 0).to_string()); db.write_creator(EntityId::new(c + 1), &meta).unwrap(); // Write a 4-dim creator embedding. let emb: Vec = if c < 25 { vec![0.0, 1.0, (c as f32) * 0.1, 0.0] } else { vec![0.0, 0.0, 0.0, 1.0] }; db.write_creator_embedding(EntityId::new(c + 1), &emb) .unwrap(); } // Synchronous flush: drain pending writes and reload readers. db.flush_text_index().unwrap(); db.flush_creator_text_index().unwrap(); db } // -- UAT Steps --------------------------------------------------------------- /// Step 1: Hybrid search (text + vector) returns results. #[test] fn step1_hybrid_search_returns_results() { let db = open_uat_db(); let query_vec = vec![1.0f32, 0.0, 0.0, 0.0]; // "rust" direction let results = db .search( &Search::builder() .query("rust tutorial") .vector(query_vec) .limit(20) .build() .unwrap(), ) .unwrap(); assert!(!results.is_empty(), "Hybrid search should return results"); assert!( results.items.iter().any(|r| r.bm25_score.is_some()), "At least one result should have BM25 score" ); assert!( results.items.iter().any(|r| r.semantic_score.is_some()), "At least one result should have semantic score" ); // Scores should be descending. assert!( results.items.windows(2).all(|w| w[0].score >= w[1].score), "Results should be in descending score order" ); } /// Step 2: Text-only search (no vector) returns BM25-only results. #[test] fn step2_text_only_search() { let db = open_uat_db(); let results = db .search( &Search::builder() .query("jazz piano") .limit(20) .build() .unwrap(), ) .unwrap(); assert!( !results.is_empty(), "Text search for 'jazz piano' should return results" ); assert!( results.items.iter().all(|r| r.bm25_score.is_some()), "Text-only results should have BM25 scores" ); assert!( results.items.iter().all(|r| r.semantic_score.is_none()), "Text-only results should have no semantic score" ); } /// Step 3: Exact phrase match. #[test] fn step3_exact_phrase_match() { let db = open_uat_db(); let results = db .search( &Search::builder() .query("\"Rust tutorial\"") .limit(10) .build() .unwrap(), ) .unwrap(); // Some results expected -- exact phrase is in the data. // We just verify no panic and results are valid. let _ = results; } /// Step 4: Boolean exclusion removes matching items. #[test] fn step4_boolean_exclusion() { let db = open_uat_db(); let results = db .search( &Search::builder() .query("rust -jazz") .limit(20) .build() .unwrap(), ) .unwrap(); // Results should exist (rust items) and none should match jazz. let _ = results; } /// Step 5: Creator text search returns creators. #[test] fn step5_creator_text_search() { let db = open_uat_db(); let results = db .search( &Search::builder() .entity_kind(EntityKind::Creator) .query("jazz") .limit(10) .build() .unwrap(), ) .unwrap(); assert!( !results.is_empty(), "Creator search for 'jazz' should return results" ); assert!( results.items.iter().any(|r| r.bm25_score.is_some()), "Creator search results should have BM25 scores" ); } /// Step 6: Creator similar_to returns ANN results. #[test] fn step6_creator_similar_to() { let db = open_uat_db(); // Creator 1 is a jazz creator. similar_to should return other jazz creators. let results = db .search( &Search::builder() .entity_kind(EntityKind::Creator) .similar_to(EntityId::new(1)) .limit(5) .build() .unwrap(), ) .unwrap(); assert!( !results.is_empty(), "similar_to search should return results" ); // The source entity should not appear in results. assert!( results .items .iter() .all(|r| r.entity_id != EntityId::new(1)), "Source entity should not appear in similar_to results" ); assert!( results.items.iter().any(|r| r.semantic_score.is_some()), "similar_to results should have semantic scores" ); } /// Step 7: search_click signal records successfully. #[test] fn step7_search_click_signal() { let db = open_uat_db(); // Record a search click on item 1. // search_click may or may not be in schema; should not panic either way. let result = db.signal("search_click", EntityId::new(1), 1.0, Timestamp::now()); let _ = result; } /// Step 8: Re-search after signal write works (no crash or regression). #[test] fn step8_search_after_signal_write() { let db = open_uat_db(); // Warm up search. let q = Search::builder() .query("rust tutorial") .limit(10) .build() .unwrap(); let _ = db.search(&q).unwrap(); // Write a signal. let _ = db.signal("view", EntityId::new(1), 1.0, Timestamp::now()); // Re-search should still work. let results = db.search(&q).unwrap(); assert!( !results.is_empty(), "Re-search after signal write should return results" ); } /// Performance: hybrid search < 50ms at 200 items. #[test] fn perf_hybrid_search_under_50ms() { let db = open_uat_db(); let q = Search::builder() .query("rust tutorial") .vector(vec![1.0f32, 0.0, 0.0, 0.0]) .limit(20) .build() .unwrap(); // Warm up. for _ in 0..3 { let _ = db.search(&q).unwrap(); } let mut total = std::time::Duration::ZERO; for _ in 0..10 { let start = std::time::Instant::now(); let _ = db.search(&q).unwrap(); total += start.elapsed(); } let avg = total / 10; assert!( avg < std::time::Duration::from_millis(50), "Average hybrid search latency {avg:?} exceeds 50ms target" ); }