#![allow(clippy::unwrap_used)] //! m5p3 SEARCH Query end-to-end integration test (UAT). //! //! Validates the full SEARCH pipeline: schema declaration → item writes → //! text index flush → BM25 retrieval → profile scoring → result assembly. //! Also validates `search_click` as a positive engagement signal. //! //! # UAT Scenario //! //! ``` //! Given: A database with 1000 indexed items (title, description) //! When: db.search(Search { query: "Rust tutorial" }) //! Then: Returns non-empty SearchResults with BM25 scores //! And: Items matching the query appear before non-matching items //! ``` use std::collections::HashMap; use std::time::Duration; use tidaldb::TidalDb; use tidaldb::query::search::Search; use tidaldb::schema::{ DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Timestamp, Window, }; // ── Schema and fixture helpers ─────────────────────────────────────────────── fn search_schema() -> tidaldb::schema::Schema { let mut builder = SchemaBuilder::new(); let _ = builder .signal( "view", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(7 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); let _ = builder .signal( "like", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(30 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); let _ = builder .signal( "search_click", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(3 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); builder.text_field("title", TextFieldType::Text); builder.text_field("description", TextFieldType::Text); builder.text_field("category", TextFieldType::Keyword); builder.build().unwrap() } /// Build a TidalDb with `n` indexed items and wait for the text syncer to /// commit all documents. /// /// Items with IDs 0..500 get title "Rust tutorial {i}" (matching corpus). /// Items with IDs 500..n get title "Python machine learning {i}" (non-matching). /// /// The text syncer commits every 1000 documents. Writing ≥ 1000 items /// guarantees at least one batch commit. A 500ms sleep gives the syncer time /// to drain the channel; `reload_text_index()` makes the reader see the /// committed documents. fn make_db(n: u64) -> TidalDb { assert!(n >= 1000, "n must be ≥ 1000 to trigger a batch commit"); let db = TidalDb::builder() .ephemeral() .with_schema(search_schema()) .open() .unwrap(); let ts = Timestamp::now(); for i in 0..n { let mut meta = HashMap::new(); if i < 500 { meta.insert("title".to_string(), format!("Rust tutorial {i}")); meta.insert( "description".to_string(), "Learn Rust systems programming.".to_string(), ); meta.insert("category".to_string(), "programming".to_string()); } else { meta.insert("title".to_string(), format!("Python machine learning {i}")); meta.insert( "description".to_string(), "Machine learning with Python.".to_string(), ); meta.insert("category".to_string(), "data-science".to_string()); } db.write_item_with_metadata(EntityId::new(i), &meta) .unwrap(); // Add view signals to items 0..100 to make profile scoring non-trivial. if i < 100 { db.signal("view", EntityId::new(i), 1.0, ts).unwrap(); } } // Wait for the background text syncer to drain the channel and commit // all documents (syncer commits every 1000 items; 1K items = 1 commit). std::thread::sleep(Duration::from_millis(500)); db.reload_text_index().unwrap(); db } // ── Step 1: SearchBuilder ──────────────────────────────────────────────────── #[test] fn step1_search_builder_requires_query() { let result = Search::builder().build(); assert!( result.is_err(), "build() without query_text or query_vector must fail" ); } #[test] fn step1_search_builder_defaults() { let s = Search::builder().query("jazz").build().unwrap(); assert_eq!(s.limit, 20); assert_eq!(s.profile.name, "search"); assert!(s.filters.is_empty()); assert!(s.for_user.is_none()); } #[test] fn step1_search_builder_vector_only() { let s = Search::builder() .vector(vec![0.1_f32; 4]) .limit(10) .build() .unwrap(); assert!(s.query_text.is_none()); assert!(s.query_vector.is_some()); assert_eq!(s.limit, 10); } // ── Step 2: Text search returns results ────────────────────────────────────── #[test] fn step2_text_search_returns_results() { let db = make_db(1000); let query = Search::builder() .query("Rust tutorial") .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); assert!( !results.is_empty(), "search for 'Rust tutorial' should return results" ); assert!(results.len() <= 20, "search results must not exceed limit"); } // ── Step 3: BM25 scores are present in results ─────────────────────────────── #[test] fn step3_bm25_scores_populated() { let db = make_db(1000); let query = Search::builder() .query("Rust systems") .limit(10) .build() .unwrap(); let results = db.search(&query).unwrap(); assert!( !results.is_empty(), "expected at least one result for 'Rust systems'" ); // All results from a text-only query must have a BM25 score. for item in &results.items { assert!( item.bm25_score.is_some(), "bm25_score should be populated for text-only search" ); assert!( item.semantic_score.is_none(), "no vector → no semantic_score" ); } } // ── Step 4: Ranks are 1-based and sequential ───────────────────────────────── #[test] fn step4_ranks_are_sequential() { let db = make_db(1000); let query = Search::builder().query("Rust").limit(10).build().unwrap(); let results = db.search(&query).unwrap(); assert!(!results.is_empty(), "expected results"); for (i, item) in results.items.iter().enumerate() { assert_eq!( item.rank, i + 1, "rank should be 1-based and sequential at position {i}" ); } } // ── Step 5: query_text that matches nothing returns empty results ───────────── #[test] fn step5_no_matching_query_returns_empty() { let db = make_db(1000); let query = Search::builder() .query("xyzzy123foobarquux") .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); assert!( results.is_empty(), "non-matching query should return empty results" ); } // ── Step 6: search_click is a positive engagement signal ───────────────────── #[test] fn step6_search_click_signal_recorded() { let db = TidalDb::builder() .ephemeral() .with_schema(search_schema()) .open() .unwrap(); let entity = EntityId::new(1); let ts = Timestamp::now(); // search_click should succeed as a registered signal type. db.signal("search_click", entity, 1.0, ts).unwrap(); // The signal should be readable as a decay score. let score = db.read_decay_score(entity, "search_click", 0).unwrap(); assert!( score.is_some() && score.unwrap() > 0.0, "search_click should produce a positive decay score" ); } // ── Step 7: search_click updates preference vector (positive engagement) ────── #[test] fn step7_search_click_updates_preference_vector() { let db = TidalDb::builder() .ephemeral() .with_schema(search_schema()) .open() .unwrap(); let user_id = 99_u64; let entity = EntityId::new(42); let ts = Timestamp::now(); // Write item with a creator so there is preference state to update. let mut meta = HashMap::new(); meta.insert("title".to_string(), "Rust embedded".to_string()); meta.insert("creator_id".to_string(), "1".to_string()); db.write_item_with_metadata(entity, &meta).unwrap(); // signal_with_context with a user triggers preference vector update. db.signal_with_context("search_click", entity, 1.0, ts, Some(user_id), None) .unwrap(); // The signal is recorded. let score = db.read_decay_score(entity, "search_click", 0).unwrap(); assert!(score.is_some(), "search_click signal should be recorded"); } // ── Step 8: Latency target < 50ms at 1K items ──────────────────────────────── #[test] fn step8_search_latency_under_50ms() { let db = make_db(1000); let query = Search::builder() .query("Rust tutorial") .limit(20) .build() .unwrap(); let start = std::time::Instant::now(); let _results = db.search(&query).unwrap(); let elapsed = start.elapsed(); assert!( elapsed.as_millis() < 50, "search at 1K items should complete in < 50ms, got {}ms", elapsed.as_millis() ); } // ── Step 9: search with for_user doesn't panic ─────────────────────────────── #[test] fn step9_personalized_search_executes() { let db = make_db(1000); let user_id = 7_u64; let ts = Timestamp::now(); // Give the user some signals so personalization has data. for i in 0u64..10 { db.signal_with_context("view", EntityId::new(i), 1.0, ts, Some(user_id), None) .unwrap(); } let query = Search::builder() .query("Rust") .for_user(user_id) .limit(20) .build() .unwrap(); let results = db.search(&query).unwrap(); // Personalized search should return results. assert!( !results.is_empty(), "personalized search should return results" ); } // ── Step 10: search builtin profile is registered ──────────────────────────── #[test] fn step10_search_profile_registered() { let db = TidalDb::builder() .ephemeral() .with_schema(search_schema()) .open() .unwrap(); // A search query with default profile ("search") must not fail with // "profile not found" — it should succeed even if results are empty. let query = Search::builder() .query("anything") .limit(1) .build() .unwrap(); // The search may return no results (text index not flushed), but must // not fail with a missing profile error. let result = db.search(&query); assert!( result.is_ok(), "search with default 'search' profile must not fail: {:?}", result.err() ); }