#![allow(clippy::unwrap_used)] //! m5p1 Text Index end-to-end integration test. //! //! Validates the full BM25 pipeline: schema declaration → index → write → //! commit → query parse → search → score. Uses an ephemeral in-RAM index so //! no disk I/O is required. use std::collections::HashMap; use tidaldb::schema::{EntityId, TextFieldDef, TextFieldType}; use tidaldb::text::{AllScoresCollector, TextIndex}; fn make_fields() -> Vec { vec![ TextFieldDef { key: "title".into(), field_type: TextFieldType::Text, }, TextFieldDef { key: "description".into(), field_type: TextFieldType::Text, }, TextFieldDef { key: "category".into(), field_type: TextFieldType::Keyword, }, ] } /// Validates the full m5p1 text index pipeline: /// index → write → commit → search → score #[test] fn text_index_end_to_end() { let fields = make_fields(); let idx = TextIndex::ephemeral(&fields).unwrap(); // Write 100 items. let mut w = idx.writer_guard().unwrap(); for i in 0..100u64 { let mut meta = HashMap::new(); meta.insert("title".into(), format!("Rust tutorial {i}")); meta.insert("description".into(), "Learn Rust programming".into()); meta.insert("category".into(), "programming".into()); w.index_item(EntityId::new(i), &meta).unwrap(); } w.commit(100).unwrap(); drop(w); idx.reload_reader().unwrap(); let searcher = idx.searcher(); let parser = idx.query_parser(); let collector = AllScoresCollector { entity_id_field: idx.fields().entity_id, }; // Test 1: bare terms (AND conjunction) — "Rust tutorial" matches all 100. let q = parser.parse("Rust tutorial").unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); assert!(!results.is_empty(), "bare terms should return results"); // Test 2: exact phrase — "Rust programming" is in every description. let q = parser.parse("\"Rust programming\"").unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); assert!(!results.is_empty(), "exact phrase should match description"); // Test 3: field-scoped keyword — category:programming matches all 100. let q = parser.parse("category:programming").unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); assert_eq!( results.len(), 100, "keyword field-scoped query should match all 100" ); // Test 4: exclusion — "Rust -foobarxyz" should match (exclusion term not in corpus). // MUST_NOT excludes at the document level; "foobarxyz" appears nowhere, so nothing excluded. let q = parser.parse("Rust -foobarxyz").unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); assert!( !results.is_empty(), "exclusion of absent term should still return matching documents" ); // Test 5: BM25 latency < 10ms at 100 docs (trivial at this scale). let start = std::time::Instant::now(); let q = parser.parse("Rust").unwrap(); let _ = searcher.search(q.as_ref(), &collector).unwrap(); assert!( start.elapsed().as_millis() < 10, "BM25 query should complete in < 10ms at 100 docs" ); } /// Boolean OR returns more results than AND for the same terms. #[test] fn boolean_or_returns_superset_of_and() { let fields = vec![TextFieldDef { key: "title".into(), field_type: TextFieldType::Text, }]; let idx = TextIndex::ephemeral(&fields).unwrap(); let mut w = idx.writer_guard().unwrap(); for (i, title) in [ (1u64, "jazz piano"), (2u64, "rock guitar"), (3u64, "jazz violin"), ] { let mut m = HashMap::new(); m.insert("title".into(), title.into()); w.index_item(EntityId::new(i), &m).unwrap(); } w.commit(3).unwrap(); drop(w); idx.reload_reader().unwrap(); let searcher = idx.searcher(); let parser = idx.query_parser(); let collector = AllScoresCollector { entity_id_field: idx.fields().entity_id, }; // AND: "jazz piano" requires both terms — only entity 1. let q_and = parser.parse("jazz piano").unwrap(); let and_results = searcher.search(q_and.as_ref(), &collector).unwrap(); // OR: "jazz OR piano" — entities 1 and 3. let q_or = parser.parse("jazz OR piano").unwrap(); let or_results = searcher.search(q_or.as_ref(), &collector).unwrap(); assert!( or_results.len() >= and_results.len(), "OR should return at least as many results as AND" ); assert_eq!( and_results.len(), 1, "AND requires both 'jazz' and 'piano' — only entity 1" ); assert_eq!(or_results.len(), 2, "OR jazz or piano — entities 1 and 3"); } /// Deleting an item removes it from search results after next commit. #[test] fn delete_removes_from_results() { let fields = vec![TextFieldDef { key: "title".into(), field_type: TextFieldType::Text, }]; let idx = TextIndex::ephemeral(&fields).unwrap(); let mut w = idx.writer_guard().unwrap(); let mut m = HashMap::new(); m.insert("title".into(), "jazz piano".into()); w.index_item(EntityId::new(1), &m).unwrap(); w.commit(1).unwrap(); // Delete and commit. w.delete_item(EntityId::new(1)); w.commit(2).unwrap(); drop(w); idx.reload_reader().unwrap(); let searcher = idx.searcher(); let parser = idx.query_parser(); let collector = AllScoresCollector { entity_id_field: idx.fields().entity_id, }; let q = parser.parse("jazz").unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); assert!( results.is_empty(), "deleted item should not appear in results" ); }