#![allow(clippy::unwrap_used)] //! Criterion benchmarks for the BM25 text index pipeline. //! //! Measures BM25 query latency at various corpus sizes to validate the //! < 10ms target at 10K documents specified in the m5p1 phase acceptance criteria. use std::collections::HashMap; use criterion::{Criterion, black_box, criterion_group, criterion_main}; use tidaldb::schema::{EntityId, TextFieldDef, TextFieldType}; use tidaldb::text::{AllScoresCollector, TextIndex}; fn make_index(n: u64) -> TextIndex { let fields = vec![ TextFieldDef { key: "title".into(), field_type: TextFieldType::Text, }, TextFieldDef { key: "description".into(), field_type: TextFieldType::Text, }, TextFieldDef { key: "category".into(), field_type: TextFieldType::Keyword, }, ]; let idx = TextIndex::ephemeral(&fields).unwrap(); let mut w = idx.writer_guard().unwrap(); for i in 0..n { let mut meta = HashMap::new(); // Vary titles so BM25 IDF scoring is meaningful. meta.insert( "title".into(), format!("Rust tutorial {i} async concurrency"), ); meta.insert( "description".into(), "Learn Rust programming with practical examples and real projects.".into(), ); // Alternate categories to test keyword field throughput. let cat = if i % 2 == 0 { "programming" } else { "systems" }; meta.insert("category".into(), cat.into()); w.index_item(EntityId::new(i), &meta).unwrap(); } w.commit(n).unwrap(); drop(w); idx.reload_reader().unwrap(); idx } /// BM25 bare-term query at 1K docs. fn bench_bm25_1k(c: &mut Criterion) { let idx = make_index(1_000); let searcher = idx.searcher(); let parser = idx.query_parser(); let collector = AllScoresCollector { entity_id_field: idx.fields().entity_id, }; c.bench_function("bm25_query_1k_docs", |b| { b.iter(|| { let q = parser.parse(black_box("Rust async")).unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); black_box(results) }); }); } /// BM25 bare-term query at 10K docs — must complete in < 10ms. fn bench_bm25_10k(c: &mut Criterion) { let idx = make_index(10_000); let searcher = idx.searcher(); let parser = idx.query_parser(); let collector = AllScoresCollector { entity_id_field: idx.fields().entity_id, }; c.bench_function("bm25_query_10k_docs", |b| { b.iter(|| { let q = parser.parse(black_box("Rust async")).unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); black_box(results) }); }); } /// BM25 exact-phrase query at 10K docs. fn bench_bm25_phrase_10k(c: &mut Criterion) { let idx = make_index(10_000); let searcher = idx.searcher(); let parser = idx.query_parser(); let collector = AllScoresCollector { entity_id_field: idx.fields().entity_id, }; c.bench_function("bm25_phrase_10k_docs", |b| { b.iter(|| { let q = parser.parse(black_box("\"Rust programming\"")).unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); black_box(results) }); }); } /// BM25 keyword field-scoped query at 10K docs. fn bench_bm25_keyword_10k(c: &mut Criterion) { let idx = make_index(10_000); let searcher = idx.searcher(); let parser = idx.query_parser(); let collector = AllScoresCollector { entity_id_field: idx.fields().entity_id, }; c.bench_function("bm25_keyword_10k_docs", |b| { b.iter(|| { let q = parser.parse(black_box("category:programming")).unwrap(); let results = searcher.search(q.as_ref(), &collector).unwrap(); black_box(results) }); }); } criterion_group!( bm25_benches, bench_bm25_1k, bench_bm25_10k, bench_bm25_phrase_10k, bench_bm25_keyword_10k ); criterion_main!(bm25_benches);