tidaldb/tidal/benches/search.rs
jordan 192c473f55 feat: complete Milestone 5 — full-text search, RRF fusion, and creator search
- M5p1: BM25 text indexing via Tantivy with background syncer (0.26ms @ 10K docs)
- M5p2: RRF fusion layer combining BM25 + ANN scores (46µs @ 1K candidates)
- M5p3: unified Search query API (8-stage pipeline, BM25 + vector + ranking)
- M5p4: creator text + vector indexing and creator search executor (< 20ms @ 200 creators)
- Refactor db/mod.rs into focused sub-modules (creators, items, sessions, signals, etc.)
- Decompose monolithic files into directory modules (query/executor, ranking/diversity, etc.)
- Split brute.rs → brute/mod.rs + brute/tests.rs; extract search executor helpers
- Add benches: fusion, search, session, text_index
- Add M5 UAT test suites (m5_uat, m5_search, m5p4_creator_search, text_index)
- Update blog posts, roadmap, content strategy, and M5 planning docs
- Add tmp/ and .claude/worktrees/ to .gitignore

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-21 23:53:16 -07:00

130 lines
3.8 KiB
Rust

#![allow(clippy::unwrap_used)]
//! Criterion benchmarks for the SEARCH query pipeline.
//!
//! Measures end-to-end `db.search()` latency at 10K items to validate the
//! < 50ms target specified in the m5p3 phase acceptance criteria.
use std::collections::HashMap;
use std::time::Duration;
use criterion::{Criterion, black_box, criterion_group, criterion_main};
use tidaldb::TidalDb;
use tidaldb::query::search::Search;
use tidaldb::schema::{
DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldDef, TextFieldType, Timestamp, Window,
};
fn search_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
let _ = builder
.signal(
"view",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(7 * 24 * 3600),
},
)
.windows(&[Window::TwentyFourHours])
.velocity(false)
.add();
let _ = builder
.signal(
"like",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(30 * 24 * 3600),
},
)
.windows(&[Window::TwentyFourHours])
.velocity(false)
.add();
builder.text_field("title", TextFieldType::Text);
builder.text_field("description", TextFieldType::Text);
builder.text_field("category", TextFieldType::Keyword);
builder.build().unwrap()
}
/// Build a TidalDb with N items indexed for text search.
fn make_db(n: u64) -> TidalDb {
let db = TidalDb::builder()
.ephemeral()
.with_schema(search_schema())
.open()
.unwrap();
let ts = Timestamp::now();
for i in 0..n {
let mut meta = HashMap::new();
// Vary titles to produce realistic IDF scores.
meta.insert(
"title".to_string(),
format!("Rust tutorial {i} async concurrency programming"),
);
meta.insert(
"description".to_string(),
"Learn Rust with practical examples and real projects.".to_string(),
);
let cat = if i % 3 == 0 {
"programming"
} else if i % 3 == 1 {
"systems"
} else {
"web"
};
meta.insert("category".to_string(), cat.to_string());
db.write_item_with_metadata(EntityId::new(i), &meta)
.unwrap();
// Add some signals to make profile scoring non-trivial.
if i % 5 == 0 {
db.signal("view", EntityId::new(i), 1.0, ts).unwrap();
}
}
// Wait for the background text syncer to commit all pending documents
// (syncer commits every 1_000 items or 2s; 10K items = 10 batch commits).
// Then reload the reader so the searcher sees all committed documents.
std::thread::sleep(std::time::Duration::from_millis(500));
db.reload_text_index().unwrap();
db
}
/// Benchmark: `db.search()` with a text-only query at 10K items.
///
/// Target: < 50ms.
fn bench_search_text_10k(c: &mut Criterion) {
let db = make_db(10_000);
let query = Search::builder()
.query("Rust async")
.limit(20)
.build()
.unwrap();
c.bench_function("search_text_10k", |b| {
b.iter(|| db.search(black_box(&query)).unwrap());
});
}
/// Benchmark: `db.search()` with a keyword-scoped query at 10K items.
///
/// Target: < 50ms.
fn bench_search_keyword_10k(c: &mut Criterion) {
let db = make_db(10_000);
let query = Search::builder()
.query("category:programming")
.limit(20)
.build()
.unwrap();
c.bench_function("search_keyword_10k", |b| {
b.iter(|| db.search(black_box(&query)).unwrap());
});
}
criterion_group!(
search_benches,
bench_search_text_10k,
bench_search_keyword_10k
);
criterion_main!(search_benches);