- M5p1: BM25 text indexing via Tantivy with background syncer (0.26ms @ 10K docs) - M5p2: RRF fusion layer combining BM25 + ANN scores (46µs @ 1K candidates) - M5p3: unified Search query API (8-stage pipeline, BM25 + vector + ranking) - M5p4: creator text + vector indexing and creator search executor (< 20ms @ 200 creators) - Refactor db/mod.rs into focused sub-modules (creators, items, sessions, signals, etc.) - Decompose monolithic files into directory modules (query/executor, ranking/diversity, etc.) - Split brute.rs → brute/mod.rs + brute/tests.rs; extract search executor helpers - Add benches: fusion, search, session, text_index - Add M5 UAT test suites (m5_uat, m5_search, m5p4_creator_search, text_index) - Update blog posts, roadmap, content strategy, and M5 planning docs - Add tmp/ and .claude/worktrees/ to .gitignore Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
130 lines
3.8 KiB
Rust
130 lines
3.8 KiB
Rust
#![allow(clippy::unwrap_used)]
|
|
//! Criterion benchmarks for the SEARCH query pipeline.
|
|
//!
|
|
//! Measures end-to-end `db.search()` latency at 10K items to validate the
|
|
//! < 50ms target specified in the m5p3 phase acceptance criteria.
|
|
|
|
use std::collections::HashMap;
|
|
use std::time::Duration;
|
|
|
|
use criterion::{Criterion, black_box, criterion_group, criterion_main};
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::query::search::Search;
|
|
use tidaldb::schema::{
|
|
DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldDef, TextFieldType, Timestamp, Window,
|
|
};
|
|
|
|
fn search_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
let _ = builder
|
|
.signal(
|
|
"view",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(7 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::TwentyFourHours])
|
|
.velocity(false)
|
|
.add();
|
|
let _ = builder
|
|
.signal(
|
|
"like",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(30 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::TwentyFourHours])
|
|
.velocity(false)
|
|
.add();
|
|
builder.text_field("title", TextFieldType::Text);
|
|
builder.text_field("description", TextFieldType::Text);
|
|
builder.text_field("category", TextFieldType::Keyword);
|
|
builder.build().unwrap()
|
|
}
|
|
|
|
/// Build a TidalDb with N items indexed for text search.
|
|
fn make_db(n: u64) -> TidalDb {
|
|
let db = TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(search_schema())
|
|
.open()
|
|
.unwrap();
|
|
|
|
let ts = Timestamp::now();
|
|
for i in 0..n {
|
|
let mut meta = HashMap::new();
|
|
// Vary titles to produce realistic IDF scores.
|
|
meta.insert(
|
|
"title".to_string(),
|
|
format!("Rust tutorial {i} async concurrency programming"),
|
|
);
|
|
meta.insert(
|
|
"description".to_string(),
|
|
"Learn Rust with practical examples and real projects.".to_string(),
|
|
);
|
|
let cat = if i % 3 == 0 {
|
|
"programming"
|
|
} else if i % 3 == 1 {
|
|
"systems"
|
|
} else {
|
|
"web"
|
|
};
|
|
meta.insert("category".to_string(), cat.to_string());
|
|
db.write_item_with_metadata(EntityId::new(i), &meta)
|
|
.unwrap();
|
|
|
|
// Add some signals to make profile scoring non-trivial.
|
|
if i % 5 == 0 {
|
|
db.signal("view", EntityId::new(i), 1.0, ts).unwrap();
|
|
}
|
|
}
|
|
|
|
// Wait for the background text syncer to commit all pending documents
|
|
// (syncer commits every 1_000 items or 2s; 10K items = 10 batch commits).
|
|
// Then reload the reader so the searcher sees all committed documents.
|
|
std::thread::sleep(std::time::Duration::from_millis(500));
|
|
db.reload_text_index().unwrap();
|
|
db
|
|
}
|
|
|
|
/// Benchmark: `db.search()` with a text-only query at 10K items.
|
|
///
|
|
/// Target: < 50ms.
|
|
fn bench_search_text_10k(c: &mut Criterion) {
|
|
let db = make_db(10_000);
|
|
let query = Search::builder()
|
|
.query("Rust async")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
c.bench_function("search_text_10k", |b| {
|
|
b.iter(|| db.search(black_box(&query)).unwrap());
|
|
});
|
|
}
|
|
|
|
/// Benchmark: `db.search()` with a keyword-scoped query at 10K items.
|
|
///
|
|
/// Target: < 50ms.
|
|
fn bench_search_keyword_10k(c: &mut Criterion) {
|
|
let db = make_db(10_000);
|
|
let query = Search::builder()
|
|
.query("category:programming")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
c.bench_function("search_keyword_10k", |b| {
|
|
b.iter(|| db.search(black_box(&query)).unwrap());
|
|
});
|
|
}
|
|
|
|
criterion_group!(
|
|
search_benches,
|
|
bench_search_text_10k,
|
|
bench_search_keyword_10k
|
|
);
|
|
criterion_main!(search_benches);
|