M2: RETRIEVE query pipeline with 5-stage execution (candidate → filter → score → diversify → limit),
usearch HNSW vector index, bitmap/range/universe filters, ranking profiles with signal scoring,
MMR diversity enforcement, and m2_uat integration tests.
M3: Entity system with typed metadata, relationship graph (follows/blocks/interactions),
creator entities, session tracking, and m3_uat integration tests.
M4: Advanced ranking with builtin functions (freshness, trending, controversy, wilson),
ranking executor with explain mode, query executor integration, benchmarks for
query/ranking/vector/filters/diversity, and m4_uat integration tests.
Includes: 9 new blog posts, marketing site updates, updated roadmap, and updated vision doc.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
481 lines
14 KiB
Rust
481 lines
14 KiB
Rust
#![allow(clippy::unwrap_used)]
|
|
|
|
//! Criterion benchmarks for the RETRIEVE query executor pipeline.
|
|
//!
|
|
//! Measures end-to-end latency of the 5-stage pipeline:
|
|
//! - Candidate generation (scan universe / signal-ranked)
|
|
//! - Filter evaluation (bitmap predicates)
|
|
//! - Signal scoring (profile executor)
|
|
//! - Diversity enforcement (per-creator, format-mix)
|
|
//! - Result assembly (pagination, signal snapshots)
|
|
//!
|
|
//! Scenarios:
|
|
//! - `retrieve_200_scan_new`: 200 items, "new" profile, no filters (baseline)
|
|
//! - `retrieve_1000_scan_new`: 1000 items, "new" profile, no filters (scale)
|
|
//! - `retrieve_200_with_category_filter`: 200 items, category filter (~50% selectivity)
|
|
//! - `retrieve_200_trending_with_signals`: 200 items, "trending" profile, signal data
|
|
//! - `retrieve_200_with_diversity`: 200 items, diversity constraints (`max_per_creator`)
|
|
//! - `retrieve_200_signal_ranked`: 200 items via signal-ranked candidate generation
|
|
|
|
use std::sync::RwLock;
|
|
use std::time::Duration;
|
|
|
|
use criterion::{Criterion, black_box, criterion_group, criterion_main};
|
|
use roaring::RoaringBitmap;
|
|
use tidaldb::query::executor::RetrieveExecutor;
|
|
use tidaldb::query::retrieve::Retrieve;
|
|
use tidaldb::ranking::builtins::register_builtins;
|
|
use tidaldb::ranking::diversity::DiversityConstraints;
|
|
use tidaldb::ranking::registry::ProfileRegistry;
|
|
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
|
|
use tidaldb::signals::{NoopWalWriter, SignalLedger};
|
|
use tidaldb::storage::indexes::bitmap::BitmapIndex;
|
|
use tidaldb::storage::indexes::filter::FilterExpr;
|
|
use tidaldb::storage::indexes::range::RangeIndex;
|
|
|
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
fn test_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
for sig in &["view", "like", "share"] {
|
|
let _ = builder
|
|
.signal(
|
|
sig,
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(7 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::OneHour, Window::SevenDays])
|
|
.velocity(true)
|
|
.add();
|
|
}
|
|
builder.build().unwrap()
|
|
}
|
|
|
|
fn setup_registry() -> ProfileRegistry {
|
|
let mut reg = ProfileRegistry::new();
|
|
register_builtins(&mut reg).unwrap();
|
|
reg
|
|
}
|
|
|
|
/// Populate indexes with `n` items.
|
|
///
|
|
/// Layout:
|
|
/// - category: even -> "jazz", odd -> "blues" (50/50)
|
|
/// - format: id % 3 == 0 -> "video", else -> "audio" (~33/67)
|
|
/// - creator: id % 50 -> distinct creator IDs (4 items per creator at n=200)
|
|
/// - `created_at`: decreasing timestamps so higher IDs are "newer"
|
|
#[allow(
|
|
clippy::cast_possible_truncation,
|
|
clippy::cast_precision_loss,
|
|
clippy::too_many_arguments
|
|
)]
|
|
fn populate_indexes(
|
|
n: u64,
|
|
cat: &BitmapIndex,
|
|
fmt: &BitmapIndex,
|
|
creator: &BitmapIndex,
|
|
tag: &BitmapIndex,
|
|
dur: &RangeIndex<u32>,
|
|
ts: &RangeIndex<u64>,
|
|
universe: &mut RoaringBitmap,
|
|
ledger: &SignalLedger,
|
|
with_signals: bool,
|
|
) {
|
|
let base_ns = 1_708_000_000_000_000_000u64;
|
|
for i in 1..=n {
|
|
let id_u32 = i as u32;
|
|
universe.insert(id_u32);
|
|
|
|
if i % 2 == 0 {
|
|
cat.insert(id_u32, "jazz");
|
|
} else {
|
|
cat.insert(id_u32, "blues");
|
|
}
|
|
|
|
if i % 3 == 0 {
|
|
fmt.insert(id_u32, "video");
|
|
} else {
|
|
fmt.insert(id_u32, "audio");
|
|
}
|
|
|
|
creator.insert(id_u32, (i % 50).to_string());
|
|
tag.insert(id_u32, "music");
|
|
dur.insert(id_u32, (i * 10) as u32);
|
|
ts.insert(id_u32, base_ns + i * 1_000_000_000); // 1s apart, ascending
|
|
|
|
if with_signals {
|
|
let sig_ts = Timestamp::from_nanos(base_ns - i * 3_600_000_000_000);
|
|
ledger
|
|
.record_signal("view", EntityId::new(i), (n - i + 1) as f64, sig_ts)
|
|
.unwrap();
|
|
if i % 3 == 0 {
|
|
ledger
|
|
.record_signal("share", EntityId::new(i), (i % 10) as f64, sig_ts)
|
|
.unwrap();
|
|
}
|
|
if i % 5 == 0 {
|
|
ledger
|
|
.record_signal("like", EntityId::new(i), (i % 5) as f64, sig_ts)
|
|
.unwrap();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments, clippy::missing_const_for_fn)]
|
|
fn make_executor<'a>(
|
|
ledger: &'a SignalLedger,
|
|
profile_reg: &'a ProfileRegistry,
|
|
cat: &'a BitmapIndex,
|
|
fmt: &'a BitmapIndex,
|
|
creator: &'a BitmapIndex,
|
|
tag: &'a BitmapIndex,
|
|
dur: &'a RangeIndex<u32>,
|
|
ts: &'a RangeIndex<u64>,
|
|
universe: &'a RwLock<RoaringBitmap>,
|
|
) -> RetrieveExecutor<'a> {
|
|
RetrieveExecutor::new(
|
|
ledger,
|
|
profile_reg,
|
|
Some(cat),
|
|
Some(fmt),
|
|
Some(creator),
|
|
Some(tag),
|
|
Some(dur),
|
|
Some(ts),
|
|
Some(universe),
|
|
None, // embedding_registry
|
|
)
|
|
}
|
|
|
|
// ── Benchmarks ───────────────────────────────────────────────────────────────
|
|
|
|
/// Baseline: 200 items, "new" profile, no filters, no signals.
|
|
///
|
|
/// Exercises: scan universe -> score (`entity_id` sort) -> assemble.
|
|
fn bench_retrieve_200_scan_new(c: &mut Criterion) {
|
|
let schema = test_schema();
|
|
let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter));
|
|
let profile_reg = setup_registry();
|
|
let cat = BitmapIndex::new("category");
|
|
let fmt = BitmapIndex::new("format");
|
|
let creator = BitmapIndex::new("creator");
|
|
let tag = BitmapIndex::new("tags");
|
|
let dur: RangeIndex<u32> = RangeIndex::new("duration");
|
|
let ts: RangeIndex<u64> = RangeIndex::new("created_at");
|
|
let mut universe_bm = RoaringBitmap::new();
|
|
|
|
populate_indexes(
|
|
200,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&mut universe_bm,
|
|
&ledger,
|
|
false,
|
|
);
|
|
|
|
let universe = RwLock::new(universe_bm);
|
|
let exec = make_executor(
|
|
&ledger,
|
|
&profile_reg,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&universe,
|
|
);
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("new")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
c.bench_function("retrieve_200_scan_new", |b| {
|
|
b.iter(|| exec.execute(black_box(&query)).unwrap());
|
|
});
|
|
}
|
|
|
|
/// Scale test: 1000 items, "new" profile, no filters.
|
|
///
|
|
/// Measures how candidate generation and scoring scale with universe size.
|
|
fn bench_retrieve_1000_scan_new(c: &mut Criterion) {
|
|
let schema = test_schema();
|
|
let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter));
|
|
let profile_reg = setup_registry();
|
|
let cat = BitmapIndex::new("category");
|
|
let fmt = BitmapIndex::new("format");
|
|
let creator = BitmapIndex::new("creator");
|
|
let tag = BitmapIndex::new("tags");
|
|
let dur: RangeIndex<u32> = RangeIndex::new("duration");
|
|
let ts: RangeIndex<u64> = RangeIndex::new("created_at");
|
|
let mut universe_bm = RoaringBitmap::new();
|
|
|
|
populate_indexes(
|
|
1000,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&mut universe_bm,
|
|
&ledger,
|
|
false,
|
|
);
|
|
|
|
let universe = RwLock::new(universe_bm);
|
|
let exec = make_executor(
|
|
&ledger,
|
|
&profile_reg,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&universe,
|
|
);
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("new")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
c.bench_function("retrieve_1000_scan_new", |b| {
|
|
b.iter(|| exec.execute(black_box(&query)).unwrap());
|
|
});
|
|
}
|
|
|
|
/// Filter test: 200 items, category filter (~50% selectivity).
|
|
///
|
|
/// Exercises: scan -> bitmap filter -> score -> assemble.
|
|
fn bench_retrieve_200_with_category_filter(c: &mut Criterion) {
|
|
let schema = test_schema();
|
|
let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter));
|
|
let profile_reg = setup_registry();
|
|
let cat = BitmapIndex::new("category");
|
|
let fmt = BitmapIndex::new("format");
|
|
let creator = BitmapIndex::new("creator");
|
|
let tag = BitmapIndex::new("tags");
|
|
let dur: RangeIndex<u32> = RangeIndex::new("duration");
|
|
let ts: RangeIndex<u64> = RangeIndex::new("created_at");
|
|
let mut universe_bm = RoaringBitmap::new();
|
|
|
|
populate_indexes(
|
|
200,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&mut universe_bm,
|
|
&ledger,
|
|
false,
|
|
);
|
|
|
|
let universe = RwLock::new(universe_bm);
|
|
let exec = make_executor(
|
|
&ledger,
|
|
&profile_reg,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&universe,
|
|
);
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("new")
|
|
.limit(20)
|
|
.filter(FilterExpr::CategoryEq("jazz".into()))
|
|
.build()
|
|
.unwrap();
|
|
|
|
c.bench_function("retrieve_200_with_category_filter", |b| {
|
|
b.iter(|| exec.execute(black_box(&query)).unwrap());
|
|
});
|
|
}
|
|
|
|
/// Signal-heavy scoring: 200 items with signal data, "trending" profile.
|
|
///
|
|
/// Exercises: scan -> score (decay reads, velocity) -> assemble.
|
|
fn bench_retrieve_200_trending_with_signals(c: &mut Criterion) {
|
|
let schema = test_schema();
|
|
let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter));
|
|
let profile_reg = setup_registry();
|
|
let cat = BitmapIndex::new("category");
|
|
let fmt = BitmapIndex::new("format");
|
|
let creator = BitmapIndex::new("creator");
|
|
let tag = BitmapIndex::new("tags");
|
|
let dur: RangeIndex<u32> = RangeIndex::new("duration");
|
|
let ts: RangeIndex<u64> = RangeIndex::new("created_at");
|
|
let mut universe_bm = RoaringBitmap::new();
|
|
|
|
populate_indexes(
|
|
200,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&mut universe_bm,
|
|
&ledger,
|
|
true, // with signals
|
|
);
|
|
|
|
let universe = RwLock::new(universe_bm);
|
|
let exec = make_executor(
|
|
&ledger,
|
|
&profile_reg,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&universe,
|
|
);
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("trending")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
c.bench_function("retrieve_200_trending_with_signals", |b| {
|
|
b.iter(|| exec.execute(black_box(&query)).unwrap());
|
|
});
|
|
}
|
|
|
|
/// Diversity enforcement: 200 items with diversity constraints.
|
|
///
|
|
/// Exercises: scan -> score -> diversity (`max_per_creator`=2) -> assemble.
|
|
/// With 50 creators and 4 items each, this forces the selector to balance.
|
|
fn bench_retrieve_200_with_diversity(c: &mut Criterion) {
|
|
let schema = test_schema();
|
|
let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter));
|
|
let profile_reg = setup_registry();
|
|
let cat = BitmapIndex::new("category");
|
|
let fmt = BitmapIndex::new("format");
|
|
let creator = BitmapIndex::new("creator");
|
|
let tag = BitmapIndex::new("tags");
|
|
let dur: RangeIndex<u32> = RangeIndex::new("duration");
|
|
let ts: RangeIndex<u64> = RangeIndex::new("created_at");
|
|
let mut universe_bm = RoaringBitmap::new();
|
|
|
|
populate_indexes(
|
|
200,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&mut universe_bm,
|
|
&ledger,
|
|
true,
|
|
);
|
|
|
|
let universe = RwLock::new(universe_bm);
|
|
let exec = make_executor(
|
|
&ledger,
|
|
&profile_reg,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&universe,
|
|
);
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("new")
|
|
.limit(20)
|
|
.diversity(DiversityConstraints::new().max_per_creator(2))
|
|
.build()
|
|
.unwrap();
|
|
|
|
c.bench_function("retrieve_200_with_diversity", |b| {
|
|
b.iter(|| exec.execute(black_box(&query)).unwrap());
|
|
});
|
|
}
|
|
|
|
/// Signal-ranked candidate generation: 200 items, "hot" profile.
|
|
///
|
|
/// Exercises: `signal_ranked_candidates` -> score -> assemble.
|
|
/// The "hot" profile uses `SignalRanked { signal: "view" }` candidate strategy.
|
|
fn bench_retrieve_200_signal_ranked(c: &mut Criterion) {
|
|
let schema = test_schema();
|
|
let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter));
|
|
let profile_reg = setup_registry();
|
|
let cat = BitmapIndex::new("category");
|
|
let fmt = BitmapIndex::new("format");
|
|
let creator = BitmapIndex::new("creator");
|
|
let tag = BitmapIndex::new("tags");
|
|
let dur: RangeIndex<u32> = RangeIndex::new("duration");
|
|
let ts: RangeIndex<u64> = RangeIndex::new("created_at");
|
|
let mut universe_bm = RoaringBitmap::new();
|
|
|
|
populate_indexes(
|
|
200,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&mut universe_bm,
|
|
&ledger,
|
|
true, // needs signal data for signal-ranked candidate gen
|
|
);
|
|
|
|
let universe = RwLock::new(universe_bm);
|
|
let exec = make_executor(
|
|
&ledger,
|
|
&profile_reg,
|
|
&cat,
|
|
&fmt,
|
|
&creator,
|
|
&tag,
|
|
&dur,
|
|
&ts,
|
|
&universe,
|
|
);
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("hot")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
c.bench_function("retrieve_200_signal_ranked", |b| {
|
|
b.iter(|| exec.execute(black_box(&query)).unwrap());
|
|
});
|
|
}
|
|
|
|
criterion_group!(
|
|
benches,
|
|
bench_retrieve_200_scan_new,
|
|
bench_retrieve_1000_scan_new,
|
|
bench_retrieve_200_with_category_filter,
|
|
bench_retrieve_200_trending_with_signals,
|
|
bench_retrieve_200_with_diversity,
|
|
bench_retrieve_200_signal_ranked,
|
|
);
|
|
criterion_main!(benches);
|