#![allow(clippy::unwrap_used)] //! Criterion benchmarks for the RETRIEVE query executor pipeline. //! //! Measures end-to-end latency of the 5-stage pipeline: //! - Candidate generation (scan universe / signal-ranked) //! - Filter evaluation (bitmap predicates) //! - Signal scoring (profile executor) //! - Diversity enforcement (per-creator, format-mix) //! - Result assembly (pagination, signal snapshots) //! //! Scenarios: //! - `retrieve_200_scan_new`: 200 items, "new" profile, no filters (baseline) //! - `retrieve_1000_scan_new`: 1000 items, "new" profile, no filters (scale) //! - `retrieve_200_with_category_filter`: 200 items, category filter (~50% selectivity) //! - `retrieve_200_trending_with_signals`: 200 items, "trending" profile, signal data //! - `retrieve_200_with_diversity`: 200 items, diversity constraints (`max_per_creator`) //! - `retrieve_200_signal_ranked`: 200 items via signal-ranked candidate generation use std::sync::RwLock; use std::time::Duration; use criterion::{Criterion, black_box, criterion_group, criterion_main}; use roaring::RoaringBitmap; use tidaldb::query::executor::RetrieveExecutor; use tidaldb::query::retrieve::Retrieve; use tidaldb::ranking::builtins::register_builtins; use tidaldb::ranking::diversity::DiversityConstraints; use tidaldb::ranking::registry::ProfileRegistry; use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window}; use tidaldb::signals::{NoopWalWriter, SignalLedger}; use tidaldb::storage::indexes::bitmap::BitmapIndex; use tidaldb::storage::indexes::filter::FilterExpr; use tidaldb::storage::indexes::range::RangeIndex; // ── Helpers ────────────────────────────────────────────────────────────────── fn test_schema() -> tidaldb::schema::Schema { let mut builder = SchemaBuilder::new(); for sig in &["view", "like", "share"] { let _ = builder .signal( sig, EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(7 * 24 * 3600), }, ) .windows(&[Window::OneHour, Window::SevenDays]) .velocity(true) .add(); } builder.build().unwrap() } fn setup_registry() -> ProfileRegistry { let mut reg = ProfileRegistry::new(); register_builtins(&mut reg).unwrap(); reg } /// Populate indexes with `n` items. /// /// Layout: /// - category: even -> "jazz", odd -> "blues" (50/50) /// - format: id % 3 == 0 -> "video", else -> "audio" (~33/67) /// - creator: id % 50 -> distinct creator IDs (4 items per creator at n=200) /// - `created_at`: decreasing timestamps so higher IDs are "newer" #[allow( clippy::cast_possible_truncation, clippy::cast_precision_loss, clippy::too_many_arguments )] fn populate_indexes( n: u64, cat: &BitmapIndex, fmt: &BitmapIndex, creator: &BitmapIndex, tag: &BitmapIndex, dur: &RangeIndex, ts: &RangeIndex, universe: &mut RoaringBitmap, ledger: &SignalLedger, with_signals: bool, ) { let base_ns = 1_708_000_000_000_000_000u64; for i in 1..=n { let id_u32 = i as u32; universe.insert(id_u32); if i % 2 == 0 { cat.insert(id_u32, "jazz"); } else { cat.insert(id_u32, "blues"); } if i % 3 == 0 { fmt.insert(id_u32, "video"); } else { fmt.insert(id_u32, "audio"); } creator.insert(id_u32, (i % 50).to_string()); tag.insert(id_u32, "music"); dur.insert(id_u32, (i * 10) as u32); ts.insert(id_u32, base_ns + i * 1_000_000_000); // 1s apart, ascending if with_signals { let sig_ts = Timestamp::from_nanos(base_ns - i * 3_600_000_000_000); ledger .record_signal("view", EntityId::new(i), (n - i + 1) as f64, sig_ts) .unwrap(); if i % 3 == 0 { ledger .record_signal("share", EntityId::new(i), (i % 10) as f64, sig_ts) .unwrap(); } if i % 5 == 0 { ledger .record_signal("like", EntityId::new(i), (i % 5) as f64, sig_ts) .unwrap(); } } } } #[allow(clippy::too_many_arguments, clippy::missing_const_for_fn)] fn make_executor<'a>( ledger: &'a SignalLedger, profile_reg: &'a ProfileRegistry, cat: &'a BitmapIndex, fmt: &'a BitmapIndex, creator: &'a BitmapIndex, tag: &'a BitmapIndex, dur: &'a RangeIndex, ts: &'a RangeIndex, universe: &'a RwLock, ) -> RetrieveExecutor<'a> { RetrieveExecutor::new( ledger, profile_reg, Some(cat), Some(fmt), Some(creator), Some(tag), Some(dur), Some(ts), Some(universe), None, // embedding_registry ) } // ── Benchmarks ─────────────────────────────────────────────────────────────── /// Baseline: 200 items, "new" profile, no filters, no signals. /// /// Exercises: scan universe -> score (`entity_id` sort) -> assemble. fn bench_retrieve_200_scan_new(c: &mut Criterion) { let schema = test_schema(); let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter)); let profile_reg = setup_registry(); let cat = BitmapIndex::new("category"); let fmt = BitmapIndex::new("format"); let creator = BitmapIndex::new("creator"); let tag = BitmapIndex::new("tags"); let dur: RangeIndex = RangeIndex::new("duration"); let ts: RangeIndex = RangeIndex::new("created_at"); let mut universe_bm = RoaringBitmap::new(); populate_indexes( 200, &cat, &fmt, &creator, &tag, &dur, &ts, &mut universe_bm, &ledger, false, ); let universe = RwLock::new(universe_bm); let exec = make_executor( &ledger, &profile_reg, &cat, &fmt, &creator, &tag, &dur, &ts, &universe, ); let query = Retrieve::builder() .profile("new") .limit(20) .build() .unwrap(); c.bench_function("retrieve_200_scan_new", |b| { b.iter(|| exec.execute(black_box(&query)).unwrap()); }); } /// Scale test: 1000 items, "new" profile, no filters. /// /// Measures how candidate generation and scoring scale with universe size. fn bench_retrieve_1000_scan_new(c: &mut Criterion) { let schema = test_schema(); let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter)); let profile_reg = setup_registry(); let cat = BitmapIndex::new("category"); let fmt = BitmapIndex::new("format"); let creator = BitmapIndex::new("creator"); let tag = BitmapIndex::new("tags"); let dur: RangeIndex = RangeIndex::new("duration"); let ts: RangeIndex = RangeIndex::new("created_at"); let mut universe_bm = RoaringBitmap::new(); populate_indexes( 1000, &cat, &fmt, &creator, &tag, &dur, &ts, &mut universe_bm, &ledger, false, ); let universe = RwLock::new(universe_bm); let exec = make_executor( &ledger, &profile_reg, &cat, &fmt, &creator, &tag, &dur, &ts, &universe, ); let query = Retrieve::builder() .profile("new") .limit(20) .build() .unwrap(); c.bench_function("retrieve_1000_scan_new", |b| { b.iter(|| exec.execute(black_box(&query)).unwrap()); }); } /// Filter test: 200 items, category filter (~50% selectivity). /// /// Exercises: scan -> bitmap filter -> score -> assemble. fn bench_retrieve_200_with_category_filter(c: &mut Criterion) { let schema = test_schema(); let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter)); let profile_reg = setup_registry(); let cat = BitmapIndex::new("category"); let fmt = BitmapIndex::new("format"); let creator = BitmapIndex::new("creator"); let tag = BitmapIndex::new("tags"); let dur: RangeIndex = RangeIndex::new("duration"); let ts: RangeIndex = RangeIndex::new("created_at"); let mut universe_bm = RoaringBitmap::new(); populate_indexes( 200, &cat, &fmt, &creator, &tag, &dur, &ts, &mut universe_bm, &ledger, false, ); let universe = RwLock::new(universe_bm); let exec = make_executor( &ledger, &profile_reg, &cat, &fmt, &creator, &tag, &dur, &ts, &universe, ); let query = Retrieve::builder() .profile("new") .limit(20) .filter(FilterExpr::CategoryEq("jazz".into())) .build() .unwrap(); c.bench_function("retrieve_200_with_category_filter", |b| { b.iter(|| exec.execute(black_box(&query)).unwrap()); }); } /// Signal-heavy scoring: 200 items with signal data, "trending" profile. /// /// Exercises: scan -> score (decay reads, velocity) -> assemble. fn bench_retrieve_200_trending_with_signals(c: &mut Criterion) { let schema = test_schema(); let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter)); let profile_reg = setup_registry(); let cat = BitmapIndex::new("category"); let fmt = BitmapIndex::new("format"); let creator = BitmapIndex::new("creator"); let tag = BitmapIndex::new("tags"); let dur: RangeIndex = RangeIndex::new("duration"); let ts: RangeIndex = RangeIndex::new("created_at"); let mut universe_bm = RoaringBitmap::new(); populate_indexes( 200, &cat, &fmt, &creator, &tag, &dur, &ts, &mut universe_bm, &ledger, true, // with signals ); let universe = RwLock::new(universe_bm); let exec = make_executor( &ledger, &profile_reg, &cat, &fmt, &creator, &tag, &dur, &ts, &universe, ); let query = Retrieve::builder() .profile("trending") .limit(20) .build() .unwrap(); c.bench_function("retrieve_200_trending_with_signals", |b| { b.iter(|| exec.execute(black_box(&query)).unwrap()); }); } /// Diversity enforcement: 200 items with diversity constraints. /// /// Exercises: scan -> score -> diversity (`max_per_creator`=2) -> assemble. /// With 50 creators and 4 items each, this forces the selector to balance. fn bench_retrieve_200_with_diversity(c: &mut Criterion) { let schema = test_schema(); let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter)); let profile_reg = setup_registry(); let cat = BitmapIndex::new("category"); let fmt = BitmapIndex::new("format"); let creator = BitmapIndex::new("creator"); let tag = BitmapIndex::new("tags"); let dur: RangeIndex = RangeIndex::new("duration"); let ts: RangeIndex = RangeIndex::new("created_at"); let mut universe_bm = RoaringBitmap::new(); populate_indexes( 200, &cat, &fmt, &creator, &tag, &dur, &ts, &mut universe_bm, &ledger, true, ); let universe = RwLock::new(universe_bm); let exec = make_executor( &ledger, &profile_reg, &cat, &fmt, &creator, &tag, &dur, &ts, &universe, ); let query = Retrieve::builder() .profile("new") .limit(20) .diversity(DiversityConstraints::new().max_per_creator(2)) .build() .unwrap(); c.bench_function("retrieve_200_with_diversity", |b| { b.iter(|| exec.execute(black_box(&query)).unwrap()); }); } /// Signal-ranked candidate generation: 200 items, "hot" profile. /// /// Exercises: `signal_ranked_candidates` -> score -> assemble. /// The "hot" profile uses `SignalRanked { signal: "view" }` candidate strategy. fn bench_retrieve_200_signal_ranked(c: &mut Criterion) { let schema = test_schema(); let ledger = SignalLedger::new(schema, Box::new(NoopWalWriter)); let profile_reg = setup_registry(); let cat = BitmapIndex::new("category"); let fmt = BitmapIndex::new("format"); let creator = BitmapIndex::new("creator"); let tag = BitmapIndex::new("tags"); let dur: RangeIndex = RangeIndex::new("duration"); let ts: RangeIndex = RangeIndex::new("created_at"); let mut universe_bm = RoaringBitmap::new(); populate_indexes( 200, &cat, &fmt, &creator, &tag, &dur, &ts, &mut universe_bm, &ledger, true, // needs signal data for signal-ranked candidate gen ); let universe = RwLock::new(universe_bm); let exec = make_executor( &ledger, &profile_reg, &cat, &fmt, &creator, &tag, &dur, &ts, &universe, ); let query = Retrieve::builder() .profile("hot") .limit(20) .build() .unwrap(); c.bench_function("retrieve_200_signal_ranked", |b| { b.iter(|| exec.execute(black_box(&query)).unwrap()); }); } criterion_group!( benches, bench_retrieve_200_scan_new, bench_retrieve_1000_scan_new, bench_retrieve_200_with_category_filter, bench_retrieve_200_trending_with_signals, bench_retrieve_200_with_diversity, bench_retrieve_200_signal_ranked, ); criterion_main!(benches);