#![allow( clippy::unwrap_used, clippy::cast_precision_loss, clippy::cast_possible_truncation )] //! Criterion benchmarks for production-representative load: 1M items. //! //! Validates the m7p3 performance acceptance criteria: //! - RETRIEVE p99 < 50ms //! - SEARCH p99 < 100ms //! - Signal write p99 < 100µs //! //! A single shared `TidalDb` is built once via `LazyLock` to amortize the //! 1M-item setup cost across all benchmark runs. //! //! Dataset layout: //! - 1M items, 10K creators (100 items/creator) //! - 20 categories, 128D random unit vectors //! - 10% view coverage, 5% like coverage use std::collections::HashMap; use std::sync::LazyLock; use std::time::Duration; use criterion::{BatchSize, Criterion, SamplingMode, black_box, criterion_group, criterion_main}; use tidaldb::TidalDb; use tidaldb::query::retrieve::Retrieve; use tidaldb::query::search::Search; use tidaldb::schema::{ DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Timestamp, Window, }; use tidaldb::storage::indexes::filter::FilterExpr; const N_ITEMS: u64 = 1_000_000; const N_CREATORS: u64 = 10_000; const ITEMS_PER_CREATOR: u64 = 100; const N_CATEGORIES: u64 = 20; const DIM: usize = 128; /// Categories pool for round-robin assignment. static CATEGORIES: &[&str] = &[ "technology", "sports", "music", "gaming", "cooking", "travel", "science", "arts", "news", "fashion", "finance", "health", "education", "entertainment", "politics", "nature", "history", "automotive", "pets", "fitness", ]; fn scale_schema() -> tidaldb::schema::Schema { let mut builder = SchemaBuilder::new(); let _ = builder .signal( "view", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(7 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours, Window::SevenDays]) .velocity(false) .add(); let _ = builder .signal( "like", EntityKind::Item, DecaySpec::Exponential { half_life: Duration::from_secs(30 * 24 * 3600), }, ) .windows(&[Window::TwentyFourHours]) .velocity(false) .add(); builder.text_field("title", TextFieldType::Text); builder.text_field("category", TextFieldType::Keyword); builder.build().unwrap() } /// Build the shared 1M-item database. Called exactly once. fn build_scale_db() -> TidalDb { eprintln!("[scale bench] Building 1M-item database (this takes ~30s)..."); let db = TidalDb::builder() .ephemeral() .with_schema(scale_schema()) .open() .unwrap(); let base_ns = 1_708_000_000_000_000_000u64; for i in 0..N_ITEMS { let item_id = EntityId::new(i + 1); let creator_id = i / ITEMS_PER_CREATOR; // 0..9999 let category = CATEGORIES[(i % N_CATEGORIES) as usize]; let mut meta = HashMap::new(); meta.insert( "title".to_string(), format!("Content {i} by creator {creator_id} about {category}"), ); meta.insert("category".to_string(), category.to_string()); meta.insert("creator".to_string(), creator_id.to_string()); db.write_item_with_metadata(item_id, &meta).unwrap(); // 10% view coverage. if i % 10 == 0 { let ts = Timestamp::from_nanos(base_ns - (i % 86400) * 1_000_000_000); db.signal("view", item_id, 1.0, ts).unwrap(); } // 5% like coverage. if i % 20 == 0 { let ts = Timestamp::from_nanos(base_ns - (i % 86400) * 2_000_000_000); db.signal("like", item_id, 1.0, ts).unwrap(); } } // Wait for text syncer to commit all 1M items (1000 batch commits at 1000/commit). // The syncer commits every 1000 items, so 1M items = ~1000 commits. Allow time. eprintln!("[scale bench] Waiting for text index to commit..."); std::thread::sleep(Duration::from_millis(3000)); db.reload_text_index().unwrap(); eprintln!("[scale bench] Database ready: {N_ITEMS} items."); db } /// Shared 1M-item DB: built once, reused by all benchmarks. static SCALE_DB: LazyLock = LazyLock::new(build_scale_db); // ── RETRIEVE benchmarks ─────────────────────────────────────────────────────── /// RETRIEVE: "for_you" profile — signal-scored ranking over full universe. fn bench_retrieve_for_you(c: &mut Criterion) { let db: &TidalDb = &SCALE_DB; let query = Retrieve::builder() .profile("for_you") .limit(20) .build() .unwrap(); let mut group = c.benchmark_group("retrieve_1m"); group.sample_size(10); group.measurement_time(Duration::from_secs(30)); group.sampling_mode(SamplingMode::Flat); group.bench_function("for_you", |b| { b.iter(|| db.retrieve(black_box(&query)).unwrap()); }); group.finish(); } /// RETRIEVE: "trending" profile — top-viewed items. fn bench_retrieve_trending(c: &mut Criterion) { let db: &TidalDb = &SCALE_DB; let query = Retrieve::builder() .profile("trending") .limit(20) .build() .unwrap(); let mut group = c.benchmark_group("retrieve_1m"); group.sample_size(10); group.measurement_time(Duration::from_secs(30)); group.sampling_mode(SamplingMode::Flat); group.bench_function("trending", |b| { b.iter(|| db.retrieve(black_box(&query)).unwrap()); }); group.finish(); } /// RETRIEVE: "new" profile — recency-filtered by category. fn bench_retrieve_new_filtered(c: &mut Criterion) { let db: &TidalDb = &SCALE_DB; let query = Retrieve::builder() .profile("new") .limit(20) .filter(FilterExpr::CategoryEq("technology".into())) .build() .unwrap(); let mut group = c.benchmark_group("retrieve_1m"); group.sample_size(10); group.measurement_time(Duration::from_secs(30)); group.sampling_mode(SamplingMode::Flat); group.bench_function("new_filtered", |b| { b.iter(|| db.retrieve(black_box(&query)).unwrap()); }); group.finish(); } // ── SEARCH benchmarks ───────────────────────────────────────────────────────── /// SEARCH: text-only query over 1M items. fn bench_search_text_only(c: &mut Criterion) { let db: &TidalDb = &SCALE_DB; let query = Search::builder() .query("technology content creator") .limit(20) .build() .unwrap(); let mut group = c.benchmark_group("search_1m"); group.sample_size(10); group.measurement_time(Duration::from_secs(30)); group.sampling_mode(SamplingMode::Flat); group.bench_function("text_only", |b| { b.iter(|| db.search(black_box(&query)).unwrap()); }); group.finish(); } /// SEARCH: text query with category filter. fn bench_search_text_filtered(c: &mut Criterion) { let db: &TidalDb = &SCALE_DB; let query = Search::builder() .query("gaming sports fitness") .limit(20) .filter(FilterExpr::CategoryEq("sports".into())) .build() .unwrap(); let mut group = c.benchmark_group("search_1m"); group.sample_size(10); group.measurement_time(Duration::from_secs(30)); group.sampling_mode(SamplingMode::Flat); group.bench_function("text_filtered", |b| { b.iter(|| db.search(black_box(&query)).unwrap()); }); group.finish(); } // ── Signal write benchmark ──────────────────────────────────────────────────── /// Signal write: amortized cost over rotating entity IDs. /// /// Measures the hot path for incremental signal ingestion at 1M-item scale. /// Rotates through 1000 entity IDs to represent a realistic write workload. fn bench_signal_write(c: &mut Criterion) { let db: &TidalDb = &SCALE_DB; let mut group = c.benchmark_group("signal_write_1m"); group.sample_size(10); group.measurement_time(Duration::from_secs(30)); group.sampling_mode(SamplingMode::Flat); let mut counter = 0u64; let ts = Timestamp::now(); group.bench_function("write_rotating_1k_entities", |b| { b.iter_batched( || { // Rotate through 1000 entity IDs in the 1M range. let id = EntityId::new((counter % 1000) + 1); counter += 1; id }, |id| db.signal("view", black_box(id), 1.0, ts).unwrap(), BatchSize::SmallInput, ); }); group.finish(); } criterion_group!( scale_benches, bench_retrieve_for_you, bench_retrieve_trending, bench_retrieve_new_filtered, bench_search_text_only, bench_search_text_filtered, bench_signal_write, ); criterion_main!(scale_benches);