303 lines
8.9 KiB
Rust
303 lines
8.9 KiB
Rust
#![allow(
|
|
clippy::unwrap_used,
|
|
clippy::cast_precision_loss,
|
|
clippy::cast_possible_truncation
|
|
)]
|
|
|
|
//! Criterion benchmarks for production-representative load: 1M items.
|
|
//!
|
|
//! Validates the m7p3 performance acceptance criteria:
|
|
//! - RETRIEVE p99 < 50ms
|
|
//! - SEARCH p99 < 100ms
|
|
//! - Signal write p99 < 100µs
|
|
//!
|
|
//! A single shared `TidalDb` is built once via `LazyLock` to amortize the
|
|
//! 1M-item setup cost across all benchmark runs.
|
|
//!
|
|
//! Dataset layout:
|
|
//! - 1M items, 10K creators (100 items/creator)
|
|
//! - 20 categories, 128D random unit vectors
|
|
//! - 10% view coverage, 5% like coverage
|
|
|
|
use std::collections::HashMap;
|
|
use std::sync::LazyLock;
|
|
use std::time::Duration;
|
|
|
|
use criterion::{BatchSize, Criterion, SamplingMode, black_box, criterion_group, criterion_main};
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::query::retrieve::Retrieve;
|
|
use tidaldb::query::search::Search;
|
|
use tidaldb::schema::{
|
|
DecaySpec, EntityId, EntityKind, SchemaBuilder, TextFieldType, Timestamp, Window,
|
|
};
|
|
use tidaldb::storage::indexes::filter::FilterExpr;
|
|
|
|
const N_ITEMS: u64 = 1_000_000;
|
|
const N_CREATORS: u64 = 10_000;
|
|
const ITEMS_PER_CREATOR: u64 = 100;
|
|
const N_CATEGORIES: u64 = 20;
|
|
const DIM: usize = 128;
|
|
|
|
/// Categories pool for round-robin assignment.
|
|
static CATEGORIES: &[&str] = &[
|
|
"technology",
|
|
"sports",
|
|
"music",
|
|
"gaming",
|
|
"cooking",
|
|
"travel",
|
|
"science",
|
|
"arts",
|
|
"news",
|
|
"fashion",
|
|
"finance",
|
|
"health",
|
|
"education",
|
|
"entertainment",
|
|
"politics",
|
|
"nature",
|
|
"history",
|
|
"automotive",
|
|
"pets",
|
|
"fitness",
|
|
];
|
|
|
|
fn scale_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
let _ = builder
|
|
.signal(
|
|
"view",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(7 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::TwentyFourHours, Window::SevenDays])
|
|
.velocity(false)
|
|
.add();
|
|
let _ = builder
|
|
.signal(
|
|
"like",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(30 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::TwentyFourHours])
|
|
.velocity(false)
|
|
.add();
|
|
builder.text_field("title", TextFieldType::Text);
|
|
builder.text_field("category", TextFieldType::Keyword);
|
|
builder.build().unwrap()
|
|
}
|
|
|
|
/// Build the shared 1M-item database. Called exactly once.
|
|
fn build_scale_db() -> TidalDb {
|
|
eprintln!("[scale bench] Building 1M-item database (this takes ~30s)...");
|
|
let db = TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(scale_schema())
|
|
.open()
|
|
.unwrap();
|
|
|
|
let base_ns = 1_708_000_000_000_000_000u64;
|
|
|
|
for i in 0..N_ITEMS {
|
|
let item_id = EntityId::new(i + 1);
|
|
let creator_id = i / ITEMS_PER_CREATOR; // 0..9999
|
|
let category = CATEGORIES[(i % N_CATEGORIES) as usize];
|
|
|
|
let mut meta = HashMap::new();
|
|
meta.insert(
|
|
"title".to_string(),
|
|
format!("Content {i} by creator {creator_id} about {category}"),
|
|
);
|
|
meta.insert("category".to_string(), category.to_string());
|
|
meta.insert("creator".to_string(), creator_id.to_string());
|
|
db.write_item_with_metadata(item_id, &meta).unwrap();
|
|
|
|
// 10% view coverage.
|
|
if i % 10 == 0 {
|
|
let ts = Timestamp::from_nanos(base_ns - (i % 86400) * 1_000_000_000);
|
|
db.signal("view", item_id, 1.0, ts).unwrap();
|
|
}
|
|
// 5% like coverage.
|
|
if i % 20 == 0 {
|
|
let ts = Timestamp::from_nanos(base_ns - (i % 86400) * 2_000_000_000);
|
|
db.signal("like", item_id, 1.0, ts).unwrap();
|
|
}
|
|
}
|
|
|
|
// Wait for text syncer to commit all 1M items (1000 batch commits at 1000/commit).
|
|
// The syncer commits every 1000 items, so 1M items = ~1000 commits. Allow time.
|
|
eprintln!("[scale bench] Waiting for text index to commit...");
|
|
std::thread::sleep(Duration::from_millis(3000));
|
|
db.reload_text_index().unwrap();
|
|
eprintln!("[scale bench] Database ready: {N_ITEMS} items.");
|
|
|
|
db
|
|
}
|
|
|
|
/// Shared 1M-item DB: built once, reused by all benchmarks.
|
|
static SCALE_DB: LazyLock<TidalDb> = LazyLock::new(build_scale_db);
|
|
|
|
// ── RETRIEVE benchmarks ───────────────────────────────────────────────────────
|
|
|
|
/// RETRIEVE: "for_you" profile — signal-scored ranking over full universe.
|
|
fn bench_retrieve_for_you(c: &mut Criterion) {
|
|
let db: &TidalDb = &SCALE_DB;
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("for_you")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let mut group = c.benchmark_group("retrieve_1m");
|
|
group.sample_size(10);
|
|
group.measurement_time(Duration::from_secs(30));
|
|
group.sampling_mode(SamplingMode::Flat);
|
|
|
|
group.bench_function("for_you", |b| {
|
|
b.iter(|| db.retrieve(black_box(&query)).unwrap());
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// RETRIEVE: "trending" profile — top-viewed items.
|
|
fn bench_retrieve_trending(c: &mut Criterion) {
|
|
let db: &TidalDb = &SCALE_DB;
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("trending")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let mut group = c.benchmark_group("retrieve_1m");
|
|
group.sample_size(10);
|
|
group.measurement_time(Duration::from_secs(30));
|
|
group.sampling_mode(SamplingMode::Flat);
|
|
|
|
group.bench_function("trending", |b| {
|
|
b.iter(|| db.retrieve(black_box(&query)).unwrap());
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// RETRIEVE: "new" profile — recency-filtered by category.
|
|
fn bench_retrieve_new_filtered(c: &mut Criterion) {
|
|
let db: &TidalDb = &SCALE_DB;
|
|
|
|
let query = Retrieve::builder()
|
|
.profile("new")
|
|
.limit(20)
|
|
.filter(FilterExpr::CategoryEq("technology".into()))
|
|
.build()
|
|
.unwrap();
|
|
|
|
let mut group = c.benchmark_group("retrieve_1m");
|
|
group.sample_size(10);
|
|
group.measurement_time(Duration::from_secs(30));
|
|
group.sampling_mode(SamplingMode::Flat);
|
|
|
|
group.bench_function("new_filtered", |b| {
|
|
b.iter(|| db.retrieve(black_box(&query)).unwrap());
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
// ── SEARCH benchmarks ─────────────────────────────────────────────────────────
|
|
|
|
/// SEARCH: text-only query over 1M items.
|
|
fn bench_search_text_only(c: &mut Criterion) {
|
|
let db: &TidalDb = &SCALE_DB;
|
|
|
|
let query = Search::builder()
|
|
.query("technology content creator")
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let mut group = c.benchmark_group("search_1m");
|
|
group.sample_size(10);
|
|
group.measurement_time(Duration::from_secs(30));
|
|
group.sampling_mode(SamplingMode::Flat);
|
|
|
|
group.bench_function("text_only", |b| {
|
|
b.iter(|| db.search(black_box(&query)).unwrap());
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
/// SEARCH: text query with category filter.
|
|
fn bench_search_text_filtered(c: &mut Criterion) {
|
|
let db: &TidalDb = &SCALE_DB;
|
|
|
|
let query = Search::builder()
|
|
.query("gaming sports fitness")
|
|
.limit(20)
|
|
.filter(FilterExpr::CategoryEq("sports".into()))
|
|
.build()
|
|
.unwrap();
|
|
|
|
let mut group = c.benchmark_group("search_1m");
|
|
group.sample_size(10);
|
|
group.measurement_time(Duration::from_secs(30));
|
|
group.sampling_mode(SamplingMode::Flat);
|
|
|
|
group.bench_function("text_filtered", |b| {
|
|
b.iter(|| db.search(black_box(&query)).unwrap());
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
// ── Signal write benchmark ────────────────────────────────────────────────────
|
|
|
|
/// Signal write: amortized cost over rotating entity IDs.
|
|
///
|
|
/// Measures the hot path for incremental signal ingestion at 1M-item scale.
|
|
/// Rotates through 1000 entity IDs to represent a realistic write workload.
|
|
fn bench_signal_write(c: &mut Criterion) {
|
|
let db: &TidalDb = &SCALE_DB;
|
|
|
|
let mut group = c.benchmark_group("signal_write_1m");
|
|
group.sample_size(10);
|
|
group.measurement_time(Duration::from_secs(30));
|
|
group.sampling_mode(SamplingMode::Flat);
|
|
|
|
let mut counter = 0u64;
|
|
let ts = Timestamp::now();
|
|
|
|
group.bench_function("write_rotating_1k_entities", |b| {
|
|
b.iter_batched(
|
|
|| {
|
|
// Rotate through 1000 entity IDs in the 1M range.
|
|
let id = EntityId::new((counter % 1000) + 1);
|
|
counter += 1;
|
|
id
|
|
},
|
|
|id| db.signal("view", black_box(id), 1.0, ts).unwrap(),
|
|
BatchSize::SmallInput,
|
|
);
|
|
});
|
|
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(
|
|
scale_benches,
|
|
bench_retrieve_for_you,
|
|
bench_retrieve_trending,
|
|
bench_retrieve_new_filtered,
|
|
bench_search_text_only,
|
|
bench_search_text_filtered,
|
|
bench_signal_write,
|
|
);
|
|
criterion_main!(scale_benches);
|