- M5p1: BM25 text indexing via Tantivy with background syncer (0.26ms @ 10K docs) - M5p2: RRF fusion layer combining BM25 + ANN scores (46µs @ 1K candidates) - M5p3: unified Search query API (8-stage pipeline, BM25 + vector + ranking) - M5p4: creator text + vector indexing and creator search executor (< 20ms @ 200 creators) - Refactor db/mod.rs into focused sub-modules (creators, items, sessions, signals, etc.) - Decompose monolithic files into directory modules (query/executor, ranking/diversity, etc.) - Split brute.rs → brute/mod.rs + brute/tests.rs; extract search executor helpers - Add benches: fusion, search, session, text_index - Add M5 UAT test suites (m5_uat, m5_search, m5p4_creator_search, text_index) - Update blog posts, roadmap, content strategy, and M5 planning docs - Add tmp/ and .claude/worktrees/ to .gitignore Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
165 lines
4.8 KiB
Rust
165 lines
4.8 KiB
Rust
#![allow(clippy::unwrap_used)]
|
|
//! Benchmarks for the session layer: signal writes, snapshot reads,
|
|
//! and retrieve queries with active session context.
|
|
|
|
use std::collections::HashMap;
|
|
use std::time::Duration;
|
|
|
|
use criterion::{Criterion, black_box, criterion_group, criterion_main};
|
|
use tidaldb::TidalDb;
|
|
use tidaldb::query::retrieve::{ProfileRef, RetrieveBuilder};
|
|
use tidaldb::schema::{
|
|
AgentPolicy, DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window,
|
|
};
|
|
|
|
fn session_schema() -> tidaldb::schema::Schema {
|
|
let mut builder = SchemaBuilder::new();
|
|
let _ = builder
|
|
.signal(
|
|
"reward",
|
|
EntityKind::Item,
|
|
DecaySpec::Exponential {
|
|
half_life: Duration::from_secs(7 * 24 * 3600),
|
|
},
|
|
)
|
|
.windows(&[Window::OneHour, Window::TwentyFourHours])
|
|
.velocity(false)
|
|
.add();
|
|
let _ = builder.session_policy(
|
|
"bench_policy",
|
|
AgentPolicy {
|
|
allowed_signals: vec!["reward".to_string()],
|
|
denied_signals: vec![],
|
|
max_session_duration: Duration::from_secs(3600),
|
|
max_signals_per_session: 1_000_000,
|
|
},
|
|
);
|
|
builder.build().unwrap()
|
|
}
|
|
|
|
/// Benchmark: `session_signal()` write throughput.
|
|
/// Target: < 200µs per call.
|
|
fn bench_session_signal(c: &mut Criterion) {
|
|
let db = TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(session_schema())
|
|
.open()
|
|
.unwrap();
|
|
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), "bench-item".to_string());
|
|
db.write_item_with_metadata(EntityId::new(1), &meta)
|
|
.unwrap();
|
|
|
|
let handle = db
|
|
.start_session(1, "bench-agent", "bench_policy", HashMap::new())
|
|
.unwrap();
|
|
let entity = EntityId::new(1);
|
|
let ts = Timestamp::now();
|
|
|
|
c.bench_function("session_signal", |b| {
|
|
b.iter(|| {
|
|
db.session_signal(
|
|
black_box(&handle),
|
|
black_box("reward"),
|
|
black_box(entity),
|
|
black_box(1.0_f64),
|
|
black_box(ts),
|
|
black_box(None),
|
|
)
|
|
.unwrap();
|
|
});
|
|
});
|
|
}
|
|
|
|
/// Benchmark: `session_snapshot()` on an active session with 100 signals.
|
|
/// Target: < 50µs per call.
|
|
fn bench_session_snapshot(c: &mut Criterion) {
|
|
let db = TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(session_schema())
|
|
.open()
|
|
.unwrap();
|
|
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), "bench-item".to_string());
|
|
db.write_item_with_metadata(EntityId::new(1), &meta)
|
|
.unwrap();
|
|
|
|
let handle = db
|
|
.start_session(2, "bench-agent", "bench_policy", HashMap::new())
|
|
.unwrap();
|
|
let session_id = handle.id;
|
|
let entity = EntityId::new(1);
|
|
let ts = Timestamp::now();
|
|
|
|
// Pre-load 100 signals.
|
|
for _ in 0..100 {
|
|
db.session_signal(&handle, "reward", entity, 1.0, ts, None)
|
|
.unwrap();
|
|
}
|
|
|
|
c.bench_function("session_snapshot_100_signals", |b| {
|
|
b.iter(|| db.session_snapshot(black_box(session_id)).unwrap());
|
|
});
|
|
}
|
|
|
|
/// Benchmark: `retrieve()` with FOR SESSION vs without, over 1K items.
|
|
/// Measures the overhead of session context in ranking.
|
|
/// Target: < 5ms overhead vs without session.
|
|
fn bench_retrieve_with_session(c: &mut Criterion) {
|
|
let db = TidalDb::builder()
|
|
.ephemeral()
|
|
.with_schema(session_schema())
|
|
.open()
|
|
.unwrap();
|
|
|
|
// Write 1K items.
|
|
for i in 1u64..=1000 {
|
|
let mut meta = HashMap::new();
|
|
meta.insert("title".to_string(), format!("item-{i}"));
|
|
db.write_item_with_metadata(EntityId::new(i), &meta)
|
|
.unwrap();
|
|
}
|
|
|
|
let handle = db
|
|
.start_session(3, "bench-agent", "bench_policy", HashMap::new())
|
|
.unwrap();
|
|
let session_id = handle.id;
|
|
let ts = Timestamp::now();
|
|
|
|
// Signal 10 entities to create a non-trivial session context.
|
|
for i in 1u64..=10 {
|
|
db.session_signal(&handle, "reward", EntityId::new(i), 1.0, ts, None)
|
|
.unwrap();
|
|
}
|
|
|
|
let query_with = RetrieveBuilder::new(EntityKind::Item, ProfileRef::new("hot"))
|
|
.limit(20)
|
|
.for_session(session_id)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let query_without = RetrieveBuilder::new(EntityKind::Item, ProfileRef::new("hot"))
|
|
.limit(20)
|
|
.build()
|
|
.unwrap();
|
|
|
|
let mut group = c.benchmark_group("retrieve_1k_items");
|
|
group.bench_function("without_session", |b| {
|
|
b.iter(|| db.retrieve(black_box(&query_without)).unwrap());
|
|
});
|
|
group.bench_function("with_session", |b| {
|
|
b.iter(|| db.retrieve(black_box(&query_with)).unwrap());
|
|
});
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(
|
|
benches,
|
|
bench_session_signal,
|
|
bench_session_snapshot,
|
|
bench_retrieve_with_session
|
|
);
|
|
criterion_main!(benches);
|