tidaldb/tidal/benches/recovery.rs
jordan f4cfd6c81f feat: complete M8 replication primitives + forage enhancements + docs
Milestone 8 (phases 1-4):
- Shard-aware WAL segment naming, BatchHeader v2, ShardRouter
- Transport trait, InProcessTransport, WalShipper, FollowerDb
- HLC, PNCounter, LWWRegister, CrdtSignalState, ReconciliationEngine
- Session replication bridge with SeqNo/HWM, idempotency store

Forage application:
- Multi-source discovery engine with MAB exploration
- Embedding-based label system, server handlers, UI refresh

Other:
- QUICKSTART.md, README.md, milestone-8 planning docs
- Hard negative union semantics, RLHF export enhancements
- Recovery benchmark and visibility test expansions
- Split 8 oversized source files per CODING_GUIDELINES §9

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 13:17:19 -07:00

220 lines
8.3 KiB
Rust

#![allow(clippy::unwrap_used, clippy::cast_precision_loss)]
//! Criterion benchmarks for cold-start recovery time.
//!
//! Measures the open-to-ready latency when `TidalDb::builder().open()` replays
//! a WAL + checkpoint from a previously populated database. This is the metric
//! operators care about most during restarts and crash recovery.
//!
//! ## Benchmarks
//!
//! - **`cold_start_10k_items`**: Measures checkpoint restore + in-memory index
//! rebuild from a clean checkpoint (no WAL backlog). This is the realistic
//! production recovery path for graceful shutdowns.
//!
//! - **`cold_start_with_wal_backlog_10k`**: Measures recovery from a checkpoint
//! plus 2K WAL backlog signals that were never checkpointed (simulating a crash
//! before checkpoint). The WAL backlog is re-injected after each iteration's
//! `close()` to ensure every iteration measures the same replay workload.
//!
//! ## Scale
//!
//! The Criterion benchmark uses 10K entities (scaled down from the 1M specified
//! in task-05 for local iteration speed). Hard SLA bounds are enforced by the
//! integration tests in `tests/m7_recovery_sla.rs` (run as part of `cargo test`).
//! The full 1M-item SLA test (`recovery_under_30_seconds`) is marked
//! `#[ignore = "expensive"]` and must be run explicitly:
//!
//! ```bash
//! cargo test --manifest-path tidal/Cargo.toml --test m7_recovery_sla -- --ignored
//! ```
use std::time::Duration;
use criterion::{Criterion, criterion_group, criterion_main};
use tidaldb::TidalDb;
use tidaldb::replication::ShardId;
use tidaldb::schema::{DecaySpec, EntityId, EntityKind, SchemaBuilder, Timestamp, Window};
use tidaldb::wal::checkpoint::CheckpointManager;
use tidaldb::wal::format::{EventRecord, MAX_EVENTS_PER_BATCH, encode_batch};
use tidaldb::wal::segment::segment_filename;
fn bench_schema() -> tidaldb::schema::Schema {
let mut builder = SchemaBuilder::new();
let _ = builder
.signal(
"view",
EntityKind::Item,
DecaySpec::Exponential {
half_life: Duration::from_secs(7 * 24 * 3600),
},
)
.windows(&[Window::AllTime])
.velocity(false)
.add();
builder.build().expect("valid schema")
}
fn generate_test_data(dir: &std::path::Path) {
let schema = bench_schema();
// Write 10K entities with signals (scaled down from 1M for CI).
// The benchmark is designed for local profiling; the smoke test (below)
// is the gatekeeping test for CI.
let db = TidalDb::builder()
.with_data_dir(dir)
.with_schema(schema)
.open()
.expect("open should succeed");
let base_ns = 1_000_000_000_000u64;
// Write signals for entities.
let entity_count = 10_000u64;
for entity_id in 1..=entity_count {
let ts = Timestamp::from_nanos(base_ns + entity_id * 1_000_000);
db.signal("view", EntityId::new(entity_id), 1.0, ts)
.expect("signal should succeed");
}
// Force clean shutdown (triggers checkpoint + WAL compaction).
db.close().expect("close should succeed");
}
/// Inject `backlog_count` raw WAL signal events into the WAL directory,
/// starting at sequence `checkpoint_seq + 1`. Uses entity IDs
/// `base_entity + 1 ..= base_entity + backlog_count` with `signal_type = 0`
/// ("view", the only signal in `bench_schema`, assigned ID 0 alphabetically).
fn inject_wal_backlog(data_dir: &std::path::Path, base_entity: u64, backlog_count: u64) {
let wal_dir = data_dir.join("wal");
std::fs::create_dir_all(&wal_dir).expect("create wal dir");
// Read the current checkpoint to determine where to start injected seqs.
let checkpoint = CheckpointManager::read(&wal_dir).expect("read checkpoint");
let checkpoint_seq = checkpoint.map_or(0, |(seq, _)| seq);
let base_ns = 1_000_000_000_000u64;
// Build event records for the backlog.
let events: Vec<EventRecord> = (1..=backlog_count)
.map(|i| EventRecord {
entity_id: base_entity + i,
signal_type: 0, // "view" is the only signal, assigned ID 0
weight: 1.0,
timestamp_nanos: base_ns + (base_entity + i) * 1_000_000,
})
.collect();
// Encode into batches (max 256 events per batch), write as a single
// WAL segment file starting at checkpoint_seq + 1.
let mut seq = checkpoint_seq + 1;
let seg_path = wal_dir.join(segment_filename(ShardId::SINGLE, seq));
let mut segment_bytes: Vec<u8> = Vec::new();
for chunk in events.chunks(usize::from(MAX_EVENTS_PER_BATCH)) {
let batch_ts = chunk[0].timestamp_nanos;
let batch_bytes = encode_batch(chunk, seq, batch_ts).expect("encode batch");
segment_bytes.extend_from_slice(&batch_bytes);
seq += chunk.len() as u64;
}
std::fs::write(&seg_path, &segment_bytes).expect("write WAL segment");
}
fn recovery_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("recovery");
// Recovery benchmarks can be slower -- allow more time.
group.sample_size(10);
group.measurement_time(Duration::from_secs(30));
// Generate the test data directory (done once, reused across iterations).
let dir = tempfile::tempdir().expect("tempdir");
generate_test_data(dir.path());
let schema = bench_schema();
group.bench_function("cold_start_10k_items", |b| {
b.iter(|| {
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema.clone())
.open()
.expect("open should succeed");
// Verify the database is actually functional.
let count = db
.read_windowed_count(EntityId::new(1), "view", Window::AllTime)
.expect("read should succeed");
assert!(count > 0, "entity 1 should have signals after recovery");
db.close().expect("close should succeed");
});
});
group.finish();
}
/// Generates a data directory with a clean checkpoint (10K base items)
/// plus 2K WAL backlog signals on entity IDs `10_001..=12_000` that are
/// NOT covered by the checkpoint.
///
/// The approach:
/// 1. Write 10K base signals, call `close()` -> checkpoint + WAL compaction.
/// 2. Inject raw WAL segment files with 2K events at sequence numbers above
/// the checkpoint boundary, simulating events written to the WAL but never
/// checkpointed (i.e., a crash before checkpoint).
///
/// NOTE: We inject WAL segments post-close rather than using `std::mem::forget`
/// because `TidalDb::Drop` calls `shutdown_inner()` which checkpoints + compacts,
/// and `forget` would leak the file lock preventing reopen in the same process.
fn generate_wal_backlog_data(dir: &std::path::Path) {
// Phase 1: Write base signals and checkpoint via clean close.
generate_test_data(dir);
// Phase 2: Inject raw WAL events that simulate a crash before checkpoint.
inject_wal_backlog(dir, 10_000, 2_000);
}
fn recovery_with_wal_backlog_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("recovery_wal_backlog");
group.sample_size(10);
group.measurement_time(Duration::from_secs(30));
// Generate the test data directory once (checkpoint + WAL backlog).
let dir = tempfile::tempdir().expect("tempdir");
generate_wal_backlog_data(dir.path());
let schema = bench_schema();
group.bench_function("cold_start_with_wal_backlog_10k", |b| {
b.iter(|| {
let db = TidalDb::builder()
.with_data_dir(dir.path())
.with_schema(schema.clone())
.open()
.expect("open should succeed");
// Verify that a backlog entity was replayed from WAL.
let count = db
.read_windowed_count(EntityId::new(10_001), "view", Window::AllTime)
.expect("read should succeed");
assert!(
count > 0,
"backlog entity 10001 should have signals after WAL replay"
);
db.close().expect("close should succeed");
// Re-inject WAL backlog for next iteration (close() checkpoints it away)
inject_wal_backlog(dir.path(), 10_000, 2_000);
});
});
group.finish();
}
criterion_group!(
benches,
recovery_benchmark,
recovery_with_wal_backlog_benchmark
);
criterion_main!(benches);