tidaldb/tidal/benches/storage.rs
jordan 29400d48db feat: implement Milestone 1 phases 1-3 — schema, WAL, and storage layer
Implements the foundation of tidalDB's data pipeline:

**Phase 1 – Schema primitives**
- EntityId newtype (u64, big-endian ordering)
- SignalTypeDefinition with pre-computed decay λ, deduped/sorted windows
- SchemaBuilder with full constraint validation (duplicates, identifiers,
  half-life, windows, velocity)
- LumenError wrapping all subsystems with required From impls

**Phase 2 – Write-Ahead Log**
- Length-prefixed, BLAKE3-protected entry format
- Group-commit writer (batch up to 100 events / 10 ms)
- Double-buffered content-hash deduplication
- Checkpoint, truncation, and crash-recovery with full replay
- Integration, property, and UAT tests (incl. 5,500-event deterministic UAT)
- Proptest coverage scaled to 10 000 events/run (was ≤500) to meet
  acceptance criterion; cases reduced 100→10 to keep runtime comparable

**Phase 3 – Storage engine**
- StorageEngine trait (get/put/delete/scan/batch/flush)
- Key encoding: [EntityId][0x00][Tag][suffix] with ordering/prefix helpers
- InMemoryBackend (BTreeMap + RwLock)
- FjallStorage with three isolated keyspaces and atomic batch helper
- Property tests for key ordering and round-trip correctness

Also adds planning docs for phases 4-5, research docs, architecture
overview, and roadmap updates.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-20 16:43:24 -07:00

199 lines
6.4 KiB
Rust

use criterion::{BatchSize, Criterion, criterion_group, criterion_main};
use tidaldb::schema::{EntityId, EntityKind};
use tidaldb::storage::{
FjallStorage, InMemoryBackend, StorageEngine, Tag, WriteBatch, encode_key, entity_prefix,
};
fn bench_sequential_put(c: &mut Criterion) {
let mut group = c.benchmark_group("sequential_put");
group.bench_function("in_memory_10k", |b| {
b.iter_batched(
InMemoryBackend::new,
|engine| {
for i in 0u64..10_000 {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
engine.put(&key, b"value_data_here").unwrap();
}
},
BatchSize::SmallInput,
);
});
group.bench_function("fjall_10k", |b| {
b.iter_batched(
|| {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
(dir, storage)
},
|(_dir, storage)| {
let items = storage.backend(EntityKind::Item);
for i in 0u64..10_000 {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
items.put(&key, b"value_data_here").unwrap();
}
},
BatchSize::SmallInput,
);
});
group.finish();
}
fn bench_random_get(c: &mut Criterion) {
let mut group = c.benchmark_group("random_get");
group.bench_function("in_memory_10k", |b| {
b.iter_batched(
|| {
let engine = InMemoryBackend::new();
for i in 0u64..10_000 {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
engine.put(&key, b"value_data_here").unwrap();
}
engine
},
|engine| {
for i in (0u64..10_000).rev() {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
let _ = engine.get(&key).unwrap();
}
},
BatchSize::SmallInput,
);
});
group.bench_function("fjall_10k", |b| {
b.iter_batched(
|| {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let items = storage.backend(EntityKind::Item);
for i in 0u64..10_000 {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
items.put(&key, b"value_data_here").unwrap();
}
(dir, storage)
},
|(_dir, storage)| {
let items = storage.backend(EntityKind::Item);
for i in (0u64..10_000).rev() {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
let _ = items.get(&key).unwrap();
}
},
BatchSize::SmallInput,
);
});
group.finish();
}
fn bench_prefix_scan(c: &mut Criterion) {
let mut group = c.benchmark_group("prefix_scan");
// Scan an entity with 10 keys (various tags/suffixes)
group.bench_function("in_memory_10_keys", |b| {
b.iter_batched(
|| {
let engine = InMemoryBackend::new();
let id = EntityId::new(42);
let tags = [Tag::Evt, Tag::Sig, Tag::Meta, Tag::Rel, Tag::Mv, Tag::Idx];
for (i, tag) in tags.iter().enumerate() {
let key = encode_key(id, *tag, format!("suffix_{i}").as_bytes());
engine.put(&key, b"data").unwrap();
}
// Add extra keys under same tag
for i in 0..4 {
let key = encode_key(id, Tag::Evt, format!("evt_{i}").as_bytes());
engine.put(&key, b"event_data").unwrap();
}
engine
},
|engine| {
let prefix = entity_prefix(EntityId::new(42));
let results: Vec<_> = engine
.scan_prefix(&prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(results.len(), 10);
},
BatchSize::SmallInput,
);
});
group.finish();
}
fn bench_batch_write(c: &mut Criterion) {
let mut group = c.benchmark_group("batch_write");
group.bench_function("in_memory_100_ops", |b| {
b.iter_batched(
|| {
let engine = InMemoryBackend::new();
let mut batch = WriteBatch::with_capacity(100);
for i in 0u64..100 {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
batch.put(key, b"value".to_vec());
}
(engine, batch)
},
|(engine, batch)| {
engine.write_batch(batch).unwrap();
},
BatchSize::SmallInput,
);
});
group.bench_function("fjall_100_ops", |b| {
b.iter_batched(
|| {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let mut batch = WriteBatch::with_capacity(100);
for i in 0u64..100 {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
batch.put(key, b"value".to_vec());
}
(dir, storage, batch)
},
|(_dir, storage, batch)| {
let items = storage.backend(EntityKind::Item);
items.write_batch(batch).unwrap();
},
BatchSize::SmallInput,
);
});
group.bench_function("in_memory_1000_ops", |b| {
b.iter_batched(
|| {
let engine = InMemoryBackend::new();
let mut batch = WriteBatch::with_capacity(1000);
for i in 0u64..1000 {
let key = encode_key(EntityId::new(i), Tag::Sig, b"");
batch.put(key, b"value".to_vec());
}
(engine, batch)
},
|(engine, batch)| {
engine.write_batch(batch).unwrap();
},
BatchSize::SmallInput,
);
});
group.finish();
}
criterion_group!(
benches,
bench_sequential_put,
bench_random_get,
bench_prefix_scan,
bench_batch_write,
);
criterion_main!(benches);