tidaldb/tidal/tests/storage.rs
jordan 29400d48db feat: implement Milestone 1 phases 1-3 — schema, WAL, and storage layer
Implements the foundation of tidalDB's data pipeline:

**Phase 1 – Schema primitives**
- EntityId newtype (u64, big-endian ordering)
- SignalTypeDefinition with pre-computed decay λ, deduped/sorted windows
- SchemaBuilder with full constraint validation (duplicates, identifiers,
  half-life, windows, velocity)
- LumenError wrapping all subsystems with required From impls

**Phase 2 – Write-Ahead Log**
- Length-prefixed, BLAKE3-protected entry format
- Group-commit writer (batch up to 100 events / 10 ms)
- Double-buffered content-hash deduplication
- Checkpoint, truncation, and crash-recovery with full replay
- Integration, property, and UAT tests (incl. 5,500-event deterministic UAT)
- Proptest coverage scaled to 10 000 events/run (was ≤500) to meet
  acceptance criterion; cases reduced 100→10 to keep runtime comparable

**Phase 3 – Storage engine**
- StorageEngine trait (get/put/delete/scan/batch/flush)
- Key encoding: [EntityId][0x00][Tag][suffix] with ordering/prefix helpers
- InMemoryBackend (BTreeMap + RwLock)
- FjallStorage with three isolated keyspaces and atomic batch helper
- Property tests for key ordering and round-trip correctness

Also adds planning docs for phases 4-5, research docs, architecture
overview, and roadmap updates.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-20 16:43:24 -07:00

358 lines
11 KiB
Rust

use tidaldb::schema::EntityId;
use tidaldb::storage::{
FjallStorage, InMemoryBackend, StorageEngine, StorageError, Tag, WriteBatch, encode_key,
entity_prefix, entity_tag_prefix, parse_key,
};
// =============================================================================
// Shared test suite — runs identical tests against both backends
// =============================================================================
/// Exercises the `StorageEngine` contract against any implementation.
fn storage_engine_tests(engine: &dyn StorageEngine) {
// -- put/get round-trip --
engine.put(b"key1", b"value1").unwrap();
assert_eq!(
engine.get(b"key1").unwrap().as_deref(),
Some(b"value1".as_slice())
);
// -- get missing returns None --
assert_eq!(engine.get(b"nonexistent").unwrap(), None);
// -- overwrite --
engine.put(b"key1", b"updated").unwrap();
assert_eq!(
engine.get(b"key1").unwrap().as_deref(),
Some(b"updated".as_slice())
);
// -- delete --
engine.delete(b"key1").unwrap();
assert_eq!(engine.get(b"key1").unwrap(), None);
// -- delete nonexistent is ok --
engine.delete(b"nope").unwrap();
// -- scan_prefix --
engine.put(b"pfx_a", b"1").unwrap();
engine.put(b"pfx_b", b"2").unwrap();
engine.put(b"pfx_c", b"3").unwrap();
engine.put(b"other", b"x").unwrap();
let results: Vec<_> = engine
.scan_prefix(b"pfx_")
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(results.len(), 3);
assert_eq!(results[0].0, b"pfx_a");
assert_eq!(results[1].0, b"pfx_b");
assert_eq!(results[2].0, b"pfx_c");
// -- scan_prefix with no matches --
let empty: Vec<_> = engine
.scan_prefix(b"zzz")
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert!(empty.is_empty());
// -- write_batch --
engine.put(b"batch_del", b"old").unwrap();
let mut batch = WriteBatch::new();
batch.put(b"batch_a".to_vec(), b"va".to_vec());
batch.put(b"batch_b".to_vec(), b"vb".to_vec());
batch.delete(b"batch_del".to_vec());
engine.write_batch(batch).unwrap();
assert_eq!(
engine.get(b"batch_a").unwrap().as_deref(),
Some(b"va".as_slice())
);
assert_eq!(
engine.get(b"batch_b").unwrap().as_deref(),
Some(b"vb".as_slice())
);
assert_eq!(engine.get(b"batch_del").unwrap(), None);
// -- flush doesn't error --
engine.flush().unwrap();
}
#[test]
fn shared_suite_in_memory() {
let engine = InMemoryBackend::new();
storage_engine_tests(&engine);
}
#[test]
fn shared_suite_fjall() {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let engine = storage.backend(tidaldb::schema::EntityKind::Item);
storage_engine_tests(engine);
}
// =============================================================================
// Key encoding integration tests with real storage
// =============================================================================
fn key_encoding_tests(engine: &dyn StorageEngine) {
let id1 = EntityId::new(1000);
let id2 = EntityId::new(2000);
// Write keys for entity 1000 with different tags
let k1_evt = encode_key(id1, Tag::Evt, b"event1");
let k1_sig = encode_key(id1, Tag::Sig, b"sig1");
let k1_meta = encode_key(id1, Tag::Meta, b"");
let k2_evt = encode_key(id2, Tag::Evt, b"event2");
engine.put(&k1_evt, b"evt_data").unwrap();
engine.put(&k1_sig, b"sig_data").unwrap();
engine.put(&k1_meta, b"meta_data").unwrap();
engine.put(&k2_evt, b"evt2_data").unwrap();
// Prefix scan for entity 1000 — should return all 3 keys
let prefix = entity_prefix(id1);
let results: Vec<_> = engine
.scan_prefix(&prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
results.len(),
3,
"entity prefix scan should return all 3 keys for entity 1000"
);
// All results should parse correctly
for (k, _) in &results {
let (parsed_id, _tag, _suffix) = parse_key(k).expect("key should parse");
assert_eq!(parsed_id, id1);
}
// Tag-scoped scan for entity 1000, Evt tag
let evt_prefix = entity_tag_prefix(id1, Tag::Evt);
let evt_results: Vec<_> = engine
.scan_prefix(&evt_prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(evt_results.len(), 1);
// Entity 2000 prefix scan — should return only its key
let prefix2 = entity_prefix(id2);
let results2: Vec<_> = engine
.scan_prefix(&prefix2)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(results2.len(), 1);
}
#[test]
fn key_encoding_in_memory() {
let engine = InMemoryBackend::new();
key_encoding_tests(&engine);
}
#[test]
fn key_encoding_fjall() {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let engine = storage.backend(tidaldb::schema::EntityKind::Item);
key_encoding_tests(engine);
}
// =============================================================================
// FjallStorage-specific tests
// =============================================================================
#[test]
fn fjall_persistence_across_reopen() {
let dir = tempfile::tempdir().unwrap();
let id = EntityId::new(42);
let key = encode_key(id, Tag::Meta, b"");
// Write and flush
{
let storage = FjallStorage::open(dir.path()).unwrap();
storage
.backend(tidaldb::schema::EntityKind::Item)
.put(&key, b"persisted_value")
.unwrap();
storage.flush_all().unwrap();
}
// Reopen and verify
{
let storage = FjallStorage::open(dir.path()).unwrap();
let val = storage
.backend(tidaldb::schema::EntityKind::Item)
.get(&key)
.unwrap();
assert_eq!(val.as_deref(), Some(b"persisted_value".as_slice()));
}
}
#[test]
fn fjall_entity_kind_isolation_with_encoded_keys() {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let id = EntityId::new(1);
let key = encode_key(id, Tag::Meta, b"");
// Same encoded key, different entity kind partitions
storage
.backend(tidaldb::schema::EntityKind::Item)
.put(&key, b"item_meta")
.unwrap();
storage
.backend(tidaldb::schema::EntityKind::User)
.put(&key, b"user_meta")
.unwrap();
assert_eq!(
storage
.backend(tidaldb::schema::EntityKind::Item)
.get(&key)
.unwrap()
.as_deref(),
Some(b"item_meta".as_slice())
);
assert_eq!(
storage
.backend(tidaldb::schema::EntityKind::User)
.get(&key)
.unwrap()
.as_deref(),
Some(b"user_meta".as_slice())
);
assert_eq!(
storage
.backend(tidaldb::schema::EntityKind::Creator)
.get(&key)
.unwrap(),
None
);
}
// =============================================================================
// StorageError tests
// =============================================================================
#[test]
fn storage_error_from_io() {
let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "access denied");
let storage_err: StorageError = io_err.into();
assert!(matches!(storage_err, StorageError::Io(_)));
assert!(storage_err.to_string().contains("access denied"));
}
#[test]
fn storage_error_display_all_variants() {
let err = StorageError::Corruption {
message: "bad data".into(),
};
assert!(err.to_string().contains("data corruption"));
assert!(err.to_string().contains("bad data"));
assert_eq!(StorageError::Closed.to_string(), "storage closed");
assert_eq!(StorageError::BatchConflict.to_string(), "batch conflict");
}
// =============================================================================
// Property tests
// =============================================================================
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
/// Key encoding preserves EntityId ordering when compared as byte slices.
#[test]
fn key_ordering_preserves_entity_id_ordering(a: u64, b: u64) {
let key_a = encode_key(EntityId::new(a), Tag::Sig, b"");
let key_b = encode_key(EntityId::new(b), Tag::Sig, b"");
prop_assert_eq!(a.cmp(&b), key_a.cmp(&key_b));
}
/// Prefix scan returns exactly the keys with matching prefix
/// (tested against InMemoryBackend).
#[test]
fn prefix_scan_correctness(
entity_ids in proptest::collection::vec(1u64..10000, 1..20),
target_id in 1u64..10000,
) {
let engine = InMemoryBackend::new();
let target = EntityId::new(target_id);
// Insert keys for various entities
for &id_val in &entity_ids {
let id = EntityId::new(id_val);
let key = encode_key(id, Tag::Meta, b"");
engine.put(&key, b"data").unwrap();
}
// Also ensure target entity has a key
let target_key = encode_key(target, Tag::Meta, b"");
engine.put(&target_key, b"target").unwrap();
// Scan for target entity
let prefix = entity_prefix(target);
let results: Vec<_> = engine
.scan_prefix(&prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
// All results must be for the target entity
for (k, _) in &results {
prop_assert!(k.starts_with(&prefix));
let (parsed_id, _, _) = parse_key(k).unwrap();
prop_assert_eq!(parsed_id, target);
}
// We always get at least 1 result (the target key we inserted)
prop_assert!(!results.is_empty());
}
/// Put/get round-trip for arbitrary byte sequences.
#[test]
fn put_get_roundtrip_arbitrary(
key in proptest::collection::vec(any::<u8>(), 1..200),
value in proptest::collection::vec(any::<u8>(), 0..2000),
) {
let engine = InMemoryBackend::new();
engine.put(&key, &value).unwrap();
let retrieved = engine.get(&key).unwrap().unwrap();
prop_assert_eq!(retrieved, value);
}
/// Batch writes make all ops visible atomically.
#[test]
fn batch_all_or_nothing(
ops in proptest::collection::vec(
(
proptest::collection::vec(any::<u8>(), 1..50),
proptest::collection::vec(any::<u8>(), 1..100),
),
1..20
)
) {
let engine = InMemoryBackend::new();
let mut batch = WriteBatch::new();
for (key, value) in &ops {
batch.put(key.clone(), value.clone());
}
engine.write_batch(batch).unwrap();
// All ops should be visible
for (key, value) in &ops {
let retrieved = engine.get(key).unwrap();
prop_assert_eq!(retrieved.as_deref(), Some(value.as_slice()));
}
}
}
}