Implements the foundation of tidalDB's data pipeline: **Phase 1 – Schema primitives** - EntityId newtype (u64, big-endian ordering) - SignalTypeDefinition with pre-computed decay λ, deduped/sorted windows - SchemaBuilder with full constraint validation (duplicates, identifiers, half-life, windows, velocity) - LumenError wrapping all subsystems with required From impls **Phase 2 – Write-Ahead Log** - Length-prefixed, BLAKE3-protected entry format - Group-commit writer (batch up to 100 events / 10 ms) - Double-buffered content-hash deduplication - Checkpoint, truncation, and crash-recovery with full replay - Integration, property, and UAT tests (incl. 5,500-event deterministic UAT) - Proptest coverage scaled to 10 000 events/run (was ≤500) to meet acceptance criterion; cases reduced 100→10 to keep runtime comparable **Phase 3 – Storage engine** - StorageEngine trait (get/put/delete/scan/batch/flush) - Key encoding: [EntityId][0x00][Tag][suffix] with ordering/prefix helpers - InMemoryBackend (BTreeMap + RwLock) - FjallStorage with three isolated keyspaces and atomic batch helper - Property tests for key ordering and round-trip correctness Also adds planning docs for phases 4-5, research docs, architecture overview, and roadmap updates. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
358 lines
11 KiB
Rust
358 lines
11 KiB
Rust
use tidaldb::schema::EntityId;
|
|
use tidaldb::storage::{
|
|
FjallStorage, InMemoryBackend, StorageEngine, StorageError, Tag, WriteBatch, encode_key,
|
|
entity_prefix, entity_tag_prefix, parse_key,
|
|
};
|
|
|
|
// =============================================================================
|
|
// Shared test suite — runs identical tests against both backends
|
|
// =============================================================================
|
|
|
|
/// Exercises the `StorageEngine` contract against any implementation.
|
|
fn storage_engine_tests(engine: &dyn StorageEngine) {
|
|
// -- put/get round-trip --
|
|
engine.put(b"key1", b"value1").unwrap();
|
|
assert_eq!(
|
|
engine.get(b"key1").unwrap().as_deref(),
|
|
Some(b"value1".as_slice())
|
|
);
|
|
|
|
// -- get missing returns None --
|
|
assert_eq!(engine.get(b"nonexistent").unwrap(), None);
|
|
|
|
// -- overwrite --
|
|
engine.put(b"key1", b"updated").unwrap();
|
|
assert_eq!(
|
|
engine.get(b"key1").unwrap().as_deref(),
|
|
Some(b"updated".as_slice())
|
|
);
|
|
|
|
// -- delete --
|
|
engine.delete(b"key1").unwrap();
|
|
assert_eq!(engine.get(b"key1").unwrap(), None);
|
|
|
|
// -- delete nonexistent is ok --
|
|
engine.delete(b"nope").unwrap();
|
|
|
|
// -- scan_prefix --
|
|
engine.put(b"pfx_a", b"1").unwrap();
|
|
engine.put(b"pfx_b", b"2").unwrap();
|
|
engine.put(b"pfx_c", b"3").unwrap();
|
|
engine.put(b"other", b"x").unwrap();
|
|
|
|
let results: Vec<_> = engine
|
|
.scan_prefix(b"pfx_")
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(results.len(), 3);
|
|
assert_eq!(results[0].0, b"pfx_a");
|
|
assert_eq!(results[1].0, b"pfx_b");
|
|
assert_eq!(results[2].0, b"pfx_c");
|
|
|
|
// -- scan_prefix with no matches --
|
|
let empty: Vec<_> = engine
|
|
.scan_prefix(b"zzz")
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert!(empty.is_empty());
|
|
|
|
// -- write_batch --
|
|
engine.put(b"batch_del", b"old").unwrap();
|
|
let mut batch = WriteBatch::new();
|
|
batch.put(b"batch_a".to_vec(), b"va".to_vec());
|
|
batch.put(b"batch_b".to_vec(), b"vb".to_vec());
|
|
batch.delete(b"batch_del".to_vec());
|
|
|
|
engine.write_batch(batch).unwrap();
|
|
|
|
assert_eq!(
|
|
engine.get(b"batch_a").unwrap().as_deref(),
|
|
Some(b"va".as_slice())
|
|
);
|
|
assert_eq!(
|
|
engine.get(b"batch_b").unwrap().as_deref(),
|
|
Some(b"vb".as_slice())
|
|
);
|
|
assert_eq!(engine.get(b"batch_del").unwrap(), None);
|
|
|
|
// -- flush doesn't error --
|
|
engine.flush().unwrap();
|
|
}
|
|
|
|
#[test]
|
|
fn shared_suite_in_memory() {
|
|
let engine = InMemoryBackend::new();
|
|
storage_engine_tests(&engine);
|
|
}
|
|
|
|
#[test]
|
|
fn shared_suite_fjall() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let engine = storage.backend(tidaldb::schema::EntityKind::Item);
|
|
storage_engine_tests(engine);
|
|
}
|
|
|
|
// =============================================================================
|
|
// Key encoding integration tests with real storage
|
|
// =============================================================================
|
|
|
|
fn key_encoding_tests(engine: &dyn StorageEngine) {
|
|
let id1 = EntityId::new(1000);
|
|
let id2 = EntityId::new(2000);
|
|
|
|
// Write keys for entity 1000 with different tags
|
|
let k1_evt = encode_key(id1, Tag::Evt, b"event1");
|
|
let k1_sig = encode_key(id1, Tag::Sig, b"sig1");
|
|
let k1_meta = encode_key(id1, Tag::Meta, b"");
|
|
let k2_evt = encode_key(id2, Tag::Evt, b"event2");
|
|
|
|
engine.put(&k1_evt, b"evt_data").unwrap();
|
|
engine.put(&k1_sig, b"sig_data").unwrap();
|
|
engine.put(&k1_meta, b"meta_data").unwrap();
|
|
engine.put(&k2_evt, b"evt2_data").unwrap();
|
|
|
|
// Prefix scan for entity 1000 — should return all 3 keys
|
|
let prefix = entity_prefix(id1);
|
|
let results: Vec<_> = engine
|
|
.scan_prefix(&prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(
|
|
results.len(),
|
|
3,
|
|
"entity prefix scan should return all 3 keys for entity 1000"
|
|
);
|
|
|
|
// All results should parse correctly
|
|
for (k, _) in &results {
|
|
let (parsed_id, _tag, _suffix) = parse_key(k).expect("key should parse");
|
|
assert_eq!(parsed_id, id1);
|
|
}
|
|
|
|
// Tag-scoped scan for entity 1000, Evt tag
|
|
let evt_prefix = entity_tag_prefix(id1, Tag::Evt);
|
|
let evt_results: Vec<_> = engine
|
|
.scan_prefix(&evt_prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(evt_results.len(), 1);
|
|
|
|
// Entity 2000 prefix scan — should return only its key
|
|
let prefix2 = entity_prefix(id2);
|
|
let results2: Vec<_> = engine
|
|
.scan_prefix(&prefix2)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(results2.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn key_encoding_in_memory() {
|
|
let engine = InMemoryBackend::new();
|
|
key_encoding_tests(&engine);
|
|
}
|
|
|
|
#[test]
|
|
fn key_encoding_fjall() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let engine = storage.backend(tidaldb::schema::EntityKind::Item);
|
|
key_encoding_tests(engine);
|
|
}
|
|
|
|
// =============================================================================
|
|
// FjallStorage-specific tests
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn fjall_persistence_across_reopen() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let id = EntityId::new(42);
|
|
let key = encode_key(id, Tag::Meta, b"");
|
|
|
|
// Write and flush
|
|
{
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
storage
|
|
.backend(tidaldb::schema::EntityKind::Item)
|
|
.put(&key, b"persisted_value")
|
|
.unwrap();
|
|
storage.flush_all().unwrap();
|
|
}
|
|
|
|
// Reopen and verify
|
|
{
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let val = storage
|
|
.backend(tidaldb::schema::EntityKind::Item)
|
|
.get(&key)
|
|
.unwrap();
|
|
assert_eq!(val.as_deref(), Some(b"persisted_value".as_slice()));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn fjall_entity_kind_isolation_with_encoded_keys() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
|
|
let id = EntityId::new(1);
|
|
let key = encode_key(id, Tag::Meta, b"");
|
|
|
|
// Same encoded key, different entity kind partitions
|
|
storage
|
|
.backend(tidaldb::schema::EntityKind::Item)
|
|
.put(&key, b"item_meta")
|
|
.unwrap();
|
|
storage
|
|
.backend(tidaldb::schema::EntityKind::User)
|
|
.put(&key, b"user_meta")
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
storage
|
|
.backend(tidaldb::schema::EntityKind::Item)
|
|
.get(&key)
|
|
.unwrap()
|
|
.as_deref(),
|
|
Some(b"item_meta".as_slice())
|
|
);
|
|
assert_eq!(
|
|
storage
|
|
.backend(tidaldb::schema::EntityKind::User)
|
|
.get(&key)
|
|
.unwrap()
|
|
.as_deref(),
|
|
Some(b"user_meta".as_slice())
|
|
);
|
|
assert_eq!(
|
|
storage
|
|
.backend(tidaldb::schema::EntityKind::Creator)
|
|
.get(&key)
|
|
.unwrap(),
|
|
None
|
|
);
|
|
}
|
|
|
|
// =============================================================================
|
|
// StorageError tests
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn storage_error_from_io() {
|
|
let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "access denied");
|
|
let storage_err: StorageError = io_err.into();
|
|
assert!(matches!(storage_err, StorageError::Io(_)));
|
|
assert!(storage_err.to_string().contains("access denied"));
|
|
}
|
|
|
|
#[test]
|
|
fn storage_error_display_all_variants() {
|
|
let err = StorageError::Corruption {
|
|
message: "bad data".into(),
|
|
};
|
|
assert!(err.to_string().contains("data corruption"));
|
|
assert!(err.to_string().contains("bad data"));
|
|
|
|
assert_eq!(StorageError::Closed.to_string(), "storage closed");
|
|
assert_eq!(StorageError::BatchConflict.to_string(), "batch conflict");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Property tests
|
|
// =============================================================================
|
|
|
|
mod proptests {
|
|
use super::*;
|
|
use proptest::prelude::*;
|
|
|
|
proptest! {
|
|
/// Key encoding preserves EntityId ordering when compared as byte slices.
|
|
#[test]
|
|
fn key_ordering_preserves_entity_id_ordering(a: u64, b: u64) {
|
|
let key_a = encode_key(EntityId::new(a), Tag::Sig, b"");
|
|
let key_b = encode_key(EntityId::new(b), Tag::Sig, b"");
|
|
prop_assert_eq!(a.cmp(&b), key_a.cmp(&key_b));
|
|
}
|
|
|
|
/// Prefix scan returns exactly the keys with matching prefix
|
|
/// (tested against InMemoryBackend).
|
|
#[test]
|
|
fn prefix_scan_correctness(
|
|
entity_ids in proptest::collection::vec(1u64..10000, 1..20),
|
|
target_id in 1u64..10000,
|
|
) {
|
|
let engine = InMemoryBackend::new();
|
|
let target = EntityId::new(target_id);
|
|
|
|
// Insert keys for various entities
|
|
for &id_val in &entity_ids {
|
|
let id = EntityId::new(id_val);
|
|
let key = encode_key(id, Tag::Meta, b"");
|
|
engine.put(&key, b"data").unwrap();
|
|
}
|
|
|
|
// Also ensure target entity has a key
|
|
let target_key = encode_key(target, Tag::Meta, b"");
|
|
engine.put(&target_key, b"target").unwrap();
|
|
|
|
// Scan for target entity
|
|
let prefix = entity_prefix(target);
|
|
let results: Vec<_> = engine
|
|
.scan_prefix(&prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
|
|
// All results must be for the target entity
|
|
for (k, _) in &results {
|
|
prop_assert!(k.starts_with(&prefix));
|
|
let (parsed_id, _, _) = parse_key(k).unwrap();
|
|
prop_assert_eq!(parsed_id, target);
|
|
}
|
|
|
|
// We always get at least 1 result (the target key we inserted)
|
|
prop_assert!(!results.is_empty());
|
|
}
|
|
|
|
/// Put/get round-trip for arbitrary byte sequences.
|
|
#[test]
|
|
fn put_get_roundtrip_arbitrary(
|
|
key in proptest::collection::vec(any::<u8>(), 1..200),
|
|
value in proptest::collection::vec(any::<u8>(), 0..2000),
|
|
) {
|
|
let engine = InMemoryBackend::new();
|
|
engine.put(&key, &value).unwrap();
|
|
let retrieved = engine.get(&key).unwrap().unwrap();
|
|
prop_assert_eq!(retrieved, value);
|
|
}
|
|
|
|
/// Batch writes make all ops visible atomically.
|
|
#[test]
|
|
fn batch_all_or_nothing(
|
|
ops in proptest::collection::vec(
|
|
(
|
|
proptest::collection::vec(any::<u8>(), 1..50),
|
|
proptest::collection::vec(any::<u8>(), 1..100),
|
|
),
|
|
1..20
|
|
)
|
|
) {
|
|
let engine = InMemoryBackend::new();
|
|
let mut batch = WriteBatch::new();
|
|
|
|
for (key, value) in &ops {
|
|
batch.put(key.clone(), value.clone());
|
|
}
|
|
|
|
engine.write_batch(batch).unwrap();
|
|
|
|
// All ops should be visible
|
|
for (key, value) in &ops {
|
|
let retrieved = engine.get(key).unwrap();
|
|
prop_assert_eq!(retrieved.as_deref(), Some(value.as_slice()));
|
|
}
|
|
}
|
|
}
|
|
}
|