tidaldb/tidal/tests/m1p3_storage_uat.rs
jordan 192c473f55 feat: complete Milestone 5 — full-text search, RRF fusion, and creator search
- M5p1: BM25 text indexing via Tantivy with background syncer (0.26ms @ 10K docs)
- M5p2: RRF fusion layer combining BM25 + ANN scores (46µs @ 1K candidates)
- M5p3: unified Search query API (8-stage pipeline, BM25 + vector + ranking)
- M5p4: creator text + vector indexing and creator search executor (< 20ms @ 200 creators)
- Refactor db/mod.rs into focused sub-modules (creators, items, sessions, signals, etc.)
- Decompose monolithic files into directory modules (query/executor, ranking/diversity, etc.)
- Split brute.rs → brute/mod.rs + brute/tests.rs; extract search executor helpers
- Add benches: fusion, search, session, text_index
- Add M5 UAT test suites (m5_uat, m5_search, m5p4_creator_search, text_index)
- Update blog posts, roadmap, content strategy, and M5 planning docs
- Add tmp/ and .claude/worktrees/ to .gitignore

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-21 23:53:16 -07:00

521 lines
17 KiB
Rust

#![allow(clippy::unwrap_used)]
//! UAT for Milestone 1, Phase 3: Storage Engine Trait and fjall Backend.
//!
//! These tests exercise acceptance criteria gaps not covered by
//! `tidal/tests/storage.rs` or the unit tests in `storage/`.
use tidaldb::schema::{EntityId, EntityKind};
use tidaldb::storage::{
FjallAtomicBatch, FjallStorage, InMemoryBackend, StorageEngine, Tag, WriteBatch, encode_key,
entity_prefix, entity_tag_prefix, parse_key,
};
// =============================================================================
// UAT-01: Out-of-order entity ID insert, scan_prefix returns numeric order
// (Fjall backend)
// =============================================================================
#[test]
fn uat01_fjall_scan_prefix_returns_numeric_order_after_out_of_order_insert() {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let items = storage.backend(EntityKind::Item);
// Insert entity IDs wildly out of numeric order
let ids: Vec<u64> = vec![9999, 1, 500, 42, 10000, 7, 256, 3];
for &id_val in &ids {
let id = EntityId::new(id_val);
let key = encode_key(id, Tag::Meta, b"");
items.put(&key, b"data").unwrap();
}
// Full scan (empty prefix) to get all keys
// Use entity_prefix for each ID and verify ordering
let mut sorted_ids = ids;
sorted_ids.sort_unstable();
sorted_ids.dedup();
// Scan with a common prefix that matches all keys (use entity prefix for smallest ID,
// but that won't work for all). Instead, scan all keys by using an empty prefix.
let all: Vec<_> = items
.scan_prefix(b"")
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(all.len(), sorted_ids.len());
// Verify keys come back in entity ID numeric order
let mut prev_id: Option<u64> = None;
for (key_bytes, _) in &all {
let (entity_id, tag, _suffix) = parse_key(key_bytes).unwrap();
assert_eq!(tag, Tag::Meta);
if let Some(prev) = prev_id {
assert!(
entity_id.to_be_bytes() > EntityId::new(prev).to_be_bytes(),
"entity {entity_id:?} should sort after entity with raw id {prev}"
);
}
prev_id = Some(u64::from_be_bytes(entity_id.to_be_bytes()));
}
// Verify the actual ID sequence matches sorted order
let returned_ids: Vec<u64> = all
.iter()
.map(|(k, _)| {
let (id, _, _) = parse_key(k).unwrap();
u64::from_be_bytes(id.to_be_bytes())
})
.collect();
assert_eq!(returned_ids, sorted_ids);
}
// =============================================================================
// UAT-02: Persistence across reopen for ALL three keyspaces
// =============================================================================
#[test]
fn uat02_fjall_persistence_all_three_keyspaces() {
let dir = tempfile::tempdir().unwrap();
let item_key = encode_key(EntityId::new(1), Tag::Meta, b"item");
let user_key = encode_key(EntityId::new(2), Tag::Meta, b"user");
let creator_key = encode_key(EntityId::new(3), Tag::Meta, b"creator");
// Write to all three keyspaces, flush, and drop
{
let storage = FjallStorage::open(dir.path()).unwrap();
storage
.backend(EntityKind::Item)
.put(&item_key, b"item_value")
.unwrap();
storage
.backend(EntityKind::User)
.put(&user_key, b"user_value")
.unwrap();
storage
.backend(EntityKind::Creator)
.put(&creator_key, b"creator_value")
.unwrap();
storage.flush_all().unwrap();
}
// Reopen and verify all three keyspaces survived
{
let storage = FjallStorage::open(dir.path()).unwrap();
assert_eq!(
storage
.backend(EntityKind::Item)
.get(&item_key)
.unwrap()
.as_deref(),
Some(b"item_value".as_slice()),
"Item keyspace data should survive reopen"
);
assert_eq!(
storage
.backend(EntityKind::User)
.get(&user_key)
.unwrap()
.as_deref(),
Some(b"user_value".as_slice()),
"User keyspace data should survive reopen"
);
assert_eq!(
storage
.backend(EntityKind::Creator)
.get(&creator_key)
.unwrap()
.as_deref(),
Some(b"creator_value".as_slice()),
"Creator keyspace data should survive reopen"
);
}
}
// =============================================================================
// UAT-03: FjallAtomicBatch remove operation
// =============================================================================
#[test]
fn uat03_fjall_atomic_batch_remove() {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let item_key = encode_key(EntityId::new(10), Tag::Meta, b"");
let user_key = encode_key(EntityId::new(20), Tag::Meta, b"");
// Pre-populate
storage
.backend(EntityKind::Item)
.put(&item_key, b"old_item")
.unwrap();
storage
.backend(EntityKind::User)
.put(&user_key, b"old_user")
.unwrap();
// Atomic batch: remove item, put new user value
let mut batch = FjallAtomicBatch::new(&storage);
batch.remove(storage.backend(EntityKind::Item), &item_key);
batch.put(storage.backend(EntityKind::User), &user_key, b"new_user");
batch.commit().unwrap();
// Item should be gone
assert_eq!(
storage.backend(EntityKind::Item).get(&item_key).unwrap(),
None,
"Atomic batch remove should delete the item key"
);
// User should have the new value
assert_eq!(
storage
.backend(EntityKind::User)
.get(&user_key)
.unwrap()
.as_deref(),
Some(b"new_user".as_slice()),
"Atomic batch put should update the user key"
);
}
// =============================================================================
// UAT-04: Entity kind isolation with scan_prefix (not just get)
// =============================================================================
#[test]
fn uat04_entity_kind_isolation_scan_prefix() {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let id = EntityId::new(100);
let prefix = entity_prefix(id);
// Write multiple tags under same entity in Item keyspace
let k1 = encode_key(id, Tag::Meta, b"");
let k2 = encode_key(id, Tag::Sig, b"score");
let k3 = encode_key(id, Tag::Evt, b"ev1");
storage.backend(EntityKind::Item).put(&k1, b"meta").unwrap();
storage.backend(EntityKind::Item).put(&k2, b"sig").unwrap();
storage.backend(EntityKind::Item).put(&k3, b"evt").unwrap();
// Write same entity in User keyspace with different data
storage
.backend(EntityKind::User)
.put(&k1, b"user_meta")
.unwrap();
// Scan Item: should see 3 keys
let item_results: Vec<_> = storage
.backend(EntityKind::Item)
.scan_prefix(&prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
item_results.len(),
3,
"Item keyspace should have 3 keys for entity 100"
);
// Scan User: should see exactly 1 key
let user_results: Vec<_> = storage
.backend(EntityKind::User)
.scan_prefix(&prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
user_results.len(),
1,
"User keyspace should have 1 key for entity 100"
);
// Scan Creator: should see 0 keys
let creator_results: Vec<_> = storage
.backend(EntityKind::Creator)
.scan_prefix(&prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
creator_results.len(),
0,
"Creator keyspace should have 0 keys for entity 100"
);
}
// =============================================================================
// UAT-05: entity_tag_prefix scan isolates tags within an entity on fjall
// =============================================================================
#[test]
fn uat05_entity_tag_prefix_scan_fjall() {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let items = storage.backend(EntityKind::Item);
let id = EntityId::new(777);
// Write multiple keys across different tags
items
.put(&encode_key(id, Tag::Evt, b"e1"), b"event1")
.unwrap();
items
.put(&encode_key(id, Tag::Evt, b"e2"), b"event2")
.unwrap();
items
.put(&encode_key(id, Tag::Sig, b"s1"), b"sig1")
.unwrap();
items.put(&encode_key(id, Tag::Meta, b""), b"meta").unwrap();
// entity_tag_prefix for Evt should return exactly 2
let evt_prefix = entity_tag_prefix(id, Tag::Evt);
let evt_results: Vec<_> = items
.scan_prefix(&evt_prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(evt_results.len(), 2, "Should find exactly 2 Evt keys");
// entity_tag_prefix for Sig should return exactly 1
let sig_prefix = entity_tag_prefix(id, Tag::Sig);
let sig_results: Vec<_> = items
.scan_prefix(&sig_prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(sig_results.len(), 1, "Should find exactly 1 Sig key");
// entity_tag_prefix for Meta should return exactly 1
let meta_prefix = entity_tag_prefix(id, Tag::Meta);
let meta_results: Vec<_> = items
.scan_prefix(&meta_prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(meta_results.len(), 1, "Should find exactly 1 Meta key");
// entity_tag_prefix for Rel should return 0
let rel_prefix = entity_tag_prefix(id, Tag::Rel);
let rel_results: Vec<_> = items
.scan_prefix(&rel_prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(rel_results.len(), 0, "Should find 0 Rel keys");
}
// =============================================================================
// UAT-06: WriteBatch with deletes and puts interleaved on fjall
// =============================================================================
#[test]
fn uat06_fjall_write_batch_interleaved_ops() {
let dir = tempfile::tempdir().unwrap();
let storage = FjallStorage::open(dir.path()).unwrap();
let items = storage.backend(EntityKind::Item);
// Pre-populate
let k1 = encode_key(EntityId::new(1), Tag::Meta, b"");
let k2 = encode_key(EntityId::new(2), Tag::Meta, b"");
let k3 = encode_key(EntityId::new(3), Tag::Meta, b"");
items.put(&k1, b"v1").unwrap();
items.put(&k2, b"v2").unwrap();
// Batch: delete k1, put k3, delete k2 (interleaved)
let mut batch = WriteBatch::new();
batch.delete(k1.clone());
batch.put(k3.clone(), b"v3".to_vec());
batch.delete(k2.clone());
items.write_batch(batch).unwrap();
assert_eq!(items.get(&k1).unwrap(), None, "k1 should be deleted");
assert_eq!(items.get(&k2).unwrap(), None, "k2 should be deleted");
assert_eq!(
items.get(&k3).unwrap().as_deref(),
Some(b"v3".as_slice()),
"k3 should exist"
);
}
// =============================================================================
// UAT-07: encode_key/parse_key roundtrip for ALL Tag variants (explicit)
// =============================================================================
#[test]
fn uat07_encode_parse_roundtrip_all_tags() {
let all_tags = [
Tag::Evt,
Tag::Sig,
Tag::Meta,
Tag::Rel,
Tag::Mv,
Tag::Idx,
Tag::Session,
];
let id = EntityId::new(u64::MAX); // boundary value
for tag in all_tags {
let suffix = format!("test_{tag:?}");
let key = encode_key(id, tag, suffix.as_bytes());
let (parsed_id, parsed_tag, parsed_suffix) =
parse_key(&key).unwrap_or_else(|| panic!("parse_key should succeed for tag {tag:?}"));
assert_eq!(parsed_id, id, "EntityId roundtrip for tag {tag:?}");
assert_eq!(parsed_tag, tag, "Tag roundtrip for tag {tag:?}");
assert_eq!(
parsed_suffix,
suffix.as_bytes(),
"Suffix roundtrip for tag {tag:?}",
);
}
// Also test with EntityId(0) — the other boundary
let id_zero = EntityId::new(0);
for tag in all_tags {
let key = encode_key(id_zero, tag, b"");
let (parsed_id, parsed_tag, parsed_suffix) = parse_key(&key).unwrap();
assert_eq!(parsed_id, id_zero);
assert_eq!(parsed_tag, tag);
assert!(parsed_suffix.is_empty());
}
}
// =============================================================================
// UAT-08: Persistence survives reopen with scan_prefix verification
// =============================================================================
#[test]
fn uat08_persistence_verified_via_scan_prefix() {
let dir = tempfile::tempdir().unwrap();
let id = EntityId::new(55);
// Write multiple keys, flush, drop
{
let storage = FjallStorage::open(dir.path()).unwrap();
let items = storage.backend(EntityKind::Item);
items.put(&encode_key(id, Tag::Meta, b""), b"meta").unwrap();
items
.put(&encode_key(id, Tag::Sig, b"a"), b"sig_a")
.unwrap();
items
.put(&encode_key(id, Tag::Sig, b"b"), b"sig_b")
.unwrap();
storage.flush_all().unwrap();
}
// Reopen and verify via scan_prefix (not just single get)
{
let storage = FjallStorage::open(dir.path()).unwrap();
let items = storage.backend(EntityKind::Item);
let prefix = entity_prefix(id);
let results: Vec<_> = items
.scan_prefix(&prefix)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
results.len(),
3,
"All 3 keys should survive reopen and be scan-discoverable"
);
// Verify values too
let values: Vec<&[u8]> = results.iter().map(|(_, v)| v.as_slice()).collect();
// Keys are sorted: Evt(0x01) < Sig(0x02) < Meta(0x03)
// So order is: Sig "a", Sig "b", Meta ""
assert!(values.contains(&b"meta".as_slice()));
assert!(values.contains(&b"sig_a".as_slice()));
assert!(values.contains(&b"sig_b".as_slice()));
}
}
// =============================================================================
// UAT-09: FjallAtomicBatch persists across reopen
// =============================================================================
#[test]
fn uat09_atomic_batch_persists_across_reopen() {
let dir = tempfile::tempdir().unwrap();
{
let storage = FjallStorage::open(dir.path()).unwrap();
let mut batch = FjallAtomicBatch::new(&storage);
batch.put(
storage.backend(EntityKind::Item),
&encode_key(EntityId::new(1), Tag::Meta, b""),
b"atomic_item",
);
batch.put(
storage.backend(EntityKind::Creator),
&encode_key(EntityId::new(2), Tag::Meta, b""),
b"atomic_creator",
);
batch.commit().unwrap();
storage.flush_all().unwrap();
}
{
let storage = FjallStorage::open(dir.path()).unwrap();
assert_eq!(
storage
.backend(EntityKind::Item)
.get(&encode_key(EntityId::new(1), Tag::Meta, b""))
.unwrap()
.as_deref(),
Some(b"atomic_item".as_slice()),
"Atomic batch item should persist across reopen"
);
assert_eq!(
storage
.backend(EntityKind::Creator)
.get(&encode_key(EntityId::new(2), Tag::Meta, b""))
.unwrap()
.as_deref(),
Some(b"atomic_creator".as_slice()),
"Atomic batch creator should persist across reopen"
);
}
}
// =============================================================================
// UAT-10: InMemoryBackend scan_prefix returns lexicographic order
// with encoded keys inserted out of order
// =============================================================================
#[test]
fn uat10_in_memory_scan_all_returns_numeric_order() {
let engine = InMemoryBackend::new();
// Insert in reverse numeric order
for id_val in (1u64..=20).rev() {
let key = encode_key(EntityId::new(id_val), Tag::Meta, b"");
engine.put(&key, b"data").unwrap();
}
// Scan all
let all: Vec<_> = engine
.scan_prefix(b"")
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(all.len(), 20);
// Verify monotonically increasing entity IDs
let ids: Vec<u64> = all
.iter()
.map(|(k, _)| {
let (id, _, _) = parse_key(k).unwrap();
u64::from_be_bytes(id.to_be_bytes())
})
.collect();
for window in ids.windows(2) {
assert!(
window[0] < window[1],
"IDs must be in ascending order: {} < {}",
window[0],
window[1]
);
}
assert_eq!(ids, (1u64..=20).collect::<Vec<_>>());
}