- M5p1: BM25 text indexing via Tantivy with background syncer (0.26ms @ 10K docs) - M5p2: RRF fusion layer combining BM25 + ANN scores (46µs @ 1K candidates) - M5p3: unified Search query API (8-stage pipeline, BM25 + vector + ranking) - M5p4: creator text + vector indexing and creator search executor (< 20ms @ 200 creators) - Refactor db/mod.rs into focused sub-modules (creators, items, sessions, signals, etc.) - Decompose monolithic files into directory modules (query/executor, ranking/diversity, etc.) - Split brute.rs → brute/mod.rs + brute/tests.rs; extract search executor helpers - Add benches: fusion, search, session, text_index - Add M5 UAT test suites (m5_uat, m5_search, m5p4_creator_search, text_index) - Update blog posts, roadmap, content strategy, and M5 planning docs - Add tmp/ and .claude/worktrees/ to .gitignore Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
521 lines
17 KiB
Rust
521 lines
17 KiB
Rust
#![allow(clippy::unwrap_used)]
|
|
|
|
//! UAT for Milestone 1, Phase 3: Storage Engine Trait and fjall Backend.
|
|
//!
|
|
//! These tests exercise acceptance criteria gaps not covered by
|
|
//! `tidal/tests/storage.rs` or the unit tests in `storage/`.
|
|
|
|
use tidaldb::schema::{EntityId, EntityKind};
|
|
use tidaldb::storage::{
|
|
FjallAtomicBatch, FjallStorage, InMemoryBackend, StorageEngine, Tag, WriteBatch, encode_key,
|
|
entity_prefix, entity_tag_prefix, parse_key,
|
|
};
|
|
|
|
// =============================================================================
|
|
// UAT-01: Out-of-order entity ID insert, scan_prefix returns numeric order
|
|
// (Fjall backend)
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat01_fjall_scan_prefix_returns_numeric_order_after_out_of_order_insert() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let items = storage.backend(EntityKind::Item);
|
|
|
|
// Insert entity IDs wildly out of numeric order
|
|
let ids: Vec<u64> = vec![9999, 1, 500, 42, 10000, 7, 256, 3];
|
|
for &id_val in &ids {
|
|
let id = EntityId::new(id_val);
|
|
let key = encode_key(id, Tag::Meta, b"");
|
|
items.put(&key, b"data").unwrap();
|
|
}
|
|
|
|
// Full scan (empty prefix) to get all keys
|
|
// Use entity_prefix for each ID and verify ordering
|
|
let mut sorted_ids = ids;
|
|
sorted_ids.sort_unstable();
|
|
sorted_ids.dedup();
|
|
|
|
// Scan with a common prefix that matches all keys (use entity prefix for smallest ID,
|
|
// but that won't work for all). Instead, scan all keys by using an empty prefix.
|
|
let all: Vec<_> = items
|
|
.scan_prefix(b"")
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
|
|
assert_eq!(all.len(), sorted_ids.len());
|
|
|
|
// Verify keys come back in entity ID numeric order
|
|
let mut prev_id: Option<u64> = None;
|
|
for (key_bytes, _) in &all {
|
|
let (entity_id, tag, _suffix) = parse_key(key_bytes).unwrap();
|
|
assert_eq!(tag, Tag::Meta);
|
|
if let Some(prev) = prev_id {
|
|
assert!(
|
|
entity_id.to_be_bytes() > EntityId::new(prev).to_be_bytes(),
|
|
"entity {entity_id:?} should sort after entity with raw id {prev}"
|
|
);
|
|
}
|
|
prev_id = Some(u64::from_be_bytes(entity_id.to_be_bytes()));
|
|
}
|
|
|
|
// Verify the actual ID sequence matches sorted order
|
|
let returned_ids: Vec<u64> = all
|
|
.iter()
|
|
.map(|(k, _)| {
|
|
let (id, _, _) = parse_key(k).unwrap();
|
|
u64::from_be_bytes(id.to_be_bytes())
|
|
})
|
|
.collect();
|
|
assert_eq!(returned_ids, sorted_ids);
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-02: Persistence across reopen for ALL three keyspaces
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat02_fjall_persistence_all_three_keyspaces() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
|
|
let item_key = encode_key(EntityId::new(1), Tag::Meta, b"item");
|
|
let user_key = encode_key(EntityId::new(2), Tag::Meta, b"user");
|
|
let creator_key = encode_key(EntityId::new(3), Tag::Meta, b"creator");
|
|
|
|
// Write to all three keyspaces, flush, and drop
|
|
{
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
storage
|
|
.backend(EntityKind::Item)
|
|
.put(&item_key, b"item_value")
|
|
.unwrap();
|
|
storage
|
|
.backend(EntityKind::User)
|
|
.put(&user_key, b"user_value")
|
|
.unwrap();
|
|
storage
|
|
.backend(EntityKind::Creator)
|
|
.put(&creator_key, b"creator_value")
|
|
.unwrap();
|
|
storage.flush_all().unwrap();
|
|
}
|
|
|
|
// Reopen and verify all three keyspaces survived
|
|
{
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
assert_eq!(
|
|
storage
|
|
.backend(EntityKind::Item)
|
|
.get(&item_key)
|
|
.unwrap()
|
|
.as_deref(),
|
|
Some(b"item_value".as_slice()),
|
|
"Item keyspace data should survive reopen"
|
|
);
|
|
assert_eq!(
|
|
storage
|
|
.backend(EntityKind::User)
|
|
.get(&user_key)
|
|
.unwrap()
|
|
.as_deref(),
|
|
Some(b"user_value".as_slice()),
|
|
"User keyspace data should survive reopen"
|
|
);
|
|
assert_eq!(
|
|
storage
|
|
.backend(EntityKind::Creator)
|
|
.get(&creator_key)
|
|
.unwrap()
|
|
.as_deref(),
|
|
Some(b"creator_value".as_slice()),
|
|
"Creator keyspace data should survive reopen"
|
|
);
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-03: FjallAtomicBatch remove operation
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat03_fjall_atomic_batch_remove() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
|
|
let item_key = encode_key(EntityId::new(10), Tag::Meta, b"");
|
|
let user_key = encode_key(EntityId::new(20), Tag::Meta, b"");
|
|
|
|
// Pre-populate
|
|
storage
|
|
.backend(EntityKind::Item)
|
|
.put(&item_key, b"old_item")
|
|
.unwrap();
|
|
storage
|
|
.backend(EntityKind::User)
|
|
.put(&user_key, b"old_user")
|
|
.unwrap();
|
|
|
|
// Atomic batch: remove item, put new user value
|
|
let mut batch = FjallAtomicBatch::new(&storage);
|
|
batch.remove(storage.backend(EntityKind::Item), &item_key);
|
|
batch.put(storage.backend(EntityKind::User), &user_key, b"new_user");
|
|
batch.commit().unwrap();
|
|
|
|
// Item should be gone
|
|
assert_eq!(
|
|
storage.backend(EntityKind::Item).get(&item_key).unwrap(),
|
|
None,
|
|
"Atomic batch remove should delete the item key"
|
|
);
|
|
|
|
// User should have the new value
|
|
assert_eq!(
|
|
storage
|
|
.backend(EntityKind::User)
|
|
.get(&user_key)
|
|
.unwrap()
|
|
.as_deref(),
|
|
Some(b"new_user".as_slice()),
|
|
"Atomic batch put should update the user key"
|
|
);
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-04: Entity kind isolation with scan_prefix (not just get)
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat04_entity_kind_isolation_scan_prefix() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
|
|
let id = EntityId::new(100);
|
|
let prefix = entity_prefix(id);
|
|
|
|
// Write multiple tags under same entity in Item keyspace
|
|
let k1 = encode_key(id, Tag::Meta, b"");
|
|
let k2 = encode_key(id, Tag::Sig, b"score");
|
|
let k3 = encode_key(id, Tag::Evt, b"ev1");
|
|
storage.backend(EntityKind::Item).put(&k1, b"meta").unwrap();
|
|
storage.backend(EntityKind::Item).put(&k2, b"sig").unwrap();
|
|
storage.backend(EntityKind::Item).put(&k3, b"evt").unwrap();
|
|
|
|
// Write same entity in User keyspace with different data
|
|
storage
|
|
.backend(EntityKind::User)
|
|
.put(&k1, b"user_meta")
|
|
.unwrap();
|
|
|
|
// Scan Item: should see 3 keys
|
|
let item_results: Vec<_> = storage
|
|
.backend(EntityKind::Item)
|
|
.scan_prefix(&prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(
|
|
item_results.len(),
|
|
3,
|
|
"Item keyspace should have 3 keys for entity 100"
|
|
);
|
|
|
|
// Scan User: should see exactly 1 key
|
|
let user_results: Vec<_> = storage
|
|
.backend(EntityKind::User)
|
|
.scan_prefix(&prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(
|
|
user_results.len(),
|
|
1,
|
|
"User keyspace should have 1 key for entity 100"
|
|
);
|
|
|
|
// Scan Creator: should see 0 keys
|
|
let creator_results: Vec<_> = storage
|
|
.backend(EntityKind::Creator)
|
|
.scan_prefix(&prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(
|
|
creator_results.len(),
|
|
0,
|
|
"Creator keyspace should have 0 keys for entity 100"
|
|
);
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-05: entity_tag_prefix scan isolates tags within an entity on fjall
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat05_entity_tag_prefix_scan_fjall() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let items = storage.backend(EntityKind::Item);
|
|
|
|
let id = EntityId::new(777);
|
|
|
|
// Write multiple keys across different tags
|
|
items
|
|
.put(&encode_key(id, Tag::Evt, b"e1"), b"event1")
|
|
.unwrap();
|
|
items
|
|
.put(&encode_key(id, Tag::Evt, b"e2"), b"event2")
|
|
.unwrap();
|
|
items
|
|
.put(&encode_key(id, Tag::Sig, b"s1"), b"sig1")
|
|
.unwrap();
|
|
items.put(&encode_key(id, Tag::Meta, b""), b"meta").unwrap();
|
|
|
|
// entity_tag_prefix for Evt should return exactly 2
|
|
let evt_prefix = entity_tag_prefix(id, Tag::Evt);
|
|
let evt_results: Vec<_> = items
|
|
.scan_prefix(&evt_prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(evt_results.len(), 2, "Should find exactly 2 Evt keys");
|
|
|
|
// entity_tag_prefix for Sig should return exactly 1
|
|
let sig_prefix = entity_tag_prefix(id, Tag::Sig);
|
|
let sig_results: Vec<_> = items
|
|
.scan_prefix(&sig_prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(sig_results.len(), 1, "Should find exactly 1 Sig key");
|
|
|
|
// entity_tag_prefix for Meta should return exactly 1
|
|
let meta_prefix = entity_tag_prefix(id, Tag::Meta);
|
|
let meta_results: Vec<_> = items
|
|
.scan_prefix(&meta_prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(meta_results.len(), 1, "Should find exactly 1 Meta key");
|
|
|
|
// entity_tag_prefix for Rel should return 0
|
|
let rel_prefix = entity_tag_prefix(id, Tag::Rel);
|
|
let rel_results: Vec<_> = items
|
|
.scan_prefix(&rel_prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
assert_eq!(rel_results.len(), 0, "Should find 0 Rel keys");
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-06: WriteBatch with deletes and puts interleaved on fjall
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat06_fjall_write_batch_interleaved_ops() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let items = storage.backend(EntityKind::Item);
|
|
|
|
// Pre-populate
|
|
let k1 = encode_key(EntityId::new(1), Tag::Meta, b"");
|
|
let k2 = encode_key(EntityId::new(2), Tag::Meta, b"");
|
|
let k3 = encode_key(EntityId::new(3), Tag::Meta, b"");
|
|
items.put(&k1, b"v1").unwrap();
|
|
items.put(&k2, b"v2").unwrap();
|
|
|
|
// Batch: delete k1, put k3, delete k2 (interleaved)
|
|
let mut batch = WriteBatch::new();
|
|
batch.delete(k1.clone());
|
|
batch.put(k3.clone(), b"v3".to_vec());
|
|
batch.delete(k2.clone());
|
|
|
|
items.write_batch(batch).unwrap();
|
|
|
|
assert_eq!(items.get(&k1).unwrap(), None, "k1 should be deleted");
|
|
assert_eq!(items.get(&k2).unwrap(), None, "k2 should be deleted");
|
|
assert_eq!(
|
|
items.get(&k3).unwrap().as_deref(),
|
|
Some(b"v3".as_slice()),
|
|
"k3 should exist"
|
|
);
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-07: encode_key/parse_key roundtrip for ALL Tag variants (explicit)
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat07_encode_parse_roundtrip_all_tags() {
|
|
let all_tags = [
|
|
Tag::Evt,
|
|
Tag::Sig,
|
|
Tag::Meta,
|
|
Tag::Rel,
|
|
Tag::Mv,
|
|
Tag::Idx,
|
|
Tag::Session,
|
|
];
|
|
|
|
let id = EntityId::new(u64::MAX); // boundary value
|
|
|
|
for tag in all_tags {
|
|
let suffix = format!("test_{tag:?}");
|
|
let key = encode_key(id, tag, suffix.as_bytes());
|
|
let (parsed_id, parsed_tag, parsed_suffix) =
|
|
parse_key(&key).unwrap_or_else(|| panic!("parse_key should succeed for tag {tag:?}"));
|
|
assert_eq!(parsed_id, id, "EntityId roundtrip for tag {tag:?}");
|
|
assert_eq!(parsed_tag, tag, "Tag roundtrip for tag {tag:?}");
|
|
assert_eq!(
|
|
parsed_suffix,
|
|
suffix.as_bytes(),
|
|
"Suffix roundtrip for tag {tag:?}",
|
|
);
|
|
}
|
|
|
|
// Also test with EntityId(0) — the other boundary
|
|
let id_zero = EntityId::new(0);
|
|
for tag in all_tags {
|
|
let key = encode_key(id_zero, tag, b"");
|
|
let (parsed_id, parsed_tag, parsed_suffix) = parse_key(&key).unwrap();
|
|
assert_eq!(parsed_id, id_zero);
|
|
assert_eq!(parsed_tag, tag);
|
|
assert!(parsed_suffix.is_empty());
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-08: Persistence survives reopen with scan_prefix verification
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat08_persistence_verified_via_scan_prefix() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let id = EntityId::new(55);
|
|
|
|
// Write multiple keys, flush, drop
|
|
{
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let items = storage.backend(EntityKind::Item);
|
|
items.put(&encode_key(id, Tag::Meta, b""), b"meta").unwrap();
|
|
items
|
|
.put(&encode_key(id, Tag::Sig, b"a"), b"sig_a")
|
|
.unwrap();
|
|
items
|
|
.put(&encode_key(id, Tag::Sig, b"b"), b"sig_b")
|
|
.unwrap();
|
|
storage.flush_all().unwrap();
|
|
}
|
|
|
|
// Reopen and verify via scan_prefix (not just single get)
|
|
{
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let items = storage.backend(EntityKind::Item);
|
|
|
|
let prefix = entity_prefix(id);
|
|
let results: Vec<_> = items
|
|
.scan_prefix(&prefix)
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
results.len(),
|
|
3,
|
|
"All 3 keys should survive reopen and be scan-discoverable"
|
|
);
|
|
|
|
// Verify values too
|
|
let values: Vec<&[u8]> = results.iter().map(|(_, v)| v.as_slice()).collect();
|
|
// Keys are sorted: Evt(0x01) < Sig(0x02) < Meta(0x03)
|
|
// So order is: Sig "a", Sig "b", Meta ""
|
|
assert!(values.contains(&b"meta".as_slice()));
|
|
assert!(values.contains(&b"sig_a".as_slice()));
|
|
assert!(values.contains(&b"sig_b".as_slice()));
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-09: FjallAtomicBatch persists across reopen
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat09_atomic_batch_persists_across_reopen() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
|
|
{
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
let mut batch = FjallAtomicBatch::new(&storage);
|
|
batch.put(
|
|
storage.backend(EntityKind::Item),
|
|
&encode_key(EntityId::new(1), Tag::Meta, b""),
|
|
b"atomic_item",
|
|
);
|
|
batch.put(
|
|
storage.backend(EntityKind::Creator),
|
|
&encode_key(EntityId::new(2), Tag::Meta, b""),
|
|
b"atomic_creator",
|
|
);
|
|
batch.commit().unwrap();
|
|
storage.flush_all().unwrap();
|
|
}
|
|
|
|
{
|
|
let storage = FjallStorage::open(dir.path()).unwrap();
|
|
assert_eq!(
|
|
storage
|
|
.backend(EntityKind::Item)
|
|
.get(&encode_key(EntityId::new(1), Tag::Meta, b""))
|
|
.unwrap()
|
|
.as_deref(),
|
|
Some(b"atomic_item".as_slice()),
|
|
"Atomic batch item should persist across reopen"
|
|
);
|
|
assert_eq!(
|
|
storage
|
|
.backend(EntityKind::Creator)
|
|
.get(&encode_key(EntityId::new(2), Tag::Meta, b""))
|
|
.unwrap()
|
|
.as_deref(),
|
|
Some(b"atomic_creator".as_slice()),
|
|
"Atomic batch creator should persist across reopen"
|
|
);
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// UAT-10: InMemoryBackend scan_prefix returns lexicographic order
|
|
// with encoded keys inserted out of order
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn uat10_in_memory_scan_all_returns_numeric_order() {
|
|
let engine = InMemoryBackend::new();
|
|
|
|
// Insert in reverse numeric order
|
|
for id_val in (1u64..=20).rev() {
|
|
let key = encode_key(EntityId::new(id_val), Tag::Meta, b"");
|
|
engine.put(&key, b"data").unwrap();
|
|
}
|
|
|
|
// Scan all
|
|
let all: Vec<_> = engine
|
|
.scan_prefix(b"")
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
|
|
assert_eq!(all.len(), 20);
|
|
|
|
// Verify monotonically increasing entity IDs
|
|
let ids: Vec<u64> = all
|
|
.iter()
|
|
.map(|(k, _)| {
|
|
let (id, _, _) = parse_key(k).unwrap();
|
|
u64::from_be_bytes(id.to_be_bytes())
|
|
})
|
|
.collect();
|
|
|
|
for window in ids.windows(2) {
|
|
assert!(
|
|
window[0] < window[1],
|
|
"IDs must be in ascending order: {} < {}",
|
|
window[0],
|
|
window[1]
|
|
);
|
|
}
|
|
|
|
assert_eq!(ids, (1u64..=20).collect::<Vec<_>>());
|
|
}
|