stemedb/crates/stemedb-query/tests/battery/battery5_prefix_scan.rs
jordan 42d4e09508 feat: Index persistence (Phase 5C) - vector hot/cold, visual checkpoint
Phase 5C (Index Persistence) implementation:
- PersistentVectorIndex with hot/cold architecture
  - Hot: in-memory HNSW for recent vectors
  - Cold: memory-mapped HNSW loaded from disk
  - Background builder for WAL replay and atomic swap
  - BLAKE3 integrity verification
- PersistentVisualIndex with checkpoint persistence
  - BkTreeSnapshot with rkyv serialization
  - CRC32C corruption detection
  - Atomic write pattern (temp → fsync → rename)
- Key codec additions for vector index metadata
- Split large files into modules (<500 lines each)
  - battery_pre_sentinel.rs → battery/ directory
  - visual_index.rs → visual_index/ directory
  - persistent.rs → persistent/ directory
- Refactored ingest worker tests for clarity
- Updated roadmap to mark Phase 5 complete

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 15:43:18 -07:00

179 lines
7.5 KiB
Rust

//! Battery 5: scan_prefix with ConceptPath Keys.
//!
//! Tests hierarchical prefix scanning for concept path subjects.
//!
//! # Test Coverage
//!
//! | Test | Feature | Validates |
//! |------|---------|-----------|
//! | `test_prefix_scan_concept_path_keys` | Hierarchical paths | Multi-level prefix matching |
//! | `test_prefix_scan_no_false_positives` | Trailing slash | Prevents substring false positives |
//! | `test_prefix_scan_sp_keys_with_concept_paths` | SP: keys | Compound key prefix scanning |
#![allow(clippy::expect_used)] // Test code uses expect() for clear failure messages
use super::helpers::*;
/// Test 5.1: Prefix scan with ConceptPath-shaped subject keys.
///
/// Store assertions with subjects that look like hierarchical paths:
/// - S:code://rust/citadeldb/auth/jwt/aud_validation
/// - S:code://rust/citadeldb/auth/jwt/expiry
/// - S:code://rust/citadeldb/net/tls/verify
/// - S:code://rust/citadeldb/auth/oauth/scopes
///
/// Verify prefix scans correctly match hierarchical subject paths:
/// - Prefix "code://rust/citadeldb/auth/jwt/" matches 2 keys
/// - Prefix "code://rust/citadeldb/auth/" matches 3 keys
/// - Prefix "code://rust/citadeldb/" matches 4 keys
/// - Prefix "code://" matches 4 keys
/// - Prefix "rfc://" matches 0 keys (different scheme)
#[tokio::test]
async fn test_prefix_scan_concept_path_keys() {
let store = Arc::new(HybridStore::open_temp().expect("store"));
// Store keys with ConceptPath-shaped subjects
let key1 = key_codec::subject_index_key("code://rust/citadeldb/auth/jwt/aud_validation");
let key2 = key_codec::subject_index_key("code://rust/citadeldb/auth/jwt/expiry");
let key3 = key_codec::subject_index_key("code://rust/citadeldb/net/tls/verify");
let key4 = key_codec::subject_index_key("code://rust/citadeldb/auth/oauth/scopes");
store.put(&key1, b"hash_a").await.expect("put key1");
store.put(&key2, b"hash_b").await.expect("put key2");
store.put(&key3, b"hash_c").await.expect("put key3");
store.put(&key4, b"hash_d").await.expect("put key4");
// Test 1: Prefix scan for auth/jwt/ should match 2 keys
// Since subject_index_key creates {subject}\x00S:, we scan with partial subject as prefix
let prefix_jwt = b"code://rust/citadeldb/auth/jwt/";
let results_jwt = store.scan_prefix(prefix_jwt).await.expect("scan jwt");
assert_eq!(
results_jwt.len(),
2,
"Prefix 'code://rust/citadeldb/auth/jwt/' should match 2 keys (aud_validation, expiry)"
);
// Test 2: Prefix scan for auth/ should match 3 keys
let prefix_auth = b"code://rust/citadeldb/auth/";
let results_auth = store.scan_prefix(prefix_auth).await.expect("scan auth");
assert_eq!(
results_auth.len(),
3,
"Prefix 'code://rust/citadeldb/auth/' should match 3 keys (jwt/aud, jwt/expiry, oauth/scopes)"
);
// Test 3: Prefix scan for citadeldb/ should match 4 keys
let prefix_citadeldb = b"code://rust/citadeldb/";
let results_citadeldb = store.scan_prefix(prefix_citadeldb).await.expect("scan citadeldb");
assert_eq!(
results_citadeldb.len(),
4,
"Prefix 'code://rust/citadeldb/' should match 4 keys (all)"
);
// Test 4: Prefix scan for code:// should match 4 keys
let prefix_code = b"code://";
let results_code = store.scan_prefix(prefix_code).await.expect("scan code");
assert_eq!(results_code.len(), 4, "Prefix 'code://' should match 4 keys (all)");
// Test 5: Prefix scan for rfc:// should match 0 keys (different scheme)
let prefix_rfc = b"rfc://";
let results_rfc = store.scan_prefix(prefix_rfc).await.expect("scan rfc");
assert_eq!(results_rfc.len(), 0, "Prefix 'rfc://' should match 0 keys (different scheme)");
}
/// Test 5.2: Prefix scan prevents false positives with trailing slash.
///
/// Store two subjects:
/// - S:code://rust/citadeldb/auth
/// - S:code://rust/citadeldb/authentication
///
/// Verify:
/// - Prefix "code://rust/citadeldb/auth/" matches 0 keys (trailing slash)
/// - Prefix "code://rust/citadeldb/auth" matches 2 keys (both match)
///
/// This validates that trailing `/` prevents "auth" from matching "authentication".
#[tokio::test]
async fn test_prefix_scan_no_false_positives() {
let store = Arc::new(HybridStore::open_temp().expect("store"));
// Store two subjects that share a common prefix
let key1 = key_codec::subject_index_key("code://rust/citadeldb/auth");
let key2 = key_codec::subject_index_key("code://rust/citadeldb/authentication");
store.put(&key1, b"hash_a").await.expect("put key1");
store.put(&key2, b"hash_b").await.expect("put key2");
// Test 1: Prefix scan with trailing slash should match 0 keys
// Keys are stored as "code://rust/citadeldb/auth\x00S:" and "code://rust/citadeldb/authentication\x00S:"
// Scanning with "code://rust/citadeldb/auth/" will not match either
let prefix_with_slash = b"code://rust/citadeldb/auth/";
let results_with_slash = store.scan_prefix(prefix_with_slash).await.expect("scan with slash");
assert_eq!(
results_with_slash.len(),
0,
"Prefix 'code://rust/citadeldb/auth/' with trailing slash should match 0 keys \
(prevents 'auth' from matching 'authentication')"
);
// Test 2: Prefix scan without trailing slash should match 2 keys
// Scanning with "code://rust/citadeldb/auth" will match both keys
let prefix_without_slash = b"code://rust/citadeldb/auth";
let results_without_slash =
store.scan_prefix(prefix_without_slash).await.expect("scan without slash");
assert_eq!(
results_without_slash.len(),
2,
"Prefix 'code://rust/citadeldb/auth' without trailing slash should match 2 keys \
(both 'auth' and 'authentication' share the prefix)"
);
}
/// Test 5.3: Prefix scan with SP: compound keys containing ConceptPath subjects.
///
/// Store SP: keys (subject+predicate) where subject is a ConceptPath:
/// - SP:code://rust/citadeldb/auth/jwt/aud_validation:config_value
/// - SP:code://rust/citadeldb/auth/jwt/expiry:config_value
///
/// Verify:
/// - Prefix scan for "code://rust/citadeldb/auth/jwt/" matches 2 SP: keys
///
/// This tests that hierarchical subject paths work correctly in compound SP: keys.
#[tokio::test]
async fn test_prefix_scan_sp_keys_with_concept_paths() {
let store = Arc::new(HybridStore::open_temp().expect("store"));
// Store SP: keys with ConceptPath-shaped subjects
// Keys are formatted as: {subject}\x00SP:{predicate}
let key1 = key_codec::subject_predicate_key(
"code://rust/citadeldb/auth/jwt/aud_validation",
"config_value",
);
let key2 =
key_codec::subject_predicate_key("code://rust/citadeldb/auth/jwt/expiry", "config_value");
store.put(&key1, b"hash_a").await.expect("put key1");
store.put(&key2, b"hash_b").await.expect("put key2");
// Prefix scan for SP: keys matching the auth/jwt/ hierarchy
// Use raw prefix of the subject path (before the \x00SP: separator)
let prefix = b"code://rust/citadeldb/auth/jwt/";
let results = store.scan_prefix(prefix).await.expect("scan SP:");
assert_eq!(
results.len(),
2,
"Prefix 'code://rust/citadeldb/auth/jwt/' should match 2 SP: keys"
);
// Verify the keys returned contain the expected predicate
for (key, _value) in &results {
let key_str = String::from_utf8_lossy(key);
assert!(
key_str.contains("config_value"),
"SP: key should contain predicate 'config_value', got: {}",
key_str
);
}
}