//! Central key encoding/decoding for subject-prefix range sharding. //! //! ALL storage keys flow through this module. Keys are partitioned into two families: //! //! **Subject-prefixed keys** — co-located by subject for range sharding: //! ```text //! {subject}\x00{TAG}:{suffix} //! ``` //! //! **Global keys** — metadata, trust, quotas, epochs (sort first under `\x00`): //! ```text //! \x00{TAG}:{suffix} //! ``` //! //! A prefix scan on `{subject}\x00` returns ALL data for that subject. //! A prefix scan on `\x00` returns ALL global metadata. use crate::error::{Result, StorageError}; /// Separator byte between subject and tag. Also serves as global key prefix. pub const SEPARATOR: u8 = 0x00; // ── Subject validation ────────────────────────────────────────────── /// Validate that a subject string does not contain the separator byte. /// /// Subjects containing `\x00` would corrupt key boundaries. This MUST be /// called on all inbound subjects at the ingestion boundary. pub fn validate_subject(subject: &str) -> Result<()> { if subject.as_bytes().contains(&SEPARATOR) { return Err(StorageError::InputValidation( "Subject must not contain null byte (\\x00)".to_string(), )); } if subject.is_empty() { return Err(StorageError::InputValidation("Subject must not be empty".to_string())); } Ok(()) } // ── Key builders ──────────────────────────────────────────────────── /// Build a subject-prefixed key: `{subject}\x00{tag}{suffix}`. fn subject_key(subject: &str, tag: &[u8], suffix: &[u8]) -> Vec { let mut key = Vec::with_capacity(subject.len() + 1 + tag.len() + suffix.len()); key.extend_from_slice(subject.as_bytes()); key.push(SEPARATOR); key.extend_from_slice(tag); key.extend_from_slice(suffix); key } /// Build a global key: `\x00{tag}{suffix}`. fn global_key(tag: &[u8], suffix: &[u8]) -> Vec { let mut key = Vec::with_capacity(1 + tag.len() + suffix.len()); key.push(SEPARATOR); key.extend_from_slice(tag); key.extend_from_slice(suffix); key } // ── Subject-prefixed keys ─────────────────────────────────────────── /// Assertion key: `{subject}\x00H:{hash_hex}` pub fn assertion_key(subject: &str, hash_hex: &str) -> Vec { subject_key(subject, b"H:", hash_hex.as_bytes()) } /// Subject index key: `{subject}\x00S:` pub fn subject_index_key(subject: &str) -> Vec { subject_key(subject, b"S:", b"") } /// Subject+predicate index key: `{subject}\x00SP:{predicate}` pub fn subject_predicate_key(subject: &str, predicate: &str) -> Vec { subject_key(subject, b"SP:", predicate.as_bytes()) } /// Materialized view key: `{subject}\x00MV:{predicate}` pub fn mv_key(subject: &str, predicate: &str) -> Vec { subject_key(subject, b"MV:", predicate.as_bytes()) } /// Vote key: `{subject}\x00V:{assert_hex}:{vote_hex}` pub fn vote_key(subject: &str, assertion_hex: &str, vote_hex: &str) -> Vec { let suffix = format!("{}:{}", assertion_hex, vote_hex); subject_key(subject, b"V:", suffix.as_bytes()) } /// Vote scan prefix: `{subject}\x00V:{assert_hex}:` pub fn vote_scan_prefix(subject: &str, assertion_hex: &str) -> Vec { let suffix = format!("{}:", assertion_hex); subject_key(subject, b"V:", suffix.as_bytes()) } /// Vote count cache key: `{subject}\x00VC:{assert_hex}` pub fn vote_count_key(subject: &str, assertion_hex: &str) -> Vec { subject_key(subject, b"VC:", assertion_hex.as_bytes()) } /// Vote weight cache key: `{subject}\x00VW:{assert_hex}` pub fn vote_weight_key(subject: &str, assertion_hex: &str) -> Vec { subject_key(subject, b"VW:", assertion_hex.as_bytes()) } /// Vote count scan prefix: `{subject}\x00VC:` - for scanning all vote counts under a subject. pub fn vote_count_prefix(subject: &str) -> Vec { subject_key(subject, b"VC:", b"") } /// Assertion scan prefix: `{subject}\x00H:` - for scanning all assertions under a subject. pub fn assertion_prefix(subject: &str) -> Vec { subject_key(subject, b"H:", b"") } /// Gold standard key: `{subject}\x00GS:{predicate}` pub fn gold_standard_key(subject: &str, predicate: &str) -> Vec { subject_key(subject, b"GS:", predicate.as_bytes()) } /// Subject+predicate scan prefix: `{subject}\x00SP:` — returns all SP keys for a subject. pub fn subject_predicate_scan_prefix(subject: &str) -> Vec { subject_key(subject, b"SP:", b"") } /// Subject scan prefix: `{subject}\x00` — returns ALL data for a subject. pub fn subject_scan_prefix(subject: &str) -> Vec { let mut key = Vec::with_capacity(subject.len() + 1); key.extend_from_slice(subject.as_bytes()); key.push(SEPARATOR); key } // ── Global keys ───────────────────────────────────────────────────── /// Trust rank key: `\x00TRUST:{agent_id_hex}` pub fn trust_rank_key(agent_id_hex: &str) -> Vec { global_key(b"TRUST:", agent_id_hex.as_bytes()) } /// Quota record key: `\x00QUOTA:{agent_hex}:{window}` pub fn quota_key(agent_hex: &str, window: u64) -> Vec { let suffix = format!("{}:{}", agent_hex, window); global_key(b"QUOTA:", suffix.as_bytes()) } /// Quota limit key: `\x00QLIMIT:{agent_id_hex}` pub fn quota_limit_key(agent_id_hex: &str) -> Vec { global_key(b"QLIMIT:", agent_id_hex.as_bytes()) } /// Epoch key: `\x00E:{epoch_id_hex}` pub fn epoch_key(epoch_id_hex: &str) -> Vec { global_key(b"E:", epoch_id_hex.as_bytes()) } /// Superseded marker key: `\x00SUPERSEDED:{epoch_id_hex}` pub fn superseded_key(epoch_id_hex: &str) -> Vec { global_key(b"SUPERSEDED:", epoch_id_hex.as_bytes()) } /// Supersession record key: `\x00SUP:{target_hash_hex}` pub fn supersession_key(target_hash_hex: &str) -> Vec { global_key(b"SUP:", target_hash_hex.as_bytes()) } /// Supersession agent index key: `\x00SUP:IDX:{agent_hex}:{ts_be_bytes}` pub fn supersession_index_key(agent_hex: &str, timestamp_be_bytes: &[u8]) -> Vec { let mut suffix = Vec::with_capacity(agent_hex.len() + 1 + timestamp_be_bytes.len()); suffix.extend_from_slice(agent_hex.as_bytes()); suffix.push(b':'); suffix.extend_from_slice(timestamp_be_bytes); global_key(b"SUP:IDX:", &suffix) } /// Supersession agent scan prefix: `\x00SUP:IDX:{agent_hex}:` pub fn supersession_index_prefix(agent_hex: &str) -> Vec { let suffix = format!("{}:", agent_hex); global_key(b"SUP:IDX:", suffix.as_bytes()) } /// Audit record key: `\x00AUD:{query_id_hex}` pub fn audit_key(query_id_hex: &str) -> Vec { global_key(b"AUD:", query_id_hex.as_bytes()) } /// Audit agent index key: `\x00AUDA:{agent_hex}:{timestamp_hex}:{query_hex}` pub fn audit_agent_index_key(agent_hex: &str, timestamp_hex: &str, query_hex: &str) -> Vec { let suffix = format!("{}:{}:{}", agent_hex, timestamp_hex, query_hex); global_key(b"AUDA:", suffix.as_bytes()) } /// Audit agent scan prefix: `\x00AUDA:{agent_hex}:` pub fn audit_agent_prefix(agent_hex: &str) -> Vec { let suffix = format!("{}:", agent_hex); global_key(b"AUDA:", suffix.as_bytes()) } /// Audit listing prefix: `\x00AUD:` pub fn audit_scan_prefix() -> Vec { global_key(b"AUD:", b"") } /// Escalation key: `\x00ESC:{timestamp}:{id_hex}` pub fn escalation_key(timestamp: u64, id_hex: &str) -> Vec { let suffix = format!("{}:{}", timestamp, id_hex); global_key(b"ESC:", suffix.as_bytes()) } /// Escalation scan prefix: `\x00ESC:` pub fn escalation_scan_prefix() -> Vec { global_key(b"ESC:", b"") } /// Trust pack key: `\x00TP:{pack_id_bytes}` pub fn trust_pack_key(pack_id: &[u8]) -> Vec { global_key(b"TP:", pack_id) } /// Trust pack scan prefix: `\x00TP:` pub fn trust_pack_scan_prefix() -> Vec { global_key(b"TP:", b"") } /// Gold standard verified key: `\x00GS_VERIFIED:{agent_hex}:{subject}:{predicate}` pub fn gs_verified_key(agent_hex: &str, subject: &str, predicate: &str) -> Vec { let suffix = format!("{}:{}:{}", agent_hex, subject, predicate); global_key(b"GS_VERIFIED:", suffix.as_bytes()) } /// Cursor key: `\x00META:cursor:ingest` pub fn cursor_key() -> Vec { global_key(b"META:cursor:ingest", b"") } /// Assertion count key: `\x00META:assertion_count` pub fn assertion_count_key() -> Vec { global_key(b"META:assertion_count", b"") } /// Trust rank scan prefix for decay: `\x00TRUST:` pub fn trust_rank_scan_prefix() -> Vec { global_key(b"TRUST:", b"") } // ── Secondary indexes ─────────────────────────────────────────────── /// Known subjects index key: `\x00SUBJECTS:{subject}` pub fn subjects_index_key(subject: &str) -> Vec { global_key(b"SUBJECTS:", subject.as_bytes()) } /// Known subjects scan prefix: `\x00SUBJECTS:` pub fn subjects_scan_prefix() -> Vec { global_key(b"SUBJECTS:", b"") } /// Gold standard listing index: `\x00GS_LIST:{subject}:{predicate}` pub fn gs_list_key(subject: &str, predicate: &str) -> Vec { let suffix = format!("{}:{}", subject, predicate); global_key(b"GS_LIST:", suffix.as_bytes()) } /// Gold standard listing scan prefix: `\x00GS_LIST:` pub fn gs_list_scan_prefix() -> Vec { global_key(b"GS_LIST:", b"") } /// Hash-to-subject reverse index: `\x00HASH_SUBJECT:{hash_hex}` pub fn hash_subject_key(hash_hex: &str) -> Vec { global_key(b"HASH_SUBJECT:", hash_hex.as_bytes()) } // ── Vector Index Persistence ───────────────────────────────────────── // // These keys are reserved for KV-backed cursor persistence (future phase). // Currently, PersistentVectorIndex stores version in filename and cursors // are rebuilt from WAL replay. /// Vector index metadata key: `\x00VI:meta` #[allow(dead_code)] pub fn vi_meta_key() -> Vec { global_key(b"VI:meta", b"") } /// Vector index hot cursor key: `\x00VI:hot_cursor` /// /// Stores the WAL offset from which the hot index should replay on restart. #[allow(dead_code)] pub fn vi_hot_cursor_key() -> Vec { global_key(b"VI:hot_cursor", b"") } /// Vector index cold version key: `\x00VI:cold_version` /// /// Stores the version number of the current cold index snapshot. #[allow(dead_code)] pub fn vi_cold_version_key() -> Vec { global_key(b"VI:cold_version", b"") } // ── Visual Index Persistence ───────────────────────────────────────── /// Visual index metadata key: `\x00VH:meta` #[allow(dead_code)] pub fn vh_meta_key() -> Vec { global_key(b"VH:meta", b"") } // ── Concept Alias Keys ─────────────────────────────────────────────── /// Alias forward key: `\x00CA:{alias_path}` /// /// Maps an alias path to its canonical ConceptPath. pub fn alias_key(alias_path: &str) -> Vec { global_key(b"CA:", alias_path.as_bytes()) } /// Alias reverse key: `\x00CAR:{canonical_path}` /// /// Maps a canonical path to all alias paths (stored as Vec). pub fn alias_reverse_key(canonical_path: &str) -> Vec { global_key(b"CAR:", canonical_path.as_bytes()) } /// Alias scan prefix: `\x00CA:` /// /// Used to list all aliases in the store. pub fn alias_scan_prefix() -> Vec { global_key(b"CA:", b"") } // ── Key extraction / parsing ──────────────────────────────────────── /// Extract subject from a `\x00SUBJECTS:{subject}` key. /// /// Returns the subject string, or `None` if the key doesn't match the expected format. pub fn extract_subject_from_subjects_key(key: &[u8]) -> Option { let prefix = b"\x00SUBJECTS:"; if key.starts_with(prefix) { std::str::from_utf8(&key[prefix.len()..]).ok().map(|s| s.to_string()) } else { None } } /// Extract subject and predicate from a `{subject}\x00SP:{predicate}` key. /// /// Returns `(subject, predicate)` or `None` if the key doesn't match. pub fn extract_sp_key(key: &[u8]) -> Option<(String, String)> { // Find the \x00 separator let sep_pos = memchr::memchr(SEPARATOR, key)?; if sep_pos == 0 { return None; // Global key, not subject-prefixed } let subject = std::str::from_utf8(&key[..sep_pos]).ok()?; let after_sep = &key[sep_pos + 1..]; // Check for SP: tag if !after_sep.starts_with(b"SP:") { return None; } let predicate = std::str::from_utf8(&after_sep[3..]).ok()?; if subject.is_empty() || predicate.is_empty() { return None; } Some((subject.to_string(), predicate.to_string())) } /// Extract the tag portion from a key (the part after the separator). /// /// For subject-prefixed keys: returns bytes after `{subject}\x00` /// For global keys: returns bytes after `\x00` pub fn extract_tag(key: &[u8]) -> &[u8] { if key.first() == Some(&SEPARATOR) { // Global key: \x00TAG:rest &key[1..] } else if let Some(pos) = memchr::memchr(SEPARATOR, key) { // Subject-prefixed: subject\x00TAG:rest &key[pos + 1..] } else { key } } /// Check if a key is a global key (starts with `\x00`). pub fn is_global_key(key: &[u8]) -> bool { key.first() == Some(&SEPARATOR) } /// Extract the subject from a subject-prefixed key. /// /// Returns `None` for global keys or keys without a separator. pub fn extract_subject(key: &[u8]) -> Option<&str> { if is_global_key(key) { return None; } if let Some(pos) = memchr::memchr(SEPARATOR, key) { std::str::from_utf8(&key[..pos]).ok() } else { None } } /// Extract alias path from a `\x00CA:{alias_path}` key. /// /// Returns the alias path string, or `None` if the key doesn't match the expected format. pub fn extract_alias_path(key: &[u8]) -> Option { let prefix = b"\x00CA:"; if key.starts_with(prefix) { std::str::from_utf8(&key[prefix.len()..]).ok().map(|s| s.to_string()) } else { None } } #[cfg(test)] mod tests;