stemedb/crates/stemedb-storage/src/supersession_store.rs
jordan 2b0923f20e feat: Distributed replication foundation (Phase 6A) - HLC, Merkle trees, CRDT stores, sync protocol
- Add Hybrid Logical Clock (HLC) for causality tracking across nodes
- Implement Merkle tree for efficient diff/sync with BLAKE3 hashing
- Add CRDT-aware stores for assertions and votes with vector clocks
- Create stemedb-sync crate with anti-entropy and gossip protocols
- Add stemedb-rpc crate with gRPC sync service (proto definitions)
- Implement SupersessionChain for tracking assertion lifecycles
- Add Aphoria application for code analysis/reporting
- Add battery11 replication test scaffolding
- Fix .gitignore to exclude nested target directories

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 19:31:54 -07:00

329 lines
12 KiB
Rust

//! Specialized storage for assertion supersession records.
//!
//! The supersession pattern enables error correction without violating append-only
//! semantics. Instead of mutating an assertion, we create a supersession record
//! that points from the old (target) to the new (replacement) assertion.
//!
//! # Storage Layout
//!
//! | Key Pattern | Value | Purpose |
//! |-------------|-------|---------|
//! | `SUP:{target_hash}` | Serialized Supersession | Quick lookup: "Is this superseded?" |
//! | `SUP:IDX:{agent_id}:{timestamp}` | target_hash | Audit: "What did this agent supersede?" |
//!
//! # Use Case: 3am Incident Investigation
//!
//! 1. SRE finds agent deployed bad config
//! 2. Trace shows agent queried assertion X with hash `abc123`
//! 3. Supervisor creates supersession: `abc123` → `new_hash` (Invalidate)
//! 4. Future queries skip `abc123` automatically
//! 5. Audit trail preserved: "Who fixed it? When? Why?"
use crate::error::{Result, StorageError};
use crate::key_codec;
use crate::traits::KVStore;
use async_trait::async_trait;
use stemedb_core::serde::{deserialize, serialize};
use stemedb_core::types::{Hash, Supersession};
use tracing::{debug, instrument};
/// Specialized storage trait for supersession operations.
///
/// This trait provides supersession-specific operations on top of a generic KVStore,
/// enabling error correction with full audit trail.
#[async_trait]
pub trait SupersessionStore: Send + Sync {
/// Store a supersession record.
///
/// This operation:
/// 1. Serializes the supersession using rkyv
/// 2. Stores at `SUP:{target_hash}`
/// 3. Creates index entry at `SUP:IDX:{agent_id}:{timestamp}`
///
/// # Returns
/// Ok(()) on success, error if serialization fails.
async fn put_supersession(&self, supersession: &Supersession) -> Result<()>;
/// Check if an assertion has been superseded.
///
/// # Arguments
/// * `target_hash` - The hash of the assertion to check
///
/// # Returns
/// The supersession record if the assertion is superseded, None otherwise.
async fn get_supersession(&self, target_hash: &Hash) -> Result<Option<Supersession>>;
/// Check if an assertion is superseded (quick existence check).
///
/// # Arguments
/// * `target_hash` - The hash of the assertion to check
///
/// # Returns
/// true if the assertion has been superseded, false otherwise.
async fn is_superseded(&self, target_hash: &Hash) -> Result<bool>;
/// Get all supersessions created by a specific agent within a time range.
///
/// # Arguments
/// * `agent_id` - The Ed25519 public key of the agent
/// * `from_timestamp` - Start of time range (Unix timestamp, inclusive)
/// * `to_timestamp` - End of time range (Unix timestamp, inclusive, optional)
/// * `limit` - Maximum number of results to return (optional)
///
/// # Returns
/// Vector of supersession records, ordered by timestamp descending.
async fn get_supersessions_by_agent(
&self,
agent_id: &[u8; 32],
from_timestamp: u64,
to_timestamp: Option<u64>,
limit: Option<usize>,
) -> Result<Vec<Supersession>>;
}
/// Generic implementation of SupersessionStore for any KVStore backend.
pub struct GenericSupersessionStore<S: KVStore> {
store: S,
}
impl<S: KVStore> GenericSupersessionStore<S> {
/// Create a new GenericSupersessionStore wrapping the given KVStore.
pub fn new(store: S) -> Self {
Self { store }
}
}
#[async_trait]
impl<S: KVStore + Send + Sync> SupersessionStore for GenericSupersessionStore<S> {
#[instrument(skip(self, supersession), fields(target_hash = ?supersession.target_hash))]
async fn put_supersession(&self, supersession: &Supersession) -> Result<()> {
// Serialize the supersession
let bytes = serialize(supersession).map_err(|e| {
StorageError::Serialization(format!("Failed to serialize supersession: {}", e))
})?;
// Store at primary key
let key = key_codec::supersession_key(&hex::encode(supersession.target_hash));
self.store.put(&key, &bytes).await?;
// Store index entry (value is the target_hash for lookup)
let timestamp_bytes = supersession.timestamp.to_be_bytes();
let index_key = key_codec::supersession_index_key(
&hex::encode(supersession.agent_id),
&timestamp_bytes,
);
self.store.put(&index_key, &supersession.target_hash).await?;
debug!(
target_hash = ?supersession.target_hash,
supersession_type = ?supersession.supersession_type,
"Stored supersession record"
);
Ok(())
}
#[instrument(skip(self))]
async fn get_supersession(&self, target_hash: &Hash) -> Result<Option<Supersession>> {
let key = key_codec::supersession_key(&hex::encode(target_hash));
match self.store.get(&key).await? {
Some(bytes) => {
let supersession: Supersession = deserialize(&bytes).map_err(|e| {
StorageError::Serialization(format!(
"Failed to deserialize supersession: {}",
e
))
})?;
Ok(Some(supersession))
}
None => Ok(None),
}
}
#[instrument(skip(self))]
async fn is_superseded(&self, target_hash: &Hash) -> Result<bool> {
let key = key_codec::supersession_key(&hex::encode(target_hash));
Ok(self.store.get(&key).await?.is_some())
}
#[instrument(skip(self), fields(agent_id = ?agent_id, from = from_timestamp, to = ?to_timestamp))]
async fn get_supersessions_by_agent(
&self,
agent_id: &[u8; 32],
from_timestamp: u64,
to_timestamp: Option<u64>,
limit: Option<usize>,
) -> Result<Vec<Supersession>> {
let prefix = key_codec::supersession_index_prefix(&hex::encode(agent_id));
let entries = self.store.scan_prefix(&prefix).await?;
let to_ts = to_timestamp.unwrap_or(u64::MAX);
let max_results = limit.unwrap_or(1000);
let mut supersessions = Vec::new();
for (key, target_hash_bytes) in entries {
// Extract timestamp from key
// Key format: \x00SUP:IDX:{agent_hex}:{timestamp_be_bytes}
// We need to find the last colon and extract the 8 bytes after it
if let Some(last_colon_pos) = key.iter().rposition(|&b| b == b':') {
let timestamp_start = last_colon_pos + 1;
if key.len() < timestamp_start + 8 {
continue; // Malformed key
}
let timestamp_bytes: [u8; 8] =
key[timestamp_start..timestamp_start + 8].try_into().map_err(|_| {
StorageError::Serialization("Invalid timestamp in index key".to_string())
})?;
let timestamp = u64::from_be_bytes(timestamp_bytes);
// Filter by time range
if timestamp < from_timestamp || timestamp > to_ts {
continue;
}
// Parse target hash
if target_hash_bytes.len() != 32 {
continue; // Malformed value
}
let target_hash: Hash = target_hash_bytes.try_into().map_err(|_| {
StorageError::Serialization("Invalid target hash in index".to_string())
})?;
// Fetch the actual supersession record
if let Some(supersession) = self.get_supersession(&target_hash).await? {
supersessions.push(supersession);
if supersessions.len() >= max_results {
break;
}
}
}
}
// Sort by temporal ordering descending (most recent first)
// Uses HLC comparison when available for causal ordering across
// distributed nodes, falling back to Unix timestamp for legacy data
supersessions.sort_by(|a, b| b.temporal_cmp(a));
Ok(supersessions)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::HybridStore;
use stemedb_core::types::SupersessionType;
use tempfile::tempdir;
async fn create_test_store() -> GenericSupersessionStore<HybridStore> {
let dir = tempdir().expect("Failed to create temp dir");
let store = HybridStore::open(dir.path()).expect("Failed to open store");
GenericSupersessionStore::new(store)
}
#[tokio::test]
async fn test_put_and_get_supersession() {
let store = create_test_store().await;
let supersession = Supersession {
target_hash: [1u8; 32],
supersession_type: SupersessionType::Invalidate,
reason: "Test invalidation".to_string(),
new_hash: Some([2u8; 32]),
timestamp: 1704067200,
hlc_timestamp: None,
agent_id: [3u8; 32],
signature: [4u8; 64],
};
store.put_supersession(&supersession).await.expect("Failed to put supersession");
let retrieved =
store.get_supersession(&[1u8; 32]).await.expect("Failed to get supersession");
assert!(retrieved.is_some());
let retrieved = retrieved.expect("Expected supersession");
assert_eq!(retrieved.target_hash, [1u8; 32]);
assert_eq!(retrieved.supersession_type, SupersessionType::Invalidate);
assert_eq!(retrieved.reason, "Test invalidation");
}
#[tokio::test]
async fn test_is_superseded() {
let store = create_test_store().await;
// Not superseded initially
assert!(!store.is_superseded(&[1u8; 32]).await.expect("Failed to check"));
let supersession = Supersession {
target_hash: [1u8; 32],
supersession_type: SupersessionType::Temporal,
reason: "Outdated".to_string(),
new_hash: Some([2u8; 32]),
timestamp: 1704067200,
hlc_timestamp: None,
agent_id: [3u8; 32],
signature: [4u8; 64],
};
store.put_supersession(&supersession).await.expect("Failed to put");
// Now superseded
assert!(store.is_superseded(&[1u8; 32]).await.expect("Failed to check"));
// Other hash not superseded
assert!(!store.is_superseded(&[99u8; 32]).await.expect("Failed to check"));
}
#[tokio::test]
async fn test_get_supersessions_by_agent() {
let store = create_test_store().await;
let agent_id = [42u8; 32];
// Create multiple supersessions at different times
for i in 0..5 {
let supersession = Supersession {
target_hash: [i; 32],
supersession_type: SupersessionType::Invalidate,
reason: format!("Supersession {}", i),
new_hash: None,
timestamp: 1704067200 + (i as u64 * 100),
hlc_timestamp: None,
agent_id,
signature: [0u8; 64],
};
store.put_supersession(&supersession).await.expect("Failed to put");
}
// Get all supersessions for agent
let results = store
.get_supersessions_by_agent(&agent_id, 0, None, None)
.await
.expect("Failed to get supersessions");
assert_eq!(results.len(), 5);
// Should be sorted by timestamp descending
assert_eq!(results[0].target_hash, [4u8; 32]); // Most recent
// Test with limit
let limited =
store.get_supersessions_by_agent(&agent_id, 0, None, Some(2)).await.expect("Failed");
assert_eq!(limited.len(), 2);
// Test with time range
let ranged = store
.get_supersessions_by_agent(&agent_id, 1704067200 + 100, Some(1704067200 + 300), None)
.await
.expect("Failed");
assert_eq!(ranged.len(), 3); // timestamps 100, 200, 300
}
}