- Add Hybrid Logical Clock (HLC) for causality tracking across nodes - Implement Merkle tree for efficient diff/sync with BLAKE3 hashing - Add CRDT-aware stores for assertions and votes with vector clocks - Create stemedb-sync crate with anti-entropy and gossip protocols - Add stemedb-rpc crate with gRPC sync service (proto definitions) - Implement SupersessionChain for tracking assertion lifecycles - Add Aphoria application for code analysis/reporting - Add battery11 replication test scaffolding - Fix .gitignore to exclude nested target directories Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
329 lines
12 KiB
Rust
329 lines
12 KiB
Rust
//! Specialized storage for assertion supersession records.
|
|
//!
|
|
//! The supersession pattern enables error correction without violating append-only
|
|
//! semantics. Instead of mutating an assertion, we create a supersession record
|
|
//! that points from the old (target) to the new (replacement) assertion.
|
|
//!
|
|
//! # Storage Layout
|
|
//!
|
|
//! | Key Pattern | Value | Purpose |
|
|
//! |-------------|-------|---------|
|
|
//! | `SUP:{target_hash}` | Serialized Supersession | Quick lookup: "Is this superseded?" |
|
|
//! | `SUP:IDX:{agent_id}:{timestamp}` | target_hash | Audit: "What did this agent supersede?" |
|
|
//!
|
|
//! # Use Case: 3am Incident Investigation
|
|
//!
|
|
//! 1. SRE finds agent deployed bad config
|
|
//! 2. Trace shows agent queried assertion X with hash `abc123`
|
|
//! 3. Supervisor creates supersession: `abc123` → `new_hash` (Invalidate)
|
|
//! 4. Future queries skip `abc123` automatically
|
|
//! 5. Audit trail preserved: "Who fixed it? When? Why?"
|
|
|
|
use crate::error::{Result, StorageError};
|
|
use crate::key_codec;
|
|
use crate::traits::KVStore;
|
|
use async_trait::async_trait;
|
|
use stemedb_core::serde::{deserialize, serialize};
|
|
use stemedb_core::types::{Hash, Supersession};
|
|
use tracing::{debug, instrument};
|
|
|
|
/// Specialized storage trait for supersession operations.
|
|
///
|
|
/// This trait provides supersession-specific operations on top of a generic KVStore,
|
|
/// enabling error correction with full audit trail.
|
|
#[async_trait]
|
|
pub trait SupersessionStore: Send + Sync {
|
|
/// Store a supersession record.
|
|
///
|
|
/// This operation:
|
|
/// 1. Serializes the supersession using rkyv
|
|
/// 2. Stores at `SUP:{target_hash}`
|
|
/// 3. Creates index entry at `SUP:IDX:{agent_id}:{timestamp}`
|
|
///
|
|
/// # Returns
|
|
/// Ok(()) on success, error if serialization fails.
|
|
async fn put_supersession(&self, supersession: &Supersession) -> Result<()>;
|
|
|
|
/// Check if an assertion has been superseded.
|
|
///
|
|
/// # Arguments
|
|
/// * `target_hash` - The hash of the assertion to check
|
|
///
|
|
/// # Returns
|
|
/// The supersession record if the assertion is superseded, None otherwise.
|
|
async fn get_supersession(&self, target_hash: &Hash) -> Result<Option<Supersession>>;
|
|
|
|
/// Check if an assertion is superseded (quick existence check).
|
|
///
|
|
/// # Arguments
|
|
/// * `target_hash` - The hash of the assertion to check
|
|
///
|
|
/// # Returns
|
|
/// true if the assertion has been superseded, false otherwise.
|
|
async fn is_superseded(&self, target_hash: &Hash) -> Result<bool>;
|
|
|
|
/// Get all supersessions created by a specific agent within a time range.
|
|
///
|
|
/// # Arguments
|
|
/// * `agent_id` - The Ed25519 public key of the agent
|
|
/// * `from_timestamp` - Start of time range (Unix timestamp, inclusive)
|
|
/// * `to_timestamp` - End of time range (Unix timestamp, inclusive, optional)
|
|
/// * `limit` - Maximum number of results to return (optional)
|
|
///
|
|
/// # Returns
|
|
/// Vector of supersession records, ordered by timestamp descending.
|
|
async fn get_supersessions_by_agent(
|
|
&self,
|
|
agent_id: &[u8; 32],
|
|
from_timestamp: u64,
|
|
to_timestamp: Option<u64>,
|
|
limit: Option<usize>,
|
|
) -> Result<Vec<Supersession>>;
|
|
}
|
|
|
|
/// Generic implementation of SupersessionStore for any KVStore backend.
|
|
pub struct GenericSupersessionStore<S: KVStore> {
|
|
store: S,
|
|
}
|
|
|
|
impl<S: KVStore> GenericSupersessionStore<S> {
|
|
/// Create a new GenericSupersessionStore wrapping the given KVStore.
|
|
pub fn new(store: S) -> Self {
|
|
Self { store }
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl<S: KVStore + Send + Sync> SupersessionStore for GenericSupersessionStore<S> {
|
|
#[instrument(skip(self, supersession), fields(target_hash = ?supersession.target_hash))]
|
|
async fn put_supersession(&self, supersession: &Supersession) -> Result<()> {
|
|
// Serialize the supersession
|
|
let bytes = serialize(supersession).map_err(|e| {
|
|
StorageError::Serialization(format!("Failed to serialize supersession: {}", e))
|
|
})?;
|
|
|
|
// Store at primary key
|
|
let key = key_codec::supersession_key(&hex::encode(supersession.target_hash));
|
|
self.store.put(&key, &bytes).await?;
|
|
|
|
// Store index entry (value is the target_hash for lookup)
|
|
let timestamp_bytes = supersession.timestamp.to_be_bytes();
|
|
let index_key = key_codec::supersession_index_key(
|
|
&hex::encode(supersession.agent_id),
|
|
×tamp_bytes,
|
|
);
|
|
self.store.put(&index_key, &supersession.target_hash).await?;
|
|
|
|
debug!(
|
|
target_hash = ?supersession.target_hash,
|
|
supersession_type = ?supersession.supersession_type,
|
|
"Stored supersession record"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[instrument(skip(self))]
|
|
async fn get_supersession(&self, target_hash: &Hash) -> Result<Option<Supersession>> {
|
|
let key = key_codec::supersession_key(&hex::encode(target_hash));
|
|
|
|
match self.store.get(&key).await? {
|
|
Some(bytes) => {
|
|
let supersession: Supersession = deserialize(&bytes).map_err(|e| {
|
|
StorageError::Serialization(format!(
|
|
"Failed to deserialize supersession: {}",
|
|
e
|
|
))
|
|
})?;
|
|
Ok(Some(supersession))
|
|
}
|
|
None => Ok(None),
|
|
}
|
|
}
|
|
|
|
#[instrument(skip(self))]
|
|
async fn is_superseded(&self, target_hash: &Hash) -> Result<bool> {
|
|
let key = key_codec::supersession_key(&hex::encode(target_hash));
|
|
Ok(self.store.get(&key).await?.is_some())
|
|
}
|
|
|
|
#[instrument(skip(self), fields(agent_id = ?agent_id, from = from_timestamp, to = ?to_timestamp))]
|
|
async fn get_supersessions_by_agent(
|
|
&self,
|
|
agent_id: &[u8; 32],
|
|
from_timestamp: u64,
|
|
to_timestamp: Option<u64>,
|
|
limit: Option<usize>,
|
|
) -> Result<Vec<Supersession>> {
|
|
let prefix = key_codec::supersession_index_prefix(&hex::encode(agent_id));
|
|
let entries = self.store.scan_prefix(&prefix).await?;
|
|
|
|
let to_ts = to_timestamp.unwrap_or(u64::MAX);
|
|
let max_results = limit.unwrap_or(1000);
|
|
|
|
let mut supersessions = Vec::new();
|
|
|
|
for (key, target_hash_bytes) in entries {
|
|
// Extract timestamp from key
|
|
// Key format: \x00SUP:IDX:{agent_hex}:{timestamp_be_bytes}
|
|
// We need to find the last colon and extract the 8 bytes after it
|
|
if let Some(last_colon_pos) = key.iter().rposition(|&b| b == b':') {
|
|
let timestamp_start = last_colon_pos + 1;
|
|
if key.len() < timestamp_start + 8 {
|
|
continue; // Malformed key
|
|
}
|
|
|
|
let timestamp_bytes: [u8; 8] =
|
|
key[timestamp_start..timestamp_start + 8].try_into().map_err(|_| {
|
|
StorageError::Serialization("Invalid timestamp in index key".to_string())
|
|
})?;
|
|
let timestamp = u64::from_be_bytes(timestamp_bytes);
|
|
|
|
// Filter by time range
|
|
if timestamp < from_timestamp || timestamp > to_ts {
|
|
continue;
|
|
}
|
|
|
|
// Parse target hash
|
|
if target_hash_bytes.len() != 32 {
|
|
continue; // Malformed value
|
|
}
|
|
let target_hash: Hash = target_hash_bytes.try_into().map_err(|_| {
|
|
StorageError::Serialization("Invalid target hash in index".to_string())
|
|
})?;
|
|
|
|
// Fetch the actual supersession record
|
|
if let Some(supersession) = self.get_supersession(&target_hash).await? {
|
|
supersessions.push(supersession);
|
|
|
|
if supersessions.len() >= max_results {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by temporal ordering descending (most recent first)
|
|
// Uses HLC comparison when available for causal ordering across
|
|
// distributed nodes, falling back to Unix timestamp for legacy data
|
|
supersessions.sort_by(|a, b| b.temporal_cmp(a));
|
|
|
|
Ok(supersessions)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::HybridStore;
|
|
use stemedb_core::types::SupersessionType;
|
|
use tempfile::tempdir;
|
|
|
|
async fn create_test_store() -> GenericSupersessionStore<HybridStore> {
|
|
let dir = tempdir().expect("Failed to create temp dir");
|
|
let store = HybridStore::open(dir.path()).expect("Failed to open store");
|
|
GenericSupersessionStore::new(store)
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_put_and_get_supersession() {
|
|
let store = create_test_store().await;
|
|
|
|
let supersession = Supersession {
|
|
target_hash: [1u8; 32],
|
|
supersession_type: SupersessionType::Invalidate,
|
|
reason: "Test invalidation".to_string(),
|
|
new_hash: Some([2u8; 32]),
|
|
timestamp: 1704067200,
|
|
hlc_timestamp: None,
|
|
agent_id: [3u8; 32],
|
|
signature: [4u8; 64],
|
|
};
|
|
|
|
store.put_supersession(&supersession).await.expect("Failed to put supersession");
|
|
|
|
let retrieved =
|
|
store.get_supersession(&[1u8; 32]).await.expect("Failed to get supersession");
|
|
|
|
assert!(retrieved.is_some());
|
|
let retrieved = retrieved.expect("Expected supersession");
|
|
assert_eq!(retrieved.target_hash, [1u8; 32]);
|
|
assert_eq!(retrieved.supersession_type, SupersessionType::Invalidate);
|
|
assert_eq!(retrieved.reason, "Test invalidation");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_is_superseded() {
|
|
let store = create_test_store().await;
|
|
|
|
// Not superseded initially
|
|
assert!(!store.is_superseded(&[1u8; 32]).await.expect("Failed to check"));
|
|
|
|
let supersession = Supersession {
|
|
target_hash: [1u8; 32],
|
|
supersession_type: SupersessionType::Temporal,
|
|
reason: "Outdated".to_string(),
|
|
new_hash: Some([2u8; 32]),
|
|
timestamp: 1704067200,
|
|
hlc_timestamp: None,
|
|
agent_id: [3u8; 32],
|
|
signature: [4u8; 64],
|
|
};
|
|
|
|
store.put_supersession(&supersession).await.expect("Failed to put");
|
|
|
|
// Now superseded
|
|
assert!(store.is_superseded(&[1u8; 32]).await.expect("Failed to check"));
|
|
|
|
// Other hash not superseded
|
|
assert!(!store.is_superseded(&[99u8; 32]).await.expect("Failed to check"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_get_supersessions_by_agent() {
|
|
let store = create_test_store().await;
|
|
|
|
let agent_id = [42u8; 32];
|
|
|
|
// Create multiple supersessions at different times
|
|
for i in 0..5 {
|
|
let supersession = Supersession {
|
|
target_hash: [i; 32],
|
|
supersession_type: SupersessionType::Invalidate,
|
|
reason: format!("Supersession {}", i),
|
|
new_hash: None,
|
|
timestamp: 1704067200 + (i as u64 * 100),
|
|
hlc_timestamp: None,
|
|
agent_id,
|
|
signature: [0u8; 64],
|
|
};
|
|
store.put_supersession(&supersession).await.expect("Failed to put");
|
|
}
|
|
|
|
// Get all supersessions for agent
|
|
let results = store
|
|
.get_supersessions_by_agent(&agent_id, 0, None, None)
|
|
.await
|
|
.expect("Failed to get supersessions");
|
|
|
|
assert_eq!(results.len(), 5);
|
|
|
|
// Should be sorted by timestamp descending
|
|
assert_eq!(results[0].target_hash, [4u8; 32]); // Most recent
|
|
|
|
// Test with limit
|
|
let limited =
|
|
store.get_supersessions_by_agent(&agent_id, 0, None, Some(2)).await.expect("Failed");
|
|
|
|
assert_eq!(limited.len(), 2);
|
|
|
|
// Test with time range
|
|
let ranged = store
|
|
.get_supersessions_by_agent(&agent_id, 1704067200 + 100, Some(1704067200 + 300), None)
|
|
.await
|
|
.expect("Failed");
|
|
|
|
assert_eq!(ranged.len(), 3); // timestamps 100, 200, 300
|
|
}
|
|
}
|