Add CRC32C checksums to WAL record format (v2), implement crash recovery with automatic truncation of corrupt records, add feature-gated group commit buffer for batched fsync under concurrent load, and implement log rotation via segment files with global offset addressing. Key changes: - Record format v2: [len:u32][crc32c:u32][blake3:32][payload:N] - recover_file() scans and truncates corrupt tail records - GroupCommitBuffer batches fsync via MPSC channel (tokio feature gate) - SegmentManager with binary search resolution and cursor-based cleanup - Journal::read() auto-refreshes segments on miss for writer/reader split - Split recovery.rs and key_codec.rs into directory modules for 500-line max Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
286 lines
10 KiB
Rust
286 lines
10 KiB
Rust
//! Query audit trail storage for incident investigation.
|
|
//!
|
|
//! Every query is logged with provenance to enable "Why did you think that?"
|
|
//! debugging. This is critical for SRE incident investigation and agent
|
|
//! decision auditing.
|
|
//!
|
|
//! # Storage Layout
|
|
//!
|
|
//! | Key Pattern | Value | Purpose |
|
|
//! |-------------|-------|---------|
|
|
//! | `\x00AUD:{query_id}` | Serialized QueryAudit | Individual audit records |
|
|
//! | `\x00AUDA:{agent_id}:{timestamp}:{query_id}` | Empty | Agent index for temporal queries |
|
|
//!
|
|
//! # Design Philosophy
|
|
//!
|
|
//! Following the "Deep Module" principle:
|
|
//! - Simple interface hiding complex indexing
|
|
//! - O(1) individual audit lookups
|
|
//! - Agent+time range queries via prefix scan
|
|
//!
|
|
//! All operations are append-only (audits are never modified or deleted).
|
|
|
|
mod store_impl;
|
|
|
|
use async_trait::async_trait;
|
|
use stemedb_core::types::{QueryAudit, QueryId};
|
|
|
|
use crate::error::Result;
|
|
|
|
pub use store_impl::GenericAuditStore;
|
|
|
|
/// Specialized storage trait for query audit operations.
|
|
///
|
|
/// This trait provides audit-specific operations on top of a generic KVStore,
|
|
/// enabling efficient audit logging and retrieval for incident investigation.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```ignore
|
|
/// let audit_store = GenericAuditStore::new(kv_store);
|
|
///
|
|
/// // Log a query audit
|
|
/// audit_store.put_audit(&audit).await?;
|
|
///
|
|
/// // Retrieve a specific audit
|
|
/// let audit = audit_store.get_audit(&query_id).await?;
|
|
///
|
|
/// // Find all audits for an agent in a time range
|
|
/// let audits = audit_store.get_audits_for_agent(&agent_id, from, to).await?;
|
|
/// ```
|
|
#[async_trait]
|
|
pub trait AuditStore: Send + Sync {
|
|
/// Store a query audit record.
|
|
///
|
|
/// This operation:
|
|
/// 1. Serializes the audit using rkyv
|
|
/// 2. Stores at `\x00AUD:{query_id}`
|
|
/// 3. Creates agent index entry at `\x00AUDA:{agent_id}:{timestamp}:{query_id}`
|
|
///
|
|
/// # Returns
|
|
/// The query_id for reference.
|
|
async fn put_audit(&self, audit: &QueryAudit) -> Result<QueryId>;
|
|
|
|
/// Get a specific audit record by its query ID.
|
|
///
|
|
/// # Returns
|
|
/// The audit record if found, None otherwise.
|
|
async fn get_audit(&self, query_id: &QueryId) -> Result<Option<QueryAudit>>;
|
|
|
|
/// Get audit records for a specific agent within a time range.
|
|
///
|
|
/// Uses the agent index for efficient temporal queries.
|
|
///
|
|
/// # Arguments
|
|
/// * `agent_id` - The agent's public key
|
|
/// * `from_timestamp` - Start of time range (inclusive)
|
|
/// * `to_timestamp` - End of time range (inclusive), None for unbounded
|
|
/// * `limit` - Maximum number of records to return
|
|
///
|
|
/// # Returns
|
|
/// Vector of audit records, sorted by timestamp ascending, capped at limit.
|
|
async fn get_audits_for_agent(
|
|
&self,
|
|
agent_id: &[u8; 32],
|
|
from_timestamp: u64,
|
|
to_timestamp: Option<u64>,
|
|
limit: usize,
|
|
) -> Result<Vec<QueryAudit>>;
|
|
|
|
/// List recent audit records across all agents.
|
|
///
|
|
/// Scans all `\x00AUD:` keys and returns the most recent audits.
|
|
///
|
|
/// # Arguments
|
|
/// * `limit` - Maximum number of records to return
|
|
///
|
|
/// # Returns
|
|
/// Vector of audit records, sorted by timestamp descending (most recent first).
|
|
async fn list_recent_audits(&self, limit: usize) -> Result<Vec<QueryAudit>>;
|
|
|
|
/// Check if any audits exist for an agent.
|
|
async fn has_audits_for_agent(&self, agent_id: &[u8; 32]) -> Result<bool>;
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::HybridStore;
|
|
use std::sync::Arc;
|
|
use stemedb_core::types::{ContributingAssertion, LifecycleStage};
|
|
|
|
fn create_test_audit(
|
|
query_id: QueryId,
|
|
agent_id: Option<[u8; 32]>,
|
|
timestamp: u64,
|
|
) -> QueryAudit {
|
|
QueryAudit {
|
|
query_id,
|
|
agent_id,
|
|
timestamp,
|
|
params: stemedb_core::types::QueryParams {
|
|
subject: Some("Tesla".to_string()),
|
|
predicate: Some("revenue".to_string()),
|
|
lifecycle: Some(LifecycleStage::Approved),
|
|
epoch: None,
|
|
lens: Some("Recency".to_string()),
|
|
},
|
|
result_hash: Some([1u8; 32]),
|
|
result_confidence: 0.95,
|
|
contributing_assertions: vec![ContributingAssertion {
|
|
assertion_hash: [2u8; 32],
|
|
weight: 1.0,
|
|
source_hash: [3u8; 32],
|
|
lifecycle: LifecycleStage::Approved,
|
|
}],
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_put_and_get_audit() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("Failed to create store"));
|
|
let audit_store = GenericAuditStore::new(store);
|
|
|
|
let query_id = [10u8; 32];
|
|
let agent_id = Some([1u8; 32]);
|
|
let audit = create_test_audit(query_id, agent_id, 1000);
|
|
|
|
// Put audit
|
|
let returned_id = audit_store.put_audit(&audit).await.expect("Failed to put audit");
|
|
assert_eq!(returned_id, query_id);
|
|
|
|
// Get audit back
|
|
let retrieved = audit_store.get_audit(&query_id).await.expect("Failed to get audit");
|
|
assert!(retrieved.is_some());
|
|
|
|
let retrieved_audit = retrieved.expect("Audit should exist");
|
|
assert_eq!(retrieved_audit.query_id, query_id);
|
|
assert_eq!(retrieved_audit.agent_id, agent_id);
|
|
assert_eq!(retrieved_audit.timestamp, 1000);
|
|
assert!((retrieved_audit.result_confidence - 0.95).abs() < f32::EPSILON);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_get_audits_for_agent() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("Failed to create store"));
|
|
let audit_store = GenericAuditStore::new(store);
|
|
|
|
let agent1 = [1u8; 32];
|
|
let agent2 = [2u8; 32];
|
|
|
|
// Create audits for different agents and times
|
|
let audit1 = create_test_audit([10u8; 32], Some(agent1), 1000);
|
|
let audit2 = create_test_audit([11u8; 32], Some(agent1), 2000);
|
|
let audit3 = create_test_audit([12u8; 32], Some(agent1), 3000);
|
|
let audit4 = create_test_audit([13u8; 32], Some(agent2), 2500);
|
|
|
|
audit_store.put_audit(&audit1).await.expect("put");
|
|
audit_store.put_audit(&audit2).await.expect("put");
|
|
audit_store.put_audit(&audit3).await.expect("put");
|
|
audit_store.put_audit(&audit4).await.expect("put");
|
|
|
|
// Get all audits for agent1
|
|
let agent1_audits =
|
|
audit_store.get_audits_for_agent(&agent1, 0, None, 100).await.expect("get");
|
|
assert_eq!(agent1_audits.len(), 3);
|
|
|
|
// Get audits for agent1 in time range
|
|
let agent1_range =
|
|
audit_store.get_audits_for_agent(&agent1, 1500, Some(2500), 100).await.expect("get");
|
|
assert_eq!(agent1_range.len(), 1);
|
|
assert_eq!(agent1_range[0].timestamp, 2000);
|
|
|
|
// Get audits for agent2
|
|
let agent2_audits =
|
|
audit_store.get_audits_for_agent(&agent2, 0, None, 100).await.expect("get");
|
|
assert_eq!(agent2_audits.len(), 1);
|
|
|
|
// Test limit parameter
|
|
let limited = audit_store.get_audits_for_agent(&agent1, 0, None, 2).await.expect("get");
|
|
assert_eq!(limited.len(), 2);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_list_recent_audits() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("Failed to create store"));
|
|
let audit_store = GenericAuditStore::new(store);
|
|
|
|
// Create audits with different timestamps
|
|
for i in 0..5 {
|
|
let mut query_id = [0u8; 32];
|
|
query_id[0] = i;
|
|
let audit = create_test_audit(query_id, Some([1u8; 32]), 1000 + (i as u64) * 100);
|
|
audit_store.put_audit(&audit).await.expect("put");
|
|
}
|
|
|
|
// List recent with limit
|
|
let recent = audit_store.list_recent_audits(3).await.expect("list");
|
|
assert_eq!(recent.len(), 3);
|
|
|
|
// Should be in descending timestamp order
|
|
assert_eq!(recent[0].timestamp, 1400);
|
|
assert_eq!(recent[1].timestamp, 1300);
|
|
assert_eq!(recent[2].timestamp, 1200);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_audit_without_agent() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("Failed to create store"));
|
|
let audit_store = GenericAuditStore::new(store);
|
|
|
|
// Audit without agent_id (anonymous query)
|
|
let query_id = [20u8; 32];
|
|
let audit = create_test_audit(query_id, None, 1000);
|
|
|
|
audit_store.put_audit(&audit).await.expect("put");
|
|
|
|
// Should still be retrievable
|
|
let retrieved = audit_store.get_audit(&query_id).await.expect("get");
|
|
assert!(retrieved.is_some());
|
|
assert!(retrieved.expect("exists").agent_id.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_has_audits_for_agent() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("Failed to create store"));
|
|
let audit_store = GenericAuditStore::new(store);
|
|
|
|
let agent1 = [1u8; 32];
|
|
let agent2 = [2u8; 32];
|
|
|
|
// No audits initially
|
|
assert!(!audit_store.has_audits_for_agent(&agent1).await.expect("has"));
|
|
assert!(!audit_store.has_audits_for_agent(&agent2).await.expect("has"));
|
|
|
|
// Add audit for agent1
|
|
let audit = create_test_audit([10u8; 32], Some(agent1), 1000);
|
|
audit_store.put_audit(&audit).await.expect("put");
|
|
|
|
// Now agent1 has audits, agent2 still doesn't
|
|
assert!(audit_store.has_audits_for_agent(&agent1).await.expect("has"));
|
|
assert!(!audit_store.has_audits_for_agent(&agent2).await.expect("has"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_get_nonexistent_audit() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("Failed to create store"));
|
|
let audit_store = GenericAuditStore::new(store);
|
|
|
|
let nonexistent = [99u8; 32];
|
|
let result = audit_store.get_audit(&nonexistent).await.expect("get");
|
|
|
|
assert!(result.is_none(), "Should return None for nonexistent audit");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_empty_agent_audits() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("Failed to create store"));
|
|
let audit_store = GenericAuditStore::new(store);
|
|
|
|
let agent = [1u8; 32];
|
|
let audits = audit_store.get_audits_for_agent(&agent, 0, None, 100).await.expect("get");
|
|
|
|
assert!(audits.is_empty(), "Should return empty vec for agent with no audits");
|
|
}
|
|
}
|