Add CRC32C checksums to WAL record format (v2), implement crash recovery with automatic truncation of corrupt records, add feature-gated group commit buffer for batched fsync under concurrent load, and implement log rotation via segment files with global offset addressing. Key changes: - Record format v2: [len:u32][crc32c:u32][blake3:32][payload:N] - recover_file() scans and truncates corrupt tail records - GroupCommitBuffer batches fsync via MPSC channel (tokio feature gate) - SegmentManager with binary search resolution and cursor-based cleanup - Journal::read() auto-refreshes segments on miss for writer/reader split - Split recovery.rs and key_codec.rs into directory modules for 500-line max Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
321 lines
11 KiB
Rust
321 lines
11 KiB
Rust
//! Storage for gold standard assertions.
|
|
//!
|
|
//! Gold standards are stored at `{subject}\x00GS:{predicate}` with a secondary
|
|
//! index at `\x00GS_LIST:{subject}:{predicate}` for listing all gold standards.
|
|
|
|
use crate::{key_codec, KVStore, Result, StorageError};
|
|
use async_trait::async_trait;
|
|
use std::sync::Arc;
|
|
use stemedb_core::types::GoldStandard;
|
|
use tracing::{debug, instrument};
|
|
|
|
/// Storage trait for gold standard operations.
|
|
///
|
|
/// Provides operations for creating, reading, listing, and removing gold standards
|
|
/// that define ground truth for agent verification.
|
|
#[async_trait]
|
|
pub trait GoldStandardStore: Send + Sync {
|
|
/// Store a gold standard.
|
|
///
|
|
/// Key format: `GS:{subject}:{predicate}`
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// - `gs` - The gold standard to store
|
|
async fn set_gold_standard(&self, gs: &GoldStandard) -> Result<()>;
|
|
|
|
/// Get a gold standard by subject and predicate.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// - `subject` - Subject entity (e.g., "Earth")
|
|
/// - `predicate` - Predicate (e.g., "has_shape")
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// `Some(GoldStandard)` if found, `None` otherwise.
|
|
async fn get_gold_standard(
|
|
&self,
|
|
subject: &str,
|
|
predicate: &str,
|
|
) -> Result<Option<GoldStandard>>;
|
|
|
|
/// List all gold standards.
|
|
///
|
|
/// Returns all gold standards ordered by subject and predicate.
|
|
async fn list_gold_standards(&self) -> Result<Vec<GoldStandard>>;
|
|
|
|
/// Remove a gold standard.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// - `subject` - Subject entity
|
|
/// - `predicate` - Predicate
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// `true` if the gold standard was found and removed, `false` if not found.
|
|
async fn remove_gold_standard(&self, subject: &str, predicate: &str) -> Result<bool>;
|
|
}
|
|
|
|
/// Generic implementation of `GoldStandardStore` backed by any `KVStore`.
|
|
pub struct GenericGoldStandardStore<S> {
|
|
store: Arc<S>,
|
|
}
|
|
|
|
impl<S: KVStore> GenericGoldStandardStore<S> {
|
|
/// Create a new gold standard store backed by the given KV store.
|
|
pub fn new(store: Arc<S>) -> Self {
|
|
Self { store }
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl<S: KVStore + 'static> GoldStandardStore for GenericGoldStandardStore<S> {
|
|
#[instrument(skip(self, gs), fields(subject = %gs.subject, predicate = %gs.predicate))]
|
|
async fn set_gold_standard(&self, gs: &GoldStandard) -> Result<()> {
|
|
let key = key_codec::gold_standard_key(&gs.subject, &gs.predicate);
|
|
let list_key = key_codec::gs_list_key(&gs.subject, &gs.predicate);
|
|
let serialized = stemedb_core::serde::serialize(gs)
|
|
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
|
|
|
// Write primary key
|
|
self.store.put(&key, &serialized).await?;
|
|
|
|
// Write secondary index for listing (empty value, just presence matters)
|
|
self.store.put(&list_key, &[]).await?;
|
|
|
|
debug!(
|
|
subject = %gs.subject,
|
|
predicate = %gs.predicate,
|
|
assertion_hash = %hex::encode(gs.assertion_hash),
|
|
"Stored gold standard"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[instrument(skip(self), fields(subject = %subject, predicate = %predicate))]
|
|
async fn get_gold_standard(
|
|
&self,
|
|
subject: &str,
|
|
predicate: &str,
|
|
) -> Result<Option<GoldStandard>> {
|
|
let key = key_codec::gold_standard_key(subject, predicate);
|
|
|
|
match self.store.get(&key).await? {
|
|
Some(data) => {
|
|
let gs: GoldStandard = stemedb_core::serde::deserialize(&data)
|
|
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
|
|
|
debug!(
|
|
subject = %subject,
|
|
predicate = %predicate,
|
|
expected_object = %gs.expected_object,
|
|
"Retrieved gold standard"
|
|
);
|
|
|
|
Ok(Some(gs))
|
|
}
|
|
None => {
|
|
debug!(
|
|
subject = %subject,
|
|
predicate = %predicate,
|
|
"Gold standard not found"
|
|
);
|
|
Ok(None)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[instrument(skip(self))]
|
|
async fn list_gold_standards(&self) -> Result<Vec<GoldStandard>> {
|
|
// Scan the GS_LIST secondary index
|
|
let list_entries = self.store.scan_prefix(&key_codec::gs_list_scan_prefix()).await?;
|
|
|
|
let mut gold_standards = Vec::new();
|
|
for (list_key, _) in list_entries {
|
|
// Extract subject and predicate from GS_LIST key: \x00GS_LIST:{subject}:{predicate}
|
|
let tag = key_codec::extract_tag(&list_key);
|
|
if let Some(suffix) = tag.strip_prefix(b"GS_LIST:") {
|
|
if let Ok(suffix_str) = std::str::from_utf8(suffix) {
|
|
// Split by first colon to get subject and predicate
|
|
if let Some(colon_pos) = suffix_str.find(':') {
|
|
let subject = &suffix_str[..colon_pos];
|
|
let predicate = &suffix_str[colon_pos + 1..];
|
|
|
|
// Fetch the actual gold standard from the primary key
|
|
let key = key_codec::gold_standard_key(subject, predicate);
|
|
if let Some(data) = self.store.get(&key).await? {
|
|
match stemedb_core::serde::deserialize::<GoldStandard>(&data) {
|
|
Ok(gs) => gold_standards.push(gs),
|
|
Err(e) => {
|
|
debug!(error = %e, subject = %subject, predicate = %predicate, "Skipping malformed gold standard");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by subject, then predicate for deterministic output
|
|
gold_standards
|
|
.sort_by(|a, b| a.subject.cmp(&b.subject).then_with(|| a.predicate.cmp(&b.predicate)));
|
|
|
|
debug!(count = gold_standards.len(), "Listed gold standards");
|
|
|
|
Ok(gold_standards)
|
|
}
|
|
|
|
#[instrument(skip(self), fields(subject = %subject, predicate = %predicate))]
|
|
async fn remove_gold_standard(&self, subject: &str, predicate: &str) -> Result<bool> {
|
|
let key = key_codec::gold_standard_key(subject, predicate);
|
|
let list_key = key_codec::gs_list_key(subject, predicate);
|
|
|
|
// Check if it exists first
|
|
let exists = self.store.get(&key).await?.is_some();
|
|
|
|
if exists {
|
|
// Delete both primary key and secondary index
|
|
self.store.delete(&key).await?;
|
|
self.store.delete(&list_key).await?;
|
|
debug!(
|
|
subject = %subject,
|
|
predicate = %predicate,
|
|
"Removed gold standard"
|
|
);
|
|
Ok(true)
|
|
} else {
|
|
debug!(
|
|
subject = %subject,
|
|
predicate = %predicate,
|
|
"Gold standard not found for removal"
|
|
);
|
|
Ok(false)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::HybridStore;
|
|
use stemedb_core::types::GoldStandard;
|
|
|
|
fn create_gold_standard(subject: &str, predicate: &str, expected_object: &str) -> GoldStandard {
|
|
GoldStandard::new(
|
|
[42u8; 32],
|
|
subject.to_string(),
|
|
predicate.to_string(),
|
|
expected_object.to_string(),
|
|
1000,
|
|
"admin".to_string(),
|
|
)
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_set_and_get_gold_standard() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("store"));
|
|
let gs_store = GenericGoldStandardStore::new(store);
|
|
|
|
let gs = create_gold_standard("Earth", "has_shape", "oblate_spheroid");
|
|
gs_store.set_gold_standard(&gs).await.expect("set");
|
|
|
|
let retrieved = gs_store
|
|
.get_gold_standard("Earth", "has_shape")
|
|
.await
|
|
.expect("get")
|
|
.expect("should exist");
|
|
|
|
assert_eq!(retrieved, gs);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_get_nonexistent_gold_standard() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("store"));
|
|
let gs_store = GenericGoldStandardStore::new(store);
|
|
|
|
let result = gs_store.get_gold_standard("NonExistent", "predicate").await.expect("get");
|
|
|
|
assert!(result.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_list_gold_standards() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("store"));
|
|
let gs_store = GenericGoldStandardStore::new(store);
|
|
|
|
let gs1 = create_gold_standard("Earth", "has_shape", "oblate_spheroid");
|
|
let gs2 = create_gold_standard("Semaglutide", "treats_condition", "type_2_diabetes");
|
|
let gs3 = create_gold_standard("Earth", "has_moon", "Luna");
|
|
|
|
gs_store.set_gold_standard(&gs1).await.expect("set gs1");
|
|
gs_store.set_gold_standard(&gs2).await.expect("set gs2");
|
|
gs_store.set_gold_standard(&gs3).await.expect("set gs3");
|
|
|
|
let list = gs_store.list_gold_standards().await.expect("list");
|
|
|
|
assert_eq!(list.len(), 3);
|
|
|
|
// Should be sorted by subject, then predicate
|
|
assert_eq!(list[0].subject, "Earth");
|
|
assert_eq!(list[0].predicate, "has_moon");
|
|
assert_eq!(list[1].subject, "Earth");
|
|
assert_eq!(list[1].predicate, "has_shape");
|
|
assert_eq!(list[2].subject, "Semaglutide");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_remove_gold_standard() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("store"));
|
|
let gs_store = GenericGoldStandardStore::new(store);
|
|
|
|
let gs = create_gold_standard("Earth", "has_shape", "oblate_spheroid");
|
|
gs_store.set_gold_standard(&gs).await.expect("set");
|
|
|
|
// Verify it exists
|
|
let retrieved = gs_store.get_gold_standard("Earth", "has_shape").await.expect("get");
|
|
assert!(retrieved.is_some());
|
|
|
|
// Remove it
|
|
let removed = gs_store.remove_gold_standard("Earth", "has_shape").await.expect("remove");
|
|
assert!(removed);
|
|
|
|
// Verify it's gone
|
|
let after_removal = gs_store.get_gold_standard("Earth", "has_shape").await.expect("get");
|
|
assert!(after_removal.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_remove_nonexistent_gold_standard() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("store"));
|
|
let gs_store = GenericGoldStandardStore::new(store);
|
|
|
|
let removed =
|
|
gs_store.remove_gold_standard("NonExistent", "predicate").await.expect("remove");
|
|
assert!(!removed);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_overwrite_gold_standard() {
|
|
let store = Arc::new(HybridStore::open_temp().expect("store"));
|
|
let gs_store = GenericGoldStandardStore::new(store);
|
|
|
|
let gs1 = create_gold_standard("Earth", "has_shape", "sphere");
|
|
gs_store.set_gold_standard(&gs1).await.expect("set");
|
|
|
|
// Overwrite with more accurate answer
|
|
let gs2 = create_gold_standard("Earth", "has_shape", "oblate_spheroid");
|
|
gs_store.set_gold_standard(&gs2).await.expect("set");
|
|
|
|
let retrieved = gs_store
|
|
.get_gold_standard("Earth", "has_shape")
|
|
.await
|
|
.expect("get")
|
|
.expect("should exist");
|
|
|
|
assert_eq!(retrieved.expected_object, "oblate_spheroid");
|
|
}
|
|
}
|