stemedb/crates/stemedb-query/src/engine/execution.rs
jordan 3320c24afa feat: WAL hardening (Phase 5B) - CRC32C, crash recovery, group commit, log rotation
Add CRC32C checksums to WAL record format (v2), implement crash recovery
with automatic truncation of corrupt records, add feature-gated group commit
buffer for batched fsync under concurrent load, and implement log rotation
via segment files with global offset addressing.

Key changes:
- Record format v2: [len:u32][crc32c:u32][blake3:32][payload:N]
- recover_file() scans and truncates corrupt tail records
- GroupCommitBuffer batches fsync via MPSC channel (tokio feature gate)
- SegmentManager with binary search resolution and cursor-based cleanup
- Journal::read() auto-refreshes segments on miss for writer/reader split
- Split recovery.rs and key_codec.rs into directory modules for 500-line max

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 12:36:35 -07:00

173 lines
6.0 KiB
Rust

//! Query execution helpers: fast path, filtering, decay, ranking.
//!
//! This module handles the post-candidate-fetch logic:
//! - Fast path: materialized view lookup and staleness checks
//! - Filtering: lifecycle, epoch, as_of, since
//! - Decay: time-based confidence decay (uniform or source-class-aware)
//! - Limit/pagination
use std::time::{SystemTime, UNIX_EPOCH};
use stemedb_core::types::{Assertion, MaterializedView};
use stemedb_storage::{key_codec, KVStore};
use tracing::debug;
use crate::decay::{apply_decay, apply_source_class_decay};
use crate::error::Result;
use crate::query::{Query, QueryResult};
use super::QueryEngine;
impl<S: KVStore + 'static> QueryEngine<S> {
/// Try the fast path: read a pre-computed MaterializedView for this subject+predicate.
///
/// Returns `Some(QueryResult)` if a materialized view exists, is fresh enough,
/// and the winner passes the query's filters. Returns `None` to fall through
/// to the slow path.
///
/// # Staleness Check
///
/// If `query.max_stale` is set, the MV's age is checked against this threshold.
/// If the MV is older than `max_stale` seconds, we fall through to the slow path.
pub(super) async fn try_fast_path(
&self,
subject: &str,
predicate: &str,
query: &Query,
) -> Result<Option<QueryResult>> {
let mv_key = key_codec::mv_key(subject, predicate);
let data = match self.store.get(&mv_key).await? {
Some(data) => data,
None => return Ok(None),
};
let view: MaterializedView = match stemedb_core::serde::deserialize(&data) {
Ok(v) => v,
Err(e) => {
debug!(
subject,
predicate,
error = %e,
"Malformed materialized view, falling back to slow path"
);
return Ok(None);
}
};
// Check staleness if max_stale is specified
if let Some(max_stale) = query.max_stale {
let now =
SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0);
let age = now.saturating_sub(view.materialized_at);
if age > max_stale {
debug!(
subject,
predicate,
age_seconds = age,
max_stale_seconds = max_stale,
materialized_at = view.materialized_at,
"Materialized view is stale, falling back to slow path"
);
return Ok(None);
}
}
// Apply query filters to the winner
if !query.matches(&view.winner) {
// Winner doesn't match filters (e.g., wrong lifecycle stage).
// Fall through to slow path which considers all candidates.
return Ok(None);
}
// Check conflict score filters if specified
if let Some(min_score) = query.min_conflict_score {
if view.conflict_score < min_score {
debug!(
conflict_score = view.conflict_score,
min_conflict_score = min_score,
"Materialized view conflict score below threshold"
);
return Ok(Some(QueryResult {
assertions: vec![],
total_count: 0,
has_more: false,
}));
}
}
if let Some(max_score) = query.max_conflict_score {
if view.conflict_score > max_score {
debug!(
conflict_score = view.conflict_score,
max_conflict_score = max_score,
"Materialized view conflict score above threshold"
);
return Ok(Some(QueryResult {
assertions: vec![],
total_count: 0,
has_more: false,
}));
}
}
Ok(Some(QueryResult { assertions: vec![view.winner], total_count: 1, has_more: false }))
}
/// Apply filters to candidates and construct the final QueryResult.
///
/// This is used by similarity search paths to apply post-filtering
/// (subject, predicate, lifecycle, epoch) and handle limit/truncation.
pub(super) async fn apply_filters_and_return(
&self,
candidates: Vec<Assertion>,
query: &Query,
) -> Result<QueryResult> {
debug!(candidate_count = candidates.len(), "Applying filters to candidates");
// Apply filters
let mut matching: Vec<Assertion> =
candidates.into_iter().filter(|a| query.matches(a)).collect();
// Apply decay if decay_halflife is set
if let Some(halflife) = query.decay_halflife {
// Use as_of timestamp if set (time-travel), otherwise current time
let now = query.as_of.unwrap_or_else(|| {
SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0)
});
if query.source_class_decay {
debug!(
halflife_seconds = halflife,
now,
source_class_aware = true,
"Applying source-class-aware decay"
);
matching = apply_source_class_decay(&matching, halflife, now);
} else {
debug!(halflife_seconds = halflife, now, "Applying uniform decay");
matching = apply_decay(&matching, halflife, now);
}
}
let total_count = matching.len();
// Apply limit if specified
let has_more = if let Some(limit) = query.limit {
if matching.len() > limit {
matching.truncate(limit);
true
} else {
false
}
} else {
false
};
debug!(matched_count = matching.len(), total_count, has_more, "Query complete");
Ok(QueryResult { assertions: matching, total_count, has_more })
}
}