Add CRC32C checksums to WAL record format (v2), implement crash recovery with automatic truncation of corrupt records, add feature-gated group commit buffer for batched fsync under concurrent load, and implement log rotation via segment files with global offset addressing. Key changes: - Record format v2: [len:u32][crc32c:u32][blake3:32][payload:N] - recover_file() scans and truncates corrupt tail records - GroupCommitBuffer batches fsync via MPSC channel (tokio feature gate) - SegmentManager with binary search resolution and cursor-based cleanup - Journal::read() auto-refreshes segments on miss for writer/reader split - Split recovery.rs and key_codec.rs into directory modules for 500-line max Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
173 lines
6.0 KiB
Rust
173 lines
6.0 KiB
Rust
//! Query execution helpers: fast path, filtering, decay, ranking.
|
|
//!
|
|
//! This module handles the post-candidate-fetch logic:
|
|
//! - Fast path: materialized view lookup and staleness checks
|
|
//! - Filtering: lifecycle, epoch, as_of, since
|
|
//! - Decay: time-based confidence decay (uniform or source-class-aware)
|
|
//! - Limit/pagination
|
|
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
|
|
|
use stemedb_core::types::{Assertion, MaterializedView};
|
|
use stemedb_storage::{key_codec, KVStore};
|
|
use tracing::debug;
|
|
|
|
use crate::decay::{apply_decay, apply_source_class_decay};
|
|
use crate::error::Result;
|
|
use crate::query::{Query, QueryResult};
|
|
|
|
use super::QueryEngine;
|
|
|
|
impl<S: KVStore + 'static> QueryEngine<S> {
|
|
/// Try the fast path: read a pre-computed MaterializedView for this subject+predicate.
|
|
///
|
|
/// Returns `Some(QueryResult)` if a materialized view exists, is fresh enough,
|
|
/// and the winner passes the query's filters. Returns `None` to fall through
|
|
/// to the slow path.
|
|
///
|
|
/// # Staleness Check
|
|
///
|
|
/// If `query.max_stale` is set, the MV's age is checked against this threshold.
|
|
/// If the MV is older than `max_stale` seconds, we fall through to the slow path.
|
|
pub(super) async fn try_fast_path(
|
|
&self,
|
|
subject: &str,
|
|
predicate: &str,
|
|
query: &Query,
|
|
) -> Result<Option<QueryResult>> {
|
|
let mv_key = key_codec::mv_key(subject, predicate);
|
|
|
|
let data = match self.store.get(&mv_key).await? {
|
|
Some(data) => data,
|
|
None => return Ok(None),
|
|
};
|
|
|
|
let view: MaterializedView = match stemedb_core::serde::deserialize(&data) {
|
|
Ok(v) => v,
|
|
Err(e) => {
|
|
debug!(
|
|
subject,
|
|
predicate,
|
|
error = %e,
|
|
"Malformed materialized view, falling back to slow path"
|
|
);
|
|
return Ok(None);
|
|
}
|
|
};
|
|
|
|
// Check staleness if max_stale is specified
|
|
if let Some(max_stale) = query.max_stale {
|
|
let now =
|
|
SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0);
|
|
let age = now.saturating_sub(view.materialized_at);
|
|
|
|
if age > max_stale {
|
|
debug!(
|
|
subject,
|
|
predicate,
|
|
age_seconds = age,
|
|
max_stale_seconds = max_stale,
|
|
materialized_at = view.materialized_at,
|
|
"Materialized view is stale, falling back to slow path"
|
|
);
|
|
return Ok(None);
|
|
}
|
|
}
|
|
|
|
// Apply query filters to the winner
|
|
if !query.matches(&view.winner) {
|
|
// Winner doesn't match filters (e.g., wrong lifecycle stage).
|
|
// Fall through to slow path which considers all candidates.
|
|
return Ok(None);
|
|
}
|
|
|
|
// Check conflict score filters if specified
|
|
if let Some(min_score) = query.min_conflict_score {
|
|
if view.conflict_score < min_score {
|
|
debug!(
|
|
conflict_score = view.conflict_score,
|
|
min_conflict_score = min_score,
|
|
"Materialized view conflict score below threshold"
|
|
);
|
|
return Ok(Some(QueryResult {
|
|
assertions: vec![],
|
|
total_count: 0,
|
|
has_more: false,
|
|
}));
|
|
}
|
|
}
|
|
|
|
if let Some(max_score) = query.max_conflict_score {
|
|
if view.conflict_score > max_score {
|
|
debug!(
|
|
conflict_score = view.conflict_score,
|
|
max_conflict_score = max_score,
|
|
"Materialized view conflict score above threshold"
|
|
);
|
|
return Ok(Some(QueryResult {
|
|
assertions: vec![],
|
|
total_count: 0,
|
|
has_more: false,
|
|
}));
|
|
}
|
|
}
|
|
|
|
Ok(Some(QueryResult { assertions: vec![view.winner], total_count: 1, has_more: false }))
|
|
}
|
|
|
|
/// Apply filters to candidates and construct the final QueryResult.
|
|
///
|
|
/// This is used by similarity search paths to apply post-filtering
|
|
/// (subject, predicate, lifecycle, epoch) and handle limit/truncation.
|
|
pub(super) async fn apply_filters_and_return(
|
|
&self,
|
|
candidates: Vec<Assertion>,
|
|
query: &Query,
|
|
) -> Result<QueryResult> {
|
|
debug!(candidate_count = candidates.len(), "Applying filters to candidates");
|
|
|
|
// Apply filters
|
|
let mut matching: Vec<Assertion> =
|
|
candidates.into_iter().filter(|a| query.matches(a)).collect();
|
|
|
|
// Apply decay if decay_halflife is set
|
|
if let Some(halflife) = query.decay_halflife {
|
|
// Use as_of timestamp if set (time-travel), otherwise current time
|
|
let now = query.as_of.unwrap_or_else(|| {
|
|
SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0)
|
|
});
|
|
|
|
if query.source_class_decay {
|
|
debug!(
|
|
halflife_seconds = halflife,
|
|
now,
|
|
source_class_aware = true,
|
|
"Applying source-class-aware decay"
|
|
);
|
|
matching = apply_source_class_decay(&matching, halflife, now);
|
|
} else {
|
|
debug!(halflife_seconds = halflife, now, "Applying uniform decay");
|
|
matching = apply_decay(&matching, halflife, now);
|
|
}
|
|
}
|
|
|
|
let total_count = matching.len();
|
|
|
|
// Apply limit if specified
|
|
let has_more = if let Some(limit) = query.limit {
|
|
if matching.len() > limit {
|
|
matching.truncate(limit);
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
} else {
|
|
false
|
|
};
|
|
|
|
debug!(matched_count = matching.len(), total_count, has_more, "Query complete");
|
|
|
|
Ok(QueryResult { assertions: matching, total_count, has_more })
|
|
}
|
|
}
|