This commit includes comprehensive work on Phase 6 features: ## Admission Control (Phase 6 admission middleware) - AdmissionStore implementation backed by TrustRankStore - PoW verification with tier-based difficulty computation - Trust tier progression (Newcomer → Established → Trusted → Authority) - API integration with admission status endpoints ## HLC Recency Lens (Phase 6C) - HlcRecencyLens for distributed system ordering - Hybrid logical clock integration with causality preservation ## Cluster Coordination (Phase 6C) - Multi-node cluster tests (availability, partition tolerance) - CRDT convergence tests for anti-entropy sync - Gateway handler improvements ## Aphoria Code Linter (Phase 2A) - RFC/OWASP corpus builders with network fetching and caching - Concept hierarchy with auto-alias creation on conflict detection - Multiple security extractors (TLS, JWT, CORS, secrets, rate limiting) ## Code Organization - Split large files into modules to comply with 500-line limit - Improved test organization with separate test modules - Fixed rkyv serialization for EigenTrustState (AgentScore struct) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
324 lines
12 KiB
Rust
324 lines
12 KiB
Rust
//! Query types and builder for filtering assertions.
|
|
//!
|
|
//! The Query struct represents a read request against the knowledge graph.
|
|
//! Queries can filter by any combination of subject, predicate, lifecycle,
|
|
//! and epoch.
|
|
|
|
use stemedb_core::types::{Assertion, EpochId, LifecycleStage, PHash};
|
|
|
|
// Re-export hamming_distance from stemedb_storage for public API consumers
|
|
pub use stemedb_storage::hamming_distance;
|
|
|
|
mod builder;
|
|
mod result;
|
|
|
|
pub use builder::QueryBuilder;
|
|
pub use result::QueryResult;
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|
|
|
|
/// Parse hex string to 8-byte pHash.
|
|
///
|
|
/// Returns `None` if the hex string is not exactly 16 characters
|
|
/// or contains invalid hex digits. Case-insensitive: both "A3F2..."
|
|
/// and "a3f2..." are valid and produce identical results.
|
|
pub(crate) fn parse_hex_phash(hex_str: &str) -> Option<PHash> {
|
|
if hex_str.len() != 16 {
|
|
return None;
|
|
}
|
|
let bytes = hex::decode(hex_str).ok()?;
|
|
if bytes.len() != 8 {
|
|
return None;
|
|
}
|
|
let mut hash = [0u8; 8];
|
|
hash.copy_from_slice(&bytes);
|
|
Some(hash)
|
|
}
|
|
|
|
/// A query against the knowledge graph.
|
|
///
|
|
/// All fields are optional filters. If a field is `None`, it matches any value.
|
|
/// Multiple filters combine with AND semantics.
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct Query {
|
|
/// Filter by subject entity.
|
|
pub subject: Option<String>,
|
|
|
|
/// Filter by predicate/relation.
|
|
pub predicate: Option<String>,
|
|
|
|
/// Filter by lifecycle stage.
|
|
pub lifecycle: Option<LifecycleStage>,
|
|
|
|
/// Filter by epoch (paradigm context).
|
|
pub epoch: Option<EpochId>,
|
|
|
|
/// Maximum number of results to return.
|
|
pub limit: Option<usize>,
|
|
|
|
/// Maximum acceptable staleness of materialized views in seconds.
|
|
///
|
|
/// When set, the fast path (MV lookup) will be skipped if the materialized
|
|
/// view is older than this threshold. This causes the query to fall through
|
|
/// to the slow path, which re-computes the result from all candidate assertions.
|
|
///
|
|
/// - `None` (default): Accept any MV age (backward-compatible behavior)
|
|
/// - `Some(0)`: Never use MV, always use slow path
|
|
/// - `Some(60)`: Only use MV if materialized within the last 60 seconds
|
|
pub max_stale: Option<u64>,
|
|
|
|
/// Filter by visual similarity to a reference pHash (hex-encoded, 16 chars).
|
|
///
|
|
/// Returns assertions whose `visual_hash` has hamming distance <= `visual_threshold`.
|
|
/// Assertions without a `visual_hash` are excluded from results when this is set.
|
|
pub visual_near: Option<String>,
|
|
|
|
/// Maximum hamming distance for `visual_near` matching.
|
|
///
|
|
/// Range: 0-64 (8 bytes = 64 bits). Default: 8 (12.5% bit difference).
|
|
/// Lower values require closer visual similarity.
|
|
pub visual_threshold: Option<u32>,
|
|
|
|
/// Query state as of this Unix timestamp (time-travel).
|
|
///
|
|
/// When set, returns only assertions created at or before this timestamp.
|
|
/// The fast path (MV lookup) is bypassed since MVs reflect current state.
|
|
///
|
|
/// - `None` (default): Query current state (backward-compatible)
|
|
/// - `Some(ts)`: Query historical state as it existed at timestamp `ts`
|
|
pub as_of: Option<u64>,
|
|
|
|
/// Decay half-life in seconds for confidence decay.
|
|
///
|
|
/// When set, older assertions have their confidence scores reduced based on age.
|
|
/// This implements semantic decay: a Reddit post from 2022 shouldn't compete
|
|
/// equally with a 2024 RCT.
|
|
///
|
|
/// Formula: `effective_confidence = confidence * 2^(-(age / halflife))`
|
|
///
|
|
/// - `None` (default): No decay, all assertions weighted by original confidence
|
|
/// - `Some(31536000)`: 1-year half-life (assertions lose ~50% confidence per year)
|
|
/// - `Some(86400)`: 1-day half-life (fast decay for rapidly changing data)
|
|
///
|
|
/// **Note**: When decay is enabled, the fast path (materialized view lookup) is
|
|
/// bypassed because MVs store pre-computed winners without decay applied.
|
|
/// Queries with decay always use the slow path for accurate results.
|
|
///
|
|
/// # Example
|
|
/// ```rust
|
|
/// use stemedb_query::Query;
|
|
///
|
|
/// // Medical queries with 6-month decay half-life
|
|
/// let query = Query::builder()
|
|
/// .subject("Semaglutide")
|
|
/// .predicate("muscle_effect")
|
|
/// .decay_halflife(15768000) // 6 months in seconds
|
|
/// .build();
|
|
/// ```
|
|
pub decay_halflife: Option<u64>,
|
|
|
|
/// Use source-class-aware decay instead of uniform decay.
|
|
///
|
|
/// When `true` and `decay_halflife` is also set, the decay half-life
|
|
/// is determined by each assertion's `source_class` tier:
|
|
/// - Tier 0 (Regulatory): No decay
|
|
/// - Tier 1 (Clinical): 2-year half-life
|
|
/// - Tier 2 (Observational): 1-year half-life
|
|
/// - Tier 3 (Expert): 6-month half-life
|
|
/// - Tier 4 (Community): 3-month half-life
|
|
/// - Tier 5 (Anecdotal): 1-month half-life
|
|
///
|
|
/// The `decay_halflife` field serves as a fallback for assertions
|
|
/// without a source_class, or when this flag is `false`.
|
|
pub source_class_decay: bool,
|
|
|
|
/// Query by semantic vector similarity (k-nearest neighbors).
|
|
///
|
|
/// When set, the QueryEngine uses the vector index for candidate retrieval
|
|
/// instead of the standard SP/S indexes. This enables semantic similarity
|
|
/// queries like "find assertions with embeddings similar to this one."
|
|
///
|
|
/// The `k` field specifies how many nearest neighbors to return.
|
|
///
|
|
/// - `None` (default): Use standard index-based lookup
|
|
/// - `Some(vec)`: Use vector index for k-NN search
|
|
///
|
|
/// **Note**: When `vector_near` is set:
|
|
/// - The fast path (MV lookup) is bypassed
|
|
/// - Subject/predicate filters are applied AFTER vector search
|
|
/// - Results are sorted by distance, not by lens resolution
|
|
///
|
|
/// # Example
|
|
/// ```rust
|
|
/// use stemedb_query::Query;
|
|
///
|
|
/// // Find 10 assertions with similar embeddings
|
|
/// let embedding = vec![0.1, 0.2, 0.3, /* ... */];
|
|
/// let query = Query::builder()
|
|
/// .vector_near(embedding, 10)
|
|
/// .subject("Semaglutide") // Optional: filter results
|
|
/// .build();
|
|
/// ```
|
|
pub vector_near: Option<Vec<f32>>,
|
|
|
|
/// Number of nearest neighbors to return for vector search.
|
|
///
|
|
/// Only used when `vector_near` is set. Defaults to 10 if not specified.
|
|
pub k: Option<usize>,
|
|
|
|
/// Minimum conflict score threshold (0.0 to 1.0).
|
|
///
|
|
/// When set, only returns results where the materialized view's conflict_score
|
|
/// is >= this value. Used to filter for controversial claims where assertions
|
|
/// significantly disagree.
|
|
///
|
|
/// - `None` (default): No conflict filtering
|
|
/// - `Some(0.7)`: Only show claims with high conflict (disagreement)
|
|
/// - `Some(0.0)`: Show all claims (equivalent to None)
|
|
///
|
|
/// **Note**: This is a POST-resolution filter. The query executes normally,
|
|
/// but results are filtered by conflict score after lens resolution.
|
|
///
|
|
/// # Example
|
|
/// ```rust
|
|
/// use stemedb_query::Query;
|
|
///
|
|
/// // Only show controversial claims (high disagreement)
|
|
/// let query = Query::builder()
|
|
/// .subject("Semaglutide")
|
|
/// .predicate("muscle_effect")
|
|
/// .min_conflict_score(0.7)
|
|
/// .build();
|
|
/// ```
|
|
pub min_conflict_score: Option<f32>,
|
|
|
|
/// Maximum conflict score threshold (0.0 to 1.0).
|
|
///
|
|
/// When set, only returns results where the materialized view's conflict_score
|
|
/// is <= this value. Used to filter for claims with strong agreement.
|
|
///
|
|
/// - `None` (default): No conflict filtering
|
|
/// - `Some(0.2)`: Only show claims with high agreement
|
|
/// - `Some(1.0)`: Show all claims (equivalent to None)
|
|
///
|
|
/// **Note**: This is a POST-resolution filter. The query executes normally,
|
|
/// but results are filtered by conflict score after lens resolution.
|
|
///
|
|
/// # Example
|
|
/// ```rust
|
|
/// use stemedb_query::Query;
|
|
///
|
|
/// // Only show claims with strong consensus
|
|
/// let query = Query::builder()
|
|
/// .subject("Semaglutide")
|
|
/// .predicate("muscle_effect")
|
|
/// .max_conflict_score(0.2)
|
|
/// .build();
|
|
/// ```
|
|
pub max_conflict_score: Option<f32>,
|
|
|
|
/// Resolve aliases when querying by subject.
|
|
///
|
|
/// When `true` and `subject` is specified, the QueryEngine will:
|
|
/// 1. Call `alias_store.resolve_all(&subject)` to find all related subjects
|
|
/// 2. Fetch assertions for ALL resolved subjects
|
|
/// 3. Deduplicate results by assertion hash
|
|
///
|
|
/// This enables cross-scheme concept resolution. For example, querying
|
|
/// `code://rust/myapp/tls/cert_verification` with aliases enabled would also
|
|
/// return assertions from `rfc://5246/tls/cert_verification` if they are aliased.
|
|
///
|
|
/// - `false` (default): Query exact subject only (backward-compatible)
|
|
/// - `true`: Expand subject to all aliased paths before querying
|
|
///
|
|
/// **Note**: Requires an `AliasStore` to be configured on the `QueryEngine`.
|
|
/// If no alias store is configured, this flag has no effect.
|
|
///
|
|
/// # Example
|
|
/// ```rust
|
|
/// use stemedb_query::Query;
|
|
///
|
|
/// // Find assertions from both code and RFC sources
|
|
/// let query = Query::builder()
|
|
/// .subject("code://rust/myapp/tls/cert_verification")
|
|
/// .resolve_aliases(true)
|
|
/// .build();
|
|
/// ```
|
|
pub resolve_aliases: bool,
|
|
}
|
|
|
|
impl Query {
|
|
/// Create a new empty query (matches all assertions).
|
|
pub fn new() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
/// Create a query builder for ergonomic query construction.
|
|
pub fn builder() -> QueryBuilder {
|
|
QueryBuilder::new()
|
|
}
|
|
|
|
/// Check if an assertion matches this query's filters.
|
|
pub fn matches(&self, assertion: &Assertion) -> bool {
|
|
// Check subject filter
|
|
// Skip subject check when resolve_aliases is true, since the expanded
|
|
// subjects (including aliases) were already used to fetch candidates.
|
|
if !self.resolve_aliases {
|
|
if let Some(ref subject) = self.subject {
|
|
if &assertion.subject != subject {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check predicate filter
|
|
if let Some(ref predicate) = self.predicate {
|
|
if &assertion.predicate != predicate {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Check lifecycle filter
|
|
if let Some(lifecycle) = self.lifecycle {
|
|
if assertion.lifecycle != lifecycle {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Check epoch filter
|
|
if let Some(epoch) = self.epoch {
|
|
match assertion.epoch {
|
|
Some(assertion_epoch) if assertion_epoch == epoch => {}
|
|
_ => return false,
|
|
}
|
|
}
|
|
|
|
// Check visual similarity filter
|
|
if let Some(ref target_hex) = self.visual_near {
|
|
let target = match parse_hex_phash(target_hex) {
|
|
Some(h) => h,
|
|
None => return false, // Invalid hex = no match
|
|
};
|
|
match assertion.visual_hash {
|
|
Some(ref assertion_hash) => {
|
|
let threshold = self.visual_threshold.unwrap_or(8);
|
|
if hamming_distance(&target, assertion_hash) > threshold {
|
|
return false;
|
|
}
|
|
}
|
|
None => return false, // No visual_hash = no match when visual_near is specified
|
|
}
|
|
}
|
|
|
|
// Check as_of (time-travel) filter
|
|
if let Some(as_of_ts) = self.as_of {
|
|
if assertion.timestamp > as_of_ts {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
true
|
|
}
|
|
}
|