//! Query types and builder for filtering assertions. //! //! The Query struct represents a read request against the knowledge graph. //! Queries can filter by any combination of subject, predicate, lifecycle, //! and epoch. use stemedb_core::types::{Assertion, EpochId, LifecycleStage, PHash}; // Re-export hamming_distance from stemedb_storage for public API consumers pub use stemedb_storage::hamming_distance; mod builder; mod result; pub use builder::QueryBuilder; pub use result::QueryResult; #[cfg(test)] mod tests; /// Parse hex string to 8-byte pHash. /// /// Returns `None` if the hex string is not exactly 16 characters /// or contains invalid hex digits. Case-insensitive: both "A3F2..." /// and "a3f2..." are valid and produce identical results. pub(crate) fn parse_hex_phash(hex_str: &str) -> Option { if hex_str.len() != 16 { return None; } let bytes = hex::decode(hex_str).ok()?; if bytes.len() != 8 { return None; } let mut hash = [0u8; 8]; hash.copy_from_slice(&bytes); Some(hash) } /// A query against the knowledge graph. /// /// All fields are optional filters. If a field is `None`, it matches any value. /// Multiple filters combine with AND semantics. #[derive(Debug, Clone, Default)] pub struct Query { /// Filter by subject entity. pub subject: Option, /// Filter by predicate/relation. pub predicate: Option, /// Filter by lifecycle stage. pub lifecycle: Option, /// Filter by epoch (paradigm context). pub epoch: Option, /// Maximum number of results to return. pub limit: Option, /// Maximum acceptable staleness of materialized views in seconds. /// /// When set, the fast path (MV lookup) will be skipped if the materialized /// view is older than this threshold. This causes the query to fall through /// to the slow path, which re-computes the result from all candidate assertions. /// /// - `None` (default): Accept any MV age (backward-compatible behavior) /// - `Some(0)`: Never use MV, always use slow path /// - `Some(60)`: Only use MV if materialized within the last 60 seconds pub max_stale: Option, /// Filter by visual similarity to a reference pHash (hex-encoded, 16 chars). /// /// Returns assertions whose `visual_hash` has hamming distance <= `visual_threshold`. /// Assertions without a `visual_hash` are excluded from results when this is set. pub visual_near: Option, /// Maximum hamming distance for `visual_near` matching. /// /// Range: 0-64 (8 bytes = 64 bits). Default: 8 (12.5% bit difference). /// Lower values require closer visual similarity. pub visual_threshold: Option, /// Query state as of this Unix timestamp (time-travel). /// /// When set, returns only assertions created at or before this timestamp. /// The fast path (MV lookup) is bypassed since MVs reflect current state. /// /// - `None` (default): Query current state (backward-compatible) /// - `Some(ts)`: Query historical state as it existed at timestamp `ts` pub as_of: Option, /// Decay half-life in seconds for confidence decay. /// /// When set, older assertions have their confidence scores reduced based on age. /// This implements semantic decay: a Reddit post from 2022 shouldn't compete /// equally with a 2024 RCT. /// /// Formula: `effective_confidence = confidence * 2^(-(age / halflife))` /// /// - `None` (default): No decay, all assertions weighted by original confidence /// - `Some(31536000)`: 1-year half-life (assertions lose ~50% confidence per year) /// - `Some(86400)`: 1-day half-life (fast decay for rapidly changing data) /// /// **Note**: When decay is enabled, the fast path (materialized view lookup) is /// bypassed because MVs store pre-computed winners without decay applied. /// Queries with decay always use the slow path for accurate results. /// /// # Example /// ```rust /// use stemedb_query::Query; /// /// // Medical queries with 6-month decay half-life /// let query = Query::builder() /// .subject("Semaglutide") /// .predicate("muscle_effect") /// .decay_halflife(15768000) // 6 months in seconds /// .build(); /// ``` pub decay_halflife: Option, /// Use source-class-aware decay instead of uniform decay. /// /// When `true` and `decay_halflife` is also set, the decay half-life /// is determined by each assertion's `source_class` tier: /// - Tier 0 (Regulatory): No decay /// - Tier 1 (Clinical): 2-year half-life /// - Tier 2 (Observational): 1-year half-life /// - Tier 3 (Expert): 6-month half-life /// - Tier 4 (Community): 3-month half-life /// - Tier 5 (Anecdotal): 1-month half-life /// /// The `decay_halflife` field serves as a fallback for assertions /// without a source_class, or when this flag is `false`. pub source_class_decay: bool, /// Query by semantic vector similarity (k-nearest neighbors). /// /// When set, the QueryEngine uses the vector index for candidate retrieval /// instead of the standard SP/S indexes. This enables semantic similarity /// queries like "find assertions with embeddings similar to this one." /// /// The `k` field specifies how many nearest neighbors to return. /// /// - `None` (default): Use standard index-based lookup /// - `Some(vec)`: Use vector index for k-NN search /// /// **Note**: When `vector_near` is set: /// - The fast path (MV lookup) is bypassed /// - Subject/predicate filters are applied AFTER vector search /// - Results are sorted by distance, not by lens resolution /// /// # Example /// ```rust /// use stemedb_query::Query; /// /// // Find 10 assertions with similar embeddings /// let embedding = vec![0.1, 0.2, 0.3, /* ... */]; /// let query = Query::builder() /// .vector_near(embedding, 10) /// .subject("Semaglutide") // Optional: filter results /// .build(); /// ``` pub vector_near: Option>, /// Number of nearest neighbors to return for vector search. /// /// Only used when `vector_near` is set. Defaults to 10 if not specified. pub k: Option, /// Minimum conflict score threshold (0.0 to 1.0). /// /// When set, only returns results where the materialized view's conflict_score /// is >= this value. Used to filter for controversial claims where assertions /// significantly disagree. /// /// - `None` (default): No conflict filtering /// - `Some(0.7)`: Only show claims with high conflict (disagreement) /// - `Some(0.0)`: Show all claims (equivalent to None) /// /// **Note**: This is a POST-resolution filter. The query executes normally, /// but results are filtered by conflict score after lens resolution. /// /// # Example /// ```rust /// use stemedb_query::Query; /// /// // Only show controversial claims (high disagreement) /// let query = Query::builder() /// .subject("Semaglutide") /// .predicate("muscle_effect") /// .min_conflict_score(0.7) /// .build(); /// ``` pub min_conflict_score: Option, /// Maximum conflict score threshold (0.0 to 1.0). /// /// When set, only returns results where the materialized view's conflict_score /// is <= this value. Used to filter for claims with strong agreement. /// /// - `None` (default): No conflict filtering /// - `Some(0.2)`: Only show claims with high agreement /// - `Some(1.0)`: Show all claims (equivalent to None) /// /// **Note**: This is a POST-resolution filter. The query executes normally, /// but results are filtered by conflict score after lens resolution. /// /// # Example /// ```rust /// use stemedb_query::Query; /// /// // Only show claims with strong consensus /// let query = Query::builder() /// .subject("Semaglutide") /// .predicate("muscle_effect") /// .max_conflict_score(0.2) /// .build(); /// ``` pub max_conflict_score: Option, /// Resolve aliases when querying by subject. /// /// When `true` and `subject` is specified, the QueryEngine will: /// 1. Call `alias_store.resolve_all(&subject)` to find all related subjects /// 2. Fetch assertions for ALL resolved subjects /// 3. Deduplicate results by assertion hash /// /// This enables cross-scheme concept resolution. For example, querying /// `code://rust/myapp/tls/cert_verification` with aliases enabled would also /// return assertions from `rfc://5246/tls/cert_verification` if they are aliased. /// /// - `false` (default): Query exact subject only (backward-compatible) /// - `true`: Expand subject to all aliased paths before querying /// /// **Note**: Requires an `AliasStore` to be configured on the `QueryEngine`. /// If no alias store is configured, this flag has no effect. /// /// # Example /// ```rust /// use stemedb_query::Query; /// /// // Find assertions from both code and RFC sources /// let query = Query::builder() /// .subject("code://rust/myapp/tls/cert_verification") /// .resolve_aliases(true) /// .build(); /// ``` pub resolve_aliases: bool, } impl Query { /// Create a new empty query (matches all assertions). pub fn new() -> Self { Self::default() } /// Create a query builder for ergonomic query construction. pub fn builder() -> QueryBuilder { QueryBuilder::new() } /// Check if an assertion matches this query's filters. pub fn matches(&self, assertion: &Assertion) -> bool { // Check subject filter // Skip subject check when resolve_aliases is true, since the expanded // subjects (including aliases) were already used to fetch candidates. if !self.resolve_aliases { if let Some(ref subject) = self.subject { if &assertion.subject != subject { return false; } } } // Check predicate filter if let Some(ref predicate) = self.predicate { if &assertion.predicate != predicate { return false; } } // Check lifecycle filter if let Some(lifecycle) = self.lifecycle { if assertion.lifecycle != lifecycle { return false; } } // Check epoch filter if let Some(epoch) = self.epoch { match assertion.epoch { Some(assertion_epoch) if assertion_epoch == epoch => {} _ => return false, } } // Check visual similarity filter if let Some(ref target_hex) = self.visual_near { let target = match parse_hex_phash(target_hex) { Some(h) => h, None => return false, // Invalid hex = no match }; match assertion.visual_hash { Some(ref assertion_hash) => { let threshold = self.visual_threshold.unwrap_or(8); if hamming_distance(&target, assertion_hash) > threshold { return false; } } None => return false, // No visual_hash = no match when visual_near is specified } } // Check as_of (time-travel) filter if let Some(as_of_ts) = self.as_of { if assertion.timestamp > as_of_ts { return false; } } true } }