Enterprise Features: - Hosted mode with remote sync for team pattern aggregation - Community sharing with privacy-preserving anonymization - LLM-based semantic claim extraction with Gemini integration - Pattern learning with promotion to declarative extractors - High-entropy secrets extractor with configurable thresholds - Auth bypass and insecure cookies extractors Module Refactoring: - Split oversized files to comply with 500-line limit - Config split: types/core.rs, types/extractors.rs, types/hosted.rs, etc. - Handlers split: scan.rs, policy.rs, report.rs modules - Extractors split: declarative/, high_entropy_secrets/, insecure_cookies/ - Learning split: store modules with metrics and persistence SDK & Ontology: - stemedb-ontology SDK with fluent builders and StemeDB client - Pharma domain extractors for FDA Orange Book data - Consumer health UAT test infrastructure Code Quality: - Fixed clippy warnings (needless_borrows_for_generic_args) - Added KVStore trait imports where needed - Fixed utoipa path re-exports for OpenAPI docs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
126 lines
4.4 KiB
Rust
126 lines
4.4 KiB
Rust
//! In-memory index for concept matching by tail path segments.
|
|
//!
|
|
//! Maps `{tail_seg1}/{tail_seg2}::{predicate}` → `Vec<Assertion>`.
|
|
//! This enables matching claims across different URI schemes by their
|
|
//! trailing path components.
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use stemedb_core::types::Assertion;
|
|
|
|
use crate::types::PredicateAliasSet;
|
|
|
|
/// In-memory index for concept matching by tail path segments.
|
|
///
|
|
/// Maps `{tail_seg1}/{tail_seg2}::{predicate}` → `Vec<Assertion>`.
|
|
/// This enables matching claims across different URI schemes by their
|
|
/// trailing path components.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// Both of these subjects produce the same key `"tls/cert_verification::enabled"`:
|
|
/// - `rfc://5246/tls/cert_verification`
|
|
/// - `code://rust/myapp/client/tls/cert_verification`
|
|
pub struct ConceptIndex {
|
|
pub entries: HashMap<String, Vec<Assertion>>,
|
|
}
|
|
|
|
impl ConceptIndex {
|
|
/// Build a ConceptIndex from a slice of assertions.
|
|
pub fn build(assertions: &[Assertion]) -> Self {
|
|
// Pre-allocate based on expected unique keys
|
|
let mut entries: HashMap<String, Vec<Assertion>> = HashMap::with_capacity(assertions.len());
|
|
|
|
for assertion in assertions {
|
|
if let Some(key) = Self::make_key(&assertion.subject, &assertion.predicate) {
|
|
entries.entry(key).or_default().push(assertion.clone());
|
|
}
|
|
}
|
|
|
|
Self { entries }
|
|
}
|
|
|
|
/// Look up assertions matching the tail segments of a subject and predicate.
|
|
pub fn lookup(&self, subject: &str, predicate: &str) -> Option<&Vec<Assertion>> {
|
|
let key = Self::make_key(subject, predicate)?;
|
|
self.entries.get(&key)
|
|
}
|
|
|
|
/// Create a lookup key from subject and predicate.
|
|
///
|
|
/// Algorithm:
|
|
/// 1. Split subject on `"://"`, take path part
|
|
/// 2. Split path on `"/"` in reverse, get last 2 non-empty segments
|
|
/// 3. If < 2 segments, return None
|
|
/// 4. Return `"{seg[-2]}/{seg[-1]}::{predicate}"`
|
|
pub fn make_key(subject: &str, predicate: &str) -> Option<String> {
|
|
Self::make_key_with_predicate(subject, predicate)
|
|
}
|
|
|
|
/// Internal key creation with explicit predicate.
|
|
fn make_key_with_predicate(subject: &str, predicate: &str) -> Option<String> {
|
|
// Split on "://" to separate scheme from path
|
|
let path = subject.find("://").map(|i| &subject[i + 3..]).unwrap_or(subject);
|
|
|
|
// Get last two non-empty segments using rsplit (avoids Vec allocation)
|
|
let mut segments = path.rsplit('/').filter(|s| !s.is_empty());
|
|
|
|
let tail2 = segments.next()?;
|
|
let tail1 = segments.next()?;
|
|
|
|
Some(format!("{}/{}::{}", tail1, tail2, predicate))
|
|
}
|
|
|
|
/// Normalize a predicate using the given alias sets.
|
|
///
|
|
/// Returns the canonical form if found, otherwise the original predicate.
|
|
pub fn normalize_predicate<'a>(
|
|
predicate: &'a str,
|
|
aliases: &'a [PredicateAliasSet],
|
|
) -> &'a str {
|
|
for alias_set in aliases {
|
|
if let Some(canonical) = alias_set.normalize(predicate) {
|
|
return canonical;
|
|
}
|
|
}
|
|
predicate
|
|
}
|
|
|
|
/// Build a ConceptIndex with predicate alias normalization.
|
|
///
|
|
/// Predicates are normalized to their canonical form before indexing,
|
|
/// enabling semantic matching across equivalent predicates.
|
|
pub fn build_with_aliases(
|
|
assertions: &[Assertion],
|
|
predicate_aliases: &[PredicateAliasSet],
|
|
) -> Self {
|
|
let mut entries: HashMap<String, Vec<Assertion>> = HashMap::with_capacity(assertions.len());
|
|
|
|
for assertion in assertions {
|
|
let normalized_predicate =
|
|
Self::normalize_predicate(&assertion.predicate, predicate_aliases);
|
|
if let Some(key) =
|
|
Self::make_key_with_predicate(&assertion.subject, normalized_predicate)
|
|
{
|
|
entries.entry(key).or_default().push(assertion.clone());
|
|
}
|
|
}
|
|
|
|
Self { entries }
|
|
}
|
|
|
|
/// Look up assertions with predicate alias normalization.
|
|
///
|
|
/// The given predicate is normalized using the alias sets before lookup.
|
|
pub fn lookup_with_aliases(
|
|
&self,
|
|
subject: &str,
|
|
predicate: &str,
|
|
predicate_aliases: &[PredicateAliasSet],
|
|
) -> Option<&Vec<Assertion>> {
|
|
let normalized = Self::normalize_predicate(predicate, predicate_aliases);
|
|
let key = Self::make_key_with_predicate(subject, normalized)?;
|
|
self.entries.get(&key)
|
|
}
|
|
}
|