Major additions: - Staged scanning modes (working tree, staged, committed) with git integration - Drift detection for baseline vs current state comparisons - Hosted API handlers for policy CRUD operations via StemeDB API - stemedb-ontology crate with domain definitions and medical extractors - Consumer health vertical UAT scenarios (GLP-1, gastroparesis, etc.) - Aphoria development skill documentation Code organization: - Split large files into focused modules to stay under 500-line limit - Extracted config tests, episteme helpers/drift/aliases, API helpers Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
405 lines
13 KiB
Rust
405 lines
13 KiB
Rust
//! Domain definitions for ontology-aware subject construction.
|
|
//!
|
|
//! A Domain defines:
|
|
//! - Entity types (Drug, Indication, Pathway, etc.)
|
|
//! - Predicate schemas (which predicates use which subject patterns)
|
|
//! - Source hierarchy (how to weight different source classes)
|
|
|
|
use std::collections::HashMap;
|
|
use stemedb_core::types::SourceClass;
|
|
|
|
/// A domain definition (vertical-specific ontology).
|
|
///
|
|
/// Domains are compiled-in for type safety. Each domain defines:
|
|
/// - What entities exist (Drug, Indication, Pathway, etc.)
|
|
/// - How predicates map to subject patterns
|
|
/// - Source class weighting for this vertical
|
|
#[derive(Debug, Clone)]
|
|
pub struct Domain {
|
|
/// Human-readable name of the domain.
|
|
pub name: String,
|
|
|
|
/// Description of what this domain covers.
|
|
pub description: String,
|
|
|
|
/// Entity types defined in this domain.
|
|
///
|
|
/// Key is the entity type name (e.g., "Drug"), value is the definition.
|
|
pub entity_types: HashMap<String, EntityType>,
|
|
|
|
/// Predicate schemas grouped by category.
|
|
///
|
|
/// Key is the category name (e.g., "efficacy", "safety"), value is the schema.
|
|
pub predicate_schemas: HashMap<String, PredicateSchema>,
|
|
|
|
/// Source hierarchy for this domain.
|
|
///
|
|
/// Ordered from highest authority (index 0) to lowest.
|
|
pub source_hierarchy: Vec<SourceTier>,
|
|
}
|
|
|
|
impl Domain {
|
|
/// Create a new domain with the given name.
|
|
pub fn new(name: impl Into<String>, description: impl Into<String>) -> Self {
|
|
Self {
|
|
name: name.into(),
|
|
description: description.into(),
|
|
entity_types: HashMap::new(),
|
|
predicate_schemas: HashMap::new(),
|
|
source_hierarchy: Vec::new(),
|
|
}
|
|
}
|
|
|
|
/// Add an entity type to this domain.
|
|
pub fn with_entity_type(mut self, name: impl Into<String>, entity_type: EntityType) -> Self {
|
|
self.entity_types.insert(name.into(), entity_type);
|
|
self
|
|
}
|
|
|
|
/// Add a predicate schema to this domain.
|
|
pub fn with_predicate_schema(
|
|
mut self,
|
|
category: impl Into<String>,
|
|
schema: PredicateSchema,
|
|
) -> Self {
|
|
self.predicate_schemas.insert(category.into(), schema);
|
|
self
|
|
}
|
|
|
|
/// Set the source hierarchy for this domain.
|
|
pub fn with_source_hierarchy(mut self, hierarchy: Vec<SourceTier>) -> Self {
|
|
self.source_hierarchy = hierarchy;
|
|
self
|
|
}
|
|
|
|
/// Get a predicate schema by category name.
|
|
pub fn get_schema(&self, category: &str) -> Option<&PredicateSchema> {
|
|
self.predicate_schemas.get(category)
|
|
}
|
|
|
|
/// Get an entity type by name.
|
|
pub fn get_entity_type(&self, name: &str) -> Option<&EntityType> {
|
|
self.entity_types.get(name)
|
|
}
|
|
|
|
/// Find the schema that contains a specific predicate.
|
|
pub fn schema_for_predicate(&self, predicate: &str) -> Option<&PredicateSchema> {
|
|
self.predicate_schemas
|
|
.values()
|
|
.find(|schema| schema.predicates.contains(&predicate.to_string()))
|
|
}
|
|
|
|
/// Get all predicate names across all schemas.
|
|
pub fn all_predicates(&self) -> Vec<&str> {
|
|
self.predicate_schemas
|
|
.values()
|
|
.flat_map(|schema| schema.predicates.iter().map(String::as_str))
|
|
.collect()
|
|
}
|
|
}
|
|
|
|
/// An entity type in the domain ontology.
|
|
///
|
|
/// Entity types represent the kinds of things that can be subjects or objects
|
|
/// in assertions. Examples: Drug, Indication, Pathway, Gene.
|
|
#[derive(Debug, Clone)]
|
|
pub struct EntityType {
|
|
/// Human-readable description of this entity type.
|
|
pub description: String,
|
|
|
|
/// Canonical naming pattern (e.g., "CamelCase", "lowercase_with_underscores").
|
|
pub naming_convention: NamingConvention,
|
|
|
|
/// Optional normalization table for aliases.
|
|
///
|
|
/// Maps common aliases to canonical names (e.g., "Ozempic" -> "Semaglutide").
|
|
pub aliases: HashMap<String, String>,
|
|
|
|
/// Whether this entity type is required for subject construction.
|
|
pub required: bool,
|
|
}
|
|
|
|
impl EntityType {
|
|
/// Create a new required entity type.
|
|
pub fn required(description: impl Into<String>) -> Self {
|
|
Self {
|
|
description: description.into(),
|
|
naming_convention: NamingConvention::CamelCase,
|
|
aliases: HashMap::new(),
|
|
required: true,
|
|
}
|
|
}
|
|
|
|
/// Create a new optional entity type.
|
|
pub fn optional(description: impl Into<String>) -> Self {
|
|
Self {
|
|
description: description.into(),
|
|
naming_convention: NamingConvention::CamelCase,
|
|
aliases: HashMap::new(),
|
|
required: false,
|
|
}
|
|
}
|
|
|
|
/// Set the naming convention for this entity type.
|
|
pub fn with_naming(mut self, convention: NamingConvention) -> Self {
|
|
self.naming_convention = convention;
|
|
self
|
|
}
|
|
|
|
/// Add an alias mapping.
|
|
pub fn with_alias(mut self, alias: impl Into<String>, canonical: impl Into<String>) -> Self {
|
|
self.aliases.insert(alias.into(), canonical.into());
|
|
self
|
|
}
|
|
|
|
/// Normalize a value using the alias table.
|
|
///
|
|
/// Returns the canonical name if an alias exists, otherwise returns the original.
|
|
pub fn normalize(&self, value: &str) -> String {
|
|
self.aliases.get(value).cloned().unwrap_or_else(|| value.to_string())
|
|
}
|
|
}
|
|
|
|
/// Naming convention for entity values.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum NamingConvention {
|
|
/// CamelCase (e.g., "Type2Diabetes")
|
|
CamelCase,
|
|
/// lowercase_with_underscores (e.g., "type_2_diabetes")
|
|
SnakeCase,
|
|
/// UPPERCASE_WITH_UNDERSCORES (e.g., "TYPE_2_DIABETES")
|
|
ScreamingSnakeCase,
|
|
/// As-is (no transformation)
|
|
Verbatim,
|
|
}
|
|
|
|
/// A predicate schema defines how subjects are built for a category of predicates.
|
|
///
|
|
/// # Subject Pattern Syntax
|
|
///
|
|
/// The `subject_pattern` uses curly braces to reference entity types:
|
|
/// - `{Drug}` - replaced with the Drug entity value
|
|
/// - `{Drug}:{Indication}` - compound subject with colon separator
|
|
///
|
|
/// All referenced entity types must be provided when building a subject.
|
|
#[derive(Debug, Clone)]
|
|
pub struct PredicateSchema {
|
|
/// Description of this predicate category.
|
|
pub description: String,
|
|
|
|
/// Subject pattern template (e.g., "{Drug}:{Indication}").
|
|
///
|
|
/// Entity type names in curly braces are replaced with values.
|
|
pub subject_pattern: String,
|
|
|
|
/// List of predicates that use this schema.
|
|
pub predicates: Vec<String>,
|
|
|
|
/// Default lens for resolving conflicts in this category.
|
|
pub default_lens: DefaultLens,
|
|
|
|
/// Entity types required by this schema.
|
|
///
|
|
/// Extracted from `subject_pattern` for validation.
|
|
pub required_entities: Vec<String>,
|
|
}
|
|
|
|
impl PredicateSchema {
|
|
/// Create a new predicate schema.
|
|
pub fn new(description: impl Into<String>, subject_pattern: impl Into<String>) -> Self {
|
|
let pattern = subject_pattern.into();
|
|
let required_entities = Self::extract_entity_names(&pattern);
|
|
|
|
Self {
|
|
description: description.into(),
|
|
subject_pattern: pattern,
|
|
predicates: Vec::new(),
|
|
default_lens: DefaultLens::Recency,
|
|
required_entities,
|
|
}
|
|
}
|
|
|
|
/// Add predicates to this schema.
|
|
pub fn with_predicates(mut self, predicates: Vec<impl Into<String>>) -> Self {
|
|
self.predicates = predicates.into_iter().map(Into::into).collect();
|
|
self
|
|
}
|
|
|
|
/// Set the default lens for this schema.
|
|
pub fn with_default_lens(mut self, lens: DefaultLens) -> Self {
|
|
self.default_lens = lens;
|
|
self
|
|
}
|
|
|
|
/// Extract entity names from a subject pattern.
|
|
///
|
|
/// Pattern: "{Drug}:{Indication}" -> ["Drug", "Indication"]
|
|
fn extract_entity_names(pattern: &str) -> Vec<String> {
|
|
let mut names = Vec::new();
|
|
let mut in_brace = false;
|
|
let mut current = String::new();
|
|
|
|
for c in pattern.chars() {
|
|
match c {
|
|
'{' => {
|
|
in_brace = true;
|
|
current.clear();
|
|
}
|
|
'}' => {
|
|
if in_brace && !current.is_empty() {
|
|
names.push(current.clone());
|
|
}
|
|
in_brace = false;
|
|
current.clear();
|
|
}
|
|
_ if in_brace => {
|
|
current.push(c);
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
names
|
|
}
|
|
}
|
|
|
|
/// Default lens to use for a predicate category.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum DefaultLens {
|
|
/// Most recent assertion wins.
|
|
Recency,
|
|
/// Consensus among sources.
|
|
Consensus,
|
|
/// Highest authority tier wins.
|
|
Authority,
|
|
/// Show all conflicts (skeptic mode).
|
|
Skeptic,
|
|
/// Per-tier breakdown with authority override.
|
|
LayeredConsensus,
|
|
}
|
|
|
|
/// A tier in the source hierarchy.
|
|
#[derive(Debug, Clone)]
|
|
pub struct SourceTier {
|
|
/// The source class for this tier.
|
|
pub source_class: SourceClass,
|
|
|
|
/// Human-readable label for this tier.
|
|
pub label: String,
|
|
|
|
/// Examples of sources in this tier.
|
|
pub examples: Vec<String>,
|
|
|
|
/// Weight multiplier for this tier (1.0 = full weight).
|
|
pub weight: f32,
|
|
|
|
/// Decay half-life override (None = use SourceClass default).
|
|
pub decay_half_life_days: Option<u32>,
|
|
}
|
|
|
|
impl SourceTier {
|
|
/// Create a new source tier.
|
|
pub fn new(source_class: SourceClass, label: impl Into<String>) -> Self {
|
|
Self {
|
|
source_class,
|
|
label: label.into(),
|
|
examples: Vec::new(),
|
|
weight: source_class.authority_weight(),
|
|
decay_half_life_days: None,
|
|
}
|
|
}
|
|
|
|
/// Add example sources for this tier.
|
|
pub fn with_examples(mut self, examples: Vec<impl Into<String>>) -> Self {
|
|
self.examples = examples.into_iter().map(Into::into).collect();
|
|
self
|
|
}
|
|
|
|
/// Override the weight for this tier.
|
|
pub fn with_weight(mut self, weight: f32) -> Self {
|
|
self.weight = weight;
|
|
self
|
|
}
|
|
|
|
/// Override the decay half-life for this tier.
|
|
pub fn with_decay(mut self, days: u32) -> Self {
|
|
self.decay_half_life_days = Some(days);
|
|
self
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_domain_builder() {
|
|
let domain = Domain::new("Test", "A test domain")
|
|
.with_entity_type("Drug", EntityType::required("A pharmaceutical compound"))
|
|
.with_entity_type("Indication", EntityType::required("A medical condition"));
|
|
|
|
assert_eq!(domain.name, "Test");
|
|
assert!(domain.get_entity_type("Drug").is_some());
|
|
assert!(domain.get_entity_type("Unknown").is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_entity_type_aliases() {
|
|
let entity = EntityType::required("A drug")
|
|
.with_alias("Ozempic", "Semaglutide")
|
|
.with_alias("Wegovy", "Semaglutide");
|
|
|
|
assert_eq!(entity.normalize("Ozempic"), "Semaglutide");
|
|
assert_eq!(entity.normalize("Wegovy"), "Semaglutide");
|
|
assert_eq!(entity.normalize("Semaglutide"), "Semaglutide");
|
|
}
|
|
|
|
#[test]
|
|
fn test_predicate_schema_extraction() {
|
|
let schema = PredicateSchema::new("Efficacy predicates", "{Drug}:{Indication}");
|
|
|
|
assert_eq!(schema.required_entities, vec!["Drug", "Indication"]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_predicate_schema_single_entity() {
|
|
let schema = PredicateSchema::new("Safety predicates", "{Drug}");
|
|
|
|
assert_eq!(schema.required_entities, vec!["Drug"]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_predicate_schema_complex_pattern() {
|
|
let schema = PredicateSchema::new("Complex", "{Drug}:{Indication}:{Outcome}");
|
|
|
|
assert_eq!(schema.required_entities, vec!["Drug", "Indication", "Outcome"]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_domain_schema_lookup() {
|
|
let domain = Domain::new("Test", "Test domain")
|
|
.with_predicate_schema(
|
|
"efficacy",
|
|
PredicateSchema::new("Efficacy", "{Drug}:{Indication}")
|
|
.with_predicates(vec!["hba1c_reduction", "weight_loss"]),
|
|
)
|
|
.with_predicate_schema(
|
|
"safety",
|
|
PredicateSchema::new("Safety", "{Drug}")
|
|
.with_predicates(vec!["has_boxed_warning", "adverse_event_rate"]),
|
|
);
|
|
|
|
// Lookup by category
|
|
let efficacy = domain.get_schema("efficacy").expect("efficacy schema");
|
|
assert_eq!(efficacy.subject_pattern, "{Drug}:{Indication}");
|
|
|
|
// Lookup by predicate
|
|
let weight_schema = domain.schema_for_predicate("weight_loss").expect("weight_loss schema");
|
|
assert_eq!(weight_schema.subject_pattern, "{Drug}:{Indication}");
|
|
|
|
let warning_schema =
|
|
domain.schema_for_predicate("has_boxed_warning").expect("warning schema");
|
|
assert_eq!(warning_schema.subject_pattern, "{Drug}");
|
|
}
|
|
}
|