stemedb/crates/stemedb-ontology/src/domain.rs
jordan 8f6506b70a feat: Aphoria scan modes + stemedb-ontology crate + consumer health UAT
Major additions:
- Staged scanning modes (working tree, staged, committed) with git integration
- Drift detection for baseline vs current state comparisons
- Hosted API handlers for policy CRUD operations via StemeDB API
- stemedb-ontology crate with domain definitions and medical extractors
- Consumer health vertical UAT scenarios (GLP-1, gastroparesis, etc.)
- Aphoria development skill documentation

Code organization:
- Split large files into focused modules to stay under 500-line limit
- Extracted config tests, episteme helpers/drift/aliases, API helpers

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 21:57:33 -07:00

405 lines
13 KiB
Rust

//! Domain definitions for ontology-aware subject construction.
//!
//! A Domain defines:
//! - Entity types (Drug, Indication, Pathway, etc.)
//! - Predicate schemas (which predicates use which subject patterns)
//! - Source hierarchy (how to weight different source classes)
use std::collections::HashMap;
use stemedb_core::types::SourceClass;
/// A domain definition (vertical-specific ontology).
///
/// Domains are compiled-in for type safety. Each domain defines:
/// - What entities exist (Drug, Indication, Pathway, etc.)
/// - How predicates map to subject patterns
/// - Source class weighting for this vertical
#[derive(Debug, Clone)]
pub struct Domain {
/// Human-readable name of the domain.
pub name: String,
/// Description of what this domain covers.
pub description: String,
/// Entity types defined in this domain.
///
/// Key is the entity type name (e.g., "Drug"), value is the definition.
pub entity_types: HashMap<String, EntityType>,
/// Predicate schemas grouped by category.
///
/// Key is the category name (e.g., "efficacy", "safety"), value is the schema.
pub predicate_schemas: HashMap<String, PredicateSchema>,
/// Source hierarchy for this domain.
///
/// Ordered from highest authority (index 0) to lowest.
pub source_hierarchy: Vec<SourceTier>,
}
impl Domain {
/// Create a new domain with the given name.
pub fn new(name: impl Into<String>, description: impl Into<String>) -> Self {
Self {
name: name.into(),
description: description.into(),
entity_types: HashMap::new(),
predicate_schemas: HashMap::new(),
source_hierarchy: Vec::new(),
}
}
/// Add an entity type to this domain.
pub fn with_entity_type(mut self, name: impl Into<String>, entity_type: EntityType) -> Self {
self.entity_types.insert(name.into(), entity_type);
self
}
/// Add a predicate schema to this domain.
pub fn with_predicate_schema(
mut self,
category: impl Into<String>,
schema: PredicateSchema,
) -> Self {
self.predicate_schemas.insert(category.into(), schema);
self
}
/// Set the source hierarchy for this domain.
pub fn with_source_hierarchy(mut self, hierarchy: Vec<SourceTier>) -> Self {
self.source_hierarchy = hierarchy;
self
}
/// Get a predicate schema by category name.
pub fn get_schema(&self, category: &str) -> Option<&PredicateSchema> {
self.predicate_schemas.get(category)
}
/// Get an entity type by name.
pub fn get_entity_type(&self, name: &str) -> Option<&EntityType> {
self.entity_types.get(name)
}
/// Find the schema that contains a specific predicate.
pub fn schema_for_predicate(&self, predicate: &str) -> Option<&PredicateSchema> {
self.predicate_schemas
.values()
.find(|schema| schema.predicates.contains(&predicate.to_string()))
}
/// Get all predicate names across all schemas.
pub fn all_predicates(&self) -> Vec<&str> {
self.predicate_schemas
.values()
.flat_map(|schema| schema.predicates.iter().map(String::as_str))
.collect()
}
}
/// An entity type in the domain ontology.
///
/// Entity types represent the kinds of things that can be subjects or objects
/// in assertions. Examples: Drug, Indication, Pathway, Gene.
#[derive(Debug, Clone)]
pub struct EntityType {
/// Human-readable description of this entity type.
pub description: String,
/// Canonical naming pattern (e.g., "CamelCase", "lowercase_with_underscores").
pub naming_convention: NamingConvention,
/// Optional normalization table for aliases.
///
/// Maps common aliases to canonical names (e.g., "Ozempic" -> "Semaglutide").
pub aliases: HashMap<String, String>,
/// Whether this entity type is required for subject construction.
pub required: bool,
}
impl EntityType {
/// Create a new required entity type.
pub fn required(description: impl Into<String>) -> Self {
Self {
description: description.into(),
naming_convention: NamingConvention::CamelCase,
aliases: HashMap::new(),
required: true,
}
}
/// Create a new optional entity type.
pub fn optional(description: impl Into<String>) -> Self {
Self {
description: description.into(),
naming_convention: NamingConvention::CamelCase,
aliases: HashMap::new(),
required: false,
}
}
/// Set the naming convention for this entity type.
pub fn with_naming(mut self, convention: NamingConvention) -> Self {
self.naming_convention = convention;
self
}
/// Add an alias mapping.
pub fn with_alias(mut self, alias: impl Into<String>, canonical: impl Into<String>) -> Self {
self.aliases.insert(alias.into(), canonical.into());
self
}
/// Normalize a value using the alias table.
///
/// Returns the canonical name if an alias exists, otherwise returns the original.
pub fn normalize(&self, value: &str) -> String {
self.aliases.get(value).cloned().unwrap_or_else(|| value.to_string())
}
}
/// Naming convention for entity values.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NamingConvention {
/// CamelCase (e.g., "Type2Diabetes")
CamelCase,
/// lowercase_with_underscores (e.g., "type_2_diabetes")
SnakeCase,
/// UPPERCASE_WITH_UNDERSCORES (e.g., "TYPE_2_DIABETES")
ScreamingSnakeCase,
/// As-is (no transformation)
Verbatim,
}
/// A predicate schema defines how subjects are built for a category of predicates.
///
/// # Subject Pattern Syntax
///
/// The `subject_pattern` uses curly braces to reference entity types:
/// - `{Drug}` - replaced with the Drug entity value
/// - `{Drug}:{Indication}` - compound subject with colon separator
///
/// All referenced entity types must be provided when building a subject.
#[derive(Debug, Clone)]
pub struct PredicateSchema {
/// Description of this predicate category.
pub description: String,
/// Subject pattern template (e.g., "{Drug}:{Indication}").
///
/// Entity type names in curly braces are replaced with values.
pub subject_pattern: String,
/// List of predicates that use this schema.
pub predicates: Vec<String>,
/// Default lens for resolving conflicts in this category.
pub default_lens: DefaultLens,
/// Entity types required by this schema.
///
/// Extracted from `subject_pattern` for validation.
pub required_entities: Vec<String>,
}
impl PredicateSchema {
/// Create a new predicate schema.
pub fn new(description: impl Into<String>, subject_pattern: impl Into<String>) -> Self {
let pattern = subject_pattern.into();
let required_entities = Self::extract_entity_names(&pattern);
Self {
description: description.into(),
subject_pattern: pattern,
predicates: Vec::new(),
default_lens: DefaultLens::Recency,
required_entities,
}
}
/// Add predicates to this schema.
pub fn with_predicates(mut self, predicates: Vec<impl Into<String>>) -> Self {
self.predicates = predicates.into_iter().map(Into::into).collect();
self
}
/// Set the default lens for this schema.
pub fn with_default_lens(mut self, lens: DefaultLens) -> Self {
self.default_lens = lens;
self
}
/// Extract entity names from a subject pattern.
///
/// Pattern: "{Drug}:{Indication}" -> ["Drug", "Indication"]
fn extract_entity_names(pattern: &str) -> Vec<String> {
let mut names = Vec::new();
let mut in_brace = false;
let mut current = String::new();
for c in pattern.chars() {
match c {
'{' => {
in_brace = true;
current.clear();
}
'}' => {
if in_brace && !current.is_empty() {
names.push(current.clone());
}
in_brace = false;
current.clear();
}
_ if in_brace => {
current.push(c);
}
_ => {}
}
}
names
}
}
/// Default lens to use for a predicate category.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DefaultLens {
/// Most recent assertion wins.
Recency,
/// Consensus among sources.
Consensus,
/// Highest authority tier wins.
Authority,
/// Show all conflicts (skeptic mode).
Skeptic,
/// Per-tier breakdown with authority override.
LayeredConsensus,
}
/// A tier in the source hierarchy.
#[derive(Debug, Clone)]
pub struct SourceTier {
/// The source class for this tier.
pub source_class: SourceClass,
/// Human-readable label for this tier.
pub label: String,
/// Examples of sources in this tier.
pub examples: Vec<String>,
/// Weight multiplier for this tier (1.0 = full weight).
pub weight: f32,
/// Decay half-life override (None = use SourceClass default).
pub decay_half_life_days: Option<u32>,
}
impl SourceTier {
/// Create a new source tier.
pub fn new(source_class: SourceClass, label: impl Into<String>) -> Self {
Self {
source_class,
label: label.into(),
examples: Vec::new(),
weight: source_class.authority_weight(),
decay_half_life_days: None,
}
}
/// Add example sources for this tier.
pub fn with_examples(mut self, examples: Vec<impl Into<String>>) -> Self {
self.examples = examples.into_iter().map(Into::into).collect();
self
}
/// Override the weight for this tier.
pub fn with_weight(mut self, weight: f32) -> Self {
self.weight = weight;
self
}
/// Override the decay half-life for this tier.
pub fn with_decay(mut self, days: u32) -> Self {
self.decay_half_life_days = Some(days);
self
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_domain_builder() {
let domain = Domain::new("Test", "A test domain")
.with_entity_type("Drug", EntityType::required("A pharmaceutical compound"))
.with_entity_type("Indication", EntityType::required("A medical condition"));
assert_eq!(domain.name, "Test");
assert!(domain.get_entity_type("Drug").is_some());
assert!(domain.get_entity_type("Unknown").is_none());
}
#[test]
fn test_entity_type_aliases() {
let entity = EntityType::required("A drug")
.with_alias("Ozempic", "Semaglutide")
.with_alias("Wegovy", "Semaglutide");
assert_eq!(entity.normalize("Ozempic"), "Semaglutide");
assert_eq!(entity.normalize("Wegovy"), "Semaglutide");
assert_eq!(entity.normalize("Semaglutide"), "Semaglutide");
}
#[test]
fn test_predicate_schema_extraction() {
let schema = PredicateSchema::new("Efficacy predicates", "{Drug}:{Indication}");
assert_eq!(schema.required_entities, vec!["Drug", "Indication"]);
}
#[test]
fn test_predicate_schema_single_entity() {
let schema = PredicateSchema::new("Safety predicates", "{Drug}");
assert_eq!(schema.required_entities, vec!["Drug"]);
}
#[test]
fn test_predicate_schema_complex_pattern() {
let schema = PredicateSchema::new("Complex", "{Drug}:{Indication}:{Outcome}");
assert_eq!(schema.required_entities, vec!["Drug", "Indication", "Outcome"]);
}
#[test]
fn test_domain_schema_lookup() {
let domain = Domain::new("Test", "Test domain")
.with_predicate_schema(
"efficacy",
PredicateSchema::new("Efficacy", "{Drug}:{Indication}")
.with_predicates(vec!["hba1c_reduction", "weight_loss"]),
)
.with_predicate_schema(
"safety",
PredicateSchema::new("Safety", "{Drug}")
.with_predicates(vec!["has_boxed_warning", "adverse_event_rate"]),
);
// Lookup by category
let efficacy = domain.get_schema("efficacy").expect("efficacy schema");
assert_eq!(efficacy.subject_pattern, "{Drug}:{Indication}");
// Lookup by predicate
let weight_schema = domain.schema_for_predicate("weight_loss").expect("weight_loss schema");
assert_eq!(weight_schema.subject_pattern, "{Drug}:{Indication}");
let warning_schema =
domain.schema_for_predicate("has_boxed_warning").expect("warning schema");
assert_eq!(warning_schema.subject_pattern, "{Drug}");
}
}