//! Domain definitions for ontology-aware subject construction. //! //! A Domain defines: //! - Entity types (Drug, Indication, Pathway, etc.) //! - Predicate schemas (which predicates use which subject patterns) //! - Source hierarchy (how to weight different source classes) use std::collections::HashMap; use stemedb_core::types::SourceClass; /// A domain definition (vertical-specific ontology). /// /// Domains are compiled-in for type safety. Each domain defines: /// - What entities exist (Drug, Indication, Pathway, etc.) /// - How predicates map to subject patterns /// - Source class weighting for this vertical #[derive(Debug, Clone)] pub struct Domain { /// Human-readable name of the domain. pub name: String, /// Description of what this domain covers. pub description: String, /// Entity types defined in this domain. /// /// Key is the entity type name (e.g., "Drug"), value is the definition. pub entity_types: HashMap, /// Predicate schemas grouped by category. /// /// Key is the category name (e.g., "efficacy", "safety"), value is the schema. pub predicate_schemas: HashMap, /// Source hierarchy for this domain. /// /// Ordered from highest authority (index 0) to lowest. pub source_hierarchy: Vec, } impl Domain { /// Create a new domain with the given name. pub fn new(name: impl Into, description: impl Into) -> Self { Self { name: name.into(), description: description.into(), entity_types: HashMap::new(), predicate_schemas: HashMap::new(), source_hierarchy: Vec::new(), } } /// Add an entity type to this domain. pub fn with_entity_type(mut self, name: impl Into, entity_type: EntityType) -> Self { self.entity_types.insert(name.into(), entity_type); self } /// Add a predicate schema to this domain. pub fn with_predicate_schema( mut self, category: impl Into, schema: PredicateSchema, ) -> Self { self.predicate_schemas.insert(category.into(), schema); self } /// Set the source hierarchy for this domain. pub fn with_source_hierarchy(mut self, hierarchy: Vec) -> Self { self.source_hierarchy = hierarchy; self } /// Get a predicate schema by category name. pub fn get_schema(&self, category: &str) -> Option<&PredicateSchema> { self.predicate_schemas.get(category) } /// Get an entity type by name. pub fn get_entity_type(&self, name: &str) -> Option<&EntityType> { self.entity_types.get(name) } /// Find the schema that contains a specific predicate. pub fn schema_for_predicate(&self, predicate: &str) -> Option<&PredicateSchema> { self.predicate_schemas .values() .find(|schema| schema.predicates.contains(&predicate.to_string())) } /// Get all predicate names across all schemas. pub fn all_predicates(&self) -> Vec<&str> { self.predicate_schemas .values() .flat_map(|schema| schema.predicates.iter().map(String::as_str)) .collect() } } /// An entity type in the domain ontology. /// /// Entity types represent the kinds of things that can be subjects or objects /// in assertions. Examples: Drug, Indication, Pathway, Gene. #[derive(Debug, Clone)] pub struct EntityType { /// Human-readable description of this entity type. pub description: String, /// Canonical naming pattern (e.g., "CamelCase", "lowercase_with_underscores"). pub naming_convention: NamingConvention, /// Optional normalization table for aliases. /// /// Maps common aliases to canonical names (e.g., "Ozempic" -> "Semaglutide"). pub aliases: HashMap, /// Whether this entity type is required for subject construction. pub required: bool, } impl EntityType { /// Create a new required entity type. pub fn required(description: impl Into) -> Self { Self { description: description.into(), naming_convention: NamingConvention::CamelCase, aliases: HashMap::new(), required: true, } } /// Create a new optional entity type. pub fn optional(description: impl Into) -> Self { Self { description: description.into(), naming_convention: NamingConvention::CamelCase, aliases: HashMap::new(), required: false, } } /// Set the naming convention for this entity type. pub fn with_naming(mut self, convention: NamingConvention) -> Self { self.naming_convention = convention; self } /// Add an alias mapping. pub fn with_alias(mut self, alias: impl Into, canonical: impl Into) -> Self { self.aliases.insert(alias.into(), canonical.into()); self } /// Normalize a value using the alias table. /// /// Returns the canonical name if an alias exists, otherwise returns the original. pub fn normalize(&self, value: &str) -> String { self.aliases.get(value).cloned().unwrap_or_else(|| value.to_string()) } } /// Naming convention for entity values. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum NamingConvention { /// CamelCase (e.g., "Type2Diabetes") CamelCase, /// lowercase_with_underscores (e.g., "type_2_diabetes") SnakeCase, /// UPPERCASE_WITH_UNDERSCORES (e.g., "TYPE_2_DIABETES") ScreamingSnakeCase, /// As-is (no transformation) Verbatim, } /// A predicate schema defines how subjects are built for a category of predicates. /// /// # Subject Pattern Syntax /// /// The `subject_pattern` uses curly braces to reference entity types: /// - `{Drug}` - replaced with the Drug entity value /// - `{Drug}:{Indication}` - compound subject with colon separator /// /// All referenced entity types must be provided when building a subject. #[derive(Debug, Clone)] pub struct PredicateSchema { /// Description of this predicate category. pub description: String, /// Subject pattern template (e.g., "{Drug}:{Indication}"). /// /// Entity type names in curly braces are replaced with values. pub subject_pattern: String, /// List of predicates that use this schema. pub predicates: Vec, /// Default lens for resolving conflicts in this category. pub default_lens: DefaultLens, /// Entity types required by this schema. /// /// Extracted from `subject_pattern` for validation. pub required_entities: Vec, } impl PredicateSchema { /// Create a new predicate schema. pub fn new(description: impl Into, subject_pattern: impl Into) -> Self { let pattern = subject_pattern.into(); let required_entities = Self::extract_entity_names(&pattern); Self { description: description.into(), subject_pattern: pattern, predicates: Vec::new(), default_lens: DefaultLens::Recency, required_entities, } } /// Add predicates to this schema. pub fn with_predicates(mut self, predicates: Vec>) -> Self { self.predicates = predicates.into_iter().map(Into::into).collect(); self } /// Set the default lens for this schema. pub fn with_default_lens(mut self, lens: DefaultLens) -> Self { self.default_lens = lens; self } /// Extract entity names from a subject pattern. /// /// Pattern: "{Drug}:{Indication}" -> ["Drug", "Indication"] fn extract_entity_names(pattern: &str) -> Vec { let mut names = Vec::new(); let mut in_brace = false; let mut current = String::new(); for c in pattern.chars() { match c { '{' => { in_brace = true; current.clear(); } '}' => { if in_brace && !current.is_empty() { names.push(current.clone()); } in_brace = false; current.clear(); } _ if in_brace => { current.push(c); } _ => {} } } names } } /// Default lens to use for a predicate category. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DefaultLens { /// Most recent assertion wins. Recency, /// Consensus among sources. Consensus, /// Highest authority tier wins. Authority, /// Show all conflicts (skeptic mode). Skeptic, /// Per-tier breakdown with authority override. LayeredConsensus, } /// A tier in the source hierarchy. #[derive(Debug, Clone)] pub struct SourceTier { /// The source class for this tier. pub source_class: SourceClass, /// Human-readable label for this tier. pub label: String, /// Examples of sources in this tier. pub examples: Vec, /// Weight multiplier for this tier (1.0 = full weight). pub weight: f32, /// Decay half-life override (None = use SourceClass default). pub decay_half_life_days: Option, } impl SourceTier { /// Create a new source tier. pub fn new(source_class: SourceClass, label: impl Into) -> Self { Self { source_class, label: label.into(), examples: Vec::new(), weight: source_class.authority_weight(), decay_half_life_days: None, } } /// Add example sources for this tier. pub fn with_examples(mut self, examples: Vec>) -> Self { self.examples = examples.into_iter().map(Into::into).collect(); self } /// Override the weight for this tier. pub fn with_weight(mut self, weight: f32) -> Self { self.weight = weight; self } /// Override the decay half-life for this tier. pub fn with_decay(mut self, days: u32) -> Self { self.decay_half_life_days = Some(days); self } } #[cfg(test)] mod tests { use super::*; #[test] fn test_domain_builder() { let domain = Domain::new("Test", "A test domain") .with_entity_type("Drug", EntityType::required("A pharmaceutical compound")) .with_entity_type("Indication", EntityType::required("A medical condition")); assert_eq!(domain.name, "Test"); assert!(domain.get_entity_type("Drug").is_some()); assert!(domain.get_entity_type("Unknown").is_none()); } #[test] fn test_entity_type_aliases() { let entity = EntityType::required("A drug") .with_alias("Ozempic", "Semaglutide") .with_alias("Wegovy", "Semaglutide"); assert_eq!(entity.normalize("Ozempic"), "Semaglutide"); assert_eq!(entity.normalize("Wegovy"), "Semaglutide"); assert_eq!(entity.normalize("Semaglutide"), "Semaglutide"); } #[test] fn test_predicate_schema_extraction() { let schema = PredicateSchema::new("Efficacy predicates", "{Drug}:{Indication}"); assert_eq!(schema.required_entities, vec!["Drug", "Indication"]); } #[test] fn test_predicate_schema_single_entity() { let schema = PredicateSchema::new("Safety predicates", "{Drug}"); assert_eq!(schema.required_entities, vec!["Drug"]); } #[test] fn test_predicate_schema_complex_pattern() { let schema = PredicateSchema::new("Complex", "{Drug}:{Indication}:{Outcome}"); assert_eq!(schema.required_entities, vec!["Drug", "Indication", "Outcome"]); } #[test] fn test_domain_schema_lookup() { let domain = Domain::new("Test", "Test domain") .with_predicate_schema( "efficacy", PredicateSchema::new("Efficacy", "{Drug}:{Indication}") .with_predicates(vec!["hba1c_reduction", "weight_loss"]), ) .with_predicate_schema( "safety", PredicateSchema::new("Safety", "{Drug}") .with_predicates(vec!["has_boxed_warning", "adverse_event_rate"]), ); // Lookup by category let efficacy = domain.get_schema("efficacy").expect("efficacy schema"); assert_eq!(efficacy.subject_pattern, "{Drug}:{Indication}"); // Lookup by predicate let weight_schema = domain.schema_for_predicate("weight_loss").expect("weight_loss schema"); assert_eq!(weight_schema.subject_pattern, "{Drug}:{Indication}"); let warning_schema = domain.schema_for_predicate("has_boxed_warning").expect("warning schema"); assert_eq!(warning_schema.subject_pattern, "{Drug}"); } }