//! Subject builder for constructing canonical subject strings. //! //! The SubjectBuilder takes a predicate schema and entity values, then constructs //! the canonical subject string that ensures proper collision detection. use std::collections::HashMap; use thiserror::Error; use crate::domain::{Domain, PredicateSchema}; /// Errors that can occur during subject construction. #[derive(Debug, Error)] pub enum SubjectError { /// A required entity is missing from the provided values. #[error("Missing required entity: {0}")] MissingEntity(String), /// An entity value is empty. #[error("Empty value for entity: {0}")] EmptyValue(String), /// An entity value contains invalid characters. #[error("Invalid characters in entity '{entity}': '{value}' contains '{invalid}'")] InvalidCharacters { /// The entity type name. entity: String, /// The invalid value. value: String, /// The invalid character found. invalid: char, }, /// The subject pattern is malformed. #[error("Malformed subject pattern: {0}")] MalformedPattern(String), /// Unknown entity type referenced in pattern. #[error("Unknown entity type in pattern: {0}")] UnknownEntityType(String), } /// Builder for constructing canonical subject strings. /// /// # Example /// /// ```ignore /// use stemedb_ontology::{SubjectBuilder, PredicateSchema}; /// use std::collections::HashMap; /// /// let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}"); /// let mut entities = HashMap::new(); /// entities.insert("Drug".to_string(), "Semaglutide".to_string()); /// entities.insert("Indication".to_string(), "Type2Diabetes".to_string()); /// /// let subject = SubjectBuilder::build(&schema, &entities).unwrap(); /// assert_eq!(subject, "Semaglutide:Type2Diabetes"); /// ``` pub struct SubjectBuilder; impl SubjectBuilder { /// Build a subject string from a schema and entity values. /// /// # Arguments /// /// * `schema` - The predicate schema defining the subject pattern /// * `entities` - Map of entity type names to their values /// /// # Returns /// /// The constructed subject string, or an error if validation fails. /// /// # Validation /// /// - All entities referenced in the pattern must be present /// - Entity values must not be empty /// - Entity values must not contain the separator character ':' pub fn build( schema: &PredicateSchema, entities: &HashMap, ) -> Result { Self::build_with_separator(schema, entities, ':') } /// Build a subject string with a custom separator. /// /// # Arguments /// /// * `schema` - The predicate schema defining the subject pattern /// * `entities` - Map of entity type names to their values /// * `separator` - Character used to separate entity values (default ':') pub fn build_with_separator( schema: &PredicateSchema, entities: &HashMap, separator: char, ) -> Result { let mut result = String::new(); let mut in_brace = false; let mut current_entity = String::new(); for c in schema.subject_pattern.chars() { match c { '{' => { in_brace = true; current_entity.clear(); } '}' => { if !in_brace { return Err(SubjectError::MalformedPattern( "Unexpected '}' in pattern".to_string(), )); } in_brace = false; // Look up the entity value let value = entities .get(¤t_entity) .ok_or_else(|| SubjectError::MissingEntity(current_entity.clone()))?; // Validate the value if value.is_empty() { return Err(SubjectError::EmptyValue(current_entity.clone())); } if value.contains(separator) { return Err(SubjectError::InvalidCharacters { entity: current_entity.clone(), value: value.clone(), invalid: separator, }); } result.push_str(value); current_entity.clear(); } _ if in_brace => { current_entity.push(c); } _ => { result.push(c); } } } if in_brace { return Err(SubjectError::MalformedPattern("Unclosed '{' in pattern".to_string())); } Ok(result) } /// Build a subject with entity normalization from a domain. /// /// This version uses the domain's entity type definitions to normalize /// values (e.g., "Ozempic" -> "Semaglutide"). /// /// # Arguments /// /// * `domain` - The domain definition with entity types /// * `schema` - The predicate schema defining the subject pattern /// * `entities` - Map of entity type names to their values pub fn build_normalized( domain: &Domain, schema: &PredicateSchema, entities: &HashMap, ) -> Result { // Normalize each entity value let normalized: HashMap = entities .iter() .map(|(name, value)| { let normalized_value = domain .get_entity_type(name) .map(|et| et.normalize(value)) .unwrap_or_else(|| value.clone()); (name.clone(), normalized_value) }) .collect(); Self::build(schema, &normalized) } /// Validate that all required entities are present for a schema. /// /// # Arguments /// /// * `schema` - The predicate schema to validate against /// * `entities` - Map of entity type names to their values /// /// # Returns /// /// Ok(()) if all required entities are present and valid, otherwise an error. pub fn validate_entities( schema: &PredicateSchema, entities: &HashMap, ) -> Result<(), SubjectError> { for required in &schema.required_entities { match entities.get(required) { None => return Err(SubjectError::MissingEntity(required.clone())), Some(value) if value.is_empty() => { return Err(SubjectError::EmptyValue(required.clone())) } _ => {} } } Ok(()) } /// Extract entity values from an existing subject string. /// /// This is the inverse of `build` - given a subject and schema, extract /// the entity values. /// /// # Arguments /// /// * `schema` - The predicate schema that was used to build the subject /// * `subject` - The subject string to parse /// /// # Returns /// /// A map of entity names to their values, or an error if parsing fails. /// /// # Example /// /// ```ignore /// let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}"); /// let entities = SubjectBuilder::parse(&schema, "Semaglutide:Type2Diabetes").unwrap(); /// assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string())); /// ``` pub fn parse( schema: &PredicateSchema, subject: &str, ) -> Result, SubjectError> { let mut result = HashMap::new(); // Split by separator and match to entity names let parts: Vec<&str> = subject.split(':').collect(); let expected_count = schema.required_entities.len(); if parts.len() != expected_count { return Err(SubjectError::MalformedPattern(format!( "Expected {} parts in subject, got {}", expected_count, parts.len() ))); } for (i, entity_name) in schema.required_entities.iter().enumerate() { result.insert(entity_name.clone(), parts[i].to_string()); } Ok(result) } } /// Helper trait for building subjects from a predicate and entities. pub trait SubjectBuilderExt { /// Build a subject for the given predicate. fn build_subject_for_predicate( &self, predicate: &str, entities: &HashMap, ) -> Result; } impl SubjectBuilderExt for Domain { fn build_subject_for_predicate( &self, predicate: &str, entities: &HashMap, ) -> Result { let schema = self .schema_for_predicate(predicate) .ok_or_else(|| SubjectError::UnknownEntityType(predicate.to_string()))?; SubjectBuilder::build_normalized(self, schema, entities) } } #[cfg(test)] mod tests { use super::*; use crate::domain::{EntityType, PredicateSchema}; fn make_entities(pairs: &[(&str, &str)]) -> HashMap { pairs.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect() } #[test] fn test_build_simple_subject() { let schema = PredicateSchema::new("Safety", "{Drug}"); let entities = make_entities(&[("Drug", "Semaglutide")]); let subject = SubjectBuilder::build(&schema, &entities).expect("build"); assert_eq!(subject, "Semaglutide"); } #[test] fn test_build_compound_subject() { let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}"); let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "Type2Diabetes")]); let subject = SubjectBuilder::build(&schema, &entities).expect("build"); assert_eq!(subject, "Semaglutide:Type2Diabetes"); } #[test] fn test_build_triple_subject() { let schema = PredicateSchema::new("Mechanism", "{Drug}:{Target}:{Effect}"); let entities = make_entities(&[ ("Drug", "Semaglutide"), ("Target", "GLP1R"), ("Effect", "Activation"), ]); let subject = SubjectBuilder::build(&schema, &entities).expect("build"); assert_eq!(subject, "Semaglutide:GLP1R:Activation"); } #[test] fn test_missing_entity_error() { let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}"); let entities = make_entities(&[("Drug", "Semaglutide")]); let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail"); assert!(matches!(err, SubjectError::MissingEntity(name) if name == "Indication")); } #[test] fn test_empty_value_error() { let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}"); let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "")]); let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail"); assert!(matches!(err, SubjectError::EmptyValue(name) if name == "Indication")); } #[test] fn test_invalid_characters_error() { let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}"); // Value contains separator character let entities = make_entities(&[("Drug", "Sema:glutide"), ("Indication", "T2D")]); let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail"); assert!(matches!(err, SubjectError::InvalidCharacters { entity, .. } if entity == "Drug")); } #[test] fn test_parse_simple_subject() { let schema = PredicateSchema::new("Safety", "{Drug}"); let entities = SubjectBuilder::parse(&schema, "Semaglutide").expect("parse"); assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string())); } #[test] fn test_parse_compound_subject() { let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}"); let entities = SubjectBuilder::parse(&schema, "Semaglutide:Type2Diabetes").expect("parse"); assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string())); assert_eq!(entities.get("Indication"), Some(&"Type2Diabetes".to_string())); } #[test] fn test_build_normalized() { let domain = Domain::new("Test", "Test domain").with_entity_type( "Drug", EntityType::required("A drug") .with_alias("Ozempic", "Semaglutide") .with_alias("Wegovy", "Semaglutide"), ); let schema = PredicateSchema::new("Safety", "{Drug}"); let entities = make_entities(&[("Drug", "Ozempic")]); let subject = SubjectBuilder::build_normalized(&domain, &schema, &entities).expect("build"); assert_eq!(subject, "Semaglutide"); } #[test] fn test_validate_entities() { let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}"); // Valid let valid = make_entities(&[("Drug", "Semaglutide"), ("Indication", "T2D")]); assert!(SubjectBuilder::validate_entities(&schema, &valid).is_ok()); // Missing let missing = make_entities(&[("Drug", "Semaglutide")]); assert!(SubjectBuilder::validate_entities(&schema, &missing).is_err()); // Empty let empty = make_entities(&[("Drug", "Semaglutide"), ("Indication", "")]); assert!(SubjectBuilder::validate_entities(&schema, &empty).is_err()); } #[test] fn test_malformed_pattern_unclosed() { let schema = PredicateSchema::new("Bad", "{Drug"); let entities = make_entities(&[("Drug", "Semaglutide")]); let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail"); assert!(matches!(err, SubjectError::MalformedPattern(_))); } #[test] fn test_custom_separator() { let schema = PredicateSchema::new("Test", "{Drug}/{Indication}"); let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "T2D")]); // Build with custom separator (but pattern still uses /) // Wait, the pattern defines the separator. Let's test that the separator validation works. let subject = SubjectBuilder::build_with_separator(&schema, &entities, '/').expect("build"); assert_eq!(subject, "Semaglutide/T2D"); } }