stemedb/crates/stemedb-ontology/src/subject.rs
jordan 8f6506b70a feat: Aphoria scan modes + stemedb-ontology crate + consumer health UAT
Major additions:
- Staged scanning modes (working tree, staged, committed) with git integration
- Drift detection for baseline vs current state comparisons
- Hosted API handlers for policy CRUD operations via StemeDB API
- stemedb-ontology crate with domain definitions and medical extractors
- Consumer health vertical UAT scenarios (GLP-1, gastroparesis, etc.)
- Aphoria development skill documentation

Code organization:
- Split large files into focused modules to stay under 500-line limit
- Extracted config tests, episteme helpers/drift/aliases, API helpers

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 21:57:33 -07:00

416 lines
14 KiB
Rust

//! Subject builder for constructing canonical subject strings.
//!
//! The SubjectBuilder takes a predicate schema and entity values, then constructs
//! the canonical subject string that ensures proper collision detection.
use std::collections::HashMap;
use thiserror::Error;
use crate::domain::{Domain, PredicateSchema};
/// Errors that can occur during subject construction.
#[derive(Debug, Error)]
pub enum SubjectError {
/// A required entity is missing from the provided values.
#[error("Missing required entity: {0}")]
MissingEntity(String),
/// An entity value is empty.
#[error("Empty value for entity: {0}")]
EmptyValue(String),
/// An entity value contains invalid characters.
#[error("Invalid characters in entity '{entity}': '{value}' contains '{invalid}'")]
InvalidCharacters {
/// The entity type name.
entity: String,
/// The invalid value.
value: String,
/// The invalid character found.
invalid: char,
},
/// The subject pattern is malformed.
#[error("Malformed subject pattern: {0}")]
MalformedPattern(String),
/// Unknown entity type referenced in pattern.
#[error("Unknown entity type in pattern: {0}")]
UnknownEntityType(String),
}
/// Builder for constructing canonical subject strings.
///
/// # Example
///
/// ```ignore
/// use stemedb_ontology::{SubjectBuilder, PredicateSchema};
/// use std::collections::HashMap;
///
/// let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
/// let mut entities = HashMap::new();
/// entities.insert("Drug".to_string(), "Semaglutide".to_string());
/// entities.insert("Indication".to_string(), "Type2Diabetes".to_string());
///
/// let subject = SubjectBuilder::build(&schema, &entities).unwrap();
/// assert_eq!(subject, "Semaglutide:Type2Diabetes");
/// ```
pub struct SubjectBuilder;
impl SubjectBuilder {
/// Build a subject string from a schema and entity values.
///
/// # Arguments
///
/// * `schema` - The predicate schema defining the subject pattern
/// * `entities` - Map of entity type names to their values
///
/// # Returns
///
/// The constructed subject string, or an error if validation fails.
///
/// # Validation
///
/// - All entities referenced in the pattern must be present
/// - Entity values must not be empty
/// - Entity values must not contain the separator character ':'
pub fn build(
schema: &PredicateSchema,
entities: &HashMap<String, String>,
) -> Result<String, SubjectError> {
Self::build_with_separator(schema, entities, ':')
}
/// Build a subject string with a custom separator.
///
/// # Arguments
///
/// * `schema` - The predicate schema defining the subject pattern
/// * `entities` - Map of entity type names to their values
/// * `separator` - Character used to separate entity values (default ':')
pub fn build_with_separator(
schema: &PredicateSchema,
entities: &HashMap<String, String>,
separator: char,
) -> Result<String, SubjectError> {
let mut result = String::new();
let mut in_brace = false;
let mut current_entity = String::new();
for c in schema.subject_pattern.chars() {
match c {
'{' => {
in_brace = true;
current_entity.clear();
}
'}' => {
if !in_brace {
return Err(SubjectError::MalformedPattern(
"Unexpected '}' in pattern".to_string(),
));
}
in_brace = false;
// Look up the entity value
let value = entities
.get(&current_entity)
.ok_or_else(|| SubjectError::MissingEntity(current_entity.clone()))?;
// Validate the value
if value.is_empty() {
return Err(SubjectError::EmptyValue(current_entity.clone()));
}
if value.contains(separator) {
return Err(SubjectError::InvalidCharacters {
entity: current_entity.clone(),
value: value.clone(),
invalid: separator,
});
}
result.push_str(value);
current_entity.clear();
}
_ if in_brace => {
current_entity.push(c);
}
_ => {
result.push(c);
}
}
}
if in_brace {
return Err(SubjectError::MalformedPattern("Unclosed '{' in pattern".to_string()));
}
Ok(result)
}
/// Build a subject with entity normalization from a domain.
///
/// This version uses the domain's entity type definitions to normalize
/// values (e.g., "Ozempic" -> "Semaglutide").
///
/// # Arguments
///
/// * `domain` - The domain definition with entity types
/// * `schema` - The predicate schema defining the subject pattern
/// * `entities` - Map of entity type names to their values
pub fn build_normalized(
domain: &Domain,
schema: &PredicateSchema,
entities: &HashMap<String, String>,
) -> Result<String, SubjectError> {
// Normalize each entity value
let normalized: HashMap<String, String> = entities
.iter()
.map(|(name, value)| {
let normalized_value = domain
.get_entity_type(name)
.map(|et| et.normalize(value))
.unwrap_or_else(|| value.clone());
(name.clone(), normalized_value)
})
.collect();
Self::build(schema, &normalized)
}
/// Validate that all required entities are present for a schema.
///
/// # Arguments
///
/// * `schema` - The predicate schema to validate against
/// * `entities` - Map of entity type names to their values
///
/// # Returns
///
/// Ok(()) if all required entities are present and valid, otherwise an error.
pub fn validate_entities(
schema: &PredicateSchema,
entities: &HashMap<String, String>,
) -> Result<(), SubjectError> {
for required in &schema.required_entities {
match entities.get(required) {
None => return Err(SubjectError::MissingEntity(required.clone())),
Some(value) if value.is_empty() => {
return Err(SubjectError::EmptyValue(required.clone()))
}
_ => {}
}
}
Ok(())
}
/// Extract entity values from an existing subject string.
///
/// This is the inverse of `build` - given a subject and schema, extract
/// the entity values.
///
/// # Arguments
///
/// * `schema` - The predicate schema that was used to build the subject
/// * `subject` - The subject string to parse
///
/// # Returns
///
/// A map of entity names to their values, or an error if parsing fails.
///
/// # Example
///
/// ```ignore
/// let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
/// let entities = SubjectBuilder::parse(&schema, "Semaglutide:Type2Diabetes").unwrap();
/// assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string()));
/// ```
pub fn parse(
schema: &PredicateSchema,
subject: &str,
) -> Result<HashMap<String, String>, SubjectError> {
let mut result = HashMap::new();
// Split by separator and match to entity names
let parts: Vec<&str> = subject.split(':').collect();
let expected_count = schema.required_entities.len();
if parts.len() != expected_count {
return Err(SubjectError::MalformedPattern(format!(
"Expected {} parts in subject, got {}",
expected_count,
parts.len()
)));
}
for (i, entity_name) in schema.required_entities.iter().enumerate() {
result.insert(entity_name.clone(), parts[i].to_string());
}
Ok(result)
}
}
/// Helper trait for building subjects from a predicate and entities.
pub trait SubjectBuilderExt {
/// Build a subject for the given predicate.
fn build_subject_for_predicate(
&self,
predicate: &str,
entities: &HashMap<String, String>,
) -> Result<String, SubjectError>;
}
impl SubjectBuilderExt for Domain {
fn build_subject_for_predicate(
&self,
predicate: &str,
entities: &HashMap<String, String>,
) -> Result<String, SubjectError> {
let schema = self
.schema_for_predicate(predicate)
.ok_or_else(|| SubjectError::UnknownEntityType(predicate.to_string()))?;
SubjectBuilder::build_normalized(self, schema, entities)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::domain::{EntityType, PredicateSchema};
fn make_entities(pairs: &[(&str, &str)]) -> HashMap<String, String> {
pairs.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect()
}
#[test]
fn test_build_simple_subject() {
let schema = PredicateSchema::new("Safety", "{Drug}");
let entities = make_entities(&[("Drug", "Semaglutide")]);
let subject = SubjectBuilder::build(&schema, &entities).expect("build");
assert_eq!(subject, "Semaglutide");
}
#[test]
fn test_build_compound_subject() {
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "Type2Diabetes")]);
let subject = SubjectBuilder::build(&schema, &entities).expect("build");
assert_eq!(subject, "Semaglutide:Type2Diabetes");
}
#[test]
fn test_build_triple_subject() {
let schema = PredicateSchema::new("Mechanism", "{Drug}:{Target}:{Effect}");
let entities = make_entities(&[
("Drug", "Semaglutide"),
("Target", "GLP1R"),
("Effect", "Activation"),
]);
let subject = SubjectBuilder::build(&schema, &entities).expect("build");
assert_eq!(subject, "Semaglutide:GLP1R:Activation");
}
#[test]
fn test_missing_entity_error() {
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
let entities = make_entities(&[("Drug", "Semaglutide")]);
let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail");
assert!(matches!(err, SubjectError::MissingEntity(name) if name == "Indication"));
}
#[test]
fn test_empty_value_error() {
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "")]);
let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail");
assert!(matches!(err, SubjectError::EmptyValue(name) if name == "Indication"));
}
#[test]
fn test_invalid_characters_error() {
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
// Value contains separator character
let entities = make_entities(&[("Drug", "Sema:glutide"), ("Indication", "T2D")]);
let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail");
assert!(matches!(err, SubjectError::InvalidCharacters { entity, .. } if entity == "Drug"));
}
#[test]
fn test_parse_simple_subject() {
let schema = PredicateSchema::new("Safety", "{Drug}");
let entities = SubjectBuilder::parse(&schema, "Semaglutide").expect("parse");
assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string()));
}
#[test]
fn test_parse_compound_subject() {
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
let entities = SubjectBuilder::parse(&schema, "Semaglutide:Type2Diabetes").expect("parse");
assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string()));
assert_eq!(entities.get("Indication"), Some(&"Type2Diabetes".to_string()));
}
#[test]
fn test_build_normalized() {
let domain = Domain::new("Test", "Test domain").with_entity_type(
"Drug",
EntityType::required("A drug")
.with_alias("Ozempic", "Semaglutide")
.with_alias("Wegovy", "Semaglutide"),
);
let schema = PredicateSchema::new("Safety", "{Drug}");
let entities = make_entities(&[("Drug", "Ozempic")]);
let subject = SubjectBuilder::build_normalized(&domain, &schema, &entities).expect("build");
assert_eq!(subject, "Semaglutide");
}
#[test]
fn test_validate_entities() {
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
// Valid
let valid = make_entities(&[("Drug", "Semaglutide"), ("Indication", "T2D")]);
assert!(SubjectBuilder::validate_entities(&schema, &valid).is_ok());
// Missing
let missing = make_entities(&[("Drug", "Semaglutide")]);
assert!(SubjectBuilder::validate_entities(&schema, &missing).is_err());
// Empty
let empty = make_entities(&[("Drug", "Semaglutide"), ("Indication", "")]);
assert!(SubjectBuilder::validate_entities(&schema, &empty).is_err());
}
#[test]
fn test_malformed_pattern_unclosed() {
let schema = PredicateSchema::new("Bad", "{Drug");
let entities = make_entities(&[("Drug", "Semaglutide")]);
let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail");
assert!(matches!(err, SubjectError::MalformedPattern(_)));
}
#[test]
fn test_custom_separator() {
let schema = PredicateSchema::new("Test", "{Drug}/{Indication}");
let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "T2D")]);
// Build with custom separator (but pattern still uses /)
// Wait, the pattern defines the separator. Let's test that the separator validation works.
let subject = SubjectBuilder::build_with_separator(&schema, &entities, '/').expect("build");
assert_eq!(subject, "Semaglutide/T2D");
}
}