Major additions: - Staged scanning modes (working tree, staged, committed) with git integration - Drift detection for baseline vs current state comparisons - Hosted API handlers for policy CRUD operations via StemeDB API - stemedb-ontology crate with domain definitions and medical extractors - Consumer health vertical UAT scenarios (GLP-1, gastroparesis, etc.) - Aphoria development skill documentation Code organization: - Split large files into focused modules to stay under 500-line limit - Extracted config tests, episteme helpers/drift/aliases, API helpers Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
416 lines
14 KiB
Rust
416 lines
14 KiB
Rust
//! Subject builder for constructing canonical subject strings.
|
|
//!
|
|
//! The SubjectBuilder takes a predicate schema and entity values, then constructs
|
|
//! the canonical subject string that ensures proper collision detection.
|
|
|
|
use std::collections::HashMap;
|
|
use thiserror::Error;
|
|
|
|
use crate::domain::{Domain, PredicateSchema};
|
|
|
|
/// Errors that can occur during subject construction.
|
|
#[derive(Debug, Error)]
|
|
pub enum SubjectError {
|
|
/// A required entity is missing from the provided values.
|
|
#[error("Missing required entity: {0}")]
|
|
MissingEntity(String),
|
|
|
|
/// An entity value is empty.
|
|
#[error("Empty value for entity: {0}")]
|
|
EmptyValue(String),
|
|
|
|
/// An entity value contains invalid characters.
|
|
#[error("Invalid characters in entity '{entity}': '{value}' contains '{invalid}'")]
|
|
InvalidCharacters {
|
|
/// The entity type name.
|
|
entity: String,
|
|
/// The invalid value.
|
|
value: String,
|
|
/// The invalid character found.
|
|
invalid: char,
|
|
},
|
|
|
|
/// The subject pattern is malformed.
|
|
#[error("Malformed subject pattern: {0}")]
|
|
MalformedPattern(String),
|
|
|
|
/// Unknown entity type referenced in pattern.
|
|
#[error("Unknown entity type in pattern: {0}")]
|
|
UnknownEntityType(String),
|
|
}
|
|
|
|
/// Builder for constructing canonical subject strings.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```ignore
|
|
/// use stemedb_ontology::{SubjectBuilder, PredicateSchema};
|
|
/// use std::collections::HashMap;
|
|
///
|
|
/// let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
|
|
/// let mut entities = HashMap::new();
|
|
/// entities.insert("Drug".to_string(), "Semaglutide".to_string());
|
|
/// entities.insert("Indication".to_string(), "Type2Diabetes".to_string());
|
|
///
|
|
/// let subject = SubjectBuilder::build(&schema, &entities).unwrap();
|
|
/// assert_eq!(subject, "Semaglutide:Type2Diabetes");
|
|
/// ```
|
|
pub struct SubjectBuilder;
|
|
|
|
impl SubjectBuilder {
|
|
/// Build a subject string from a schema and entity values.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `schema` - The predicate schema defining the subject pattern
|
|
/// * `entities` - Map of entity type names to their values
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// The constructed subject string, or an error if validation fails.
|
|
///
|
|
/// # Validation
|
|
///
|
|
/// - All entities referenced in the pattern must be present
|
|
/// - Entity values must not be empty
|
|
/// - Entity values must not contain the separator character ':'
|
|
pub fn build(
|
|
schema: &PredicateSchema,
|
|
entities: &HashMap<String, String>,
|
|
) -> Result<String, SubjectError> {
|
|
Self::build_with_separator(schema, entities, ':')
|
|
}
|
|
|
|
/// Build a subject string with a custom separator.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `schema` - The predicate schema defining the subject pattern
|
|
/// * `entities` - Map of entity type names to their values
|
|
/// * `separator` - Character used to separate entity values (default ':')
|
|
pub fn build_with_separator(
|
|
schema: &PredicateSchema,
|
|
entities: &HashMap<String, String>,
|
|
separator: char,
|
|
) -> Result<String, SubjectError> {
|
|
let mut result = String::new();
|
|
let mut in_brace = false;
|
|
let mut current_entity = String::new();
|
|
|
|
for c in schema.subject_pattern.chars() {
|
|
match c {
|
|
'{' => {
|
|
in_brace = true;
|
|
current_entity.clear();
|
|
}
|
|
'}' => {
|
|
if !in_brace {
|
|
return Err(SubjectError::MalformedPattern(
|
|
"Unexpected '}' in pattern".to_string(),
|
|
));
|
|
}
|
|
in_brace = false;
|
|
|
|
// Look up the entity value
|
|
let value = entities
|
|
.get(¤t_entity)
|
|
.ok_or_else(|| SubjectError::MissingEntity(current_entity.clone()))?;
|
|
|
|
// Validate the value
|
|
if value.is_empty() {
|
|
return Err(SubjectError::EmptyValue(current_entity.clone()));
|
|
}
|
|
|
|
if value.contains(separator) {
|
|
return Err(SubjectError::InvalidCharacters {
|
|
entity: current_entity.clone(),
|
|
value: value.clone(),
|
|
invalid: separator,
|
|
});
|
|
}
|
|
|
|
result.push_str(value);
|
|
current_entity.clear();
|
|
}
|
|
_ if in_brace => {
|
|
current_entity.push(c);
|
|
}
|
|
_ => {
|
|
result.push(c);
|
|
}
|
|
}
|
|
}
|
|
|
|
if in_brace {
|
|
return Err(SubjectError::MalformedPattern("Unclosed '{' in pattern".to_string()));
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Build a subject with entity normalization from a domain.
|
|
///
|
|
/// This version uses the domain's entity type definitions to normalize
|
|
/// values (e.g., "Ozempic" -> "Semaglutide").
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `domain` - The domain definition with entity types
|
|
/// * `schema` - The predicate schema defining the subject pattern
|
|
/// * `entities` - Map of entity type names to their values
|
|
pub fn build_normalized(
|
|
domain: &Domain,
|
|
schema: &PredicateSchema,
|
|
entities: &HashMap<String, String>,
|
|
) -> Result<String, SubjectError> {
|
|
// Normalize each entity value
|
|
let normalized: HashMap<String, String> = entities
|
|
.iter()
|
|
.map(|(name, value)| {
|
|
let normalized_value = domain
|
|
.get_entity_type(name)
|
|
.map(|et| et.normalize(value))
|
|
.unwrap_or_else(|| value.clone());
|
|
(name.clone(), normalized_value)
|
|
})
|
|
.collect();
|
|
|
|
Self::build(schema, &normalized)
|
|
}
|
|
|
|
/// Validate that all required entities are present for a schema.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `schema` - The predicate schema to validate against
|
|
/// * `entities` - Map of entity type names to their values
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// Ok(()) if all required entities are present and valid, otherwise an error.
|
|
pub fn validate_entities(
|
|
schema: &PredicateSchema,
|
|
entities: &HashMap<String, String>,
|
|
) -> Result<(), SubjectError> {
|
|
for required in &schema.required_entities {
|
|
match entities.get(required) {
|
|
None => return Err(SubjectError::MissingEntity(required.clone())),
|
|
Some(value) if value.is_empty() => {
|
|
return Err(SubjectError::EmptyValue(required.clone()))
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Extract entity values from an existing subject string.
|
|
///
|
|
/// This is the inverse of `build` - given a subject and schema, extract
|
|
/// the entity values.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `schema` - The predicate schema that was used to build the subject
|
|
/// * `subject` - The subject string to parse
|
|
///
|
|
/// # Returns
|
|
///
|
|
/// A map of entity names to their values, or an error if parsing fails.
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```ignore
|
|
/// let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
|
|
/// let entities = SubjectBuilder::parse(&schema, "Semaglutide:Type2Diabetes").unwrap();
|
|
/// assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string()));
|
|
/// ```
|
|
pub fn parse(
|
|
schema: &PredicateSchema,
|
|
subject: &str,
|
|
) -> Result<HashMap<String, String>, SubjectError> {
|
|
let mut result = HashMap::new();
|
|
|
|
// Split by separator and match to entity names
|
|
let parts: Vec<&str> = subject.split(':').collect();
|
|
let expected_count = schema.required_entities.len();
|
|
|
|
if parts.len() != expected_count {
|
|
return Err(SubjectError::MalformedPattern(format!(
|
|
"Expected {} parts in subject, got {}",
|
|
expected_count,
|
|
parts.len()
|
|
)));
|
|
}
|
|
|
|
for (i, entity_name) in schema.required_entities.iter().enumerate() {
|
|
result.insert(entity_name.clone(), parts[i].to_string());
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
}
|
|
|
|
/// Helper trait for building subjects from a predicate and entities.
|
|
pub trait SubjectBuilderExt {
|
|
/// Build a subject for the given predicate.
|
|
fn build_subject_for_predicate(
|
|
&self,
|
|
predicate: &str,
|
|
entities: &HashMap<String, String>,
|
|
) -> Result<String, SubjectError>;
|
|
}
|
|
|
|
impl SubjectBuilderExt for Domain {
|
|
fn build_subject_for_predicate(
|
|
&self,
|
|
predicate: &str,
|
|
entities: &HashMap<String, String>,
|
|
) -> Result<String, SubjectError> {
|
|
let schema = self
|
|
.schema_for_predicate(predicate)
|
|
.ok_or_else(|| SubjectError::UnknownEntityType(predicate.to_string()))?;
|
|
|
|
SubjectBuilder::build_normalized(self, schema, entities)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::domain::{EntityType, PredicateSchema};
|
|
|
|
fn make_entities(pairs: &[(&str, &str)]) -> HashMap<String, String> {
|
|
pairs.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect()
|
|
}
|
|
|
|
#[test]
|
|
fn test_build_simple_subject() {
|
|
let schema = PredicateSchema::new("Safety", "{Drug}");
|
|
let entities = make_entities(&[("Drug", "Semaglutide")]);
|
|
|
|
let subject = SubjectBuilder::build(&schema, &entities).expect("build");
|
|
assert_eq!(subject, "Semaglutide");
|
|
}
|
|
|
|
#[test]
|
|
fn test_build_compound_subject() {
|
|
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
|
|
let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "Type2Diabetes")]);
|
|
|
|
let subject = SubjectBuilder::build(&schema, &entities).expect("build");
|
|
assert_eq!(subject, "Semaglutide:Type2Diabetes");
|
|
}
|
|
|
|
#[test]
|
|
fn test_build_triple_subject() {
|
|
let schema = PredicateSchema::new("Mechanism", "{Drug}:{Target}:{Effect}");
|
|
let entities = make_entities(&[
|
|
("Drug", "Semaglutide"),
|
|
("Target", "GLP1R"),
|
|
("Effect", "Activation"),
|
|
]);
|
|
|
|
let subject = SubjectBuilder::build(&schema, &entities).expect("build");
|
|
assert_eq!(subject, "Semaglutide:GLP1R:Activation");
|
|
}
|
|
|
|
#[test]
|
|
fn test_missing_entity_error() {
|
|
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
|
|
let entities = make_entities(&[("Drug", "Semaglutide")]);
|
|
|
|
let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail");
|
|
assert!(matches!(err, SubjectError::MissingEntity(name) if name == "Indication"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_value_error() {
|
|
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
|
|
let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "")]);
|
|
|
|
let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail");
|
|
assert!(matches!(err, SubjectError::EmptyValue(name) if name == "Indication"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_characters_error() {
|
|
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
|
|
// Value contains separator character
|
|
let entities = make_entities(&[("Drug", "Sema:glutide"), ("Indication", "T2D")]);
|
|
|
|
let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail");
|
|
assert!(matches!(err, SubjectError::InvalidCharacters { entity, .. } if entity == "Drug"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_simple_subject() {
|
|
let schema = PredicateSchema::new("Safety", "{Drug}");
|
|
let entities = SubjectBuilder::parse(&schema, "Semaglutide").expect("parse");
|
|
|
|
assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string()));
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_compound_subject() {
|
|
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
|
|
let entities = SubjectBuilder::parse(&schema, "Semaglutide:Type2Diabetes").expect("parse");
|
|
|
|
assert_eq!(entities.get("Drug"), Some(&"Semaglutide".to_string()));
|
|
assert_eq!(entities.get("Indication"), Some(&"Type2Diabetes".to_string()));
|
|
}
|
|
|
|
#[test]
|
|
fn test_build_normalized() {
|
|
let domain = Domain::new("Test", "Test domain").with_entity_type(
|
|
"Drug",
|
|
EntityType::required("A drug")
|
|
.with_alias("Ozempic", "Semaglutide")
|
|
.with_alias("Wegovy", "Semaglutide"),
|
|
);
|
|
|
|
let schema = PredicateSchema::new("Safety", "{Drug}");
|
|
let entities = make_entities(&[("Drug", "Ozempic")]);
|
|
|
|
let subject = SubjectBuilder::build_normalized(&domain, &schema, &entities).expect("build");
|
|
assert_eq!(subject, "Semaglutide");
|
|
}
|
|
|
|
#[test]
|
|
fn test_validate_entities() {
|
|
let schema = PredicateSchema::new("Efficacy", "{Drug}:{Indication}");
|
|
|
|
// Valid
|
|
let valid = make_entities(&[("Drug", "Semaglutide"), ("Indication", "T2D")]);
|
|
assert!(SubjectBuilder::validate_entities(&schema, &valid).is_ok());
|
|
|
|
// Missing
|
|
let missing = make_entities(&[("Drug", "Semaglutide")]);
|
|
assert!(SubjectBuilder::validate_entities(&schema, &missing).is_err());
|
|
|
|
// Empty
|
|
let empty = make_entities(&[("Drug", "Semaglutide"), ("Indication", "")]);
|
|
assert!(SubjectBuilder::validate_entities(&schema, &empty).is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_malformed_pattern_unclosed() {
|
|
let schema = PredicateSchema::new("Bad", "{Drug");
|
|
let entities = make_entities(&[("Drug", "Semaglutide")]);
|
|
|
|
let err = SubjectBuilder::build(&schema, &entities).expect_err("should fail");
|
|
assert!(matches!(err, SubjectError::MalformedPattern(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_custom_separator() {
|
|
let schema = PredicateSchema::new("Test", "{Drug}/{Indication}");
|
|
let entities = make_entities(&[("Drug", "Semaglutide"), ("Indication", "T2D")]);
|
|
|
|
// Build with custom separator (but pattern still uses /)
|
|
// Wait, the pattern defines the separator. Let's test that the separator validation works.
|
|
let subject = SubjectBuilder::build_with_separator(&schema, &entities, '/').expect("build");
|
|
assert_eq!(subject, "Semaglutide/T2D");
|
|
}
|
|
}
|