//! Claim validation against domain schemas. //! //! Validates that claims conform to the domain ontology before ingestion. use std::collections::HashMap; use thiserror::Error; use crate::domain::{Domain, PredicateSchema}; /// Errors that can occur during claim validation. #[derive(Debug, Error)] pub enum ValidationError { /// The predicate is not defined in the domain. #[error("Unknown predicate: '{0}' not in domain '{1}'")] UnknownPredicate(String, String), /// The subject doesn't match the expected pattern. #[error("Subject '{subject}' doesn't match pattern '{pattern}' for predicate '{predicate}'")] SubjectMismatch { /// The actual subject string. subject: String, /// The expected pattern. pattern: String, /// The predicate name. predicate: String, }, /// A required entity is missing from the subject. #[error("Subject missing required entity '{entity}' for predicate '{predicate}'")] MissingEntity { /// The missing entity name. entity: String, /// The predicate name. predicate: String, }, /// The confidence score is out of range. #[error("Confidence {0} out of range [0.0, 1.0]")] ConfidenceOutOfRange(f32), /// The object value type doesn't match expected type. #[error("Object type mismatch: expected {expected}, got {actual}")] ObjectTypeMismatch { /// The expected type. expected: String, /// The actual type received. actual: String, }, /// Multiple validation errors occurred. #[error("Multiple validation errors: {}", .0.join("; "))] Multiple(Vec), } /// Validator for claims against a domain ontology. #[derive(Debug)] pub struct Validator<'a> { domain: &'a Domain, strict_mode: bool, } impl<'a> Validator<'a> { /// Create a new validator for the given domain. pub fn new(domain: &'a Domain) -> Self { Self { domain, strict_mode: false } } /// Enable strict mode (unknown predicates are errors instead of warnings). pub fn strict(mut self) -> Self { self.strict_mode = true; self } /// Validate a claim's predicate and subject against the domain. /// /// # Arguments /// /// * `predicate` - The predicate name /// * `subject` - The subject string /// * `confidence` - The confidence score (0.0 to 1.0) /// /// # Returns /// /// Ok if valid, or a ValidationError describing what's wrong. pub fn validate( &self, predicate: &str, subject: &str, confidence: f32, ) -> Result<(), ValidationError> { // Validate confidence first if !(0.0..=1.0).contains(&confidence) { return Err(ValidationError::ConfidenceOutOfRange(confidence)); } // Find the schema for this predicate let schema = match self.domain.schema_for_predicate(predicate) { Some(s) => s, None if self.strict_mode => { return Err(ValidationError::UnknownPredicate( predicate.to_string(), self.domain.name.clone(), )); } None => { // Non-strict: warn but allow tracing::warn!( predicate = predicate, domain = self.domain.name, "Unknown predicate, skipping subject validation" ); return Ok(()); } }; // Validate subject matches pattern self.validate_subject(subject, schema, predicate) } /// Validate just the subject against a schema. fn validate_subject( &self, subject: &str, schema: &PredicateSchema, predicate: &str, ) -> Result<(), ValidationError> { // Count separators in subject let subject_parts: Vec<&str> = subject.split(':').collect(); let expected_parts = schema.required_entities.len(); if subject_parts.len() != expected_parts { return Err(ValidationError::SubjectMismatch { subject: subject.to_string(), pattern: schema.subject_pattern.clone(), predicate: predicate.to_string(), }); } // Check for empty parts for (i, part) in subject_parts.iter().enumerate() { if part.is_empty() { return Err(ValidationError::MissingEntity { entity: schema .required_entities .get(i) .cloned() .unwrap_or_else(|| format!("part_{}", i)), predicate: predicate.to_string(), }); } } Ok(()) } /// Validate a batch of claims. /// /// Returns a map of claim index to validation error. pub fn validate_batch( &self, claims: &[(String, String, f32)], // (predicate, subject, confidence) ) -> HashMap { let mut errors = HashMap::new(); for (i, (predicate, subject, confidence)) in claims.iter().enumerate() { if let Err(e) = self.validate(predicate, subject, *confidence) { errors.insert(i, e); } } errors } /// Check if a predicate is known in the domain. pub fn is_known_predicate(&self, predicate: &str) -> bool { self.domain.schema_for_predicate(predicate).is_some() } /// Get the expected subject pattern for a predicate. pub fn expected_pattern(&self, predicate: &str) -> Option<&str> { self.domain.schema_for_predicate(predicate).map(|s| s.subject_pattern.as_str()) } } #[cfg(test)] mod tests { use super::*; use crate::domain::{Domain, EntityType, PredicateSchema}; fn test_domain() -> Domain { Domain::new("Pharma", "Test pharmaceutical domain") .with_entity_type("Drug", EntityType::required("A pharmaceutical compound")) .with_entity_type("Indication", EntityType::required("A medical condition")) .with_predicate_schema( "efficacy", PredicateSchema::new("Efficacy predicates", "{Drug}:{Indication}") .with_predicates(vec!["hba1c_reduction", "weight_loss"]), ) .with_predicate_schema( "safety", PredicateSchema::new("Safety predicates", "{Drug}") .with_predicates(vec!["has_boxed_warning", "adverse_event_rate"]), ) } #[test] fn test_valid_efficacy_claim() { let domain = test_domain(); let validator = Validator::new(&domain); let result = validator.validate("hba1c_reduction", "Semaglutide:Type2Diabetes", 0.95); assert!(result.is_ok()); } #[test] fn test_valid_safety_claim() { let domain = test_domain(); let validator = Validator::new(&domain); let result = validator.validate("has_boxed_warning", "Semaglutide", 0.99); assert!(result.is_ok()); } #[test] fn test_subject_mismatch_too_few_parts() { let domain = test_domain(); let validator = Validator::new(&domain); // Efficacy requires Drug:Indication, but we only provided Drug let result = validator.validate("hba1c_reduction", "Semaglutide", 0.95); assert!(result.is_err()); assert!(matches!(result.unwrap_err(), ValidationError::SubjectMismatch { .. })); } #[test] fn test_subject_mismatch_too_many_parts() { let domain = test_domain(); let validator = Validator::new(&domain); // Safety requires just Drug, but we provided Drug:Indication let result = validator.validate("has_boxed_warning", "Semaglutide:T2D", 0.95); assert!(result.is_err()); } #[test] fn test_confidence_out_of_range_high() { let domain = test_domain(); let validator = Validator::new(&domain); let result = validator.validate("has_boxed_warning", "Semaglutide", 1.5); assert!(result.is_err()); assert!(matches!(result.unwrap_err(), ValidationError::ConfidenceOutOfRange(_))); } #[test] fn test_confidence_out_of_range_negative() { let domain = test_domain(); let validator = Validator::new(&domain); let result = validator.validate("has_boxed_warning", "Semaglutide", -0.1); assert!(result.is_err()); } #[test] fn test_unknown_predicate_strict() { let domain = test_domain(); let validator = Validator::new(&domain).strict(); let result = validator.validate("unknown_predicate", "Semaglutide", 0.5); assert!(result.is_err()); assert!(matches!(result.unwrap_err(), ValidationError::UnknownPredicate(_, _))); } #[test] fn test_unknown_predicate_nonstrict() { let domain = test_domain(); let validator = Validator::new(&domain); // non-strict // Should pass even with unknown predicate let result = validator.validate("unknown_predicate", "Semaglutide", 0.5); assert!(result.is_ok()); } #[test] fn test_empty_subject_part() { let domain = test_domain(); let validator = Validator::new(&domain); // Empty indication part let result = validator.validate("hba1c_reduction", "Semaglutide:", 0.95); assert!(result.is_err()); assert!(matches!(result.unwrap_err(), ValidationError::MissingEntity { .. })); } #[test] fn test_validate_batch() { let domain = test_domain(); let validator = Validator::new(&domain); let claims = vec![ ("hba1c_reduction".to_string(), "Semaglutide:T2D".to_string(), 0.95), ("has_boxed_warning".to_string(), "Semaglutide".to_string(), 0.99), ("hba1c_reduction".to_string(), "BadSubject".to_string(), 0.5), // Will fail ("has_boxed_warning".to_string(), "Drug".to_string(), 1.5), // Confidence will fail ]; let errors = validator.validate_batch(&claims); assert_eq!(errors.len(), 2); // Claims 2 and 3 should fail assert!(errors.contains_key(&2)); assert!(errors.contains_key(&3)); } #[test] fn test_is_known_predicate() { let domain = test_domain(); let validator = Validator::new(&domain); assert!(validator.is_known_predicate("hba1c_reduction")); assert!(validator.is_known_predicate("has_boxed_warning")); assert!(!validator.is_known_predicate("unknown")); } #[test] fn test_expected_pattern() { let domain = test_domain(); let validator = Validator::new(&domain); assert_eq!(validator.expected_pattern("hba1c_reduction"), Some("{Drug}:{Indication}")); assert_eq!(validator.expected_pattern("has_boxed_warning"), Some("{Drug}")); assert_eq!(validator.expected_pattern("unknown"), None); } }