stemedb/applications/aphoria/src/research/quality.rs
jordan a734be3a0d feat: Phase 7 Content Defense + code structure refactoring
Content Defense (Phase 7):
- Add SimilarityIndex with MinHash/LSH for near-duplicate detection
- Add QuarantineStore for flagged assertions awaiting admin review
- Add CircuitBreakerStore for per-agent circuit breaker state
- Add ContentDefenseLayer for ingestion pipeline integration
- Add API endpoints for quarantine and circuit breaker management
- Add research module with gap detection and documentation fetching

Code Structure Improvements:
- Extract research CLI commands to research_commands.rs
- Extract API routers to routers.rs module
- Extract key_codec extraction functions to separate module
- Extract test modules to separate files across multiple crates
- All files now under 500 line limit per pre-commit hook

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 12:44:05 -07:00

469 lines
14 KiB
Rust

//! Quality validation for researched claims.
//!
//! Ensures that claims extracted from research meet quality standards before
//! being ingested into the corpus. High-quality data is critical for Aphoria's
//! accuracy - false positives erode trust.
use serde::{Deserialize, Serialize};
use tracing::{debug, info, warn};
use super::researcher::ResearchedClaim;
/// Quality validation report for a set of researched claims.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QualityReport {
/// Overall quality score (0.0 to 1.0).
pub overall_score: f32,
/// Number of claims that passed validation.
pub passed: usize,
/// Number of claims that failed validation.
pub failed: usize,
/// Number of claims that passed with warnings.
pub warnings: usize,
/// Per-claim validation results.
pub claim_results: Vec<ClaimValidationResult>,
/// Source attribution score (0.0 to 1.0).
pub source_attribution_score: f32,
/// Normative language score (0.0 to 1.0).
pub normative_language_score: f32,
/// Consistency score (0.0 to 1.0).
pub consistency_score: f32,
}
/// Validation result for a single claim.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClaimValidationResult {
/// Subject of the claim.
pub subject: String,
/// Whether the claim passed validation.
pub passed: bool,
/// Confidence in this claim's quality.
pub confidence: f32,
/// Validation issues found.
pub issues: Vec<ValidationIssue>,
/// Validation warnings (non-fatal).
pub warnings: Vec<String>,
}
/// A validation issue that caused a claim to fail.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationIssue {
/// Issue category.
pub category: IssueCategory,
/// Human-readable description.
pub description: String,
/// Severity (higher = worse).
pub severity: u8,
}
/// Categories of validation issues.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum IssueCategory {
/// Missing or invalid source attribution.
SourceAttribution,
/// Claim lacks normative language (MUST, SHOULD, etc.).
NormativeLanguage,
/// Claim is too vague or generic.
VagueContent,
/// Claim conflicts with existing corpus.
Conflict,
/// Subject path is malformed.
MalformedSubject,
/// Value is invalid or ambiguous.
InvalidValue,
/// Description is missing or too short.
InsufficientDescription,
/// Duplicate of existing claim.
Duplicate,
}
/// Validator for researched claims.
pub struct QualityValidator {
/// Minimum confidence threshold for accepting claims.
min_confidence: f32,
/// Minimum description length.
min_description_len: usize,
/// Whether to allow claims without explicit normative language.
allow_implicit_normative: bool,
}
impl Default for QualityValidator {
fn default() -> Self {
Self { min_confidence: 0.7, min_description_len: 20, allow_implicit_normative: false }
}
}
impl QualityValidator {
/// Create a new validator with custom settings.
pub fn new(min_confidence: f32) -> Self {
Self { min_confidence, ..Default::default() }
}
/// Create a strict validator (higher thresholds).
pub fn strict() -> Self {
Self { min_confidence: 0.85, min_description_len: 40, allow_implicit_normative: false }
}
/// Create a lenient validator (lower thresholds).
pub fn lenient() -> Self {
Self { min_confidence: 0.5, min_description_len: 10, allow_implicit_normative: true }
}
/// Validate a batch of researched claims.
pub fn validate(&self, claims: &[ResearchedClaim]) -> QualityReport {
let mut claim_results = Vec::with_capacity(claims.len());
let mut passed = 0;
let mut failed = 0;
let mut warnings = 0;
let mut source_scores = Vec::new();
let mut normative_scores = Vec::new();
for claim in claims {
let result = self.validate_claim(claim);
if result.passed {
passed += 1;
if !result.warnings.is_empty() {
warnings += 1;
}
} else {
failed += 1;
}
// Track component scores
source_scores.push(self.score_source_attribution(claim));
normative_scores.push(self.score_normative_language(&claim.description));
claim_results.push(result);
}
let total = claims.len();
let overall_score = if total > 0 { passed as f32 / total as f32 } else { 0.0 };
let source_attribution_score = if source_scores.is_empty() {
0.0
} else {
source_scores.iter().sum::<f32>() / source_scores.len() as f32
};
let normative_language_score = if normative_scores.is_empty() {
0.0
} else {
normative_scores.iter().sum::<f32>() / normative_scores.len() as f32
};
// Consistency score: check for conflicting claims
let consistency_score = self.score_consistency(claims);
info!(
total,
passed,
failed,
warnings,
overall_score,
source_attribution_score,
normative_language_score,
consistency_score,
"Quality validation complete"
);
QualityReport {
overall_score,
passed,
failed,
warnings,
claim_results,
source_attribution_score,
normative_language_score,
consistency_score,
}
}
/// Validate a single claim.
fn validate_claim(&self, claim: &ResearchedClaim) -> ClaimValidationResult {
let mut issues = Vec::new();
let mut validation_warnings = Vec::new();
let mut confidence = claim.confidence;
// Check subject path format
if !self.is_valid_subject(&claim.subject) {
issues.push(ValidationIssue {
category: IssueCategory::MalformedSubject,
description: format!("Subject path is malformed: {}", claim.subject),
severity: 3,
});
confidence *= 0.5;
}
// Check source attribution
if claim.source_url.is_empty() {
issues.push(ValidationIssue {
category: IssueCategory::SourceAttribution,
description: "Missing source URL".to_string(),
severity: 2,
});
confidence *= 0.7;
} else if !self.is_authoritative_source(&claim.source_url) {
validation_warnings
.push(format!("Source may not be authoritative: {}", claim.source_url));
confidence *= 0.9;
}
// Check description quality
if claim.description.len() < self.min_description_len {
issues.push(ValidationIssue {
category: IssueCategory::InsufficientDescription,
description: format!(
"Description too short ({} chars, min {})",
claim.description.len(),
self.min_description_len
),
severity: 2,
});
confidence *= 0.8;
}
// Check normative language
let has_normative = self.has_normative_language(&claim.description);
if !has_normative && !self.allow_implicit_normative {
issues.push(ValidationIssue {
category: IssueCategory::NormativeLanguage,
description: "Description lacks normative language (MUST, SHOULD, etc.)"
.to_string(),
severity: 2,
});
confidence *= 0.8;
} else if !has_normative {
validation_warnings.push("Implicit normative statement (no MUST/SHOULD)".to_string());
}
// Check for vague content
if self.is_vague_content(&claim.description) {
issues.push(ValidationIssue {
category: IssueCategory::VagueContent,
description: "Content is too vague or generic".to_string(),
severity: 2,
});
confidence *= 0.7;
}
// Determine pass/fail
let passed = issues.is_empty() || confidence >= self.min_confidence;
if !passed {
debug!(
subject = %claim.subject,
confidence,
issues = issues.len(),
"Claim failed validation"
);
}
ClaimValidationResult {
subject: claim.subject.clone(),
passed,
confidence: confidence.min(1.0),
issues,
warnings: validation_warnings,
}
}
/// Check if a subject path is valid.
fn is_valid_subject(&self, subject: &str) -> bool {
// Must have scheme://path format
if !subject.contains("://") {
return false;
}
// Must have at least 2 path segments
let path = subject.find("://").map(|i| &subject[i + 3..]).unwrap_or("");
let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
segments.len() >= 2
}
/// Check if a source URL is from an authoritative domain.
fn is_authoritative_source(&self, url: &str) -> bool {
let authoritative_domains = [
"rfc-editor.org",
"ietf.org",
"owasp.org",
"nist.gov",
"w3.org",
"postgresql.org",
"redis.io",
"docs.rs",
"go.dev",
"python.org",
"rust-lang.org",
"apache.org",
"microsoft.com/docs",
"aws.amazon.com/docs",
"cloud.google.com/docs",
"developer.mozilla.org",
];
authoritative_domains.iter().any(|domain| url.contains(domain))
}
/// Check if text contains normative language.
fn has_normative_language(&self, text: &str) -> bool {
let upper = text.to_uppercase();
let normative_keywords = ["MUST", "SHALL", "SHOULD", "REQUIRED", "RECOMMENDED", "MAY NOT"];
normative_keywords.iter().any(|kw| upper.contains(kw))
}
/// Check if content is too vague.
fn is_vague_content(&self, text: &str) -> bool {
let vague_phrases = [
"should be configured",
"it depends",
"varies",
"may or may not",
"could be",
"might be",
"typically",
"usually",
"often",
"sometimes",
"in some cases",
];
let lower = text.to_lowercase();
let vague_count = vague_phrases.iter().filter(|p| lower.contains(*p)).count();
// Too vague if more than 2 vague phrases or text is very short with any vague phrase
vague_count > 2 || (text.len() < 50 && vague_count > 0)
}
/// Score source attribution (0.0 to 1.0).
fn score_source_attribution(&self, claim: &ResearchedClaim) -> f32 {
if claim.source_url.is_empty() {
return 0.0;
}
let mut score: f32 = 0.5; // Base score for having a URL
if self.is_authoritative_source(&claim.source_url) {
score += 0.3;
}
if !claim.source_section.is_empty() {
score += 0.1;
}
if claim.source_url.starts_with("https://") {
score += 0.1;
}
score.min(1.0)
}
/// Score normative language (0.0 to 1.0).
fn score_normative_language(&self, text: &str) -> f32 {
let upper = text.to_uppercase();
// Strong normative = higher score
if upper.contains("MUST") || upper.contains("SHALL") || upper.contains("REQUIRED") {
return 1.0;
}
if upper.contains("SHOULD") || upper.contains("RECOMMENDED") {
return 0.8;
}
if upper.contains("MAY NOT") {
return 0.7;
}
if upper.contains("MAY") {
return 0.5;
}
// Implicit recommendations
if text.to_lowercase().contains("recommended")
|| text.to_lowercase().contains("best practice")
{
return 0.4;
}
0.2
}
/// Score consistency among claims (0.0 to 1.0).
fn score_consistency(&self, claims: &[ResearchedClaim]) -> f32 {
if claims.len() < 2 {
return 1.0;
}
// Check for conflicting claims on the same subject+predicate
let mut subject_values: std::collections::HashMap<String, Vec<&ResearchedClaim>> =
std::collections::HashMap::new();
for claim in claims {
let key = format!("{}::{}", claim.subject, claim.predicate);
subject_values.entry(key).or_default().push(claim);
}
let mut conflicts = 0;
for (key, claims_for_key) in &subject_values {
if claims_for_key.len() > 1 {
// Check if values differ
let first_value = &claims_for_key[0].value;
for claim in claims_for_key.iter().skip(1) {
if &claim.value != first_value {
warn!(key, "Conflicting claims detected");
conflicts += 1;
}
}
}
}
if conflicts == 0 {
1.0
} else {
(1.0 - (conflicts as f32 / claims.len() as f32)).max(0.0)
}
}
/// Filter claims to only those that passed validation.
pub fn filter_passed(&self, claims: Vec<ResearchedClaim>) -> Vec<ResearchedClaim> {
let report = self.validate(&claims);
claims
.into_iter()
.zip(report.claim_results.iter())
.filter(|(_, result)| result.passed)
.map(|(claim, _)| claim)
.collect()
}
}
#[cfg(test)]
#[path = "quality_tests.rs"]
mod tests;