//! Promotion pipeline for converting learned patterns to declarative extractors. //! //! Orchestrates the full promotion flow: candidates → regex generation → validation → YAML output. use std::path::PathBuf; use tracing::{debug, info, warn}; use uuid::Uuid; use super::regex_gen::RegexGenerator; use super::types::{PromotionCandidate, PromotionStats, ValidationResult}; use super::validator::ExtractorValidator; use super::writer::YamlWriter; use crate::config::PromotionConfig; use crate::learning::{LearnedPattern, PatternStore}; use crate::llm::GeminiClient; use crate::AphoriaError; /// The promotion pipeline orchestrates pattern-to-extractor conversion. pub struct PromotionPipeline<'a, S: PatternStore> { /// Pattern store for fetching candidates. store: &'a S, /// LLM client for regex generation. client: Option<&'a GeminiClient>, /// Configuration for promotion thresholds. config: &'a PromotionConfig, /// Validator for testing generated extractors. validator: ExtractorValidator, /// YAML writer for output. writer: Option, } impl<'a, S: PatternStore> PromotionPipeline<'a, S> { /// Create a new promotion pipeline. /// /// If `output_dir` is None, uses the default `.aphoria/extractors/learned/`. pub fn new( store: &'a S, client: Option<&'a GeminiClient>, config: &'a PromotionConfig, output_dir: Option, ) -> Result { let writer = if let Some(dir) = output_dir { Some(YamlWriter::new(dir)?) } else { None }; Ok(Self { store, client, config, validator: ExtractorValidator::default(), writer }) } /// Get patterns eligible for promotion. /// /// Returns patterns that meet the configured thresholds for project count /// and confidence. pub fn get_candidates(&self) -> Vec { self.store.get_promotion_candidates(self.config.min_projects, self.config.min_confidence) } /// Generate a promotion candidate from a learned pattern. /// /// Uses the LLM to generate a regex pattern and validates it. pub fn generate_candidate( &self, pattern: &LearnedPattern, ) -> Result { let client = self.client.ok_or_else(|| { AphoriaError::Promotion("LLM client not configured for regex generation".to_string()) })?; // Generate extractor definition using LLM let generator = RegexGenerator::new(client); let extractor_def = generator.generate(pattern)?; // Validate the generated extractor let validation = self.validator.validate(&extractor_def, pattern)?; Ok(PromotionCandidate::new(pattern.clone(), extractor_def, validation)) } /// Promote a candidate by writing it to YAML and marking the pattern as promoted. /// /// Returns the path to the written YAML file. pub fn promote(&self, candidate: &PromotionCandidate) -> Result { // Check if candidate is ready if !candidate.is_ready() { return Err(AphoriaError::Promotion(format!( "Candidate {} is not ready for promotion: validation={}, performance={}", candidate.pattern_id(), candidate.validation.passed, candidate.validation.performance_ok ))); } // Get or create writer let writer = if let Some(ref w) = self.writer { w } else { return Err(AphoriaError::Promotion("YAML writer not configured".to_string())); }; // Check if already exists if writer.exists(candidate.extractor_name()) { return Err(AphoriaError::Promotion(format!( "Extractor '{}' already exists", candidate.extractor_name() ))); } // Write YAML file let path = writer.write(&candidate.extractor_def, &candidate.pattern)?; // Mark pattern as promoted self.store.mark_promoted(&candidate.pattern_id(), candidate.extractor_name())?; info!( pattern_id = %candidate.pattern_id(), extractor = %candidate.extractor_name(), path = %path.display(), "Pattern promoted to extractor" ); Ok(path) } /// Process all eligible patterns and return promotion candidates. /// /// Generates and validates extractors for each eligible pattern. /// Does not actually promote (write YAML) - use `promote()` for that. pub fn process_all(&self) -> Vec> { let patterns = self.get_candidates(); debug!(count = patterns.len(), "Processing promotion candidates"); patterns.iter().map(|pattern| self.generate_candidate(pattern)).collect() } /// Auto-promote all ready candidates. /// /// Only runs if `auto_promote` is enabled in config. /// Returns the number of patterns promoted and any errors. pub fn auto_promote_all(&self) -> (usize, Vec) { if !self.config.auto_promote { warn!("auto_promote is disabled in config"); return (0, vec![]); } let candidates = self.process_all(); let mut promoted = 0; let mut errors = Vec::new(); for result in candidates { match result { Ok(candidate) if candidate.is_ready() => match self.promote(&candidate) { Ok(_) => promoted += 1, Err(e) => errors.push(e), }, Ok(candidate) => { debug!( pattern_id = %candidate.pattern_id(), "Candidate not ready for auto-promotion" ); } Err(e) => errors.push(e), } } (promoted, errors) } /// Get statistics about the promotion pipeline. pub fn stats(&self) -> PromotionStats { let all_patterns: Vec = self.store.get_promotion_candidates(0, 0.0); // Get all patterns let eligible = self.get_candidates(); let promoted: Vec<_> = all_patterns.iter().filter(|p| p.promoted).collect(); let avg_confidence = if eligible.is_empty() { 0.0 } else { eligible.iter().map(|p| p.avg_confidence).sum::() / eligible.len() as f32 }; let avg_projects = if eligible.is_empty() { 0.0 } else { eligible.iter().map(|p| p.project_count() as f32).sum::() / eligible.len() as f32 }; PromotionStats { total_patterns: all_patterns.len(), eligible_patterns: eligible.len(), promoted_patterns: promoted.len(), pending_review: eligible.len().saturating_sub(promoted.len()), avg_confidence, avg_projects, } } /// Promote a specific pattern by ID. pub fn promote_by_id(&self, pattern_id: &Uuid) -> Result { // Find the pattern let candidates = self.get_candidates(); let pattern = candidates.iter().find(|p| &p.id == pattern_id).ok_or_else(|| { AphoriaError::Promotion(format!("Pattern {} not found in candidates", pattern_id)) })?; // Generate and validate let candidate = self.generate_candidate(pattern)?; // Promote self.promote(&candidate) } /// Validate a pattern without promoting it. /// /// Returns the validation result for inspection. pub fn validate_pattern( &self, pattern: &LearnedPattern, ) -> Result { let client = self.client.ok_or_else(|| { AphoriaError::Promotion("LLM client not configured for regex generation".to_string()) })?; let generator = RegexGenerator::new(client); let extractor_def = generator.generate(pattern)?; self.validator.validate(&extractor_def, pattern) } } #[cfg(test)] mod tests { use super::*; use crate::config::PromotionConfig; use crate::learning::{ClaimTemplate, LocalPatternStore, ValueType}; use crate::types::Language; use chrono::Utc; use tempfile::TempDir; fn create_test_store(temp: &TempDir) -> LocalPatternStore { LocalPatternStore::new(temp.path()).expect("create store") } fn create_eligible_pattern() -> LearnedPattern { let mut pattern = LearnedPattern::new( "verify_ssl = false", "verify_ssl = ", ClaimTemplate::new("ssl/verify", "enabled", ValueType::Boolean, "SSL verification"), Language::Python, "project1", 0.9, ); // Add enough projects to meet threshold for i in 2..=6 { pattern.record_observation(format!("project{}", i), 0.85, Utc::now()); } pattern } #[test] fn test_pipeline_creation() { let temp = TempDir::new().expect("temp dir"); let store = create_test_store(&temp); let config = PromotionConfig::default(); let pipeline = PromotionPipeline::new(&store, None, &config, Some(temp.path().to_path_buf())); assert!(pipeline.is_ok()); } #[test] fn test_get_candidates_empty() { let temp = TempDir::new().expect("temp dir"); let store = create_test_store(&temp); let config = PromotionConfig::default(); let pipeline = PromotionPipeline::new(&store, None, &config, None).expect("create pipeline"); let candidates = pipeline.get_candidates(); assert!(candidates.is_empty()); } #[test] fn test_get_candidates_with_eligible() { let temp = TempDir::new().expect("temp dir"); let store = create_test_store(&temp); let config = PromotionConfig::default(); // Add eligible pattern let pattern = create_eligible_pattern(); store.record_pattern(&pattern, None).expect("record"); let pipeline = PromotionPipeline::new(&store, None, &config, None).expect("create pipeline"); let candidates = pipeline.get_candidates(); assert_eq!(candidates.len(), 1); } #[test] fn test_stats_empty_store() { let temp = TempDir::new().expect("temp dir"); let store = create_test_store(&temp); let config = PromotionConfig::default(); let pipeline = PromotionPipeline::new(&store, None, &config, None).expect("create pipeline"); let stats = pipeline.stats(); assert_eq!(stats.total_patterns, 0); assert_eq!(stats.eligible_patterns, 0); assert_eq!(stats.promoted_patterns, 0); } #[test] fn test_stats_with_patterns() { let temp = TempDir::new().expect("temp dir"); let store = create_test_store(&temp); let config = PromotionConfig::default(); // Add eligible pattern let pattern = create_eligible_pattern(); store.record_pattern(&pattern, None).expect("record"); // Add non-eligible pattern (not enough projects) let small_pattern = LearnedPattern::new( "test = true", "test = ", ClaimTemplate::new("test", "value", ValueType::Boolean, "Test"), Language::Rust, "project1", 0.9, ); store.record_pattern(&small_pattern, None).expect("record"); let pipeline = PromotionPipeline::new(&store, None, &config, None).expect("create pipeline"); let stats = pipeline.stats(); assert_eq!(stats.eligible_patterns, 1); assert_eq!(stats.pending_review, 1); } #[test] fn test_generate_candidate_requires_client() { let temp = TempDir::new().expect("temp dir"); let store = create_test_store(&temp); let config = PromotionConfig::default(); let pattern = create_eligible_pattern(); let pipeline = PromotionPipeline::new(&store, None, &config, None).expect("create pipeline"); let result = pipeline.generate_candidate(&pattern); assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("LLM client not configured")); } #[test] fn test_auto_promote_disabled() { let temp = TempDir::new().expect("temp dir"); let store = create_test_store(&temp); let config = PromotionConfig { auto_promote: false, ..Default::default() }; let pipeline = PromotionPipeline::new(&store, None, &config, None).expect("create pipeline"); let (promoted, errors) = pipeline.auto_promote_all(); assert_eq!(promoted, 0); assert!(errors.is_empty()); } }