//! Pattern storage for learned patterns. //! //! Provides persistent storage for patterns learned from LLM extraction, //! enabling pattern tracking across scans and promotion to declarative extractors. use std::collections::HashMap; use std::fs; use std::path::{Path, PathBuf}; use std::sync::RwLock; use chrono::Utc; use uuid::Uuid; use crate::error::AphoriaError; use crate::types::Language; use super::normalizer::are_patterns_similar; use super::types::LearnedPattern; #[cfg(test)] #[path = "store_tests.rs"] mod store_tests; /// Trait for pattern storage implementations. /// /// Enables both local file-based storage and future hosted storage options. pub trait PatternStore: Send + Sync { /// Record a pattern learned from LLM extraction. /// /// If a similar pattern already exists, it will be updated with /// the new observation. Otherwise, a new pattern is created. /// /// If `max_patterns` is set and the limit would be exceeded, /// the oldest non-promoted pattern is removed first. fn record_pattern( &self, pattern: &LearnedPattern, max_patterns: Option, ) -> Result<(), AphoriaError>; /// Find an existing pattern similar to the given normalized pattern. /// /// Returns the most similar pattern above the threshold, if any. fn find_similar( &self, normalized: &str, language: Language, threshold: f32, ) -> Option; /// Get patterns that meet promotion criteria. /// /// Returns patterns seen in at least `min_projects` projects /// with average confidence >= `min_confidence`. fn get_promotion_candidates( &self, min_projects: usize, min_confidence: f32, ) -> Vec; /// Mark a pattern as promoted to a declarative extractor. fn mark_promoted(&self, id: &Uuid, extractor_name: &str) -> Result<(), AphoriaError>; /// Remove patterns not seen in `max_age_days` days. /// /// Returns the number of patterns pruned. fn prune_stale(&self, max_age_days: u32) -> Result; /// Get the total number of stored patterns. fn pattern_count(&self) -> usize; /// Get all stored patterns. fn get_all_patterns(&self) -> Vec; /// Get a specific pattern by ID. fn get_pattern_by_id(&self, id: &Uuid) -> Option; } /// Local JSON-backed pattern store. /// /// Stores patterns in `~/.aphoria/learning/patterns.json` with /// in-memory caching and write-through persistence. pub struct LocalPatternStore { /// Path to the JSON storage file. path: PathBuf, /// In-memory cache of patterns, keyed by ID. cache: RwLock>, } impl LocalPatternStore { /// Create a new local pattern store. /// /// Creates the storage directory if it doesn't exist. pub fn new(store_dir: &Path) -> Result { let path = store_dir.join("patterns.json"); // Ensure directory exists if let Some(parent) = path.parent() { fs::create_dir_all(parent).map_err(|e| { AphoriaError::LearningStore(format!("Failed to create learning directory: {}", e)) })?; } // Load existing patterns if file exists let cache = if path.exists() { let content = fs::read_to_string(&path).map_err(|e| { AphoriaError::LearningStore(format!("Failed to read patterns file: {}", e)) })?; let patterns: Vec = serde_json::from_str(&content).map_err(|e| { AphoriaError::LearningStore(format!("Failed to parse patterns file: {}", e)) })?; let map: HashMap = patterns.into_iter().map(|p| (p.id, p)).collect(); RwLock::new(map) } else { RwLock::new(HashMap::new()) }; Ok(Self { path, cache }) } /// Persist the cache to disk. fn persist(&self) -> Result<(), AphoriaError> { let cache = self.cache.read().map_err(|e| { AphoriaError::LearningStore(format!("Failed to acquire read lock: {}", e)) })?; let patterns: Vec<&LearnedPattern> = cache.values().collect(); let content = serde_json::to_string_pretty(&patterns).map_err(|e| { AphoriaError::LearningStore(format!("Failed to serialize patterns: {}", e)) })?; fs::write(&self.path, content).map_err(|e| { AphoriaError::LearningStore(format!("Failed to write patterns file: {}", e)) })?; Ok(()) } } impl PatternStore for LocalPatternStore { fn record_pattern( &self, pattern: &LearnedPattern, max_patterns: Option, ) -> Result<(), AphoriaError> { // Hold write lock only for cache mutation, then release before disk I/O { let mut cache = self.cache.write().map_err(|e| { AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e)) })?; // If at capacity, remove oldest non-promoted pattern before adding new one if let Some(max) = max_patterns { // Only evict if we're adding a new pattern (not updating existing) if !cache.contains_key(&pattern.id) && cache.len() >= max { // Find oldest non-promoted pattern let oldest_id = cache .values() .filter(|p| !p.promoted) .min_by_key(|p| p.last_seen) .map(|p| p.id); if let Some(id) = oldest_id { cache.remove(&id); } } } cache.insert(pattern.id, pattern.clone()); // Write lock released here when `cache` goes out of scope } // Persist happens outside write lock to reduce contention. // persist() acquires a read lock internally. self.persist() } fn find_similar( &self, normalized: &str, language: Language, threshold: f32, ) -> Option { let cache = self.cache.read().ok()?; // Find the most similar pattern for this language let mut best_match: Option<(f32, &LearnedPattern)> = None; for pattern in cache.values() { // Must be same language if pattern.language != language { continue; } // Skip promoted patterns if pattern.promoted { continue; } if let Some(similarity) = are_patterns_similar(&pattern.normalized_pattern, normalized, threshold) { match best_match { None => best_match = Some((similarity, pattern)), Some((best_sim, _)) if similarity > best_sim => { best_match = Some((similarity, pattern)); } _ => {} } } } best_match.map(|(_, p)| p.clone()) } fn get_promotion_candidates( &self, min_projects: usize, min_confidence: f32, ) -> Vec { let cache = match self.cache.read() { Ok(c) => c, Err(_) => return vec![], }; cache .values() .filter(|p| p.is_promotion_candidate(min_projects, min_confidence)) .cloned() .collect() } fn mark_promoted(&self, id: &Uuid, extractor_name: &str) -> Result<(), AphoriaError> { let mut cache = self.cache.write().map_err(|e| { AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e)) })?; if let Some(pattern) = cache.get_mut(id) { pattern.promoted = true; pattern.promoted_to = Some(extractor_name.to_string()); } drop(cache); self.persist() } fn prune_stale(&self, max_age_days: u32) -> Result { let mut cache = self.cache.write().map_err(|e| { AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e)) })?; let cutoff = Utc::now() - chrono::Duration::days(max_age_days as i64); let initial_count = cache.len(); cache.retain(|_, pattern| { // Keep promoted patterns regardless of age pattern.promoted || pattern.last_seen >= cutoff }); let pruned = initial_count - cache.len(); drop(cache); if pruned > 0 { self.persist()?; } Ok(pruned) } fn pattern_count(&self) -> usize { self.cache.read().map(|c| c.len()).unwrap_or(0) } fn get_all_patterns(&self) -> Vec { self.cache.read().map(|c| c.values().cloned().collect()).unwrap_or_default() } fn get_pattern_by_id(&self, id: &Uuid) -> Option { self.cache.read().ok()?.get(id).cloned() } } /// Get the default learning store directory. pub fn learning_store_dir() -> PathBuf { if let Some(home) = dirs::home_dir() { home.join(".aphoria").join("learning") } else { PathBuf::from(".aphoria/learning") } }