Implement structured approval workflows for pattern promotion with full audit trails for SOC 2 compliance. Core Components: - governance/types.rs: ApprovalRequest, ApprovalStatus, ApprovalDecision - governance/workflow.rs: ApprovalWorkflow, ApprovalStage with escalation - governance/store.rs: JSONL persistence for requests and decisions - governance/state_machine.rs: Approval state transitions with auto-advance - governance/audit.rs: AuditTrail with JSON/CSV/Markdown export CLI Commands: - aphoria governance pending/approve/reject/escalate/status/create - aphoria audit trail/export/summary Integration: - Pipeline gate blocks promotion until governance approval - Auto-creates approval requests when governance enabled - Evidence-based auto-approval for high-confidence patterns Also includes: - Phase 11-13: Evidence, Lifecycle, Scope modules - 62+ governance-specific tests (946 total passing) - Clippy clean with -D warnings - Refactored cli.rs into submodules (governance, lifecycle, scope, etc.) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
295 lines
9.2 KiB
Rust
295 lines
9.2 KiB
Rust
//! Pattern storage for learned patterns.
|
|
//!
|
|
//! Provides persistent storage for patterns learned from LLM extraction,
|
|
//! enabling pattern tracking across scans and promotion to declarative extractors.
|
|
|
|
use std::collections::HashMap;
|
|
use std::fs;
|
|
use std::path::{Path, PathBuf};
|
|
use std::sync::RwLock;
|
|
|
|
use chrono::Utc;
|
|
use uuid::Uuid;
|
|
|
|
use crate::error::AphoriaError;
|
|
use crate::types::Language;
|
|
|
|
use super::normalizer::are_patterns_similar;
|
|
use super::types::LearnedPattern;
|
|
|
|
#[cfg(test)]
|
|
#[path = "store_tests.rs"]
|
|
mod store_tests;
|
|
|
|
/// Trait for pattern storage implementations.
|
|
///
|
|
/// Enables both local file-based storage and future hosted storage options.
|
|
pub trait PatternStore: Send + Sync {
|
|
/// Record a pattern learned from LLM extraction.
|
|
///
|
|
/// If a similar pattern already exists, it will be updated with
|
|
/// the new observation. Otherwise, a new pattern is created.
|
|
///
|
|
/// If `max_patterns` is set and the limit would be exceeded,
|
|
/// the oldest non-promoted pattern is removed first.
|
|
fn record_pattern(
|
|
&self,
|
|
pattern: &LearnedPattern,
|
|
max_patterns: Option<usize>,
|
|
) -> Result<(), AphoriaError>;
|
|
|
|
/// Find an existing pattern similar to the given normalized pattern.
|
|
///
|
|
/// Returns the most similar pattern above the threshold, if any.
|
|
fn find_similar(
|
|
&self,
|
|
normalized: &str,
|
|
language: Language,
|
|
threshold: f32,
|
|
) -> Option<LearnedPattern>;
|
|
|
|
/// Get patterns that meet promotion criteria.
|
|
///
|
|
/// Returns patterns seen in at least `min_projects` projects
|
|
/// with average confidence >= `min_confidence`.
|
|
fn get_promotion_candidates(
|
|
&self,
|
|
min_projects: usize,
|
|
min_confidence: f32,
|
|
) -> Vec<LearnedPattern>;
|
|
|
|
/// Mark a pattern as promoted to a declarative extractor.
|
|
fn mark_promoted(&self, id: &Uuid, extractor_name: &str) -> Result<(), AphoriaError>;
|
|
|
|
/// Remove patterns not seen in `max_age_days` days.
|
|
///
|
|
/// Returns the number of patterns pruned.
|
|
fn prune_stale(&self, max_age_days: u32) -> Result<usize, AphoriaError>;
|
|
|
|
/// Get the total number of stored patterns.
|
|
fn pattern_count(&self) -> usize;
|
|
|
|
/// Get all stored patterns.
|
|
fn get_all_patterns(&self) -> Vec<LearnedPattern>;
|
|
|
|
/// Get a specific pattern by ID.
|
|
fn get_pattern_by_id(&self, id: &Uuid) -> Option<LearnedPattern>;
|
|
}
|
|
|
|
/// Local JSON-backed pattern store.
|
|
///
|
|
/// Stores patterns in `~/.aphoria/learning/patterns.json` with
|
|
/// in-memory caching and write-through persistence.
|
|
pub struct LocalPatternStore {
|
|
/// Path to the JSON storage file.
|
|
path: PathBuf,
|
|
|
|
/// In-memory cache of patterns, keyed by ID.
|
|
cache: RwLock<HashMap<Uuid, LearnedPattern>>,
|
|
}
|
|
|
|
impl LocalPatternStore {
|
|
/// Create a new local pattern store.
|
|
///
|
|
/// Creates the storage directory if it doesn't exist.
|
|
pub fn new(store_dir: &Path) -> Result<Self, AphoriaError> {
|
|
let path = store_dir.join("patterns.json");
|
|
|
|
// Ensure directory exists
|
|
if let Some(parent) = path.parent() {
|
|
fs::create_dir_all(parent).map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to create learning directory: {}", e))
|
|
})?;
|
|
}
|
|
|
|
// Load existing patterns if file exists
|
|
let cache = if path.exists() {
|
|
let content = fs::read_to_string(&path).map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to read patterns file: {}", e))
|
|
})?;
|
|
|
|
let patterns: Vec<LearnedPattern> = serde_json::from_str(&content).map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to parse patterns file: {}", e))
|
|
})?;
|
|
|
|
let map: HashMap<Uuid, LearnedPattern> =
|
|
patterns.into_iter().map(|p| (p.id, p)).collect();
|
|
RwLock::new(map)
|
|
} else {
|
|
RwLock::new(HashMap::new())
|
|
};
|
|
|
|
Ok(Self { path, cache })
|
|
}
|
|
|
|
/// Persist the cache to disk.
|
|
fn persist(&self) -> Result<(), AphoriaError> {
|
|
let cache = self.cache.read().map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to acquire read lock: {}", e))
|
|
})?;
|
|
|
|
let patterns: Vec<&LearnedPattern> = cache.values().collect();
|
|
let content = serde_json::to_string_pretty(&patterns).map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to serialize patterns: {}", e))
|
|
})?;
|
|
|
|
fs::write(&self.path, content).map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to write patterns file: {}", e))
|
|
})?;
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl PatternStore for LocalPatternStore {
|
|
fn record_pattern(
|
|
&self,
|
|
pattern: &LearnedPattern,
|
|
max_patterns: Option<usize>,
|
|
) -> Result<(), AphoriaError> {
|
|
// Hold write lock only for cache mutation, then release before disk I/O
|
|
{
|
|
let mut cache = self.cache.write().map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e))
|
|
})?;
|
|
|
|
// If at capacity, remove oldest non-promoted pattern before adding new one
|
|
if let Some(max) = max_patterns {
|
|
// Only evict if we're adding a new pattern (not updating existing)
|
|
if !cache.contains_key(&pattern.id) && cache.len() >= max {
|
|
// Find oldest non-promoted pattern
|
|
let oldest_id = cache
|
|
.values()
|
|
.filter(|p| !p.promoted)
|
|
.min_by_key(|p| p.last_seen)
|
|
.map(|p| p.id);
|
|
|
|
if let Some(id) = oldest_id {
|
|
cache.remove(&id);
|
|
}
|
|
}
|
|
}
|
|
|
|
cache.insert(pattern.id, pattern.clone());
|
|
// Write lock released here when `cache` goes out of scope
|
|
}
|
|
|
|
// Persist happens outside write lock to reduce contention.
|
|
// persist() acquires a read lock internally.
|
|
self.persist()
|
|
}
|
|
|
|
fn find_similar(
|
|
&self,
|
|
normalized: &str,
|
|
language: Language,
|
|
threshold: f32,
|
|
) -> Option<LearnedPattern> {
|
|
let cache = self.cache.read().ok()?;
|
|
|
|
// Find the most similar pattern for this language
|
|
let mut best_match: Option<(f32, &LearnedPattern)> = None;
|
|
|
|
for pattern in cache.values() {
|
|
// Must be same language
|
|
if pattern.language != language {
|
|
continue;
|
|
}
|
|
|
|
// Skip promoted patterns
|
|
if pattern.promoted {
|
|
continue;
|
|
}
|
|
|
|
if let Some(similarity) =
|
|
are_patterns_similar(&pattern.normalized_pattern, normalized, threshold)
|
|
{
|
|
match best_match {
|
|
None => best_match = Some((similarity, pattern)),
|
|
Some((best_sim, _)) if similarity > best_sim => {
|
|
best_match = Some((similarity, pattern));
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
best_match.map(|(_, p)| p.clone())
|
|
}
|
|
|
|
fn get_promotion_candidates(
|
|
&self,
|
|
min_projects: usize,
|
|
min_confidence: f32,
|
|
) -> Vec<LearnedPattern> {
|
|
let cache = match self.cache.read() {
|
|
Ok(c) => c,
|
|
Err(_) => return vec![],
|
|
};
|
|
|
|
cache
|
|
.values()
|
|
.filter(|p| p.is_promotion_candidate(min_projects, min_confidence))
|
|
.cloned()
|
|
.collect()
|
|
}
|
|
|
|
fn mark_promoted(&self, id: &Uuid, extractor_name: &str) -> Result<(), AphoriaError> {
|
|
let mut cache = self.cache.write().map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e))
|
|
})?;
|
|
|
|
if let Some(pattern) = cache.get_mut(id) {
|
|
pattern.promoted = true;
|
|
pattern.promoted_to = Some(extractor_name.to_string());
|
|
}
|
|
|
|
drop(cache);
|
|
self.persist()
|
|
}
|
|
|
|
fn prune_stale(&self, max_age_days: u32) -> Result<usize, AphoriaError> {
|
|
let mut cache = self.cache.write().map_err(|e| {
|
|
AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e))
|
|
})?;
|
|
|
|
let cutoff = Utc::now() - chrono::Duration::days(max_age_days as i64);
|
|
let initial_count = cache.len();
|
|
|
|
cache.retain(|_, pattern| {
|
|
// Keep promoted patterns regardless of age
|
|
pattern.promoted || pattern.last_seen >= cutoff
|
|
});
|
|
|
|
let pruned = initial_count - cache.len();
|
|
drop(cache);
|
|
|
|
if pruned > 0 {
|
|
self.persist()?;
|
|
}
|
|
|
|
Ok(pruned)
|
|
}
|
|
|
|
fn pattern_count(&self) -> usize {
|
|
self.cache.read().map(|c| c.len()).unwrap_or(0)
|
|
}
|
|
|
|
fn get_all_patterns(&self) -> Vec<LearnedPattern> {
|
|
self.cache.read().map(|c| c.values().cloned().collect()).unwrap_or_default()
|
|
}
|
|
|
|
fn get_pattern_by_id(&self, id: &Uuid) -> Option<LearnedPattern> {
|
|
self.cache.read().ok()?.get(id).cloned()
|
|
}
|
|
}
|
|
|
|
/// Get the default learning store directory.
|
|
pub fn learning_store_dir() -> PathBuf {
|
|
if let Some(home) = dirs::home_dir() {
|
|
home.join(".aphoria").join("learning")
|
|
} else {
|
|
PathBuf::from(".aphoria/learning")
|
|
}
|
|
}
|