stemedb/applications/aphoria/src/learning/store.rs
jordan 8af9b48ac7 feat: Complete Aphoria Phase 14 - Governance Workflows
Implement structured approval workflows for pattern promotion with full
audit trails for SOC 2 compliance.

Core Components:
- governance/types.rs: ApprovalRequest, ApprovalStatus, ApprovalDecision
- governance/workflow.rs: ApprovalWorkflow, ApprovalStage with escalation
- governance/store.rs: JSONL persistence for requests and decisions
- governance/state_machine.rs: Approval state transitions with auto-advance
- governance/audit.rs: AuditTrail with JSON/CSV/Markdown export

CLI Commands:
- aphoria governance pending/approve/reject/escalate/status/create
- aphoria audit trail/export/summary

Integration:
- Pipeline gate blocks promotion until governance approval
- Auto-creates approval requests when governance enabled
- Evidence-based auto-approval for high-confidence patterns

Also includes:
- Phase 11-13: Evidence, Lifecycle, Scope modules
- 62+ governance-specific tests (946 total passing)
- Clippy clean with -D warnings
- Refactored cli.rs into submodules (governance, lifecycle, scope, etc.)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-07 05:16:26 -07:00

295 lines
9.2 KiB
Rust

//! Pattern storage for learned patterns.
//!
//! Provides persistent storage for patterns learned from LLM extraction,
//! enabling pattern tracking across scans and promotion to declarative extractors.
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::RwLock;
use chrono::Utc;
use uuid::Uuid;
use crate::error::AphoriaError;
use crate::types::Language;
use super::normalizer::are_patterns_similar;
use super::types::LearnedPattern;
#[cfg(test)]
#[path = "store_tests.rs"]
mod store_tests;
/// Trait for pattern storage implementations.
///
/// Enables both local file-based storage and future hosted storage options.
pub trait PatternStore: Send + Sync {
/// Record a pattern learned from LLM extraction.
///
/// If a similar pattern already exists, it will be updated with
/// the new observation. Otherwise, a new pattern is created.
///
/// If `max_patterns` is set and the limit would be exceeded,
/// the oldest non-promoted pattern is removed first.
fn record_pattern(
&self,
pattern: &LearnedPattern,
max_patterns: Option<usize>,
) -> Result<(), AphoriaError>;
/// Find an existing pattern similar to the given normalized pattern.
///
/// Returns the most similar pattern above the threshold, if any.
fn find_similar(
&self,
normalized: &str,
language: Language,
threshold: f32,
) -> Option<LearnedPattern>;
/// Get patterns that meet promotion criteria.
///
/// Returns patterns seen in at least `min_projects` projects
/// with average confidence >= `min_confidence`.
fn get_promotion_candidates(
&self,
min_projects: usize,
min_confidence: f32,
) -> Vec<LearnedPattern>;
/// Mark a pattern as promoted to a declarative extractor.
fn mark_promoted(&self, id: &Uuid, extractor_name: &str) -> Result<(), AphoriaError>;
/// Remove patterns not seen in `max_age_days` days.
///
/// Returns the number of patterns pruned.
fn prune_stale(&self, max_age_days: u32) -> Result<usize, AphoriaError>;
/// Get the total number of stored patterns.
fn pattern_count(&self) -> usize;
/// Get all stored patterns.
fn get_all_patterns(&self) -> Vec<LearnedPattern>;
/// Get a specific pattern by ID.
fn get_pattern_by_id(&self, id: &Uuid) -> Option<LearnedPattern>;
}
/// Local JSON-backed pattern store.
///
/// Stores patterns in `~/.aphoria/learning/patterns.json` with
/// in-memory caching and write-through persistence.
pub struct LocalPatternStore {
/// Path to the JSON storage file.
path: PathBuf,
/// In-memory cache of patterns, keyed by ID.
cache: RwLock<HashMap<Uuid, LearnedPattern>>,
}
impl LocalPatternStore {
/// Create a new local pattern store.
///
/// Creates the storage directory if it doesn't exist.
pub fn new(store_dir: &Path) -> Result<Self, AphoriaError> {
let path = store_dir.join("patterns.json");
// Ensure directory exists
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).map_err(|e| {
AphoriaError::LearningStore(format!("Failed to create learning directory: {}", e))
})?;
}
// Load existing patterns if file exists
let cache = if path.exists() {
let content = fs::read_to_string(&path).map_err(|e| {
AphoriaError::LearningStore(format!("Failed to read patterns file: {}", e))
})?;
let patterns: Vec<LearnedPattern> = serde_json::from_str(&content).map_err(|e| {
AphoriaError::LearningStore(format!("Failed to parse patterns file: {}", e))
})?;
let map: HashMap<Uuid, LearnedPattern> =
patterns.into_iter().map(|p| (p.id, p)).collect();
RwLock::new(map)
} else {
RwLock::new(HashMap::new())
};
Ok(Self { path, cache })
}
/// Persist the cache to disk.
fn persist(&self) -> Result<(), AphoriaError> {
let cache = self.cache.read().map_err(|e| {
AphoriaError::LearningStore(format!("Failed to acquire read lock: {}", e))
})?;
let patterns: Vec<&LearnedPattern> = cache.values().collect();
let content = serde_json::to_string_pretty(&patterns).map_err(|e| {
AphoriaError::LearningStore(format!("Failed to serialize patterns: {}", e))
})?;
fs::write(&self.path, content).map_err(|e| {
AphoriaError::LearningStore(format!("Failed to write patterns file: {}", e))
})?;
Ok(())
}
}
impl PatternStore for LocalPatternStore {
fn record_pattern(
&self,
pattern: &LearnedPattern,
max_patterns: Option<usize>,
) -> Result<(), AphoriaError> {
// Hold write lock only for cache mutation, then release before disk I/O
{
let mut cache = self.cache.write().map_err(|e| {
AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e))
})?;
// If at capacity, remove oldest non-promoted pattern before adding new one
if let Some(max) = max_patterns {
// Only evict if we're adding a new pattern (not updating existing)
if !cache.contains_key(&pattern.id) && cache.len() >= max {
// Find oldest non-promoted pattern
let oldest_id = cache
.values()
.filter(|p| !p.promoted)
.min_by_key(|p| p.last_seen)
.map(|p| p.id);
if let Some(id) = oldest_id {
cache.remove(&id);
}
}
}
cache.insert(pattern.id, pattern.clone());
// Write lock released here when `cache` goes out of scope
}
// Persist happens outside write lock to reduce contention.
// persist() acquires a read lock internally.
self.persist()
}
fn find_similar(
&self,
normalized: &str,
language: Language,
threshold: f32,
) -> Option<LearnedPattern> {
let cache = self.cache.read().ok()?;
// Find the most similar pattern for this language
let mut best_match: Option<(f32, &LearnedPattern)> = None;
for pattern in cache.values() {
// Must be same language
if pattern.language != language {
continue;
}
// Skip promoted patterns
if pattern.promoted {
continue;
}
if let Some(similarity) =
are_patterns_similar(&pattern.normalized_pattern, normalized, threshold)
{
match best_match {
None => best_match = Some((similarity, pattern)),
Some((best_sim, _)) if similarity > best_sim => {
best_match = Some((similarity, pattern));
}
_ => {}
}
}
}
best_match.map(|(_, p)| p.clone())
}
fn get_promotion_candidates(
&self,
min_projects: usize,
min_confidence: f32,
) -> Vec<LearnedPattern> {
let cache = match self.cache.read() {
Ok(c) => c,
Err(_) => return vec![],
};
cache
.values()
.filter(|p| p.is_promotion_candidate(min_projects, min_confidence))
.cloned()
.collect()
}
fn mark_promoted(&self, id: &Uuid, extractor_name: &str) -> Result<(), AphoriaError> {
let mut cache = self.cache.write().map_err(|e| {
AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e))
})?;
if let Some(pattern) = cache.get_mut(id) {
pattern.promoted = true;
pattern.promoted_to = Some(extractor_name.to_string());
}
drop(cache);
self.persist()
}
fn prune_stale(&self, max_age_days: u32) -> Result<usize, AphoriaError> {
let mut cache = self.cache.write().map_err(|e| {
AphoriaError::LearningStore(format!("Failed to acquire write lock: {}", e))
})?;
let cutoff = Utc::now() - chrono::Duration::days(max_age_days as i64);
let initial_count = cache.len();
cache.retain(|_, pattern| {
// Keep promoted patterns regardless of age
pattern.promoted || pattern.last_seen >= cutoff
});
let pruned = initial_count - cache.len();
drop(cache);
if pruned > 0 {
self.persist()?;
}
Ok(pruned)
}
fn pattern_count(&self) -> usize {
self.cache.read().map(|c| c.len()).unwrap_or(0)
}
fn get_all_patterns(&self) -> Vec<LearnedPattern> {
self.cache.read().map(|c| c.values().cloned().collect()).unwrap_or_default()
}
fn get_pattern_by_id(&self, id: &Uuid) -> Option<LearnedPattern> {
self.cache.read().ok()?.get(id).cloned()
}
}
/// Get the default learning store directory.
pub fn learning_store_dir() -> PathBuf {
if let Some(home) = dirs::home_dir() {
home.join(".aphoria").join("learning")
} else {
PathBuf::from(".aphoria/learning")
}
}