stemedb/applications/aphoria/src/community/pattern_syncer.rs
jordan 8af9b48ac7 feat: Complete Aphoria Phase 14 - Governance Workflows
Implement structured approval workflows for pattern promotion with full
audit trails for SOC 2 compliance.

Core Components:
- governance/types.rs: ApprovalRequest, ApprovalStatus, ApprovalDecision
- governance/workflow.rs: ApprovalWorkflow, ApprovalStage with escalation
- governance/store.rs: JSONL persistence for requests and decisions
- governance/state_machine.rs: Approval state transitions with auto-advance
- governance/audit.rs: AuditTrail with JSON/CSV/Markdown export

CLI Commands:
- aphoria governance pending/approve/reject/escalate/status/create
- aphoria audit trail/export/summary

Integration:
- Pipeline gate blocks promotion until governance approval
- Auto-creates approval requests when governance enabled
- Evidence-based auto-approval for high-confidence patterns

Also includes:
- Phase 11-13: Evidence, Lifecycle, Scope modules
- 62+ governance-specific tests (946 total passing)
- Clippy clean with -D warnings
- Refactored cli.rs into submodules (governance, lifecycle, scope, etc.)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-07 05:16:26 -07:00

304 lines
11 KiB
Rust

//! Pattern syncer for cross-project learning.
//!
//! Handles uploading learned patterns to the hosted server after anonymization.
use tracing::{info, instrument};
use crate::community::{SharedClaimTemplate, SharedPattern};
use crate::config::CrossProjectConfig;
use crate::error::AphoriaError;
use crate::hosted::{HostedClient, PushPatternsResponse};
use crate::learning::{LearnedPattern, PatternStore};
/// Syncs learned patterns to the hosted server.
///
/// Filters patterns by eligibility criteria, converts them to the
/// anonymized `SharedPattern` format, and pushes to the server.
pub struct PatternSyncer<'a> {
client: &'a HostedClient,
config: &'a CrossProjectConfig,
}
impl<'a> PatternSyncer<'a> {
/// Create a new pattern syncer.
pub fn new(client: &'a HostedClient, config: &'a CrossProjectConfig) -> Self {
Self { client, config }
}
/// Get patterns eligible for sharing from the store.
///
/// Filters by:
/// - Not already promoted
/// - Meets minimum local project count
/// - Meets minimum local confidence
/// - Not in exclude list
pub fn get_shareable_patterns<S: PatternStore>(&self, store: &S) -> Vec<SharedPattern> {
store
.get_promotion_candidates(
self.config.min_local_projects,
self.config.min_local_confidence,
)
.into_iter()
.filter(|p| !p.promoted)
.filter(|p| self.passes_subject_filters(p))
.map(|p| self.to_shared_pattern(&p))
.collect()
}
/// Check if a pattern passes subject exclusion filters.
fn passes_subject_filters(&self, pattern: &LearnedPattern) -> bool {
let subject = &pattern.claim_template.subject_template;
!self.config.is_subject_excluded(subject)
}
/// Convert a LearnedPattern to an anonymized SharedPattern.
///
/// Privacy: Does NOT include `example_code` or `project_hashes`.
fn to_shared_pattern(&self, pattern: &LearnedPattern) -> SharedPattern {
SharedPattern {
pattern_hash: compute_pattern_hash(&pattern.normalized_pattern, &pattern.language),
normalized_pattern: pattern.normalized_pattern.clone(),
claim_template: SharedClaimTemplate::new(
&pattern.claim_template.subject_template,
&pattern.claim_template.predicate,
pattern.claim_template.value_type.to_string(),
),
language: pattern.language.to_string(),
project_count: pattern.project_count(),
occurrences: pattern.occurrences,
avg_confidence: pattern.avg_confidence,
}
}
/// Sync all eligible patterns to the hosted server.
///
/// Returns the server response with counts of accepted, merged, and deduplicated patterns.
#[instrument(skip(self, store), fields(project = %self.client.project_id()))]
pub fn sync<S: PatternStore>(&self, store: &S) -> Result<PushPatternsResponse, AphoriaError> {
let patterns = self.get_shareable_patterns(store);
if patterns.is_empty() {
info!("No patterns eligible for sharing");
return Ok(PushPatternsResponse::default());
}
info!(count = patterns.len(), "Syncing patterns to hosted server");
self.client.push_patterns(patterns)
}
/// Get the count of patterns that would be synced (for preview).
pub fn preview_count<S: PatternStore>(&self, store: &S) -> usize {
self.get_shareable_patterns(store).len()
}
}
/// Compute BLAKE3 hash of (normalized_pattern, language) for deduplication.
///
/// This hash uniquely identifies a pattern across organizations,
/// enabling server-side deduplication without revealing source code.
pub fn compute_pattern_hash(pattern: &str, language: &crate::types::Language) -> String {
let mut hasher = blake3::Hasher::new();
hasher.update(pattern.as_bytes());
hasher.update(b":");
hasher.update(language.to_string().as_bytes());
hex::encode(hasher.finalize().as_bytes())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::learning::{ClaimTemplate, ValueType};
use crate::types::Language;
/// Mock pattern store for testing
struct MockPatternStore {
patterns: Vec<LearnedPattern>,
}
impl MockPatternStore {
fn new(patterns: Vec<LearnedPattern>) -> Self {
Self { patterns }
}
}
impl PatternStore for MockPatternStore {
fn record_pattern(
&self,
_pattern: &LearnedPattern,
_max_patterns: Option<usize>,
) -> Result<(), AphoriaError> {
Ok(())
}
fn find_similar(
&self,
_normalized: &str,
_language: Language,
_threshold: f32,
) -> Option<LearnedPattern> {
None
}
fn get_promotion_candidates(
&self,
min_projects: usize,
min_confidence: f32,
) -> Vec<LearnedPattern> {
self.patterns
.iter()
.filter(|p| p.is_promotion_candidate(min_projects, min_confidence))
.cloned()
.collect()
}
fn mark_promoted(
&self,
_id: &uuid::Uuid,
_extractor_name: &str,
) -> Result<(), AphoriaError> {
Ok(())
}
fn prune_stale(&self, _max_age_days: u32) -> Result<usize, AphoriaError> {
Ok(0)
}
fn pattern_count(&self) -> usize {
self.patterns.len()
}
fn get_all_patterns(&self) -> Vec<LearnedPattern> {
self.patterns.clone()
}
fn get_pattern_by_id(&self, id: &uuid::Uuid) -> Option<LearnedPattern> {
self.patterns.iter().find(|p| p.id == *id).cloned()
}
}
fn create_test_pattern(
subject: &str,
project_count: usize,
confidence: f32,
promoted: bool,
) -> LearnedPattern {
let template = ClaimTemplate::new(subject, "version", ValueType::Text, "Test pattern");
let mut pattern = LearnedPattern::new(
"test code",
"const X = <string>",
template,
Language::Rust,
"project1",
confidence,
);
// Add more projects
for i in 1..project_count {
pattern.project_hashes.insert(format!("project{}", i));
}
pattern.promoted = promoted;
pattern
}
#[test]
fn test_compute_pattern_hash() {
let hash1 = compute_pattern_hash("const X = <string>", &Language::Rust);
let hash2 = compute_pattern_hash("const X = <string>", &Language::Rust);
let hash3 = compute_pattern_hash("const X = <string>", &Language::Python);
let hash4 = compute_pattern_hash("const Y = <number>", &Language::Rust);
// Same input = same hash
assert_eq!(hash1, hash2);
// Different language = different hash
assert_ne!(hash1, hash3);
// Different pattern = different hash
assert_ne!(hash1, hash4);
// Hash should be 64 hex characters
assert_eq!(hash1.len(), 64);
}
#[test]
fn test_subject_exclusion() {
// Note: is_subject_excluded uses simple prefix matching with starts_with
let config = CrossProjectConfig {
exclude_subjects: vec![
"code://rust/internal/".to_string(),
"vendor://acme/".to_string(),
],
min_local_projects: 1,
min_local_confidence: 0.5,
..Default::default()
};
// Create patterns (unused but kept for documentation of intent)
let _internal = create_test_pattern("code://rust/internal/auth", 5, 0.9, false);
let _vendor = create_test_pattern("vendor://acme/secret", 5, 0.9, false);
let _public = create_test_pattern("code://rust/tls/version", 5, 0.9, false);
// We need a hosted client to create the syncer - use a test fixture approach
// Since we can't easily create a HostedClient without actual config,
// we test the filter logic directly
assert!(config.is_subject_excluded("code://rust/internal/auth"));
assert!(config.is_subject_excluded("vendor://acme/secret"));
assert!(!config.is_subject_excluded("code://rust/tls/version"));
}
#[test]
fn test_promoted_patterns_excluded() {
let promoted = create_test_pattern("tls/version", 5, 0.9, true);
let not_promoted = create_test_pattern("db/pool_size", 5, 0.9, false);
let store = MockPatternStore::new(vec![promoted, not_promoted]);
// Get candidates (promoted should be filtered by the store itself)
let candidates = store.get_promotion_candidates(3, 0.8);
// Promoted pattern should be filtered out by is_promotion_candidate
assert_eq!(candidates.len(), 1);
assert!(!candidates[0].promoted);
}
#[test]
fn test_to_shared_pattern_anonymization() {
let template =
ClaimTemplate::new("tls/min_version", "version", ValueType::Text, "TLS version");
let mut pattern = LearnedPattern::new(
"const TLS_MIN_VERSION = \"1.2\"", // This should NOT be shared
"const TLS_MIN_VERSION = <string>",
template,
Language::Rust,
"secret-project-hash", // This should NOT be shared
0.9,
);
pattern.project_hashes.insert("another-secret-hash".to_string());
// Create syncer with a mock - testing the conversion logic directly
// Since we need a HostedClient, we test the SharedPattern structure
let shared = SharedPattern {
pattern_hash: compute_pattern_hash(&pattern.normalized_pattern, &pattern.language),
normalized_pattern: pattern.normalized_pattern.clone(),
claim_template: SharedClaimTemplate::new(
&pattern.claim_template.subject_template,
&pattern.claim_template.predicate,
pattern.claim_template.value_type.to_string(),
),
language: pattern.language.to_string(),
project_count: pattern.project_count(),
occurrences: pattern.occurrences,
avg_confidence: pattern.avg_confidence,
};
// Verify anonymization - no example_code or project_hashes
assert_eq!(shared.normalized_pattern, "const TLS_MIN_VERSION = <string>");
assert_eq!(shared.project_count, 2);
assert_eq!(shared.occurrences, 1);
assert!((shared.avg_confidence - 0.9).abs() < 0.001);
// Verify the pattern_hash computation
assert_eq!(shared.pattern_hash.len(), 64);
}
}