//! Pattern syncer for cross-project learning. //! //! Handles uploading learned patterns to the hosted server after anonymization. use tracing::{info, instrument}; use crate::community::{SharedClaimTemplate, SharedPattern}; use crate::config::CrossProjectConfig; use crate::error::AphoriaError; use crate::hosted::{HostedClient, PushPatternsResponse}; use crate::learning::{LearnedPattern, PatternStore}; /// Syncs learned patterns to the hosted server. /// /// Filters patterns by eligibility criteria, converts them to the /// anonymized `SharedPattern` format, and pushes to the server. pub struct PatternSyncer<'a> { client: &'a HostedClient, config: &'a CrossProjectConfig, } impl<'a> PatternSyncer<'a> { /// Create a new pattern syncer. pub fn new(client: &'a HostedClient, config: &'a CrossProjectConfig) -> Self { Self { client, config } } /// Get patterns eligible for sharing from the store. /// /// Filters by: /// - Not already promoted /// - Meets minimum local project count /// - Meets minimum local confidence /// - Not in exclude list pub fn get_shareable_patterns(&self, store: &S) -> Vec { store .get_promotion_candidates( self.config.min_local_projects, self.config.min_local_confidence, ) .into_iter() .filter(|p| !p.promoted) .filter(|p| self.passes_subject_filters(p)) .map(|p| self.to_shared_pattern(&p)) .collect() } /// Check if a pattern passes subject exclusion filters. fn passes_subject_filters(&self, pattern: &LearnedPattern) -> bool { let subject = &pattern.claim_template.subject_template; !self.config.is_subject_excluded(subject) } /// Convert a LearnedPattern to an anonymized SharedPattern. /// /// Privacy: Does NOT include `example_code` or `project_hashes`. fn to_shared_pattern(&self, pattern: &LearnedPattern) -> SharedPattern { SharedPattern { pattern_hash: compute_pattern_hash(&pattern.normalized_pattern, &pattern.language), normalized_pattern: pattern.normalized_pattern.clone(), claim_template: SharedClaimTemplate::new( &pattern.claim_template.subject_template, &pattern.claim_template.predicate, pattern.claim_template.value_type.to_string(), ), language: pattern.language.to_string(), project_count: pattern.project_count(), occurrences: pattern.occurrences, avg_confidence: pattern.avg_confidence, } } /// Sync all eligible patterns to the hosted server. /// /// Returns the server response with counts of accepted, merged, and deduplicated patterns. #[instrument(skip(self, store), fields(project = %self.client.project_id()))] pub fn sync(&self, store: &S) -> Result { let patterns = self.get_shareable_patterns(store); if patterns.is_empty() { info!("No patterns eligible for sharing"); return Ok(PushPatternsResponse::default()); } info!(count = patterns.len(), "Syncing patterns to hosted server"); self.client.push_patterns(patterns) } /// Get the count of patterns that would be synced (for preview). pub fn preview_count(&self, store: &S) -> usize { self.get_shareable_patterns(store).len() } } /// Compute BLAKE3 hash of (normalized_pattern, language) for deduplication. /// /// This hash uniquely identifies a pattern across organizations, /// enabling server-side deduplication without revealing source code. pub fn compute_pattern_hash(pattern: &str, language: &crate::types::Language) -> String { let mut hasher = blake3::Hasher::new(); hasher.update(pattern.as_bytes()); hasher.update(b":"); hasher.update(language.to_string().as_bytes()); hex::encode(hasher.finalize().as_bytes()) } #[cfg(test)] mod tests { use super::*; use crate::learning::{ClaimTemplate, ValueType}; use crate::types::Language; /// Mock pattern store for testing struct MockPatternStore { patterns: Vec, } impl MockPatternStore { fn new(patterns: Vec) -> Self { Self { patterns } } } impl PatternStore for MockPatternStore { fn record_pattern( &self, _pattern: &LearnedPattern, _max_patterns: Option, ) -> Result<(), AphoriaError> { Ok(()) } fn find_similar( &self, _normalized: &str, _language: Language, _threshold: f32, ) -> Option { None } fn get_promotion_candidates( &self, min_projects: usize, min_confidence: f32, ) -> Vec { self.patterns .iter() .filter(|p| p.is_promotion_candidate(min_projects, min_confidence)) .cloned() .collect() } fn mark_promoted( &self, _id: &uuid::Uuid, _extractor_name: &str, ) -> Result<(), AphoriaError> { Ok(()) } fn prune_stale(&self, _max_age_days: u32) -> Result { Ok(0) } fn pattern_count(&self) -> usize { self.patterns.len() } fn get_all_patterns(&self) -> Vec { self.patterns.clone() } fn get_pattern_by_id(&self, id: &uuid::Uuid) -> Option { self.patterns.iter().find(|p| p.id == *id).cloned() } } fn create_test_pattern( subject: &str, project_count: usize, confidence: f32, promoted: bool, ) -> LearnedPattern { let template = ClaimTemplate::new(subject, "version", ValueType::Text, "Test pattern"); let mut pattern = LearnedPattern::new( "test code", "const X = ", template, Language::Rust, "project1", confidence, ); // Add more projects for i in 1..project_count { pattern.project_hashes.insert(format!("project{}", i)); } pattern.promoted = promoted; pattern } #[test] fn test_compute_pattern_hash() { let hash1 = compute_pattern_hash("const X = ", &Language::Rust); let hash2 = compute_pattern_hash("const X = ", &Language::Rust); let hash3 = compute_pattern_hash("const X = ", &Language::Python); let hash4 = compute_pattern_hash("const Y = ", &Language::Rust); // Same input = same hash assert_eq!(hash1, hash2); // Different language = different hash assert_ne!(hash1, hash3); // Different pattern = different hash assert_ne!(hash1, hash4); // Hash should be 64 hex characters assert_eq!(hash1.len(), 64); } #[test] fn test_subject_exclusion() { // Note: is_subject_excluded uses simple prefix matching with starts_with let config = CrossProjectConfig { exclude_subjects: vec![ "code://rust/internal/".to_string(), "vendor://acme/".to_string(), ], min_local_projects: 1, min_local_confidence: 0.5, ..Default::default() }; // Create patterns (unused but kept for documentation of intent) let _internal = create_test_pattern("code://rust/internal/auth", 5, 0.9, false); let _vendor = create_test_pattern("vendor://acme/secret", 5, 0.9, false); let _public = create_test_pattern("code://rust/tls/version", 5, 0.9, false); // We need a hosted client to create the syncer - use a test fixture approach // Since we can't easily create a HostedClient without actual config, // we test the filter logic directly assert!(config.is_subject_excluded("code://rust/internal/auth")); assert!(config.is_subject_excluded("vendor://acme/secret")); assert!(!config.is_subject_excluded("code://rust/tls/version")); } #[test] fn test_promoted_patterns_excluded() { let promoted = create_test_pattern("tls/version", 5, 0.9, true); let not_promoted = create_test_pattern("db/pool_size", 5, 0.9, false); let store = MockPatternStore::new(vec![promoted, not_promoted]); // Get candidates (promoted should be filtered by the store itself) let candidates = store.get_promotion_candidates(3, 0.8); // Promoted pattern should be filtered out by is_promotion_candidate assert_eq!(candidates.len(), 1); assert!(!candidates[0].promoted); } #[test] fn test_to_shared_pattern_anonymization() { let template = ClaimTemplate::new("tls/min_version", "version", ValueType::Text, "TLS version"); let mut pattern = LearnedPattern::new( "const TLS_MIN_VERSION = \"1.2\"", // This should NOT be shared "const TLS_MIN_VERSION = ", template, Language::Rust, "secret-project-hash", // This should NOT be shared 0.9, ); pattern.project_hashes.insert("another-secret-hash".to_string()); // Create syncer with a mock - testing the conversion logic directly // Since we need a HostedClient, we test the SharedPattern structure let shared = SharedPattern { pattern_hash: compute_pattern_hash(&pattern.normalized_pattern, &pattern.language), normalized_pattern: pattern.normalized_pattern.clone(), claim_template: SharedClaimTemplate::new( &pattern.claim_template.subject_template, &pattern.claim_template.predicate, pattern.claim_template.value_type.to_string(), ), language: pattern.language.to_string(), project_count: pattern.project_count(), occurrences: pattern.occurrences, avg_confidence: pattern.avg_confidence, }; // Verify anonymization - no example_code or project_hashes assert_eq!(shared.normalized_pattern, "const TLS_MIN_VERSION = "); assert_eq!(shared.project_count, 2); assert_eq!(shared.occurrences, 1); assert!((shared.avg_confidence - 0.9).abs() < 0.001); // Verify the pattern_hash computation assert_eq!(shared.pattern_hash.len(), 64); } }