use regex::Regex; use stemedb_core::types::ObjectValue; use super::{Extractor, build_claim}; use crate::types::{Language, Observation}; /// Detects when Option fields are set to None (unbounded configuration). /// /// This extractor identifies configuration fields that use Option types /// and are explicitly set to None in their Default implementation, which /// often indicates unbounded behavior (e.g., unlimited retries, redirects). /// /// # Examples /// /// Detects patterns like: /// ```rust /// pub struct Config { /// pub max_redirects: Option, // ← Field declaration /// } /// /// impl Default for Config { /// fn default() -> Self { /// Self { /// max_redirects: None, // ← None assignment (unbounded!) /// } /// } /// } /// ``` /// /// Creates observation: /// ``` /// concept_path: "httpclient/max_redirects" /// predicate: "configured" /// value: false // Not configured (allows unbounded) /// ``` pub struct OptionBoundsExtractor { /// Matches: pub field_name: Option field_pattern: Regex, /// Matches: field_name: None none_pattern: Regex, } impl OptionBoundsExtractor { /// Create a new OptionBoundsExtractor. #[allow(clippy::expect_used)] pub fn new() -> Self { Self { field_pattern: Regex::new(r"pub\s+(\w+):\s*Option<(?:usize|u32|u64|i32|i64|Duration)>") .expect("valid regex"), none_pattern: Regex::new(r"(\w+):\s*None") .expect("valid regex"), } } fn extract_field_names(&self, content: &str) -> Vec { self.field_pattern .captures_iter(content) .map(|cap| cap[1].to_string()) .collect() } fn find_none_assignments(&self, content: &str) -> Vec<(String, usize)> { content.lines() .enumerate() .filter_map(|(idx, line)| { self.none_pattern.captures(line).map(|cap| { (cap[1].to_string(), idx + 1) }) }) .collect() } } impl Default for OptionBoundsExtractor { fn default() -> Self { Self::new() } } impl Extractor for OptionBoundsExtractor { fn name(&self) -> &str { "option_bounds" } fn languages(&self) -> &[Language] { &[Language::Rust] } fn extract( &self, path_segments: &[String], content: &str, _language: Language, file: &str, ) -> Vec { let mut observations = Vec::new(); // Find all Option fields in struct declarations let option_fields = self.extract_field_names(content); // Find all None assignments in Default impl let none_assignments = self.find_none_assignments(content); // Match field names: if an Option field is set to None, it's unbounded for (field_name, line_num) in none_assignments { if option_fields.contains(&field_name) { // This is an Option field set to None - unbounded! observations.push(build_claim( path_segments, &[&field_name], "configured", ObjectValue::Boolean(false), // Not configured (unbounded) file, line_num, &format!("{}: None", field_name), 0.95, // High confidence &format!("{} is unbounded (allows None)", field_name), )); } } observations } fn screening_patterns(&self) -> Vec<&str> { vec!["Option<", "None"] // Only run if file has Option types and None } fn verifiable_predicates(&self) -> Vec<(&str, &str)> { vec![ ("max_redirects", "configured"), ("max_retries", "configured"), ("max_connections", "configured"), ("max_lifetime", "configured"), ("idle_timeout", "configured"), ("pool_size", "configured"), ] } } #[cfg(test)] mod tests { use super::*; #[test] fn test_detects_none_assignment() { let content = r#" pub struct Config { pub max_redirects: Option, } impl Default for Config { fn default() -> Self { Self { max_redirects: None, } } } "#; let extractor = OptionBoundsExtractor::new(); let obs = extractor.extract( &["httpclient".to_string(), "config".to_string()], content, Language::Rust, "config.rs", ); assert_eq!(obs.len(), 1); assert_eq!(obs[0].predicate, "configured"); assert_eq!(obs[0].value, ObjectValue::Boolean(false)); assert!(obs[0].concept_path.contains("max_redirects")); } #[test] fn test_detects_multiple_none_assignments() { let content = r#" pub struct Config { pub max_redirects: Option, pub max_retries: Option, } impl Default for Config { fn default() -> Self { Self { max_redirects: None, max_retries: None, } } } "#; let extractor = OptionBoundsExtractor::new(); let obs = extractor.extract(&[], content, Language::Rust, "config.rs"); assert_eq!(obs.len(), 2); assert!(obs.iter().any(|o| o.concept_path.contains("max_redirects"))); assert!(obs.iter().any(|o| o.concept_path.contains("max_retries"))); } #[test] fn test_ignores_non_option_fields() { let content = r#" pub struct Config { pub timeout: u64, } impl Default for Config { fn default() -> Self { Self { timeout: 30, } } } "#; let extractor = OptionBoundsExtractor::new(); let obs = extractor.extract(&[], content, Language::Rust, "config.rs"); assert_eq!(obs.len(), 0); // Should not detect non-Option fields } #[test] fn test_ignores_some_assignments() { let content = r#" pub struct Config { pub max_redirects: Option, } impl Default for Config { fn default() -> Self { Self { max_redirects: Some(10), } } } "#; let extractor = OptionBoundsExtractor::new(); let obs = extractor.extract(&[], content, Language::Rust, "config.rs"); assert_eq!(obs.len(), 0); // Should not detect Some(_) assignments } #[test] fn test_screening_patterns() { let extractor = OptionBoundsExtractor::new(); let patterns = extractor.screening_patterns(); assert!(patterns.contains(&"Option<")); assert!(patterns.contains(&"None")); } #[test] fn test_verifiable_predicates() { let extractor = OptionBoundsExtractor::new(); let predicates = extractor.verifiable_predicates(); assert!(predicates.contains(&("max_redirects", "configured"))); assert!(predicates.contains(&("max_retries", "configured"))); } }