//! Timeout configuration extractor. //! //! Detects timeout values that are misconfigured (zero/infinite, //! too low, or too high) which can cause availability issues. use regex::Regex; use stemedb_core::types::ObjectValue; use super::Extractor; use crate::types::{Observation, Language}; /// Configuration for timeout extraction thresholds. #[derive(Debug, Clone)] pub struct TimeoutThresholds { /// Minimum reasonable timeout in milliseconds. pub min_reasonable_ms: u64, /// Maximum reasonable timeout in milliseconds. pub max_reasonable_ms: u64, } impl Default for TimeoutThresholds { fn default() -> Self { Self { min_reasonable_ms: 1000, max_reasonable_ms: 300_000 } } } /// Extractor for timeout configuration values. pub struct TimeoutConfigExtractor { /// Zero/infinite timeout patterns zero_timeout: Regex, /// Numeric timeout patterns (captures the value) numeric_timeout: Regex, /// Duration patterns (Rust/Go style, reserved for future use) #[allow(dead_code)] duration_timeout: Regex, /// Configuration thresholds thresholds: TimeoutThresholds, } impl Default for TimeoutConfigExtractor { fn default() -> Self { Self::new(TimeoutThresholds::default()) } } impl TimeoutConfigExtractor { /// Create a new timeout extractor with the given thresholds. /// /// # Panics /// Panics if any regex pattern is invalid (programmer error). #[allow(clippy::expect_used)] pub fn new(thresholds: TimeoutThresholds) -> Self { Self { zero_timeout: Regex::new( r"(?i)timeout\s*[:=]\s*(0|None|null|nil|infinity|Inf|never|\-1)", ) .expect("valid regex"), numeric_timeout: Regex::new(r"(?i)timeout\s*[:=]\s*(\d+)").expect("valid regex"), duration_timeout: Regex::new( r"(?i)(?:Duration::from_(?:secs|millis|nanos)|time\.(?:Second|Millisecond)|timeout)\s*[:=\(]\s*(\d+)", ) .expect("valid regex"), thresholds, } } #[allow(clippy::too_many_arguments)] fn extract_claim( &self, path_segments: &[String], file: &str, line: usize, matched_text: &str, context: &str, value: f64, description: &str, ) -> Observation { let mut concept_path = path_segments.to_vec(); concept_path.push(context.to_string()); concept_path.push("timeout".to_string()); Observation { concept_path: format!("code://{}", concept_path.join("/")), predicate: "config_value".to_string(), value: ObjectValue::Number(value), file: file.to_string(), line, matched_text: matched_text.to_string(), confidence: 1.0, description: description.to_string(), } } fn detect_context(&self, line: &str) -> &str { let lower = line.to_lowercase(); if lower.contains("http") || lower.contains("client") || lower.contains("request") { "http" } else if lower.contains("db") || lower.contains("database") || lower.contains("sql") { "database" } else if lower.contains("redis") || lower.contains("cache") || lower.contains("memcache") { "cache" } else if lower.contains("grpc") || lower.contains("rpc") { "rpc" } else { "general" } } fn estimate_milliseconds(&self, value: u64, line: &str) -> u64 { // Strip comments before analyzing let code_part = line.split("//").next().unwrap_or(line); let code_part = code_part.split('#').next().unwrap_or(code_part); let lower = code_part.to_lowercase(); // Explicit unit markers in code (not comments) if lower.contains("from_secs") || lower.contains("_secs") { return value * 1000; } if lower.contains("from_millis") || lower.contains("millisecond") || lower.contains("_ms") { return value; } if lower.contains("from_nanos") || lower.contains("nanosecond") { return value / 1_000_000; } // Heuristics based on magnitude if value > 1_000_000 { // Likely nanoseconds value / 1_000_000 } else if value > 1000 && value < 1_000_000 { // Likely milliseconds value } else if value < 100 { // Likely seconds value * 1000 } else { // Default: assume milliseconds value } } } impl Extractor for TimeoutConfigExtractor { fn name(&self) -> &str { "timeout_config" } fn languages(&self) -> &[Language] { &[ Language::Rust, Language::Go, Language::Python, Language::TypeScript, Language::JavaScript, Language::Yaml, Language::Toml, Language::Json, ] } fn extract( &self, path_segments: &[String], content: &str, _language: Language, file: &str, ) -> Vec { let mut claims = Vec::new(); for (line_idx, line) in content.lines().enumerate() { let line_num = line_idx + 1; let context = self.detect_context(line); // Zero/infinite timeout detection if let Some(matched) = self.zero_timeout.find(line) { claims.push(self.extract_claim( path_segments, file, line_num, matched.as_str(), context, 0.0, "Timeout is disabled (infinite wait)", )); continue; } // Numeric timeout detection if let Some(captures) = self.numeric_timeout.captures(line) { if let Some(value_match) = captures.get(1) { if let Ok(value) = value_match.as_str().parse::() { let ms = self.estimate_milliseconds(value, line); if ms > 0 && ms < self.thresholds.min_reasonable_ms { claims.push(self.extract_claim( path_segments, file, line_num, captures.get(0).map(|m| m.as_str()).unwrap_or(""), context, ms as f64, &format!( "Timeout {}ms is below minimum reasonable {}ms", ms, self.thresholds.min_reasonable_ms ), )); } else if ms > self.thresholds.max_reasonable_ms { claims.push(self.extract_claim( path_segments, file, line_num, captures.get(0).map(|m| m.as_str()).unwrap_or(""), context, ms as f64, &format!( "Timeout {}ms exceeds maximum reasonable {}ms", ms, self.thresholds.max_reasonable_ms ), )); } } } } } claims } fn screening_patterns(&self) -> Vec<&str> { vec![ r"(?i)timeout", r"(?i)read_timeout|write_timeout|connect_timeout", r"(?i)request_timeout|idle_timeout|keep_alive", ] } } #[cfg(test)] mod tests { use super::*; #[test] fn test_zero_timeout_detection() { let extractor = TimeoutConfigExtractor::default(); let content = r#" client.timeout = 0 "#; let claims = extractor.extract(&["rust".to_string()], content, Language::Rust, "src/http.rs"); assert_eq!(claims.len(), 1); assert!(claims[0].description.contains("disabled")); } #[test] fn test_nil_timeout_detection() { let extractor = TimeoutConfigExtractor::default(); let content = r#" timeout: nil "#; let claims = extractor.extract(&["go".to_string()], content, Language::Go, "config.go"); assert_eq!(claims.len(), 1); } #[test] fn test_unreasonably_low_timeout() { let extractor = TimeoutConfigExtractor::default(); let content = r#" http_client.timeout = 100 // 100ms "#; let claims = extractor.extract(&["rust".to_string()], content, Language::Rust, "src/http.rs"); assert_eq!(claims.len(), 1); assert!(claims[0].description.contains("below minimum")); } #[test] fn test_unreasonably_high_timeout() { let extractor = TimeoutConfigExtractor::default(); let content = r#" db_timeout = 600000 // 10 minutes "#; let claims = extractor.extract(&["python".to_string()], content, Language::Python, "config.py"); assert_eq!(claims.len(), 1); assert!(claims[0].description.contains("exceeds maximum")); } #[test] fn test_reasonable_timeout_no_claims() { let extractor = TimeoutConfigExtractor::default(); let content = r#" timeout = 30000 // 30 seconds "#; let claims = extractor.extract(&["rust".to_string()], content, Language::Rust, "src/http.rs"); assert!(claims.is_empty(), "Expected no claims for reasonable 30000ms timeout"); } #[test] fn test_context_detection() { let extractor = TimeoutConfigExtractor::default(); let content_http = "http_client.timeout = 0"; let claims = extractor.extract(&["rust".to_string()], content_http, Language::Rust, "src/http.rs"); assert!(claims[0].concept_path.contains("http")); let content_db = "database_timeout = 0"; let claims = extractor.extract(&["rust".to_string()], content_db, Language::Rust, "src/db.rs"); assert!(claims[0].concept_path.contains("database")); } }