stemedb/applications/aphoria/src/extractors/timeout_config.rs
jml 3b5f88b4f0 feat(aphoria): implement claims architecture (A1-A5) with verify engine, corpus, coverage, and explain
Complete Aphoria claims system overhaul:
- A1: Rename ExtractedClaim to Observation (extractors produce observations, not claims)
- A2: Add AuthoredClaim with full provenance, invariants, and authority tiers
- A3: Verify engine comparing observations against authored claims, CLI + formatters
- A4: Corpus as first-class assertions with predicate indexing, authority lens, trust packs
- A5: Coverage analysis, explain/docs generation, self-audit extractor, claim suggester skill

Also includes: 42 extractors updated for Observation type, verifiable_predicates trait,
conflict detection with comparison modes, claims TOML persistence, Grafana dashboard,
backup/restore scripts, and comprehensive test coverage.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 09:11:47 +00:00

324 lines
10 KiB
Rust

//! Timeout configuration extractor.
//!
//! Detects timeout values that are misconfigured (zero/infinite,
//! too low, or too high) which can cause availability issues.
use regex::Regex;
use stemedb_core::types::ObjectValue;
use super::Extractor;
use crate::types::{Observation, Language};
/// Configuration for timeout extraction thresholds.
#[derive(Debug, Clone)]
pub struct TimeoutThresholds {
/// Minimum reasonable timeout in milliseconds.
pub min_reasonable_ms: u64,
/// Maximum reasonable timeout in milliseconds.
pub max_reasonable_ms: u64,
}
impl Default for TimeoutThresholds {
fn default() -> Self {
Self { min_reasonable_ms: 1000, max_reasonable_ms: 300_000 }
}
}
/// Extractor for timeout configuration values.
pub struct TimeoutConfigExtractor {
/// Zero/infinite timeout patterns
zero_timeout: Regex,
/// Numeric timeout patterns (captures the value)
numeric_timeout: Regex,
/// Duration patterns (Rust/Go style, reserved for future use)
#[allow(dead_code)]
duration_timeout: Regex,
/// Configuration thresholds
thresholds: TimeoutThresholds,
}
impl Default for TimeoutConfigExtractor {
fn default() -> Self {
Self::new(TimeoutThresholds::default())
}
}
impl TimeoutConfigExtractor {
/// Create a new timeout extractor with the given thresholds.
///
/// # Panics
/// Panics if any regex pattern is invalid (programmer error).
#[allow(clippy::expect_used)]
pub fn new(thresholds: TimeoutThresholds) -> Self {
Self {
zero_timeout: Regex::new(
r"(?i)timeout\s*[:=]\s*(0|None|null|nil|infinity|Inf|never|\-1)",
)
.expect("valid regex"),
numeric_timeout: Regex::new(r"(?i)timeout\s*[:=]\s*(\d+)").expect("valid regex"),
duration_timeout: Regex::new(
r"(?i)(?:Duration::from_(?:secs|millis|nanos)|time\.(?:Second|Millisecond)|timeout)\s*[:=\(]\s*(\d+)",
)
.expect("valid regex"),
thresholds,
}
}
#[allow(clippy::too_many_arguments)]
fn extract_claim(
&self,
path_segments: &[String],
file: &str,
line: usize,
matched_text: &str,
context: &str,
value: f64,
description: &str,
) -> Observation {
let mut concept_path = path_segments.to_vec();
concept_path.push(context.to_string());
concept_path.push("timeout".to_string());
Observation {
concept_path: format!("code://{}", concept_path.join("/")),
predicate: "config_value".to_string(),
value: ObjectValue::Number(value),
file: file.to_string(),
line,
matched_text: matched_text.to_string(),
confidence: 1.0,
description: description.to_string(),
}
}
fn detect_context(&self, line: &str) -> &str {
let lower = line.to_lowercase();
if lower.contains("http") || lower.contains("client") || lower.contains("request") {
"http"
} else if lower.contains("db") || lower.contains("database") || lower.contains("sql") {
"database"
} else if lower.contains("redis") || lower.contains("cache") || lower.contains("memcache") {
"cache"
} else if lower.contains("grpc") || lower.contains("rpc") {
"rpc"
} else {
"general"
}
}
fn estimate_milliseconds(&self, value: u64, line: &str) -> u64 {
// Strip comments before analyzing
let code_part = line.split("//").next().unwrap_or(line);
let code_part = code_part.split('#').next().unwrap_or(code_part);
let lower = code_part.to_lowercase();
// Explicit unit markers in code (not comments)
if lower.contains("from_secs") || lower.contains("_secs") {
return value * 1000;
}
if lower.contains("from_millis") || lower.contains("millisecond") || lower.contains("_ms") {
return value;
}
if lower.contains("from_nanos") || lower.contains("nanosecond") {
return value / 1_000_000;
}
// Heuristics based on magnitude
if value > 1_000_000 {
// Likely nanoseconds
value / 1_000_000
} else if value > 1000 && value < 1_000_000 {
// Likely milliseconds
value
} else if value < 100 {
// Likely seconds
value * 1000
} else {
// Default: assume milliseconds
value
}
}
}
impl Extractor for TimeoutConfigExtractor {
fn name(&self) -> &str {
"timeout_config"
}
fn languages(&self) -> &[Language] {
&[
Language::Rust,
Language::Go,
Language::Python,
Language::TypeScript,
Language::JavaScript,
Language::Yaml,
Language::Toml,
Language::Json,
]
}
fn extract(
&self,
path_segments: &[String],
content: &str,
_language: Language,
file: &str,
) -> Vec<Observation> {
let mut claims = Vec::new();
for (line_idx, line) in content.lines().enumerate() {
let line_num = line_idx + 1;
let context = self.detect_context(line);
// Zero/infinite timeout detection
if let Some(matched) = self.zero_timeout.find(line) {
claims.push(self.extract_claim(
path_segments,
file,
line_num,
matched.as_str(),
context,
0.0,
"Timeout is disabled (infinite wait)",
));
continue;
}
// Numeric timeout detection
if let Some(captures) = self.numeric_timeout.captures(line) {
if let Some(value_match) = captures.get(1) {
if let Ok(value) = value_match.as_str().parse::<u64>() {
let ms = self.estimate_milliseconds(value, line);
if ms > 0 && ms < self.thresholds.min_reasonable_ms {
claims.push(self.extract_claim(
path_segments,
file,
line_num,
captures.get(0).map(|m| m.as_str()).unwrap_or(""),
context,
ms as f64,
&format!(
"Timeout {}ms is below minimum reasonable {}ms",
ms, self.thresholds.min_reasonable_ms
),
));
} else if ms > self.thresholds.max_reasonable_ms {
claims.push(self.extract_claim(
path_segments,
file,
line_num,
captures.get(0).map(|m| m.as_str()).unwrap_or(""),
context,
ms as f64,
&format!(
"Timeout {}ms exceeds maximum reasonable {}ms",
ms, self.thresholds.max_reasonable_ms
),
));
}
}
}
}
}
claims
}
fn screening_patterns(&self) -> Vec<&str> {
vec![
r"(?i)timeout",
r"(?i)read_timeout|write_timeout|connect_timeout",
r"(?i)request_timeout|idle_timeout|keep_alive",
]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_zero_timeout_detection() {
let extractor = TimeoutConfigExtractor::default();
let content = r#"
client.timeout = 0
"#;
let claims =
extractor.extract(&["rust".to_string()], content, Language::Rust, "src/http.rs");
assert_eq!(claims.len(), 1);
assert!(claims[0].description.contains("disabled"));
}
#[test]
fn test_nil_timeout_detection() {
let extractor = TimeoutConfigExtractor::default();
let content = r#"
timeout: nil
"#;
let claims = extractor.extract(&["go".to_string()], content, Language::Go, "config.go");
assert_eq!(claims.len(), 1);
}
#[test]
fn test_unreasonably_low_timeout() {
let extractor = TimeoutConfigExtractor::default();
let content = r#"
http_client.timeout = 100 // 100ms
"#;
let claims =
extractor.extract(&["rust".to_string()], content, Language::Rust, "src/http.rs");
assert_eq!(claims.len(), 1);
assert!(claims[0].description.contains("below minimum"));
}
#[test]
fn test_unreasonably_high_timeout() {
let extractor = TimeoutConfigExtractor::default();
let content = r#"
db_timeout = 600000 // 10 minutes
"#;
let claims =
extractor.extract(&["python".to_string()], content, Language::Python, "config.py");
assert_eq!(claims.len(), 1);
assert!(claims[0].description.contains("exceeds maximum"));
}
#[test]
fn test_reasonable_timeout_no_claims() {
let extractor = TimeoutConfigExtractor::default();
let content = r#"
timeout = 30000 // 30 seconds
"#;
let claims =
extractor.extract(&["rust".to_string()], content, Language::Rust, "src/http.rs");
assert!(claims.is_empty(), "Expected no claims for reasonable 30000ms timeout");
}
#[test]
fn test_context_detection() {
let extractor = TimeoutConfigExtractor::default();
let content_http = "http_client.timeout = 0";
let claims =
extractor.extract(&["rust".to_string()], content_http, Language::Rust, "src/http.rs");
assert!(claims[0].concept_path.contains("http"));
let content_db = "database_timeout = 0";
let claims =
extractor.extract(&["rust".to_string()], content_db, Language::Rust, "src/db.rs");
assert!(claims[0].concept_path.contains("database"));
}
}