//! SQL injection vulnerability extractor. //! //! Detects patterns where SQL queries are constructed using string interpolation //! rather than parameterized queries, which leads to SQL injection vulnerabilities. use regex::Regex; use stemedb_core::types::ObjectValue; use super::Extractor; use crate::types::{Observation, Language}; /// Extractor for SQL injection vulnerabilities. /// /// Detects patterns indicating unsafe SQL query construction: /// - String interpolation/concatenation in SQL queries /// - format! macros with SQL keywords /// - f-strings with SQL in Python /// - Template literals with SQL in JavaScript pub struct SqlInjectionExtractor { // Rust patterns rust_format_sql: Regex, rust_concat_sql: Regex, // Go patterns go_sprintf_sql: Regex, go_concat_sql: Regex, // Python patterns python_fstring_sql: Regex, python_format_sql: Regex, python_percent_sql: Regex, // JavaScript/TypeScript patterns js_template_sql: Regex, js_concat_sql: Regex, } impl Default for SqlInjectionExtractor { fn default() -> Self { Self::new() } } impl SqlInjectionExtractor { /// Create a new SQL injection extractor with compiled regexes. /// /// # Panics /// Panics if any regex pattern is invalid (programmer error). #[allow(clippy::expect_used)] pub fn new() -> Self { Self { // Rust: format! with SQL keywords rust_format_sql: Regex::new( r#"format!\s*\(\s*["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE|FROM)[^"']*\{[^}]*\}"#, ) .expect("valid regex"), // Rust: string concatenation with SQL rust_concat_sql: Regex::new( r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)\s+.*["']\s*\+\s*"#, ) .expect("valid regex"), // Go: fmt.Sprintf with SQL (matches Sprintf followed by SQL keywords with format verbs) go_sprintf_sql: Regex::new( r#"(?:fmt\.)?Sprintf\s*\([^)]*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^)]*%[sdvq]"#, ) .expect("valid regex"), // Go: string concatenation with SQL go_concat_sql: Regex::new( r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)\s+.*["']\s*\+"#, ) .expect("valid regex"), // Python: f-strings with SQL python_fstring_sql: Regex::new( r#"f["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^"']*\{[^}]+\}"#, ) .expect("valid regex"), // Python: .format() with SQL python_format_sql: Regex::new( r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^"']*\{[^}]*\}["']\.format"#, ) .expect("valid regex"), // Python: % formatting with SQL python_percent_sql: Regex::new( r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^"']*%[sd]["']\s*%"#, ) .expect("valid regex"), // JavaScript: template literals with SQL js_template_sql: Regex::new( r#"`[^`]*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^`]*\$\{[^}]+\}"#, ) .expect("valid regex"), // JavaScript: string concatenation with SQL js_concat_sql: Regex::new( r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)\s+.*["']\s*\+"#, ) .expect("valid regex"), } } fn check_pattern( &self, content: &str, pattern: &Regex, path_segments: &[String], file: &str, description: &str, ) -> Vec { let mut claims = Vec::new(); for (line_idx, line) in content.lines().enumerate() { if let Some(matched) = pattern.find(line) { let mut concept_path = path_segments.to_vec(); concept_path.push("db".to_string()); concept_path.push("query".to_string()); concept_path.push("construction".to_string()); claims.push(Observation { concept_path: format!("code://{}", concept_path.join("/")), predicate: "construction".to_string(), value: ObjectValue::Text("interpolated".to_string()), file: file.to_string(), line: line_idx + 1, matched_text: matched.as_str().to_string(), confidence: 0.9, // High confidence but allow for edge cases description: description.to_string(), }); } } claims } } impl Extractor for SqlInjectionExtractor { fn name(&self) -> &str { "sql_injection" } fn languages(&self) -> &[Language] { &[ Language::Rust, Language::Go, Language::Python, Language::TypeScript, Language::JavaScript, ] } fn extract( &self, path_segments: &[String], content: &str, language: Language, file: &str, ) -> Vec { let mut claims = Vec::new(); match language { Language::Rust => { claims.extend(self.check_pattern( content, &self.rust_format_sql, path_segments, file, "SQL query uses format! macro with interpolation (SQL injection risk)", )); claims.extend(self.check_pattern( content, &self.rust_concat_sql, path_segments, file, "SQL query uses string concatenation (SQL injection risk)", )); } Language::Go => { claims.extend(self.check_pattern( content, &self.go_sprintf_sql, path_segments, file, "SQL query uses fmt.Sprintf interpolation (SQL injection risk)", )); claims.extend(self.check_pattern( content, &self.go_concat_sql, path_segments, file, "SQL query uses string concatenation (SQL injection risk)", )); } Language::Python => { claims.extend(self.check_pattern( content, &self.python_fstring_sql, path_segments, file, "SQL query uses f-string interpolation (SQL injection risk)", )); claims.extend(self.check_pattern( content, &self.python_format_sql, path_segments, file, "SQL query uses .format() interpolation (SQL injection risk)", )); claims.extend(self.check_pattern( content, &self.python_percent_sql, path_segments, file, "SQL query uses % formatting (SQL injection risk)", )); } Language::TypeScript | Language::JavaScript => { claims.extend(self.check_pattern( content, &self.js_template_sql, path_segments, file, "SQL query uses template literal interpolation (SQL injection risk)", )); claims.extend(self.check_pattern( content, &self.js_concat_sql, path_segments, file, "SQL query uses string concatenation (SQL injection risk)", )); } _ => {} } claims } fn screening_patterns(&self) -> Vec<&str> { vec![ r"(?i)format!.*SELECT|format!.*INSERT|format!.*UPDATE|format!.*DELETE", r"(?i)Sprintf.*SELECT|Sprintf.*INSERT|Sprintf.*UPDATE", r#"(?i)f".*SELECT|f".*INSERT|f".*UPDATE|f".*DELETE"#, r"(?i)\.format\(.*SELECT|\.format\(.*INSERT", r"(?i)%.*SELECT|%.*INSERT", r"(?i)\+.*SELECT|\+.*INSERT|\+.*UPDATE", ] } } #[cfg(test)] mod tests { use super::*; #[test] fn test_rust_format_sql() { let extractor = SqlInjectionExtractor::new(); let content = r#" let query = format!("SELECT * FROM users WHERE id = {}", user_id); "#; let claims = extractor.extract(&["rust".to_string()], content, Language::Rust, "src/db.rs"); assert_eq!(claims.len(), 1); assert_eq!(claims[0].value, ObjectValue::Text("interpolated".to_string())); } #[test] fn test_go_sprintf_sql() { let extractor = SqlInjectionExtractor::new(); let content = r#" query := fmt.Sprintf("SELECT * FROM users WHERE name = '%s'", name) "#; let claims = extractor.extract(&["go".to_string()], content, Language::Go, "db.go"); assert_eq!(claims.len(), 1); assert_eq!(claims[0].value, ObjectValue::Text("interpolated".to_string())); } #[test] fn test_python_fstring_sql() { let extractor = SqlInjectionExtractor::new(); let content = r#" query = f"SELECT * FROM users WHERE id = {user_id}" "#; let claims = extractor.extract(&["python".to_string()], content, Language::Python, "db.py"); assert_eq!(claims.len(), 1); assert_eq!(claims[0].value, ObjectValue::Text("interpolated".to_string())); } #[test] fn test_python_format_sql() { let extractor = SqlInjectionExtractor::new(); let content = r#" query = "DELETE FROM users WHERE id = {}".format(user_id) "#; let claims = extractor.extract(&["python".to_string()], content, Language::Python, "db.py"); assert_eq!(claims.len(), 1); } #[test] fn test_js_template_literal_sql() { let extractor = SqlInjectionExtractor::new(); let content = r#" const query = `SELECT * FROM users WHERE email = '${email}'`; "#; let claims = extractor.extract(&["js".to_string()], content, Language::JavaScript, "db.js"); assert_eq!(claims.len(), 1); assert_eq!(claims[0].value, ObjectValue::Text("interpolated".to_string())); } #[test] fn test_no_false_positives_parameterized() { let extractor = SqlInjectionExtractor::new(); // Proper parameterized query in Rust (sqlx style) let content = r#" let users = sqlx::query!("SELECT * FROM users WHERE id = $1", user_id) .fetch_all(&pool) .await?; "#; let claims = extractor.extract(&["rust".to_string()], content, Language::Rust, "src/db.rs"); // Should not flag parameterized queries assert!(claims.is_empty()); } #[test] fn test_no_false_positives_prepared() { let extractor = SqlInjectionExtractor::new(); // Prepared statement in Python let content = r#" cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,)) "#; let claims = extractor.extract(&["python".to_string()], content, Language::Python, "db.py"); // Should not flag prepared statements assert!(claims.is_empty()); } }