stemedb/applications/aphoria/src/extractors/sql_injection.rs
jml 3b5f88b4f0 feat(aphoria): implement claims architecture (A1-A5) with verify engine, corpus, coverage, and explain
Complete Aphoria claims system overhaul:
- A1: Rename ExtractedClaim to Observation (extractors produce observations, not claims)
- A2: Add AuthoredClaim with full provenance, invariants, and authority tiers
- A3: Verify engine comparing observations against authored claims, CLI + formatters
- A4: Corpus as first-class assertions with predicate indexing, authority lens, trust packs
- A5: Coverage analysis, explain/docs generation, self-audit extractor, claim suggester skill

Also includes: 42 extractors updated for Observation type, verifiable_predicates trait,
conflict detection with comparison modes, claims TOML persistence, Grafana dashboard,
backup/restore scripts, and comprehensive test coverage.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 09:11:47 +00:00

349 lines
12 KiB
Rust

//! SQL injection vulnerability extractor.
//!
//! Detects patterns where SQL queries are constructed using string interpolation
//! rather than parameterized queries, which leads to SQL injection vulnerabilities.
use regex::Regex;
use stemedb_core::types::ObjectValue;
use super::Extractor;
use crate::types::{Observation, Language};
/// Extractor for SQL injection vulnerabilities.
///
/// Detects patterns indicating unsafe SQL query construction:
/// - String interpolation/concatenation in SQL queries
/// - format! macros with SQL keywords
/// - f-strings with SQL in Python
/// - Template literals with SQL in JavaScript
pub struct SqlInjectionExtractor {
// Rust patterns
rust_format_sql: Regex,
rust_concat_sql: Regex,
// Go patterns
go_sprintf_sql: Regex,
go_concat_sql: Regex,
// Python patterns
python_fstring_sql: Regex,
python_format_sql: Regex,
python_percent_sql: Regex,
// JavaScript/TypeScript patterns
js_template_sql: Regex,
js_concat_sql: Regex,
}
impl Default for SqlInjectionExtractor {
fn default() -> Self {
Self::new()
}
}
impl SqlInjectionExtractor {
/// Create a new SQL injection extractor with compiled regexes.
///
/// # Panics
/// Panics if any regex pattern is invalid (programmer error).
#[allow(clippy::expect_used)]
pub fn new() -> Self {
Self {
// Rust: format! with SQL keywords
rust_format_sql: Regex::new(
r#"format!\s*\(\s*["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE|FROM)[^"']*\{[^}]*\}"#,
)
.expect("valid regex"),
// Rust: string concatenation with SQL
rust_concat_sql: Regex::new(
r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)\s+.*["']\s*\+\s*"#,
)
.expect("valid regex"),
// Go: fmt.Sprintf with SQL (matches Sprintf followed by SQL keywords with format verbs)
go_sprintf_sql: Regex::new(
r#"(?:fmt\.)?Sprintf\s*\([^)]*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^)]*%[sdvq]"#,
)
.expect("valid regex"),
// Go: string concatenation with SQL
go_concat_sql: Regex::new(
r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)\s+.*["']\s*\+"#,
)
.expect("valid regex"),
// Python: f-strings with SQL
python_fstring_sql: Regex::new(
r#"f["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^"']*\{[^}]+\}"#,
)
.expect("valid regex"),
// Python: .format() with SQL
python_format_sql: Regex::new(
r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^"']*\{[^}]*\}["']\.format"#,
)
.expect("valid regex"),
// Python: % formatting with SQL
python_percent_sql: Regex::new(
r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^"']*%[sd]["']\s*%"#,
)
.expect("valid regex"),
// JavaScript: template literals with SQL
js_template_sql: Regex::new(
r#"`[^`]*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)[^`]*\$\{[^}]+\}"#,
)
.expect("valid regex"),
// JavaScript: string concatenation with SQL
js_concat_sql: Regex::new(
r#"["'][^"']*(?:SELECT|INSERT|UPDATE|DELETE|WHERE)\s+.*["']\s*\+"#,
)
.expect("valid regex"),
}
}
fn check_pattern(
&self,
content: &str,
pattern: &Regex,
path_segments: &[String],
file: &str,
description: &str,
) -> Vec<Observation> {
let mut claims = Vec::new();
for (line_idx, line) in content.lines().enumerate() {
if let Some(matched) = pattern.find(line) {
let mut concept_path = path_segments.to_vec();
concept_path.push("db".to_string());
concept_path.push("query".to_string());
concept_path.push("construction".to_string());
claims.push(Observation {
concept_path: format!("code://{}", concept_path.join("/")),
predicate: "construction".to_string(),
value: ObjectValue::Text("interpolated".to_string()),
file: file.to_string(),
line: line_idx + 1,
matched_text: matched.as_str().to_string(),
confidence: 0.9, // High confidence but allow for edge cases
description: description.to_string(),
});
}
}
claims
}
}
impl Extractor for SqlInjectionExtractor {
fn name(&self) -> &str {
"sql_injection"
}
fn languages(&self) -> &[Language] {
&[
Language::Rust,
Language::Go,
Language::Python,
Language::TypeScript,
Language::JavaScript,
]
}
fn extract(
&self,
path_segments: &[String],
content: &str,
language: Language,
file: &str,
) -> Vec<Observation> {
let mut claims = Vec::new();
match language {
Language::Rust => {
claims.extend(self.check_pattern(
content,
&self.rust_format_sql,
path_segments,
file,
"SQL query uses format! macro with interpolation (SQL injection risk)",
));
claims.extend(self.check_pattern(
content,
&self.rust_concat_sql,
path_segments,
file,
"SQL query uses string concatenation (SQL injection risk)",
));
}
Language::Go => {
claims.extend(self.check_pattern(
content,
&self.go_sprintf_sql,
path_segments,
file,
"SQL query uses fmt.Sprintf interpolation (SQL injection risk)",
));
claims.extend(self.check_pattern(
content,
&self.go_concat_sql,
path_segments,
file,
"SQL query uses string concatenation (SQL injection risk)",
));
}
Language::Python => {
claims.extend(self.check_pattern(
content,
&self.python_fstring_sql,
path_segments,
file,
"SQL query uses f-string interpolation (SQL injection risk)",
));
claims.extend(self.check_pattern(
content,
&self.python_format_sql,
path_segments,
file,
"SQL query uses .format() interpolation (SQL injection risk)",
));
claims.extend(self.check_pattern(
content,
&self.python_percent_sql,
path_segments,
file,
"SQL query uses % formatting (SQL injection risk)",
));
}
Language::TypeScript | Language::JavaScript => {
claims.extend(self.check_pattern(
content,
&self.js_template_sql,
path_segments,
file,
"SQL query uses template literal interpolation (SQL injection risk)",
));
claims.extend(self.check_pattern(
content,
&self.js_concat_sql,
path_segments,
file,
"SQL query uses string concatenation (SQL injection risk)",
));
}
_ => {}
}
claims
}
fn screening_patterns(&self) -> Vec<&str> {
vec![
r"(?i)format!.*SELECT|format!.*INSERT|format!.*UPDATE|format!.*DELETE",
r"(?i)Sprintf.*SELECT|Sprintf.*INSERT|Sprintf.*UPDATE",
r#"(?i)f".*SELECT|f".*INSERT|f".*UPDATE|f".*DELETE"#,
r"(?i)\.format\(.*SELECT|\.format\(.*INSERT",
r"(?i)%.*SELECT|%.*INSERT",
r"(?i)\+.*SELECT|\+.*INSERT|\+.*UPDATE",
]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rust_format_sql() {
let extractor = SqlInjectionExtractor::new();
let content = r#"
let query = format!("SELECT * FROM users WHERE id = {}", user_id);
"#;
let claims = extractor.extract(&["rust".to_string()], content, Language::Rust, "src/db.rs");
assert_eq!(claims.len(), 1);
assert_eq!(claims[0].value, ObjectValue::Text("interpolated".to_string()));
}
#[test]
fn test_go_sprintf_sql() {
let extractor = SqlInjectionExtractor::new();
let content = r#"
query := fmt.Sprintf("SELECT * FROM users WHERE name = '%s'", name)
"#;
let claims = extractor.extract(&["go".to_string()], content, Language::Go, "db.go");
assert_eq!(claims.len(), 1);
assert_eq!(claims[0].value, ObjectValue::Text("interpolated".to_string()));
}
#[test]
fn test_python_fstring_sql() {
let extractor = SqlInjectionExtractor::new();
let content = r#"
query = f"SELECT * FROM users WHERE id = {user_id}"
"#;
let claims = extractor.extract(&["python".to_string()], content, Language::Python, "db.py");
assert_eq!(claims.len(), 1);
assert_eq!(claims[0].value, ObjectValue::Text("interpolated".to_string()));
}
#[test]
fn test_python_format_sql() {
let extractor = SqlInjectionExtractor::new();
let content = r#"
query = "DELETE FROM users WHERE id = {}".format(user_id)
"#;
let claims = extractor.extract(&["python".to_string()], content, Language::Python, "db.py");
assert_eq!(claims.len(), 1);
}
#[test]
fn test_js_template_literal_sql() {
let extractor = SqlInjectionExtractor::new();
let content = r#"
const query = `SELECT * FROM users WHERE email = '${email}'`;
"#;
let claims = extractor.extract(&["js".to_string()], content, Language::JavaScript, "db.js");
assert_eq!(claims.len(), 1);
assert_eq!(claims[0].value, ObjectValue::Text("interpolated".to_string()));
}
#[test]
fn test_no_false_positives_parameterized() {
let extractor = SqlInjectionExtractor::new();
// Proper parameterized query in Rust (sqlx style)
let content = r#"
let users = sqlx::query!("SELECT * FROM users WHERE id = $1", user_id)
.fetch_all(&pool)
.await?;
"#;
let claims = extractor.extract(&["rust".to_string()], content, Language::Rust, "src/db.rs");
// Should not flag parameterized queries
assert!(claims.is_empty());
}
#[test]
fn test_no_false_positives_prepared() {
let extractor = SqlInjectionExtractor::new();
// Prepared statement in Python
let content = r#"
cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,))
"#;
let claims = extractor.extract(&["python".to_string()], content, Language::Python, "db.py");
// Should not flag prepared statements
assert!(claims.is_empty());
}
}