Add Language::C variant with file detection (.c, Makefile, CMakeLists.txt) and integration across prompts, regex_gen, and path_mapper. Simplify README and guides to be more concise and scannable. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
206 lines
6.5 KiB
Rust
206 lines
6.5 KiB
Rust
//! LLM prompt templates and language conversion utilities.
|
|
|
|
use crate::types::Language;
|
|
|
|
/// Default system prompt when no vocabulary is provided.
|
|
pub const DEFAULT_SYSTEM_PROMPT: &str = r#"You are a security code analyzer. Extract security-relevant claims from the provided code.
|
|
|
|
Focus on:
|
|
- TLS/SSL configuration (verification, minimum versions, cipher suites)
|
|
- Authentication settings (password policies, session management, MFA)
|
|
- Cryptography (algorithms, key sizes, modes, IVs)
|
|
- Input validation (SQL injection, command injection, XSS)
|
|
- API security (rate limiting, CORS, CSRF)
|
|
- Secrets management (hardcoded credentials, API keys)
|
|
- Configuration issues (debug modes, verbose errors)
|
|
|
|
For each claim found, provide:
|
|
- subject: A normalized concept path (e.g., "tls/cert_verification", "auth/password_min_length")
|
|
- predicate: The aspect being claimed (e.g., "enabled", "min_length", "algorithm")
|
|
- value: The actual value found
|
|
- value_type: One of "text", "number", "boolean"
|
|
- line: Line number where found (1-indexed)
|
|
- matched_text: The exact code that contains this claim (single line)
|
|
- confidence: How confident you are (0.0-1.0)
|
|
- description: Brief explanation of the security implications
|
|
|
|
Respond with JSON only, no markdown:
|
|
{
|
|
"claims": [
|
|
{
|
|
"subject": "tls/cert_verification",
|
|
"predicate": "enabled",
|
|
"value": false,
|
|
"value_type": "boolean",
|
|
"line": 42,
|
|
"matched_text": "verify=False",
|
|
"confidence": 0.95,
|
|
"description": "TLS certificate verification disabled, vulnerable to MITM attacks"
|
|
}
|
|
]
|
|
}
|
|
|
|
If no security claims are found, return: {"claims": []}"#;
|
|
|
|
/// Convert Language enum to a concept path prefix.
|
|
pub fn language_to_prefix(language: Language) -> &'static str {
|
|
match language {
|
|
Language::Rust => "rust",
|
|
Language::Go => "go",
|
|
Language::Python => "python",
|
|
Language::JavaScript => "javascript",
|
|
Language::TypeScript => "typescript",
|
|
Language::C => "c",
|
|
Language::Cpp => "cpp",
|
|
Language::Java => "java",
|
|
Language::Php => "php",
|
|
Language::Ruby => "ruby",
|
|
Language::CSharp => "csharp",
|
|
Language::Toml => "toml",
|
|
Language::Yaml => "yaml",
|
|
Language::Json => "json",
|
|
Language::Ini => "ini",
|
|
Language::Properties => "properties",
|
|
Language::Docker => "docker",
|
|
Language::Dotenv => "env",
|
|
Language::CargoManifest => "cargo",
|
|
Language::GoMod => "gomod",
|
|
Language::NpmManifest => "npm",
|
|
Language::PythonManifest => "python",
|
|
Language::Terraform => "terraform",
|
|
Language::Unknown => "unknown",
|
|
}
|
|
}
|
|
|
|
/// Convert Language enum to human-readable name.
|
|
pub fn language_to_name(language: Language) -> &'static str {
|
|
match language {
|
|
Language::Rust => "Rust",
|
|
Language::Go => "Go",
|
|
Language::Python => "Python",
|
|
Language::JavaScript => "JavaScript",
|
|
Language::TypeScript => "TypeScript",
|
|
Language::C => "C",
|
|
Language::Cpp => "C++",
|
|
Language::Java => "Java",
|
|
Language::Php => "PHP",
|
|
Language::Ruby => "Ruby",
|
|
Language::CSharp => "C#",
|
|
Language::Toml => "TOML",
|
|
Language::Yaml => "YAML",
|
|
Language::Json => "JSON",
|
|
Language::Ini => "INI",
|
|
Language::Properties => "Properties",
|
|
Language::Docker => "Dockerfile",
|
|
Language::Dotenv => "Environment file",
|
|
Language::CargoManifest => "Cargo manifest",
|
|
Language::GoMod => "Go module",
|
|
Language::NpmManifest => "NPM manifest",
|
|
Language::PythonManifest => "Python manifest",
|
|
Language::Terraform => "Terraform",
|
|
Language::Unknown => "Unknown",
|
|
}
|
|
}
|
|
|
|
/// Convert Language enum to file extension for code block.
|
|
pub fn language_to_extension(language: Language) -> &'static str {
|
|
match language {
|
|
Language::Rust => "rust",
|
|
Language::Go => "go",
|
|
Language::Python => "python",
|
|
Language::JavaScript => "javascript",
|
|
Language::TypeScript => "typescript",
|
|
Language::C => "c",
|
|
Language::Cpp => "cpp",
|
|
Language::Java => "java",
|
|
Language::Php => "php",
|
|
Language::Ruby => "ruby",
|
|
Language::CSharp => "csharp",
|
|
Language::Toml => "toml",
|
|
Language::Yaml => "yaml",
|
|
Language::Json => "json",
|
|
Language::Ini => "ini",
|
|
Language::Properties => "properties",
|
|
Language::Docker => "dockerfile",
|
|
Language::Dotenv => "env",
|
|
Language::CargoManifest => "toml",
|
|
Language::GoMod => "go",
|
|
Language::NpmManifest => "json",
|
|
Language::PythonManifest => "toml",
|
|
Language::Terraform => "hcl",
|
|
Language::Unknown => "",
|
|
}
|
|
}
|
|
|
|
/// Extract JSON from a response that may contain markdown code blocks.
|
|
pub fn extract_json(response: &str) -> &str {
|
|
let trimmed = response.trim();
|
|
|
|
// If it starts with {, assume it's already JSON
|
|
if trimmed.starts_with('{') {
|
|
return trimmed;
|
|
}
|
|
|
|
// Try to find JSON in markdown code block
|
|
if let Some(start) = trimmed.find("```json") {
|
|
let after_marker = &trimmed[start + 7..];
|
|
if let Some(end) = after_marker.find("```") {
|
|
return after_marker[..end].trim();
|
|
}
|
|
}
|
|
|
|
// Try generic code block
|
|
if let Some(start) = trimmed.find("```") {
|
|
let after_marker = &trimmed[start + 3..];
|
|
// Skip language identifier if present
|
|
let content = if let Some(newline) = after_marker.find('\n') {
|
|
&after_marker[newline + 1..]
|
|
} else {
|
|
after_marker
|
|
};
|
|
if let Some(end) = content.find("```") {
|
|
return content[..end].trim();
|
|
}
|
|
}
|
|
|
|
trimmed
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_extract_json_plain() {
|
|
let json = r#"{"claims": []}"#;
|
|
assert_eq!(extract_json(json), json);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_json_markdown_code_block() {
|
|
let response = r#"Here's the analysis:
|
|
|
|
```json
|
|
{"claims": []}
|
|
```
|
|
|
|
That's all I found."#;
|
|
assert_eq!(extract_json(response), r#"{"claims": []}"#);
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_json_generic_code_block() {
|
|
let response = r#"```
|
|
{"claims": []}
|
|
```"#;
|
|
assert_eq!(extract_json(response), r#"{"claims": []}"#);
|
|
}
|
|
|
|
#[test]
|
|
fn test_language_to_prefix() {
|
|
assert_eq!(language_to_prefix(Language::Rust), "rust");
|
|
assert_eq!(language_to_prefix(Language::Python), "python");
|
|
assert_eq!(language_to_prefix(Language::Go), "go");
|
|
}
|
|
}
|