stemedb/applications/aphoria/src/llm/prompts.rs
jordan 28fc3b5391 feat(aphoria): add C language support and streamline documentation
Add Language::C variant with file detection (.c, Makefile, CMakeLists.txt)
and integration across prompts, regex_gen, and path_mapper. Simplify
README and guides to be more concise and scannable.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 03:02:33 -07:00

206 lines
6.5 KiB
Rust

//! LLM prompt templates and language conversion utilities.
use crate::types::Language;
/// Default system prompt when no vocabulary is provided.
pub const DEFAULT_SYSTEM_PROMPT: &str = r#"You are a security code analyzer. Extract security-relevant claims from the provided code.
Focus on:
- TLS/SSL configuration (verification, minimum versions, cipher suites)
- Authentication settings (password policies, session management, MFA)
- Cryptography (algorithms, key sizes, modes, IVs)
- Input validation (SQL injection, command injection, XSS)
- API security (rate limiting, CORS, CSRF)
- Secrets management (hardcoded credentials, API keys)
- Configuration issues (debug modes, verbose errors)
For each claim found, provide:
- subject: A normalized concept path (e.g., "tls/cert_verification", "auth/password_min_length")
- predicate: The aspect being claimed (e.g., "enabled", "min_length", "algorithm")
- value: The actual value found
- value_type: One of "text", "number", "boolean"
- line: Line number where found (1-indexed)
- matched_text: The exact code that contains this claim (single line)
- confidence: How confident you are (0.0-1.0)
- description: Brief explanation of the security implications
Respond with JSON only, no markdown:
{
"claims": [
{
"subject": "tls/cert_verification",
"predicate": "enabled",
"value": false,
"value_type": "boolean",
"line": 42,
"matched_text": "verify=False",
"confidence": 0.95,
"description": "TLS certificate verification disabled, vulnerable to MITM attacks"
}
]
}
If no security claims are found, return: {"claims": []}"#;
/// Convert Language enum to a concept path prefix.
pub fn language_to_prefix(language: Language) -> &'static str {
match language {
Language::Rust => "rust",
Language::Go => "go",
Language::Python => "python",
Language::JavaScript => "javascript",
Language::TypeScript => "typescript",
Language::C => "c",
Language::Cpp => "cpp",
Language::Java => "java",
Language::Php => "php",
Language::Ruby => "ruby",
Language::CSharp => "csharp",
Language::Toml => "toml",
Language::Yaml => "yaml",
Language::Json => "json",
Language::Ini => "ini",
Language::Properties => "properties",
Language::Docker => "docker",
Language::Dotenv => "env",
Language::CargoManifest => "cargo",
Language::GoMod => "gomod",
Language::NpmManifest => "npm",
Language::PythonManifest => "python",
Language::Terraform => "terraform",
Language::Unknown => "unknown",
}
}
/// Convert Language enum to human-readable name.
pub fn language_to_name(language: Language) -> &'static str {
match language {
Language::Rust => "Rust",
Language::Go => "Go",
Language::Python => "Python",
Language::JavaScript => "JavaScript",
Language::TypeScript => "TypeScript",
Language::C => "C",
Language::Cpp => "C++",
Language::Java => "Java",
Language::Php => "PHP",
Language::Ruby => "Ruby",
Language::CSharp => "C#",
Language::Toml => "TOML",
Language::Yaml => "YAML",
Language::Json => "JSON",
Language::Ini => "INI",
Language::Properties => "Properties",
Language::Docker => "Dockerfile",
Language::Dotenv => "Environment file",
Language::CargoManifest => "Cargo manifest",
Language::GoMod => "Go module",
Language::NpmManifest => "NPM manifest",
Language::PythonManifest => "Python manifest",
Language::Terraform => "Terraform",
Language::Unknown => "Unknown",
}
}
/// Convert Language enum to file extension for code block.
pub fn language_to_extension(language: Language) -> &'static str {
match language {
Language::Rust => "rust",
Language::Go => "go",
Language::Python => "python",
Language::JavaScript => "javascript",
Language::TypeScript => "typescript",
Language::C => "c",
Language::Cpp => "cpp",
Language::Java => "java",
Language::Php => "php",
Language::Ruby => "ruby",
Language::CSharp => "csharp",
Language::Toml => "toml",
Language::Yaml => "yaml",
Language::Json => "json",
Language::Ini => "ini",
Language::Properties => "properties",
Language::Docker => "dockerfile",
Language::Dotenv => "env",
Language::CargoManifest => "toml",
Language::GoMod => "go",
Language::NpmManifest => "json",
Language::PythonManifest => "toml",
Language::Terraform => "hcl",
Language::Unknown => "",
}
}
/// Extract JSON from a response that may contain markdown code blocks.
pub fn extract_json(response: &str) -> &str {
let trimmed = response.trim();
// If it starts with {, assume it's already JSON
if trimmed.starts_with('{') {
return trimmed;
}
// Try to find JSON in markdown code block
if let Some(start) = trimmed.find("```json") {
let after_marker = &trimmed[start + 7..];
if let Some(end) = after_marker.find("```") {
return after_marker[..end].trim();
}
}
// Try generic code block
if let Some(start) = trimmed.find("```") {
let after_marker = &trimmed[start + 3..];
// Skip language identifier if present
let content = if let Some(newline) = after_marker.find('\n') {
&after_marker[newline + 1..]
} else {
after_marker
};
if let Some(end) = content.find("```") {
return content[..end].trim();
}
}
trimmed
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_json_plain() {
let json = r#"{"claims": []}"#;
assert_eq!(extract_json(json), json);
}
#[test]
fn test_extract_json_markdown_code_block() {
let response = r#"Here's the analysis:
```json
{"claims": []}
```
That's all I found."#;
assert_eq!(extract_json(response), r#"{"claims": []}"#);
}
#[test]
fn test_extract_json_generic_code_block() {
let response = r#"```
{"claims": []}
```"#;
assert_eq!(extract_json(response), r#"{"claims": []}"#);
}
#[test]
fn test_language_to_prefix() {
assert_eq!(language_to_prefix(Language::Rust), "rust");
assert_eq!(language_to_prefix(Language::Python), "python");
assert_eq!(language_to_prefix(Language::Go), "go");
}
}