stemedb/applications/aphoria/src/llm/prompt.rs
jordan 157dbbb9eb feat: Complete Aphoria Phase 8-9 + UAT suite (90/90 tests passing)
## Phase 8: Enterprise Extractor Improvements 
- 14 security extractors (TLS, JWT, SQL injection, XSS, etc.)
- 10 framework-specific extractors (Spring, Django, Rails, etc.)
- Config file security detection (YAML, TOML)

## Phase 9: Autonomous Extractor Generation 
- Shadow mode executor with TP/FP tracking
- Graduation pipeline with confidence thresholds
- Auto-rollback on regression detection
- Cross-project pattern syncing

## UAT Suite Complete (14 scripts, 90 tests)
- test-core-detection.sh (6 tests)
- test-declarative-extractors.sh (5 tests)
- test-domain-frameworks.sh (5 tests)
- test-domain-unreal.sh (3 tests)
- test-llm-extraction.sh (6 tests)
- test-eval-harness.sh (5 tests)
- test-cross-language.sh (3 tests)
- test-precommit-performance.sh (4 tests)
- test-output-formats.sh (8 tests)
- test-drift-detection.sh (6 tests)
- test-exit-codes.sh (12 tests)
+ 3 more scripts

## Other Changes
- Updated roadmap to mark Phase 8-9 complete
- Added .gitignore entries for build artifacts
- Updated pre-commit: 800 line limit, exclude tests/data/cmd

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 22:50:55 -07:00

160 lines
5.6 KiB
Rust

//! Dynamic prompt builder with ontology vocabulary injection.
//!
//! Builds system prompts that constrain LLM output to use authority-compatible
//! concept paths, ensuring conflict detection works correctly.
use crate::llm::ontology::OntologyVocabulary;
/// System prompt template with vocabulary placeholder.
const SYSTEM_PROMPT_TEMPLATE: &str = r#"You are a security code analyzer. Extract security-relevant claims from the provided code.
CRITICAL INSTRUCTION: You MUST use ONLY the concept paths listed in the VALID CONCEPT VOCABULARY table below.
Do NOT invent new paths. If the code doesn't match any known concept, return an empty claims array.
## VALID CONCEPT VOCABULARY
{vocabulary_section}
## CLAIM EXTRACTION RULES
1. **Subject Path**: MUST be EXACTLY one of the leaf paths from the table above
2. **Predicate**: MUST EXACTLY match the predicate for that concept from the table
3. **Value Type**: Use the value type specified in the table (boolean, text, number)
4. **Confidence**: Only report claims with confidence >= 0.7
## EXAMPLES
### Example 1: Python with verify=False
Code: `requests.get(url, verify=False)`
If vocabulary contains `tls/cert_verification | enabled | boolean`:
```json
{"subject": "tls/cert_verification", "predicate": "enabled", "value": false, "value_type": "boolean"}
```
### Example 2: Hardcoded API key
Code: `API_KEY = "sk-live-abc123"`
If vocabulary contains `secrets/api_key | hardcoded | boolean`:
```json
{"subject": "secrets/api_key", "predicate": "hardcoded", "value": true, "value_type": "boolean"}
```
### Example 3: JWT with algorithm none
Code: `algorithms: ['HS256', 'none']`
If vocabulary contains `jwt/algorithms | allows_none | boolean`:
```json
{"subject": "jwt/algorithms", "predicate": "allows_none", "value": true, "value_type": "boolean"}
```
## OUTPUT FORMAT
For each security claim found, provide:
- subject: A leaf path from the vocabulary table (MUST match exactly)
- predicate: The predicate for that concept (MUST match exactly)
- value: The actual value found in the code
- value_type: One of "text", "number", "boolean" (must match the concept's expected type)
- line: Line number where found (1-indexed)
- matched_text: The exact code snippet containing this claim (single line)
- confidence: How confident you are (0.0-1.0)
- description: Brief explanation of the security implications
Respond with JSON only, no markdown code blocks:
{
"claims": [
{
"subject": "tls/cert_verification",
"predicate": "enabled",
"value": false,
"value_type": "boolean",
"line": 42,
"matched_text": "verify=False",
"confidence": 0.95,
"description": "TLS certificate verification disabled, vulnerable to MITM attacks"
}
]
}
If no security claims matching the vocabulary are found, return: {"claims": []}"#;
/// Build a system prompt with ontology vocabulary injected.
pub fn build_system_prompt(vocabulary: &OntologyVocabulary) -> String {
let vocabulary_section = vocabulary.to_prompt_section();
SYSTEM_PROMPT_TEMPLATE.replace("{vocabulary_section}", &vocabulary_section)
}
/// Build a system prompt from raw vocabulary section string.
///
/// Useful when vocabulary is pre-computed or comes from a different source.
pub fn build_system_prompt_from_section(vocabulary_section: &str) -> String {
SYSTEM_PROMPT_TEMPLATE.replace("{vocabulary_section}", vocabulary_section)
}
#[cfg(test)]
mod tests {
use super::*;
use stemedb_core::types::{Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SourceClass};
fn make_test_assertion(subject: &str, predicate: &str, value: ObjectValue) -> Assertion {
let source_metadata = serde_json::json!({
"description": "Test description",
"source": "test",
});
Assertion {
subject: subject.to_string(),
predicate: predicate.to_string(),
object: value,
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Clinical,
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(),
lifecycle: LifecycleStage::Approved,
signatures: vec![],
confidence: 1.0,
timestamp: 0,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
}
}
#[test]
fn test_build_system_prompt() {
let assertions = vec![
make_test_assertion(
"rfc://5246/tls/cert_verification",
"enabled",
ObjectValue::Boolean(true),
),
make_test_assertion(
"owasp://rate_limit/enabled",
"enabled",
ObjectValue::Boolean(true),
),
];
let vocab = OntologyVocabulary::from_assertions(&assertions);
let prompt = build_system_prompt(&vocab);
// Check vocabulary is included
assert!(prompt.contains("tls/cert_verification"));
assert!(prompt.contains("rate_limit/enabled"));
// Check critical instruction is present
assert!(prompt.contains("CRITICAL INSTRUCTION"));
assert!(prompt.contains("MUST use ONLY the concept paths"));
// Check output format instructions
assert!(prompt.contains("Respond with JSON only"));
}
#[test]
fn test_build_system_prompt_from_section() {
let section = "| test/path | enabled | boolean | true | Test |";
let prompt = build_system_prompt_from_section(section);
assert!(prompt.contains("test/path"));
assert!(prompt.contains("CRITICAL INSTRUCTION"));
}
}