stemedb/applications/aphoria/src/report/sarif.rs
jml e73bf3c4b7 feat(aphoria): add --show-claims flag to display all extracted claims
Implements the --show-claims feature requested by users who need to verify
extractors are working correctly and debug false negatives.

Changes:
- Add `claims: Option<Vec<ExtractedClaim>>` field to ScanResult
- Add `--show-claims` CLI flag to scan command
- Add `show_claims: bool` parameter to ScanArgs
- Populate claims in scanner when flag is set (sorted by file, then line)
- Display claims in all output formats:
  * Table: New "Extracted Claims" section with concept/value/file/line/confidence
  * JSON: Top-level `claims` array with full claim details
  * Markdown: "## Extracted Claims" section with table
  * SARIF: Informational-level results (level: "note") for IDE integration

User outcome:
- `aphoria scan . --show-claims` displays all claims (not just conflicts)
- Users can verify extractors detected their code patterns
- Users can debug false negatives by seeing what WAS extracted
- Builds trust through transparency

Quality:
- Zero breaking changes (opt-in flag, backward compatible)
- All tests passing (943 passed)
- Clippy clean (no warnings)
- Manual testing verified all 4 output formats

Addresses user feedback from /home/jml/Workspace/maxwell/.aphoria/.notes-for-aphoria-team

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 00:39:54 +00:00

508 lines
20 KiB
Rust

//! SARIF output format for CI integration.
//!
//! SARIF (Static Analysis Results Interchange Format) v2.1.0 is supported by:
//! - GitHub Code Scanning
//! - GitLab SAST
//! - Azure DevOps
//!
//! Reference: <https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html>
use super::{object_value_display, verdict_label, ReportFormatter};
use crate::types::{ScanResult, Verdict};
/// SARIF report formatter for CI integration.
pub struct SarifReport;
impl ReportFormatter for SarifReport {
fn format(&self, result: &ScanResult) -> String {
// Build SARIF rules from unique conflict types
let mut rules = Vec::new();
let mut rule_indices: std::collections::HashMap<String, usize> =
std::collections::HashMap::new();
for conflict in &result.conflicts {
let rule_id = format!("aphoria/{}", extract_rule_id(&conflict.claim.concept_path));
if !rule_indices.contains_key(&rule_id) {
let idx = rules.len();
rule_indices.insert(rule_id.clone(), idx);
let level = match conflict.verdict {
Verdict::Block => "error",
Verdict::Flag | Verdict::Drift => "warning",
Verdict::Pass | Verdict::Ack => "note",
};
// Generate help URI based on RFC citation if available
let help_uri = conflict
.conflicts
.first()
.and_then(|s| s.rfc_citation.as_ref())
.map(|citation| {
if citation.starts_with("RFC ") {
let rfc_num = citation.strip_prefix("RFC ").unwrap_or("");
format!("https://www.rfc-editor.org/rfc/rfc{}", rfc_num)
} else if citation.starts_with("OWASP") {
"https://owasp.org/www-project-top-ten/".to_string()
} else {
format!(
"https://github.com/orchard9/aphoria/rules/{}",
extract_rule_id(&conflict.claim.concept_path)
)
}
})
.unwrap_or_else(|| {
format!(
"https://github.com/orchard9/aphoria/rules/{}",
extract_rule_id(&conflict.claim.concept_path)
)
});
rules.push(serde_json::json!({
"id": rule_id,
"shortDescription": {
"text": conflict.claim.description,
},
"defaultConfiguration": {
"level": level,
},
"helpUri": help_uri,
}));
}
}
// Build SARIF results
let results: Vec<serde_json::Value> = result
.conflicts
.iter()
.map(|conflict| {
let rule_id = format!("aphoria/{}", extract_rule_id(&conflict.claim.concept_path));
let rule_index = rule_indices.get(&rule_id).copied().unwrap_or(0);
let level = match conflict.verdict {
Verdict::Block => "error",
Verdict::Flag | Verdict::Drift => "warning",
Verdict::Pass | Verdict::Ack => "note",
};
// Build message with authoritative source details
let source_details: Vec<String> = conflict
.conflicts
.iter()
.map(|s| {
let mut detail = format!(
"{:?} (Tier {}): {}",
s.source_class,
s.source_class.tier(),
object_value_display(&s.value)
);
// Include policy source info if available
if let Some(ps) = &s.policy_source {
let signer = ps.signer_name.as_deref().unwrap_or(&ps.issuer_hex);
detail.push_str(&format!(
" [Source: {} v{} ({})",
ps.pack_name, ps.pack_version, signer
));
if let Some(contact) = &ps.contact {
detail.push_str(&format!(", Contact: {}", contact));
}
detail.push(']');
}
detail
})
.collect();
let message = format!(
"{}\nYour code: {} = {}\nAuthoritative: {}",
conflict.claim.description,
conflict.claim.predicate,
object_value_display(&conflict.claim.value),
source_details.join("; ")
);
serde_json::json!({
"ruleId": rule_id,
"ruleIndex": rule_index,
"level": level,
"message": {
"text": message,
},
"locations": [{
"physicalLocation": {
"artifactLocation": {
"uri": conflict.claim.file,
"uriBaseId": "%SRCROOT%",
},
"region": {
"startLine": conflict.claim.line,
}
}
}],
"properties": {
"conflict_score": conflict.conflict_score,
"verdict": verdict_label(conflict.verdict),
}
})
})
.collect();
// Add drift rules and results
for drift in &result.drifts {
let rule_id = format!("aphoria/drift/{}", extract_rule_id(&drift.claim.concept_path));
if !rule_indices.contains_key(&rule_id) {
let idx = rules.len();
rule_indices.insert(rule_id.clone(), idx);
rules.push(serde_json::json!({
"id": rule_id,
"shortDescription": {
"text": format!("Value drift detected for {}", drift.claim.concept_path),
},
"defaultConfiguration": {
"level": "warning",
},
"helpUri": "https://github.com/orchard9/aphoria/docs/drift",
}));
}
}
// Add drift results
let drift_results: Vec<serde_json::Value> = result
.drifts
.iter()
.map(|drift| {
let rule_id = format!("aphoria/drift/{}", extract_rule_id(&drift.claim.concept_path));
let rule_index = rule_indices.get(&rule_id).copied().unwrap_or(0);
let message = format!(
"Value changed from prior observation.\nCurrent: {}\nPrior: {} (recorded at {}:{})",
object_value_display(&drift.claim.value),
object_value_display(&drift.prior.value),
drift.prior.file,
drift.prior.line
);
serde_json::json!({
"ruleId": rule_id,
"ruleIndex": rule_index,
"level": "warning",
"message": {
"text": message,
},
"locations": [{
"physicalLocation": {
"artifactLocation": {
"uri": drift.claim.file,
"uriBaseId": "%SRCROOT%",
},
"region": {
"startLine": drift.claim.line,
}
}
}],
"properties": {
"verdict": verdict_label(drift.verdict),
"prior_value": object_value_display(&drift.prior.value),
"prior_timestamp": drift.prior.timestamp,
}
})
})
.collect();
// Add deprecated usage rules and results
for usage in &result.deprecated_usages {
let rule_id = format!("aphoria/deprecated/{}", usage.pattern_name);
if !rule_indices.contains_key(&rule_id) {
let idx = rules.len();
rule_indices.insert(rule_id.clone(), idx);
let level = match usage.severity() {
"OVERDUE" => "error",
"URGENT" => "warning",
_ => "note",
};
rules.push(serde_json::json!({
"id": rule_id,
"shortDescription": {
"text": format!("Deprecated pattern: {}", usage.pattern_name),
},
"fullDescription": {
"text": usage.reason.clone(),
},
"defaultConfiguration": {
"level": level,
},
"helpUri": usage.migration_guide.clone().unwrap_or_else(|| {
"https://github.com/orchard9/aphoria/docs/deprecation".to_string()
}),
}));
}
}
// Add deprecated usage results
let deprecated_results: Vec<serde_json::Value> = result
.deprecated_usages
.iter()
.map(|usage| {
let rule_id = format!("aphoria/deprecated/{}", usage.pattern_name);
let rule_index = rule_indices.get(&rule_id).copied().unwrap_or(0);
let level = match usage.severity() {
"OVERDUE" => "error",
"URGENT" => "warning",
_ => "note",
};
let mut message = format!(
"Deprecated pattern '{}' detected.\nReason: {}",
usage.pattern_name, usage.reason
);
if let Some(ref replacement) = usage.superseded_by {
message.push_str(&format!("\nReplace with: {}", replacement));
}
if let Some(days) = usage.days_until_sunset {
if days < 0 {
message.push_str(&format!("\nSunset: OVERDUE by {} days", -days));
} else {
message.push_str(&format!("\nSunset: {} days remaining", days));
}
}
serde_json::json!({
"ruleId": rule_id,
"ruleIndex": rule_index,
"level": level,
"message": {
"text": message,
},
"locations": [{
"physicalLocation": {
"artifactLocation": {
"uri": usage.file_path,
"uriBaseId": "%SRCROOT%",
},
"region": {
"startLine": usage.line,
}
}
}],
"properties": {
"pattern_id": usage.pattern_id.to_string(),
"severity": usage.severity(),
"days_until_sunset": usage.days_until_sunset,
}
})
})
.collect();
// Add claims if present (as informational-level results)
let claims_results: Vec<serde_json::Value> = if let Some(claims) = &result.claims {
// Add a single rule for all claims
if !claims.is_empty() && !rule_indices.contains_key("aphoria/claim") {
let idx = rules.len();
rule_indices.insert("aphoria/claim".to_string(), idx);
rules.push(serde_json::json!({
"id": "aphoria/claim",
"shortDescription": {
"text": "Extracted claim (no conflict detected)",
},
"defaultConfiguration": {
"level": "note",
},
"helpUri": "https://github.com/orchard9/aphoria/docs/claims",
}));
}
claims
.iter()
.map(|claim| {
let rule_index = rule_indices.get("aphoria/claim").copied().unwrap_or(0);
let message = format!(
"{}\n{} = {}",
claim.description,
claim.predicate,
object_value_display(&claim.value)
);
serde_json::json!({
"ruleId": "aphoria/claim",
"ruleIndex": rule_index,
"level": "note",
"message": {
"text": message,
},
"locations": [{
"physicalLocation": {
"artifactLocation": {
"uri": claim.file,
"uriBaseId": "%SRCROOT%",
},
"region": {
"startLine": claim.line,
}
}
}],
"properties": {
"concept_path": claim.concept_path,
"confidence": claim.confidence,
}
})
})
.collect()
} else {
Vec::new()
};
// Combine all results
let mut all_results = results;
all_results.extend(drift_results);
all_results.extend(deprecated_results);
all_results.extend(claims_results);
let sarif = serde_json::json!({
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/main/sarif-2.1/schema/sarif-schema-2.1.0.json",
"version": "2.1.0",
"runs": [{
"tool": {
"driver": {
"name": "aphoria",
"version": env!("CARGO_PKG_VERSION"),
"informationUri": "https://github.com/orchard9/aphoria",
"rules": rules,
}
},
"results": all_results,
"invocations": [{
"executionSuccessful": true,
"properties": {
"scan_id": result.scan_id,
"files_scanned": result.files_scanned,
"claims_extracted": result.claims_extracted,
"drifts_detected": result.drift_count(),
"deprecated_usages": result.deprecated_usage_count(),
}
}]
}]
});
serde_json::to_string_pretty(&sarif).unwrap_or_else(|_| sarif.to_string())
}
}
/// Extract a rule ID from a concept path.
///
/// e.g., `code://rust/myapp/tls/cert_verification` -> `tls/cert_verification`
fn extract_rule_id(concept_path: &str) -> String {
// Strip the scheme and project prefix, keep the meaningful tail
if let Some(after_scheme) = concept_path.split("://").nth(1) {
// Skip language and project segments (first two after scheme)
let segments: Vec<&str> = after_scheme.split('/').collect();
if segments.len() > 2 {
segments[2..].join("/")
} else {
after_scheme.to_string()
}
} else {
concept_path.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{ConflictResult, ConflictingSource, ExtractedClaim};
use stemedb_core::types::{ObjectValue, SourceClass};
#[test]
fn test_sarif_structure() {
let formatter = SarifReport;
let result = ScanResult {
project: "testproject".to_string(),
scan_id: "scan-789".to_string(),
files_scanned: 42,
claims_extracted: 5,
conflicts: vec![ConflictResult {
claim: ExtractedClaim {
concept_path: "code://rust/testproject/tls/cert_verification".to_string(),
predicate: "enabled".to_string(),
value: ObjectValue::Boolean(false),
file: "src/client.rs".to_string(),
line: 23,
matched_text: "danger_accept_invalid_certs(true)".to_string(),
confidence: 1.0,
description: "TLS certificate verification disabled".to_string(),
},
conflicts: vec![ConflictingSource {
path: "rfc://5246/tls/cert_verification".to_string(),
source_class: SourceClass::Regulatory,
value: ObjectValue::Boolean(true),
confidence: 1.0,
rfc_citation: Some("RFC 5246".to_string()),
policy_source: None,
}],
conflict_score: 0.92,
verdict: Verdict::Block,
acknowledged: None,
trace: None,
}],
drifts: vec![],
format: "sarif".to_string(),
debug: false,
observations_recorded: 0,
timing: None,
claims: None,
deprecated_usages: vec![],
};
let output = formatter.format(&result);
let parsed: serde_json::Value = serde_json::from_str(&output).expect("valid json");
// SARIF version
assert_eq!(parsed["version"], "2.1.0");
// Tool info
assert_eq!(parsed["runs"][0]["tool"]["driver"]["name"], "aphoria");
// Rules
let rules = parsed["runs"][0]["tool"]["driver"]["rules"].as_array().expect("rules array");
assert_eq!(rules.len(), 1);
assert_eq!(rules[0]["id"], "aphoria/tls/cert_verification");
// Results
let results = parsed["runs"][0]["results"].as_array().expect("results array");
assert_eq!(results.len(), 1);
assert_eq!(results[0]["level"], "error");
assert_eq!(
results[0]["locations"][0]["physicalLocation"]["artifactLocation"]["uri"],
"src/client.rs"
);
assert_eq!(results[0]["locations"][0]["physicalLocation"]["region"]["startLine"], 23);
}
#[test]
fn test_sarif_empty() {
let formatter = SarifReport;
let result = ScanResult::stub(&std::path::PathBuf::from("."), "sarif");
let output = formatter.format(&result);
let parsed: serde_json::Value = serde_json::from_str(&output).expect("valid json");
assert_eq!(parsed["version"], "2.1.0");
assert_eq!(parsed["runs"][0]["results"].as_array().map(|a| a.len()), Some(0));
}
#[test]
fn test_extract_rule_id() {
assert_eq!(
extract_rule_id("code://rust/myapp/tls/cert_verification"),
"tls/cert_verification"
);
assert_eq!(
extract_rule_id("code://go/myapp/jwt/audience_validation"),
"jwt/audience_validation"
);
assert_eq!(extract_rule_id("simple"), "simple");
}
}