Claims now flow through StemeDB's append-only knowledge graph instead of mutable TOML files. This resolves all 6 critical claim-bypass code paths: - Bridge: lossless AuthoredClaim ↔ Assertion round-trip (comparison, status, lifecycle mapping) - LocalEpisteme: ingest_authored_claim() and fetch_authored_claims() with AUTHORED_CLAIM predicate index - EpistemeClaimStore: ClaimStore trait backed by StemeDB (append-only delete via deprecation) - CLI handlers: all claim commands read/write through StemeDB - Scanner: loads claims from StemeDB with auto-migration fallback to TOML - Export: new `aphoria claims export` serializes StemeDB claims to TOML/JSON Also cleans up dead code (EpistemeConfig.url), renames ingest_claims→ingest_observations, fixes ClaimFilter.authority_tier type, adds Draft variant to ClaimStatus, and fixes pre-existing clippy warnings (too_many_arguments, filter_next→rfind). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
252 lines
9.9 KiB
Rust
252 lines
9.9 KiB
Rust
//! Integration tests for wiki corpus import.
|
|
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
|
|
use aphoria::community::PatternAggregator;
|
|
use aphoria::corpus::{import_from_wiki, WikiParser};
|
|
use aphoria::{import_corpus_from_wiki, AphoriaConfig, PatternAggregate};
|
|
use stemedb_storage::{GenericPredicateIndexStore, HybridStore, PredicateIndexStore};
|
|
use tempfile::TempDir;
|
|
|
|
#[tokio::test]
|
|
async fn test_import_from_wiki_basic() {
|
|
// Get wiki fixtures path
|
|
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
let wiki_path = manifest_dir.join("tests/fixtures/wiki");
|
|
|
|
let timestamp = 1706832000;
|
|
let patterns = import_from_wiki(&wiki_path, timestamp).await.expect("import_from_wiki");
|
|
|
|
// Should extract patterns from markdown files
|
|
assert!(!patterns.is_empty(), "Expected patterns to be extracted from wiki files");
|
|
|
|
// Check pattern structure
|
|
for pattern in &patterns {
|
|
assert!(pattern.subject.starts_with("code://*/"), "Subject should be wildcarded");
|
|
assert!(!pattern.predicate.is_empty(), "Predicate should not be empty");
|
|
assert_eq!(pattern.project_count, 1, "Bootstrap count should be 1");
|
|
assert_eq!(pattern.observation_count, 1, "Observation count should be 1");
|
|
assert_eq!(pattern.first_seen, timestamp);
|
|
assert_eq!(pattern.last_seen, timestamp);
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_wiki_pattern_to_storage() {
|
|
// Create temporary storage
|
|
let temp_dir = TempDir::new().expect("tempdir");
|
|
let store_path = temp_dir.path().join("store");
|
|
std::fs::create_dir_all(&store_path).expect("create store dir");
|
|
|
|
let hybrid_store = Arc::new(HybridStore::open(&store_path).expect("open hybrid store"));
|
|
let predicate_index = Arc::new(GenericPredicateIndexStore::new(hybrid_store.clone()));
|
|
|
|
// Import patterns from wiki
|
|
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
let wiki_path = manifest_dir.join("tests/fixtures/wiki");
|
|
let timestamp = 1706832000;
|
|
let patterns = import_from_wiki(&wiki_path, timestamp).await.expect("import_from_wiki");
|
|
|
|
assert!(!patterns.is_empty(), "Should have patterns");
|
|
|
|
// Store patterns using PatternAggregator
|
|
let aggregator = PatternAggregator::new(hybrid_store.clone(), predicate_index.clone());
|
|
let hashes = aggregator.add_patterns(&patterns).await.expect("add_patterns");
|
|
|
|
assert_eq!(hashes.len(), patterns.len(), "All patterns should be stored");
|
|
|
|
// Query patterns back from storage
|
|
let query_result =
|
|
predicate_index.get_by_predicate("pattern_aggregate").await.expect("get_by_predicate");
|
|
|
|
assert_eq!(query_result.len(), patterns.len(), "Should retrieve all stored patterns");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_wiki_parser_extracts_tls_patterns() {
|
|
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
let wiki_path = manifest_dir.join("tests/fixtures/wiki");
|
|
let timestamp = 1706832000;
|
|
let patterns = import_from_wiki(&wiki_path, timestamp).await.expect("import_from_wiki");
|
|
|
|
// Find TLS pattern (parser extracts "tls" from "TLS certificate verification")
|
|
let tls_pattern = patterns.iter().find(|p| p.subject.contains("tls"));
|
|
|
|
assert!(tls_pattern.is_some(), "Should extract TLS pattern");
|
|
|
|
if let Some(pattern) = tls_pattern {
|
|
assert_eq!(pattern.predicate, "enabled", "Predicate should be 'enabled'");
|
|
// Value should be Boolean(true) since "MUST be enabled"
|
|
match &pattern.value {
|
|
aphoria::community::CommunityObjectValue::Boolean(b) => {
|
|
assert!(*b, "TLS should be enabled");
|
|
}
|
|
_ => panic!("Expected Boolean value"),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_wiki_parser_extracts_authentication_patterns() {
|
|
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
let wiki_path = manifest_dir.join("tests/fixtures/wiki");
|
|
let timestamp = 1706832000;
|
|
let patterns = import_from_wiki(&wiki_path, timestamp).await.expect("import_from_wiki");
|
|
|
|
// Find JWT pattern (parser extracts "jwt" from "JWT authentication")
|
|
let jwt_pattern = patterns.iter().find(|p| p.subject.contains("jwt"));
|
|
|
|
assert!(jwt_pattern.is_some(), "Should extract JWT pattern");
|
|
|
|
// Find password hashing pattern
|
|
let password_pattern = patterns.iter().find(|p| p.subject.contains("password"));
|
|
|
|
assert!(password_pattern.is_some(), "Should extract password hashing pattern");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_wiki_import_deduplication() {
|
|
// Create temporary storage
|
|
let temp_dir = TempDir::new().expect("tempdir");
|
|
let store_path = temp_dir.path().join("store");
|
|
std::fs::create_dir_all(&store_path).expect("create store dir");
|
|
|
|
let hybrid_store = Arc::new(HybridStore::open(&store_path).expect("open hybrid store"));
|
|
let predicate_index = Arc::new(GenericPredicateIndexStore::new(hybrid_store.clone()));
|
|
let aggregator = PatternAggregator::new(hybrid_store.clone(), predicate_index.clone());
|
|
|
|
// Import patterns twice
|
|
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
let wiki_path = manifest_dir.join("tests/fixtures/wiki");
|
|
let timestamp = 1706832000;
|
|
|
|
let patterns1 = import_from_wiki(&wiki_path, timestamp).await.expect("import_from_wiki");
|
|
aggregator.add_patterns(&patterns1).await.expect("add_patterns first");
|
|
|
|
let patterns2 = import_from_wiki(&wiki_path, timestamp).await.expect("import_from_wiki");
|
|
aggregator.add_patterns(&patterns2).await.expect("add_patterns second");
|
|
|
|
// Query patterns - should have entries for both imports
|
|
// (deduplication happens via content-addressed subject)
|
|
let query_result =
|
|
predicate_index.get_by_predicate("pattern_aggregate").await.expect("get_by_predicate");
|
|
|
|
// Both imports should create distinct assertions since they have different timestamps
|
|
// or same content-addressed hashes would overwrite
|
|
assert!(
|
|
query_result.len() >= patterns1.len(),
|
|
"Should have at least as many patterns as first import"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_wiki_pattern_content_addressed_subject() {
|
|
use aphoria::community::CommunityObjectValue;
|
|
|
|
let pattern1 = PatternAggregate {
|
|
subject: "code://*/tls/cert".to_string(),
|
|
predicate: "enabled".to_string(),
|
|
value: CommunityObjectValue::Boolean(true),
|
|
project_count: 1,
|
|
observation_count: 1,
|
|
first_seen: 1000,
|
|
last_seen: 2000,
|
|
};
|
|
|
|
let pattern2 = PatternAggregate {
|
|
subject: "code://*/tls/cert".to_string(),
|
|
predicate: "enabled".to_string(),
|
|
value: CommunityObjectValue::Boolean(true),
|
|
project_count: 5,
|
|
observation_count: 10,
|
|
first_seen: 1000,
|
|
last_seen: 3000,
|
|
};
|
|
|
|
// Same subject/predicate/value should produce same content-addressed hash
|
|
// even if counts differ
|
|
let hash1 = {
|
|
let mut hasher = blake3::Hasher::new();
|
|
hasher.update(pattern1.subject.as_bytes());
|
|
hasher.update(b":");
|
|
hasher.update(pattern1.predicate.as_bytes());
|
|
hasher.update(b":");
|
|
hasher.update(&[1u8]); // Boolean(true)
|
|
hex::encode(hasher.finalize().as_bytes())
|
|
};
|
|
|
|
let hash2 = {
|
|
let mut hasher = blake3::Hasher::new();
|
|
hasher.update(pattern2.subject.as_bytes());
|
|
hasher.update(b":");
|
|
hasher.update(pattern2.predicate.as_bytes());
|
|
hasher.update(b":");
|
|
hasher.update(&[1u8]); // Boolean(true)
|
|
hex::encode(hasher.finalize().as_bytes())
|
|
};
|
|
|
|
assert_eq!(hash1, hash2, "Same pattern should have same content hash");
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_wiki_parser_edge_cases() {
|
|
let parser = WikiParser::new().expect("parser");
|
|
|
|
// Test: Authority within 5 lines after pattern (boundary condition)
|
|
// Pattern at line 0, authority at line 5 (within range [0..6))
|
|
let content = "TLS MUST be enabled.\n\n\n\n\nAuthority: RFC 5246";
|
|
let patterns = parser.parse(content).expect("parse");
|
|
assert_eq!(patterns.len(), 1);
|
|
assert!(patterns[0].authority.is_some(), "Should find authority within 5 lines after pattern");
|
|
|
|
// Test: Authority beyond 5 lines after pattern
|
|
// Pattern at line 0, authority at line 6 (beyond range [0..6))
|
|
let content = "TLS MUST be enabled.\n\n\n\n\n\nAuthority: RFC 5246";
|
|
let patterns = parser.parse(content).expect("parse");
|
|
assert_eq!(patterns.len(), 1);
|
|
assert!(
|
|
patterns[0].authority.is_none(),
|
|
"Should NOT find authority beyond 5 lines after pattern"
|
|
);
|
|
|
|
// Test: Empty file
|
|
let patterns = parser.parse("").expect("parse");
|
|
assert_eq!(patterns.len(), 0);
|
|
|
|
// Test: No patterns
|
|
let content = "This is just regular markdown text.";
|
|
let patterns = parser.parse(content).expect("parse");
|
|
assert_eq!(patterns.len(), 0);
|
|
|
|
// Test: Multi-line pattern (continuation)
|
|
let content = "TLS certificate verification MUST be enabled\nacross all connections.";
|
|
let patterns = parser.parse(content).expect("parse");
|
|
assert_eq!(patterns.len(), 1);
|
|
assert!(patterns[0].subject.contains("tls"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_wiki_import_duplicate_patterns() {
|
|
use tempfile::TempDir;
|
|
|
|
// Test: Same pattern in multiple files - import_corpus_from_wiki returns extraction count
|
|
let temp_dir = TempDir::new().expect("tempdir");
|
|
let wiki_dir = temp_dir.path().join("wiki");
|
|
std::fs::create_dir_all(&wiki_dir).expect("create wiki dir");
|
|
|
|
// Write two files with identical patterns
|
|
std::fs::write(wiki_dir.join("file1.md"), "## TLS\nTLS MUST be enabled.\nAuthority: RFC 5246")
|
|
.expect("write file1");
|
|
|
|
std::fs::write(wiki_dir.join("file2.md"), "## TLS\nTLS MUST be enabled.\nAuthority: RFC 5246")
|
|
.expect("write file2");
|
|
|
|
let config = AphoriaConfig::default();
|
|
let count = import_corpus_from_wiki(&wiki_dir, &config).await.expect("import");
|
|
|
|
// Returns number of patterns extracted (2), not number stored (1 after deduplication)
|
|
// Deduplication happens at storage layer via content-addressed subject
|
|
assert_eq!(count, 2, "Should extract 2 patterns (one from each file)");
|
|
}
|