stemedb/applications/aphoria/src/episteme/local/store.rs
jml cce54358d2 feat(aphoria): add git commit tracking + comprehensive documentation
**Git Commit Tracking**
- Automatically capture git commit hash when claims/observations are ingested
- Store in assertion metadata for temporal context and audit trails
- Graceful degradation in non-git environments
- Solves double-commit problem by capturing hash at ingestion time

**Implementation**
- walker/git.rs: get_current_commit_hash() utility function
- bridge.rs: Accept optional git_commit parameter in all conversion functions
- episteme/local: Store project_root, capture git hash during ingestion
- 5 new tests for git hash tracking + metadata validation
- All 1162 aphoria tests passing

**Documentation Overhaul**
- README: Added Observations vs Claims distinction, git tracking, dashboard
- CLI Reference: New sections for git integration and ignore/exclusion system
- Comprehensive ignore documentation: .aphoriaignore, inline comments, 4 methods
- Enhanced verification engine docs with matching capabilities
- DOCUMENTATION_UPDATES.md: Complete audit summary

**Dashboard Separation**
- Moved Aphoria-specific UI from stemedb-dashboard to aphoria-dashboard
- Clean separation of concerns: StemeDB for core, Aphoria for security
- Added dashboard documentation and setup guides

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-08 18:36:46 +00:00

284 lines
10 KiB
Rust

//! Storage operations for LocalEpisteme.
//!
//! Handles ingestion of claims, observations, and authoritative assertions.
use stemedb_core::types::Assertion;
use stemedb_ingest::serialize_assertion;
use stemedb_storage::PredicateIndexStore;
use tracing::{debug, info, instrument, warn};
use crate::bridge::{claim_to_assertion, observation_to_assertion};
use crate::types::{predicates, Observation};
use crate::walker::git::get_current_commit_hash;
use crate::AphoriaError;
use super::super::corpus::current_timestamp;
use super::LocalEpisteme;
impl LocalEpisteme {
/// Ingest a batch of extracted claims into Episteme.
#[instrument(skip(self, claims), fields(claim_count = claims.len()))]
pub async fn ingest_claims(&self, claims: &[Observation]) -> Result<usize, AphoriaError> {
let timestamp = current_timestamp();
let mut ingested = 0;
// Capture current git commit hash
let git_commit = get_current_commit_hash(&self.project_root);
if let Some(ref hash) = git_commit {
debug!(git_commit = %hash, "Captured git commit for claim ingestion");
}
// Collect claims for predicate index updates
let mut acknowledged_claims = Vec::new();
let mut blessed_claims = Vec::new();
for claim in claims {
let assertion = claim_to_assertion(claim, &self.signing_key, timestamp, git_commit.as_deref());
// Serialize and write to WAL
let record_bytes = serialize_assertion(&assertion)
.map_err(|e| AphoriaError::Storage(format!("Failed to serialize claim: {e}")))?;
// Compute hash for predicate indexing (same as Ingestor uses)
let hash = *blake3::hash(&record_bytes[8..]).as_bytes(); // Skip 8-byte header
let mut journal = self.journal.lock().await;
journal.append(record_bytes).map_err(|e| {
AphoriaError::Storage(format!("Failed to append claim to WAL: {e}"))
})?;
// Track acknowledged claims for predicate index update
if claim.predicate == predicates::ACKNOWLEDGED {
acknowledged_claims.push(hash);
}
// Track blessed claims (created via `bless` command) for predicate index
if claim.file == "aphoria_bless" {
blessed_claims.push(hash);
}
debug!(
concept_path = %claim.concept_path,
predicate = %claim.predicate,
"Ingested claim"
);
ingested += 1;
}
// Sync WAL
{
let mut journal = self.journal.lock().await;
journal
.force_sync()
.map_err(|e| AphoriaError::Storage(format!("Failed to sync claims WAL: {e}")))?;
}
// Wait for ingestion to process
self.ingestor.process_pending().await.map_err(|e| {
AphoriaError::Storage(format!("Failed to process claims ingestion: {e}"))
})?;
// Update predicate index for acknowledged claims
for hash in acknowledged_claims {
if let Err(e) = self
.predicate_index_store
.add_to_predicate_index(predicates::ACKNOWLEDGED, &hash)
.await
{
warn!(hash = %hex::encode(hash), error = %e, "Failed to add to predicate index");
}
}
// Update predicate index for blessed claims
for hash in blessed_claims {
if let Err(e) =
self.predicate_index_store.add_to_predicate_index(predicates::BLESSED, &hash).await
{
warn!(hash = %hex::encode(hash), error = %e, "Failed to add to blessed index");
}
}
info!(ingested, "Ingested claims into Episteme");
Ok(ingested)
}
/// Ingest code claims as Tier 4 (Community) observations.
///
/// Used for claims that have no authority conflict — these become "project memory"
/// that persists across commits and enables future drift detection.
///
/// Returns the number of observations successfully ingested.
#[instrument(skip(self, observations), fields(count = observations.len()))]
pub async fn ingest_observations(
&self,
observations: &[Observation],
) -> Result<usize, AphoriaError> {
if observations.is_empty() {
return Ok(0);
}
let timestamp = current_timestamp();
// Capture current git commit hash
let git_commit = get_current_commit_hash(&self.project_root);
if let Some(ref hash) = git_commit {
debug!(git_commit = %hash, "Captured git commit for observation ingestion");
}
let mut count = 0;
for claim in observations {
let assertion = observation_to_assertion(claim, &self.signing_key, timestamp, git_commit.as_deref());
// Serialize and write to WAL
let record_bytes = serialize_assertion(&assertion).map_err(|e| {
AphoriaError::Storage(format!("Failed to serialize observation: {e}"))
})?;
// Compute hash for predicate indexing
let hash = *blake3::hash(&record_bytes[8..]).as_bytes(); // Skip 8-byte header
let mut journal = self.journal.lock().await;
journal.append(record_bytes).map_err(|e| {
AphoriaError::Storage(format!("Failed to append observation to WAL: {e}"))
})?;
drop(journal);
// Add to predicate index for "observation" queries
if let Err(e) = self
.predicate_index_store
.add_to_predicate_index(predicates::OBSERVATION, &hash)
.await
{
warn!(hash = %hex::encode(hash), error = %e, "Failed to add to observation index");
}
debug!(
concept_path = %claim.concept_path,
predicate = %claim.predicate,
"Ingested observation"
);
count += 1;
}
// Sync WAL
{
let mut journal = self.journal.lock().await;
journal.force_sync().map_err(|e| {
AphoriaError::Storage(format!("Failed to sync observations WAL: {e}"))
})?;
}
// Wait for ingestion to process
self.ingestor.process_pending().await.map_err(|e| {
AphoriaError::Storage(format!("Failed to process observations ingestion: {e}"))
})?;
info!(count, "Ingested observations as Tier 4 (project memory)");
Ok(count)
}
/// Ingest authoritative assertions (RFC, OWASP, etc.).
///
/// Writes assertions to WAL and adds them to the AUTHORITATIVE predicate index
/// so they are discoverable by `fetch_authoritative_assertions()` during scans.
#[instrument(skip(self, assertions), fields(count = assertions.len()))]
pub async fn ingest_authoritative(
&self,
assertions: &[Assertion],
) -> Result<usize, AphoriaError> {
let mut ingested = 0;
let mut hashes = Vec::with_capacity(assertions.len());
for assertion in assertions {
let record_bytes = serialize_assertion(assertion).map_err(|e| {
AphoriaError::Storage(format!(
"Failed to serialize authoritative assertion '{}': {e}",
assertion.subject
))
})?;
// Compute hash for predicate indexing (skip 8-byte header, same as Ingestor)
let hash = *blake3::hash(&record_bytes[8..]).as_bytes();
hashes.push(hash);
let mut journal = self.journal.lock().await;
journal.append(record_bytes).map_err(|e| {
AphoriaError::Storage(format!(
"Failed to append authoritative assertion to WAL: {e}"
))
})?;
ingested += 1;
}
// Sync and process
{
let mut journal = self.journal.lock().await;
journal.force_sync().map_err(|e| {
AphoriaError::Storage(format!("Failed to sync authoritative WAL: {e}"))
})?;
}
self.ingestor.process_pending().await.map_err(|e| {
AphoriaError::Storage(format!("Failed to process authoritative ingestion: {e}"))
})?;
// Add all assertions to the AUTHORITATIVE predicate index
// This mirrors the pattern from policy_ops.rs import_policy()
for hash in &hashes {
if let Err(e) = self
.predicate_index_store
.add_to_predicate_index(predicates::AUTHORITATIVE, hash)
.await
{
warn!(hash = %hex::encode(hash), error = %e, "Failed to add to authoritative index");
}
}
info!(ingested, "Ingested authoritative assertions");
Ok(ingested)
}
/// Fetch all "acknowledged" assertions for policy export.
pub async fn fetch_acknowledgments(&self) -> Result<Vec<Assertion>, AphoriaError> {
self.fetch_assertions_by_predicate(predicates::ACKNOWLEDGED).await
}
/// Fetch all "blessed" assertions (authoritative patterns) for policy export.
pub async fn fetch_blessed_assertions(&self) -> Result<Vec<Assertion>, AphoriaError> {
self.fetch_assertions_by_predicate(predicates::BLESSED).await
}
/// Fetch all authoritative assertions imported from Trust Packs.
///
/// These are assertions imported via `policy import` that should be used
/// for conflict detection during scans. They are indexed under the
/// "authoritative" predicate key.
pub async fn fetch_authoritative_assertions(&self) -> Result<Vec<Assertion>, AphoriaError> {
self.fetch_assertions_by_predicate(predicates::AUTHORITATIVE).await
}
/// Fetch assertions by predicate from the predicate index.
async fn fetch_assertions_by_predicate(
&self,
predicate: &str,
) -> Result<Vec<Assertion>, AphoriaError> {
let hashes = self.predicate_index_store.get_by_predicate(predicate).await.map_err(|e| {
AphoriaError::Storage(format!(
"Failed to fetch predicate index for '{}': {e}",
predicate
))
})?;
let mut assertions = Vec::new();
for hash in hashes {
if let Some(assertion) = self.load_assertion_by_hash(&hash).await {
assertions.push(assertion);
}
}
info!(predicate, count = assertions.len(), "Fetched assertions by predicate");
Ok(assertions)
}
}