fix(aphoria): deduplicate authored claims by ID in StemeDB queries

When a claim is updated, deprecated, or superseded, a new assertion is
appended (append-only). Without dedup, fetch_authored_claims() returned
all versions, causing stale active copies to appear alongside the latest.

Now uses a HashMap keyed by claim ID, keeping only the version with the
highest assertion timestamp. All callers (scanner, CLI, ClaimStore,
export) get correct results automatically.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jordan 2026-02-12 02:32:35 -07:00
parent 422e2d4416
commit afbeed2358

View File

@ -254,15 +254,37 @@ impl LocalEpisteme {
/// ///
/// Uses the `AUTHORED_CLAIM` predicate index to find assertions, /// Uses the `AUTHORED_CLAIM` predicate index to find assertions,
/// then converts back to `AuthoredClaim` via `assertion_to_authored_claim()`. /// then converts back to `AuthoredClaim` via `assertion_to_authored_claim()`.
///
/// Deduplicates by claim ID: when a claim is updated, deprecated, or superseded,
/// a new assertion is appended (append-only). This method keeps only the most
/// recently ingested version of each claim (by assertion timestamp).
#[allow(dead_code)] // Used by EpistemeClaimStore (T4) and scanner (T6) #[allow(dead_code)] // Used by EpistemeClaimStore (T4) and scanner (T6)
#[instrument(skip(self))] #[instrument(skip(self))]
pub async fn fetch_authored_claims(&self) -> Result<Vec<AuthoredClaim>, AphoriaError> { pub async fn fetch_authored_claims(&self) -> Result<Vec<AuthoredClaim>, AphoriaError> {
let assertions = self.fetch_assertions_by_predicate(predicates::AUTHORED_CLAIM).await?; let assertions = self.fetch_assertions_by_predicate(predicates::AUTHORED_CLAIM).await?;
let mut claims = Vec::with_capacity(assertions.len()); // Deduplicate by claim ID, keeping the most recently ingested version.
// Each update/deprecate/supersede creates a new assertion with a newer timestamp.
let mut claims_by_id: std::collections::HashMap<String, (AuthoredClaim, u64)> =
std::collections::HashMap::new();
for assertion in &assertions { for assertion in &assertions {
match assertion_to_authored_claim(assertion) { match assertion_to_authored_claim(assertion) {
Ok(claim) => claims.push(claim), Ok(claim) => {
let id = claim.id.clone();
let timestamp = assertion.timestamp;
match claims_by_id.entry(id) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((claim, timestamp));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
if timestamp > e.get().1 {
e.insert((claim, timestamp));
}
}
}
}
Err(e) => { Err(e) => {
warn!( warn!(
subject = %assertion.subject, subject = %assertion.subject,
@ -273,7 +295,8 @@ impl LocalEpisteme {
} }
} }
info!(count = claims.len(), "Fetched authored claims from StemeDB"); let claims: Vec<AuthoredClaim> = claims_by_id.into_values().map(|(c, _)| c).collect();
info!(count = claims.len(), "Fetched authored claims from StemeDB (deduplicated)");
Ok(claims) Ok(claims)
} }