feat: add source content to source registry, signed assertions, feed endpoint, dashboard enhancements

- Add `content: Option<String>` to SourceRecord with rkyv schema evolution
  (LegacySourceRecord compat deserializer for backward compatibility)
- Add MAX_SOURCE_CONTENT_LEN (1MB) limit with API validation
- Strip content from list responses, include in single-source GET
- Update Go SDK RegisterSourceRequest with Content field
- FCM pipeline extracts PDF text via pdftotext and passes to registration
- Dashboard impact panel fetches and displays source content with expand/collapse
- Add feed endpoint, dashboard feed panel, and signed assertion support
- Update data-structures.md, API docs, and storage docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jordan 2026-02-19 21:54:27 -07:00
parent 58594bc7b9
commit ad07a75d0a
85 changed files with 2651 additions and 348 deletions

View File

@ -1,6 +1,6 @@
# API Surface # API Surface
**Last Updated:** 2026-02-03 **Last Updated:** 2026-02-19
**Confidence:** High **Confidence:** High
## Summary ## Summary
@ -41,10 +41,10 @@ Episteme exposes an HTTP API via `axum` with auto-generated OpenAPI 3.1 document
| `GET` | `/metrics` | Prometheus metrics (Phase 8B) | ✅ Implemented | | `GET` | `/metrics` | Prometheus metrics (Phase 8B) | ✅ Implemented |
| `GET` | `/api-docs/openapi.json` | OpenAPI 3.1 spec | ✅ Implemented | | `GET` | `/api-docs/openapi.json` | OpenAPI 3.1 spec | ✅ Implemented |
| `GET` | `/swagger-ui` | Interactive API docs | ✅ Implemented | | `GET` | `/swagger-ui` | Interactive API docs | ✅ Implemented |
| `POST` | `/v1/sources` | Register source with human-readable metadata | ✅ Implemented | | `POST` | `/v1/sources` | Register source with metadata and optional content | ✅ Implemented |
| `GET` | `/v1/sources/{hash}` | Get source record by hash | ✅ Implemented | | `GET` | `/v1/sources/{hash}` | Get source record by hash (includes content) | ✅ Implemented |
| `PATCH` | `/v1/sources/{hash}/status` | Update source status (deprecate/quarantine) | ✅ Implemented | | `PATCH` | `/v1/sources/{hash}/status` | Update source status (deprecate/quarantine) | ✅ Implemented |
| `GET` | `/v1/sources` | List/search sources (filter by tier or query) | ✅ Implemented | | `GET` | `/v1/sources` | List/search sources (content stripped for performance) | ✅ Implemented |
### Cluster Gateway Endpoints (stemedb-cluster) ### Cluster Gateway Endpoints (stemedb-cluster)

View File

@ -1,6 +1,6 @@
# SDK - Go Client Libraries # SDK - Go Client Libraries
**Last Updated:** 2026-02-01 **Last Updated:** 2026-02-19
**Confidence:** High **Confidence:** High
## Summary ## Summary

View File

@ -1,6 +1,6 @@
# Storage # Storage
**Last Updated:** 2026-01-31 **Last Updated:** 2026-02-19
**Confidence:** High **Confidence:** High
## Summary ## Summary
@ -91,6 +91,16 @@ let value: MyType = deserialize(&bytes)?;
This provides unified error handling across all store implementations (VoteStore, IndexStore, TrustRankStore, AuditStore, TrustPackStore, QuotaStore). This provides unified error handling across all store implementations (VoteStore, IndexStore, TrustRankStore, AuditStore, TrustPackStore, QuotaStore).
For types with schema evolution (rkyv compat), use the dedicated compat functions:
```rust
use crate::serde_helpers::deserialize_source_record_compat;
let record: SourceRecord = deserialize_source_record_compat(&bytes)?;
```
Available compat deserializers: `deserialize_source_record_compat` (SourceRecord). For assertions, use `stemedb_core::serde::deserialize_assertion_compat` directly.
## Write Path ## Write Path
``` ```

View File

@ -146,6 +146,7 @@ fn claim_to_assertion_with_tier(
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(), source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![signature_entry], signatures: vec![signature_entry],
confidence: claim.confidence, confidence: claim.confidence,
@ -235,6 +236,7 @@ pub fn authored_claim_to_assertion(
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(), source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle, lifecycle,
signatures: vec![signature_entry], signatures: vec![signature_entry],
confidence: 1.0, // Authored claims have full confidence confidence: 1.0, // Authored claims have full confidence

View File

@ -79,7 +79,7 @@ impl StemeDBPatternStore {
return Ok(None); return Ok(None);
}; };
let assertion = stemedb_core::serde::deserialize::<Assertion>(&bytes).map_err(|e| { let assertion = stemedb_core::serde::deserialize_assertion_compat(&bytes).map_err(|e| {
AphoriaError::Storage(format!( AphoriaError::Storage(format!(
"Failed to deserialize assertion {}: {}", "Failed to deserialize assertion {}: {}",
hex::encode(hash), hex::encode(hash),
@ -389,6 +389,7 @@ impl PatternAggregator {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: Some(metadata_bytes), source_metadata: Some(metadata_bytes),
narrative: None,
lifecycle: stemedb_core::types::LifecycleStage::Approved, lifecycle: stemedb_core::types::LifecycleStage::Approved,
signatures: vec![], // Bootstrap patterns are unsigned (no signing key available) signatures: vec![], // Bootstrap patterns are unsigned (no signing key available)
confidence: 1.0, // Pattern aggregates are high confidence confidence: 1.0, // Pattern aggregates are high confidence

View File

@ -114,6 +114,7 @@ pub fn create_authoritative_assertion_with_metadata(
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: serde_json::to_vec(&metadata).ok(), source_metadata: serde_json::to_vec(&metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![signature_entry], signatures: vec![signature_entry],
confidence: 1.0, confidence: 1.0,
@ -170,6 +171,7 @@ pub fn create_authoritative_assertion(
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(), source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![signature_entry], signatures: vec![signature_entry],
confidence: 1.0, confidence: 1.0,

View File

@ -342,7 +342,7 @@ impl LocalEpisteme {
let assertion_key = stemedb_storage::key_codec::assertion_key(&subject, &hash_hex); let assertion_key = stemedb_storage::key_codec::assertion_key(&subject, &hash_hex);
self.store.get(&assertion_key).await.ok().flatten().and_then(|bytes| { self.store.get(&assertion_key).await.ok().flatten().and_then(|bytes| {
stemedb_core::serde::deserialize::<Assertion>(&bytes) stemedb_core::serde::deserialize_assertion_compat(&bytes)
.map_err(|e| warn!(hash = %hash_hex, error = %e, "Failed to deserialize")) .map_err(|e| warn!(hash = %hash_hex, error = %e, "Failed to deserialize"))
.ok() .ok()
}) })

View File

@ -854,6 +854,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: Some(b"{\"file\":\"test.rs\"}".to_vec()), source_metadata: Some(b"{\"file\":\"test.rs\"}".to_vec()),
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
agent_id: [2u8; 32], agent_id: [2u8; 32],

View File

@ -438,6 +438,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(), source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![], signatures: vec![],
confidence: 1.0, confidence: 1.0,

View File

@ -255,6 +255,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(), source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![], signatures: vec![],
confidence: 1.0, confidence: 1.0,

View File

@ -109,6 +109,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(), source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![], signatures: vec![],
confidence: 1.0, confidence: 1.0,

View File

@ -47,7 +47,7 @@ impl ClaimCache {
pub fn save(&self, claims: &[AuthoredClaim], remote_url: &str) -> Result<(), AphoriaError> { pub fn save(&self, claims: &[AuthoredClaim], remote_url: &str) -> Result<(), AphoriaError> {
let now = SystemTime::now() let now = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH) .duration_since(SystemTime::UNIX_EPOCH)
.map_err(|e| AphoriaError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))? .map_err(|e| AphoriaError::Io(std::io::Error::other(e)))?
.as_secs(); .as_secs();
let cache = ClaimCacheFile { let cache = ClaimCacheFile {

View File

@ -179,9 +179,7 @@ impl RemoteClaimStore {
} }
} }
Err(last_error.unwrap_or_else(|| { Err(last_error.unwrap_or_else(|| AphoriaError::Hosted("Max retries exceeded".to_string())))
AphoriaError::Hosted("Max retries exceeded".to_string())
}))
} }
/// Perform the actual HTTP request. /// Perform the actual HTTP request.
@ -211,8 +209,8 @@ impl RemoteClaimStore {
http_request.call() http_request.call()
}; };
let response = response let response =
.map_err(|e| AphoriaError::Hosted(format!("HTTP request failed: {e}")))?; response.map_err(|e| AphoriaError::Hosted(format!("HTTP request failed: {e}")))?;
if response.status() >= 200 && response.status() < 300 { if response.status() >= 200 && response.status() < 300 {
let body = response let body = response
@ -230,8 +228,7 @@ impl ClaimStore for RemoteClaimStore {
fn save_claim(&self, claim: &AuthoredClaim) -> Result<(), AphoriaError> { fn save_claim(&self, claim: &AuthoredClaim) -> Result<(), AphoriaError> {
let request = CreateClaimRequest { claim: claim_to_dto(claim) }; let request = CreateClaimRequest { claim: claim_to_dto(claim) };
let response: CreateClaimResponse = let response: CreateClaimResponse = self.request("POST", "/v1/claims", Some(&request))?;
self.request("POST", "/v1/claims", Some(&request))?;
if response.stored { if response.stored {
info!(claim_id = %claim.id, "Claim stored remotely"); info!(claim_id = %claim.id, "Claim stored remotely");
@ -324,10 +321,9 @@ impl RemoteClaimStore {
warn!(operation, "Remote unreachable, using cached claims"); warn!(operation, "Remote unreachable, using cached claims");
fallback() fallback()
} }
OfflineFallback::Fail => Err(AphoriaError::Hosted(format!( OfflineFallback::Fail => {
"{}: remote unreachable", Err(AphoriaError::Hosted(format!("{}: remote unreachable", operation)))
operation }
))),
OfflineFallback::Queue => { OfflineFallback::Queue => {
warn!(operation, "Remote unreachable, queue not implemented (using cache)"); warn!(operation, "Remote unreachable, queue not implemented (using cache)");
fallback() fallback()
@ -425,14 +421,11 @@ fn is_network_error(err: &AphoriaError) -> bool {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::config::types::hosted::SyncMode; use crate::SyncMode;
#[test] #[test]
fn test_remote_store_requires_url() { fn test_remote_store_requires_url() {
let config = HostedConfig { let config = HostedConfig { url: None, ..Default::default() };
url: None,
..Default::default()
};
let result = RemoteClaimStore::new(&config); let result = RemoteClaimStore::new(&config);
assert!(result.is_err()); assert!(result.is_err());
@ -474,6 +467,7 @@ mod tests {
let config = HostedConfig { let config = HostedConfig {
url: Some("https://example.com".to_string()), url: Some("https://example.com".to_string()),
project_id: Some("test-project".to_string()), project_id: Some("test-project".to_string()),
team_id: None,
api_key_env: "TEST_API_KEY".to_string(), api_key_env: "TEST_API_KEY".to_string(),
sync_mode: SyncMode::RemoteOnly, sync_mode: SyncMode::RemoteOnly,
offline_fallback: OfflineFallback::Skip, offline_fallback: OfflineFallback::Skip,

View File

@ -98,8 +98,8 @@ impl ReportFormatter for JsonReport {
// Add tier-aware verdict if available // Add tier-aware verdict if available
if let Some(ref tier_verdict) = conflict.tier_verdict { if let Some(ref tier_verdict) = conflict.tier_verdict {
conflict_json["tier_verdict"] = serde_json::to_value(tier_verdict) conflict_json["tier_verdict"] =
.unwrap_or(serde_json::Value::Null); serde_json::to_value(tier_verdict).unwrap_or(serde_json::Value::Null);
} }
// Add primary tier if available // Add primary tier if available

View File

@ -87,7 +87,9 @@ impl TierAwareVerdict {
/// Returns a human-readable string describing the tier-aware verdict. /// Returns a human-readable string describing the tier-aware verdict.
pub fn display(&self) -> String { pub fn display(&self) -> String {
match self { match self {
TierAwareVerdict::SingleTier { tier_name, verdict, sources, max_confidence, .. } => { TierAwareVerdict::SingleTier {
tier_name, verdict, sources, max_confidence, ..
} => {
format!( format!(
"{} {} - {} source{}, max confidence {:.2}", "{} {} - {} source{}, max confidence {:.2}",
verdict.symbol(), verdict.symbol(),
@ -159,12 +161,7 @@ impl TierAwareVerdict {
}) })
.collect(); .collect();
Self::MultiTier { Self::MultiTier { primary_tier, primary_verdict, tier_verdicts, conflict_score }
primary_tier,
primary_verdict,
tier_verdicts,
conflict_score,
}
} }
} }
@ -250,8 +247,7 @@ mod tests {
}, },
); );
let verdict = let verdict = TierAwareVerdict::from_multi_tier(&tier_breakdown, 1, Verdict::Block, 0.92);
TierAwareVerdict::from_multi_tier(&tier_breakdown, 1, Verdict::Block, 0.92);
assert_eq!(verdict.effective_verdict(), Verdict::Block); assert_eq!(verdict.effective_verdict(), Verdict::Block);
assert_eq!(verdict.primary_tier(), 1); assert_eq!(verdict.primary_tier(), 1);

View File

@ -235,11 +235,7 @@ impl fmt::Display for ConflictResult {
writeln!(f, " {} {}", verdict_str, self.claim.concept_path)?; writeln!(f, " {} {}", verdict_str, self.claim.concept_path)?;
} }
writeln!( writeln!(f, " Concept: {}", self.claim.concept_path)?;
f,
" Concept: {}",
self.claim.concept_path
)?;
writeln!( writeln!(
f, f,
" Your code: {} ({}: L{})", " Your code: {} ({}: L{})",

View File

@ -1,12 +1,23 @@
import { Header } from "@/components/layout/header"; import { Header } from "@/components/layout/header";
import { LayeredQueryResults } from "@/components/layered"; import { LayeredQueryResults } from "@/components/layered";
export default function LayeredPage() { interface LayeredPageProps {
searchParams: Promise<{ subject?: string; predicate?: string }>;
}
export default async function LayeredPage({ searchParams }: LayeredPageProps) {
const params = await searchParams;
const initialSubject = params.subject;
const initialPredicate = params.predicate;
return ( return (
<> <>
<Header title="Layered Consensus" /> <Header title="Layered Consensus" />
<div className="p-6"> <div className="p-6">
<LayeredQueryResults /> <LayeredQueryResults
initialSubject={initialSubject}
initialPredicate={initialPredicate}
/>
</div> </div>
</> </>
); );

View File

@ -1,12 +1,26 @@
"use client";
import { useSearchParams } from "next/navigation";
import { Suspense } from "react";
import { Header } from "@/components/layout/header"; import { Header } from "@/components/layout/header";
import { QueryResults } from "@/components/skeptic"; import { QueryResults } from "@/components/skeptic";
function SkepticContent() {
const searchParams = useSearchParams();
const subject = searchParams.get("subject") ?? undefined;
const predicate = searchParams.get("predicate") ?? undefined;
return <QueryResults initialSubject={subject} initialPredicate={predicate} />;
}
export default function SkepticPage() { export default function SkepticPage() {
return ( return (
<> <>
<Header title="Skeptic Query" /> <Header title="Skeptic Query" />
<div className="p-6"> <div className="p-6">
<QueryResults /> <Suspense fallback={<div className="text-sm text-muted-foreground">Loading...</div>}>
<SkepticContent />
</Suspense>
</div> </div>
</> </>
); );

View File

@ -29,14 +29,14 @@ export function AuditPanel({ initialFilters }: AuditPanelProps) {
try { try {
const client = new StemeDBClient(); const client = new StemeDBClient();
// Convert time range to from/to timestamps // Convert time range to from/to timestamps (Unix seconds — backend uses seconds, not ms)
let fromTs: number | undefined; let fromTs: number | undefined;
let toTs: number | undefined; let toTs: number | undefined;
if (currentFilters.timeRange !== "all") { if (currentFilters.timeRange !== "all") {
const now = Date.now(); const nowSecs = Math.floor(Date.now() / 1000);
const rangeMs = TIME_RANGES_MS[currentFilters.timeRange as TimeRangeKey] ?? TIME_RANGES_MS["24h"]; const rangeSecs = Math.floor((TIME_RANGES_MS[currentFilters.timeRange as TimeRangeKey] ?? TIME_RANGES_MS["24h"]) / 1000);
fromTs = now - rangeMs; fromTs = nowSecs - rangeSecs;
toTs = now; toTs = nowSecs;
} }
const data = await client.auditQueries({ const data = await client.auditQueries({

View File

@ -1,6 +1,7 @@
"use client"; "use client";
import { useState } from "react"; import { useState, useCallback } from "react";
import Link from "next/link";
import type { AuditEntry } from "@/lib/api/types"; import type { AuditEntry } from "@/lib/api/types";
import { formatTime, formatDate } from "@/lib/format"; import { formatTime, formatDate } from "@/lib/format";
import { ResultBadge } from "./result-badge"; import { ResultBadge } from "./result-badge";
@ -10,6 +11,36 @@ interface AuditRowProps {
entry: AuditEntry; entry: AuditEntry;
} }
function CopyableHash({ hash, label }: { hash: string; label?: string }) {
const [copied, setCopied] = useState(false);
const handleCopy = useCallback(
(e: React.MouseEvent) => {
e.stopPropagation();
navigator.clipboard.writeText(hash).then(() => {
setCopied(true);
setTimeout(() => setCopied(false), 1500);
});
},
[hash]
);
return (
<button
type="button"
onClick={handleCopy}
title={label ? `${label}: ${hash}` : hash}
className="font-mono text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
>
{copied ? (
<span className="text-green-600 dark:text-green-400">Copied!</span>
) : (
`${hash.slice(0, 12)}`
)}
</button>
);
}
export function AuditRow({ entry }: AuditRowProps) { export function AuditRow({ entry }: AuditRowProps) {
const [expanded, setExpanded] = useState(false); const [expanded, setExpanded] = useState(false);
@ -30,6 +61,15 @@ export function AuditRow({ entry }: AuditRowProps) {
? `${entry.agent_id.slice(0, 8)}...` ? `${entry.agent_id.slice(0, 8)}...`
: "-"; : "-";
// Build cross-navigation URLs when subject is present
const hasSubject = Boolean(entry.params.subject);
const crossNavParams = hasSubject
? new URLSearchParams({
subject: entry.params.subject!,
...(entry.params.predicate ? { predicate: entry.params.predicate } : {}),
}).toString()
: null;
return ( return (
<div <div
className={`rounded-lg border border-border transition-colors hover:bg-muted/50 ${ className={`rounded-lg border border-border transition-colors hover:bg-muted/50 ${
@ -84,7 +124,8 @@ export function AuditRow({ entry }: AuditRowProps) {
{/* Expanded details */} {/* Expanded details */}
{expanded && ( {expanded && (
<div className="px-4 pb-3 pt-0 border-t border-border mt-0"> <div className="px-4 pb-3 pt-0 border-t border-border mt-0">
<div className="bg-muted/50 rounded-md p-3 mt-3 space-y-2"> <div className="bg-muted/50 rounded-md p-3 mt-3 space-y-3">
{/* Metadata grid */}
<div className="grid grid-cols-2 gap-2 text-xs"> <div className="grid grid-cols-2 gap-2 text-xs">
<div> <div>
<span className="text-muted-foreground">Query ID:</span> <span className="text-muted-foreground">Query ID:</span>
@ -107,16 +148,51 @@ export function AuditRow({ entry }: AuditRowProps) {
<span className="ml-2">{entry.contributing_assertions.length}</span> <span className="ml-2">{entry.contributing_assertions.length}</span>
</div> </div>
</div> </div>
{/* Contributing assertions */}
{entry.contributing_assertions.length > 0 && ( {entry.contributing_assertions.length > 0 && (
<div className="text-xs"> <div className="text-xs space-y-1">
<span className="text-muted-foreground">Top contributors:</span> <div className="grid grid-cols-3 gap-2 text-muted-foreground font-medium pb-1 border-b border-border/50">
<div className="mt-1 space-y-1"> <span>Assertion Hash</span>
<span>Source Hash</span>
<span>Lifecycle / Weight</span>
</div>
{entry.contributing_assertions.slice(0, 3).map((ca) => ( {entry.contributing_assertions.slice(0, 3).map((ca) => (
<div key={ca.assertion_hash} className="font-mono text-muted-foreground"> <div
{ca.assertion_hash.slice(0, 12)}... (weight: {(ca.weight * 100).toFixed(0)}%) key={ca.assertion_hash}
className="grid grid-cols-3 gap-2 items-center py-0.5"
>
<CopyableHash hash={ca.assertion_hash} label="Assertion hash" />
<CopyableHash hash={ca.source_hash} label="Source hash" />
<span className="text-muted-foreground">
<span className="px-1.5 py-0.5 rounded bg-muted text-foreground mr-1">
{ca.lifecycle}
</span>
{(ca.weight * 100).toFixed(0)}%
</span>
</div> </div>
))} ))}
</div> </div>
)}
{/* Cross-navigation links */}
{hasSubject && crossNavParams && (
<div
className="flex items-center gap-3 pt-2 border-t border-border"
onClick={(e) => e.stopPropagation()}
>
<Link
href={`/skeptic?${crossNavParams}`}
className="text-xs text-blue-600 dark:text-blue-400 hover:underline px-2 py-1 rounded bg-muted"
>
View in Skeptic
</Link>
<Link
href={`/layered?${crossNavParams}`}
className="text-xs text-blue-600 dark:text-blue-400 hover:underline px-2 py-1 rounded bg-muted"
>
View in Layered
</Link>
</div> </div>
)} )}
</div> </div>

View File

@ -1,6 +1,7 @@
"use client"; "use client";
import { useState } from "react"; import { useState } from "react";
import Link from "next/link";
import type { AssertionObject } from "@/lib/api/types"; import type { AssertionObject } from "@/lib/api/types";
import { formatRelativeTime, formatUnixDateTime } from "@/lib/format"; import { formatRelativeTime, formatUnixDateTime } from "@/lib/format";
import { Badge } from "@/components/ui/badge"; import { Badge } from "@/components/ui/badge";
@ -26,6 +27,10 @@ function formatValue(obj: { type: string; value: string | number | boolean }): s
return str.length > 60 ? `${str.slice(0, 57)}...` : str; return str.length > 60 ? `${str.slice(0, 57)}...` : str;
} }
function investigateHref(entry: AssertionObject): string {
return `/skeptic?subject=${encodeURIComponent(entry.subject)}&predicate=${encodeURIComponent(entry.predicate)}`;
}
export function FeedRow({ entry }: FeedRowProps) { export function FeedRow({ entry }: FeedRowProps) {
const [expanded, setExpanded] = useState(false); const [expanded, setExpanded] = useState(false);
@ -34,11 +39,13 @@ export function FeedRow({ entry }: FeedRowProps) {
return ( return (
<div <div
className="rounded-lg border border-border transition-colors hover:bg-muted/50 cursor-pointer" className="rounded-lg border border-border transition-colors"
onClick={() => setExpanded(!expanded)}
> >
{/* Main row */} {/* Main row */}
<div className="grid grid-cols-2 sm:grid-cols-5 gap-2 sm:gap-4 px-4 py-3 items-center"> <div
className="grid grid-cols-2 sm:grid-cols-5 gap-2 sm:gap-4 px-4 py-3 items-center cursor-pointer hover:bg-muted/50 rounded-t-lg"
onClick={() => setExpanded(!expanded)}
>
{/* Time */} {/* Time */}
<div className="text-sm" title={formatUnixDateTime(entry.timestamp)}> <div className="text-sm" title={formatUnixDateTime(entry.timestamp)}>
<span className="font-medium">{formatRelativeTime(entry.timestamp)}</span> <span className="font-medium">{formatRelativeTime(entry.timestamp)}</span>
@ -66,20 +73,36 @@ export function FeedRow({ entry }: FeedRowProps) {
<span className="text-foreground">{formatValue(entry.object)}</span> <span className="text-foreground">{formatValue(entry.object)}</span>
</div> </div>
{/* Source Class */} {/* Source Class + Investigate icon */}
<div className="flex items-center justify-between gap-2"> <div className="flex items-center justify-between gap-2">
<Badge variant="outline" className={cn("text-xs", badgeColor)}> <Badge variant="outline" className={cn("text-xs", badgeColor)}>
{entry.source_class} {entry.source_class}
</Badge> </Badge>
<div className="flex items-center gap-1">
<Link
href={investigateHref(entry)}
className="text-muted-foreground hover:text-primary transition-colors p-1"
title="Investigate in Skeptic"
onClick={(e) => e.stopPropagation()}
>
<svg className="h-3.5 w-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<circle cx="11" cy="11" r="8" />
<path d="m21 21-4.3-4.3" />
</svg>
</Link>
<span className="text-xs text-muted-foreground"> <span className="text-xs text-muted-foreground">
{expanded ? "\u25B2" : "\u25BC"} {expanded ? "\u25B2" : "\u25BC"}
</span> </span>
</div> </div>
</div> </div>
</div>
{/* Expanded details */} {/* Expanded details */}
{expanded && ( {expanded && (
<div className="px-4 pb-3 pt-0 border-t border-border mt-0"> <div
className="px-4 pb-3 pt-0 border-t border-border mt-0"
onClick={(e) => e.stopPropagation()}
>
<div className="bg-muted/50 rounded-md p-3 mt-3 space-y-2"> <div className="bg-muted/50 rounded-md p-3 mt-3 space-y-2">
<div className="grid grid-cols-2 gap-2 text-xs"> <div className="grid grid-cols-2 gap-2 text-xs">
<div> <div>
@ -116,6 +139,29 @@ export function FeedRow({ entry }: FeedRowProps) {
</div> </div>
</div> </div>
)} )}
{/* Narrative */}
{entry.narrative && (
<div className="text-xs border-t border-border pt-2">
<span className="text-muted-foreground">Narrative:</span>
<p className="mt-1 text-foreground whitespace-pre-wrap leading-relaxed">
{entry.narrative}
</p>
</div>
)}
{/* Investigate link */}
<div className="border-t border-border pt-2 flex justify-end">
<Link
href={investigateHref(entry)}
className="text-xs text-primary hover:underline inline-flex items-center gap-1"
onClick={(e) => e.stopPropagation()}
>
<svg className="h-3 w-3" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<circle cx="11" cy="11" r="8" />
<path d="m21 21-4.3-4.3" />
</svg>
Investigate in Skeptic
</Link>
</div>
</div> </div>
</div> </div>
)} )}

View File

@ -1,6 +1,6 @@
"use client"; "use client";
import { useState, useCallback } from "react"; import { useState, useCallback, useEffect, useRef } from "react";
import { StemeDBClient, type LayeredResponse, ApiError } from "@/lib/api"; import { StemeDBClient, type LayeredResponse, ApiError } from "@/lib/api";
import { QueryForm, type QueryParams, EmptyState, ErrorState } from "@/components/skeptic"; import { QueryForm, type QueryParams, EmptyState, ErrorState } from "@/components/skeptic";
import { LayeredLoadingSkeleton } from "./layered-loading-skeleton"; import { LayeredLoadingSkeleton } from "./layered-loading-skeleton";
@ -12,8 +12,14 @@ type QueryState =
| { status: "success"; data: LayeredResponse; params: QueryParams } | { status: "success"; data: LayeredResponse; params: QueryParams }
| { status: "error"; error: string; params: QueryParams }; | { status: "error"; error: string; params: QueryParams };
export function LayeredQueryResults() { interface LayeredQueryResultsProps {
initialSubject?: string;
initialPredicate?: string;
}
export function LayeredQueryResults({ initialSubject, initialPredicate }: LayeredQueryResultsProps) {
const [state, setState] = useState<QueryState>({ status: "idle" }); const [state, setState] = useState<QueryState>({ status: "idle" });
const hasAutoQueried = useRef(false);
const executeQuery = useCallback(async (params: QueryParams) => { const executeQuery = useCallback(async (params: QueryParams) => {
setState({ status: "loading", params }); setState({ status: "loading", params });
@ -33,6 +39,18 @@ export function LayeredQueryResults() {
} }
}, []); }, []);
// Auto-execute query when initial subject+predicate are provided (e.g., from audit trail links)
useEffect(() => {
if (initialSubject && initialPredicate && !hasAutoQueried.current) {
hasAutoQueried.current = true;
executeQuery({
subject: initialSubject,
predicate: initialPredicate,
includeSourceMetadata: true,
});
}
}, [initialSubject, initialPredicate, executeQuery]);
const handleRetry = useCallback(() => { const handleRetry = useCallback(() => {
if (state.status === "error") { if (state.status === "error") {
executeQuery(state.params); executeQuery(state.params);
@ -48,7 +66,12 @@ export function LayeredQueryResults() {
<h2 className="text-lg font-medium text-card-foreground mb-4"> <h2 className="text-lg font-medium text-card-foreground mb-4">
Layered Consensus Query Layered Consensus Query
</h2> </h2>
<QueryForm onSubmit={executeQuery} isLoading={isLoading} /> <QueryForm
onSubmit={executeQuery}
isLoading={isLoading}
initialSubject={initialSubject}
initialPredicate={initialPredicate}
/>
</div> </div>
{/* Results Section */} {/* Results Section */}

View File

@ -112,6 +112,11 @@ export function LayeredResultsView({ data }: LayeredResultsViewProps) {
<p className="text-xs text-muted-foreground mt-1"> <p className="text-xs text-muted-foreground mt-1">
Confidence: {(data.overall_winner.confidence * 100).toFixed(0)}% Confidence: {(data.overall_winner.confidence * 100).toFixed(0)}%
</p> </p>
{data.overall_winner.narrative && (
<p className="text-sm text-muted-foreground mt-2 whitespace-pre-wrap leading-relaxed border-t border-primary/20 pt-2">
{data.overall_winner.narrative}
</p>
)}
</div> </div>
)} )}

View File

@ -1,7 +1,10 @@
"use client"; "use client";
import { useState, useEffect } from "react";
import Link from "next/link";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import type { LayeredTier } from "@/lib/api/types"; import type { LayeredTier, SourceRecordDto } from "@/lib/api/types";
import { StemeDBClient } from "@/lib/api";
import { SourceTierBadge, ConflictGauge, tierLabels, type SourceTier } from "@/components/skeptic"; import { SourceTierBadge, ConflictGauge, tierLabels, type SourceTier } from "@/components/skeptic";
function getConflictStatus(score: number): "Unanimous" | "Agreed" | "Contested" { function getConflictStatus(score: number): "Unanimous" | "Agreed" | "Contested" {
@ -10,6 +13,17 @@ function getConflictStatus(score: number): "Unanimous" | "Agreed" | "Contested"
return "Contested"; return "Contested";
} }
function formatTimestamp(unixSeconds: number): string {
const date = new Date(unixSeconds * 1000);
return date.toLocaleString(undefined, {
year: "numeric",
month: "short",
day: "numeric",
hour: "2-digit",
minute: "2-digit",
});
}
interface TierAccordionProps { interface TierAccordionProps {
tier: LayeredTier; tier: LayeredTier;
isExpanded: boolean; isExpanded: boolean;
@ -21,6 +35,20 @@ export function TierAccordion({ tier, isExpanded, onToggle }: TierAccordionProps
const tierLabel = tierLabels[safeTier] || tier.source_class; const tierLabel = tierLabels[safeTier] || tier.source_class;
const conflictStatus = getConflictStatus(tier.conflict_score); const conflictStatus = getConflictStatus(tier.conflict_score);
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(null);
const [sourceLoading, setSourceLoading] = useState(false);
useEffect(() => {
if (!isExpanded || !tier.winner || sourceRecord || sourceLoading) return;
setSourceLoading(true);
const client = new StemeDBClient();
client
.getSource(tier.winner.source_hash)
.then(setSourceRecord)
.catch(() => {})
.finally(() => setSourceLoading(false));
}, [isExpanded, tier.winner, sourceRecord, sourceLoading]);
return ( return (
<div className="border border-border rounded-lg overflow-hidden"> <div className="border border-border rounded-lg overflow-hidden">
<button <button
@ -99,12 +127,105 @@ export function TierAccordion({ tier, isExpanded, onToggle }: TierAccordionProps
</div> </div>
<div> <div>
<span className="text-muted-foreground">Source</span> <span className="text-muted-foreground">Source</span>
{sourceLoading ? (
<p className="font-mono text-xs text-muted-foreground animate-pulse">
Loading...
</p>
) : sourceRecord ? (
<p className="font-medium text-foreground truncate" title={sourceRecord.label}>
{sourceRecord.label}
</p>
) : (
<p className="font-mono text-xs text-foreground truncate" title={tier.winner.source_hash}> <p className="font-mono text-xs text-foreground truncate" title={tier.winner.source_hash}>
{tier.winner.source_hash.slice(0, 12)}... {tier.winner.source_hash.slice(0, 12)}...
</p> </p>
)}
</div> </div>
</div> </div>
{/* Assertion timestamp */}
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
<div className="col-span-2">
<span className="text-muted-foreground">Asserted at</span>
<p className="font-medium text-foreground">
{formatTimestamp(tier.winner.timestamp)}
</p>
</div>
</div>
{/* Narrative */}
{tier.winner.narrative && (
<div className="text-sm">
<span className="text-muted-foreground">Narrative</span>
<p className="mt-1 text-foreground whitespace-pre-wrap leading-relaxed">
{tier.winner.narrative}
</p>
</div>
)}
{/* Source registry details */}
{sourceLoading && (
<div className="rounded border border-border bg-muted/30 p-2">
<p className="text-xs text-muted-foreground animate-pulse">
Loading source details...
</p>
</div>
)}
{!sourceLoading && sourceRecord && (
<div className="rounded border border-border bg-muted/30 p-2 space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
Source Registry
</span>
<Link
href="/sources"
className="text-xs text-blue-600 dark:text-blue-400 hover:underline"
>
View in Source Registry
</Link>
</div>
<div className="grid grid-cols-2 gap-x-4 gap-y-1 text-xs">
<div>
<span className="text-muted-foreground">Label</span>
<p className="font-medium text-foreground">{sourceRecord.label}</p>
</div>
<div>
<span className="text-muted-foreground">Status</span>
<p className="font-medium text-foreground capitalize">{sourceRecord.status}</p>
</div>
{sourceRecord.url && (
<div className="col-span-2">
<span className="text-muted-foreground">URL</span>
<p className="font-mono text-foreground truncate" title={sourceRecord.url}>
<a
href={sourceRecord.url}
target="_blank"
rel="noopener noreferrer"
className="text-blue-600 dark:text-blue-400 hover:underline"
>
{sourceRecord.url}
</a>
</p>
</div>
)}
{sourceRecord.notes && (
<div className="col-span-2">
<span className="text-muted-foreground">Notes</span>
<p className="text-foreground leading-relaxed">{sourceRecord.notes}</p>
</div>
)}
<div>
<span className="text-muted-foreground">Created</span>
<p className="text-foreground">{formatTimestamp(sourceRecord.created_at)}</p>
</div>
<div>
<span className="text-muted-foreground">Updated</span>
<p className="text-foreground">{formatTimestamp(sourceRecord.updated_at)}</p>
</div>
</div>
</div>
)}
{/* Assertion hash */} {/* Assertion hash */}
<div className="pt-2 border-t border-border"> <div className="pt-2 border-t border-border">
<span className="text-xs text-muted-foreground">Assertion: </span> <span className="text-xs text-muted-foreground">Assertion: </span>

View File

@ -1,7 +1,10 @@
"use client"; "use client";
import { useState, useEffect } from "react";
import Link from "next/link";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import type { ClaimSummary } from "@/lib/api/types"; import type { ClaimSummary, SourceRecordDto } from "@/lib/api/types";
import { StemeDBClient } from "@/lib/api";
import { SourceTierBadge } from "./source-tier-badge"; import { SourceTierBadge } from "./source-tier-badge";
import { WeightBar } from "./weight-bar"; import { WeightBar } from "./weight-bar";
import { HashDisplay } from "./hash-display"; import { HashDisplay } from "./hash-display";
@ -33,6 +36,23 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
: "active") as SourceStatus; : "active") as SourceStatus;
const valueStr = formatValue(claim.value); const valueStr = formatValue(claim.value);
// Fetch full source record when expanded
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(null);
const [sourceLoading, setSourceLoading] = useState(false);
useEffect(() => {
if (!isExpanded || sourceRecord || sourceLoading) return;
setSourceLoading(true);
const client = new StemeDBClient();
client
.getSource(claim.source.source_hash)
.then(setSourceRecord)
.catch(() => {
// Source may not be in registry — that's fine
})
.finally(() => setSourceLoading(false));
}, [isExpanded, claim.source.source_hash, sourceRecord, sourceLoading]);
return ( return (
<div <div
className={cn( className={cn(
@ -88,6 +108,19 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
{/* Expanded details */} {/* Expanded details */}
{isExpanded && ( {isExpanded && (
<div className="px-3 pb-3 space-y-4 border-t border-border pt-3"> <div className="px-3 pb-3 space-y-4 border-t border-border pt-3">
{/* Full value */}
<div className="space-y-1">
<div className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
Value
</div>
<p className="text-sm text-foreground whitespace-pre-wrap break-words leading-relaxed">
{valueStr}
</p>
<div className="text-xs text-muted-foreground">
Type: <code className="bg-muted px-1 py-0.5 rounded">{claim.value.type}</code>
</div>
</div>
{/* Source info */} {/* Source info */}
<div className="space-y-1"> <div className="space-y-1">
<div className="text-xs font-medium text-muted-foreground uppercase tracking-wide"> <div className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
@ -98,7 +131,7 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
<span className={statusColors[status]}> <span className={statusColors[status]}>
{statusIcons[status]} {status} {statusIcons[status]} {status}
</span> </span>
<span className="text-muted-foreground"></span> <span className="text-muted-foreground">&middot;</span>
<span className="text-muted-foreground"> <span className="text-muted-foreground">
{tierLabel} (T{tier}) {tierLabel} (T{tier})
</span> </span>
@ -113,6 +146,33 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
{sourceUrl} {sourceUrl}
</a> </a>
)} )}
{/* Source registry details (fetched) */}
{sourceLoading && (
<div className="text-xs text-muted-foreground animate-pulse mt-1">
Loading source details...
</div>
)}
{sourceRecord && (
<div className="mt-2 rounded border border-border bg-muted/30 p-2 space-y-1">
{sourceRecord.notes && (
<p className="text-xs text-muted-foreground whitespace-pre-wrap">
{sourceRecord.notes}
</p>
)}
<div className="flex items-center gap-3 text-[10px] text-muted-foreground">
<span>Created: {new Date(sourceRecord.created_at).toLocaleDateString()}</span>
{sourceRecord.updated_at !== sourceRecord.created_at && (
<span>Updated: {new Date(sourceRecord.updated_at).toLocaleDateString()}</span>
)}
</div>
<Link
href={`/sources`}
className="text-[10px] text-blue-600 dark:text-blue-400 hover:underline"
>
View in Source Registry &rarr;
</Link>
</div>
)}
</div> </div>
{/* Supporting agents */} {/* Supporting agents */}

View File

@ -1,9 +1,10 @@
"use client"; "use client";
import { useState } from "react"; import { useState, useEffect, useRef, useCallback } from "react";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input"; import { Input } from "@/components/ui/input";
import { DatePicker } from "@/components/ui/date-picker"; import { DatePicker } from "@/components/ui/date-picker";
import { StemeDBClient } from "@/lib/api";
export interface QueryParams { export interface QueryParams {
subject: string; subject: string;
@ -15,22 +16,162 @@ export interface QueryParams {
interface QueryFormProps { interface QueryFormProps {
onSubmit: (params: QueryParams) => void; onSubmit: (params: QueryParams) => void;
isLoading: boolean; isLoading: boolean;
initialSubject?: string;
initialPredicate?: string;
} }
export function QueryForm({ onSubmit, isLoading }: QueryFormProps) { export function QueryForm({ onSubmit, isLoading, initialSubject, initialPredicate }: QueryFormProps) {
const [subject, setSubject] = useState(""); const [subject, setSubject] = useState(initialSubject ?? "");
const [predicate, setPredicate] = useState(""); const [predicate, setPredicate] = useState(initialPredicate ?? "");
const [includeSourceMetadata, setIncludeSourceMetadata] = useState(true); const [includeSourceMetadata, setIncludeSourceMetadata] = useState(true);
const [asOfDate, setAsOfDate] = useState<Date | undefined>(undefined); const [asOfDate, setAsOfDate] = useState<Date | undefined>(undefined);
// Autocomplete state
const [subjectSuggestions, setSubjectSuggestions] = useState<string[]>([]);
const [predicateSuggestions, setPredicateSuggestions] = useState<string[]>([]);
const [showSubjectDropdown, setShowSubjectDropdown] = useState(false);
const [showPredicateDropdown, setShowPredicateDropdown] = useState(false);
const [activeSubjectIndex, setActiveSubjectIndex] = useState(-1);
const [activePredicateIndex, setActivePredicateIndex] = useState(-1);
const subjectRef = useRef<HTMLDivElement>(null);
const predicateRef = useRef<HTMLDivElement>(null);
const debounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
// Sync initial values when they change (e.g., from URL params)
useEffect(() => {
if (initialSubject !== undefined) setSubject(initialSubject);
}, [initialSubject]);
useEffect(() => {
if (initialPredicate !== undefined) setPredicate(initialPredicate);
}, [initialPredicate]);
// Fetch subject suggestions with debounce
const fetchSubjects = useCallback((query: string) => {
if (debounceRef.current) clearTimeout(debounceRef.current);
debounceRef.current = setTimeout(async () => {
if (!query.trim()) {
setSubjectSuggestions([]);
setShowSubjectDropdown(false);
return;
}
try {
const client = new StemeDBClient();
const resp = await client.listSubjects(query, 20);
setSubjectSuggestions(resp.subjects);
setShowSubjectDropdown(resp.subjects.length > 0);
setActiveSubjectIndex(-1);
} catch {
setSubjectSuggestions([]);
setShowSubjectDropdown(false);
}
}, 200);
}, []);
// Fetch predicates when subject is selected
const fetchPredicates = useCallback(async (subj: string) => {
if (!subj.trim()) {
setPredicateSuggestions([]);
return;
}
try {
const client = new StemeDBClient();
const resp = await client.listPredicates(subj);
setPredicateSuggestions(resp.predicates);
} catch {
setPredicateSuggestions([]);
}
}, []);
// Close dropdowns on click outside
useEffect(() => {
function handleClickOutside(e: MouseEvent) {
if (subjectRef.current && !subjectRef.current.contains(e.target as Node)) {
setShowSubjectDropdown(false);
}
if (predicateRef.current && !predicateRef.current.contains(e.target as Node)) {
setShowPredicateDropdown(false);
}
}
document.addEventListener("mousedown", handleClickOutside);
return () => document.removeEventListener("mousedown", handleClickOutside);
}, []);
const handleSubjectChange = (value: string) => {
setSubject(value);
fetchSubjects(value);
// Clear predicate suggestions when subject changes
setPredicateSuggestions([]);
};
const selectSubject = (value: string) => {
setSubject(value);
setShowSubjectDropdown(false);
setActiveSubjectIndex(-1);
fetchPredicates(value);
};
const handlePredicateChange = (value: string) => {
setPredicate(value);
// Filter existing predicate suggestions locally
if (predicateSuggestions.length > 0) {
setShowPredicateDropdown(true);
setActivePredicateIndex(-1);
}
};
const selectPredicate = (value: string) => {
setPredicate(value);
setShowPredicateDropdown(false);
setActivePredicateIndex(-1);
};
const filteredPredicates = predicateSuggestions.filter((p) =>
p.toLowerCase().includes(predicate.toLowerCase())
);
const handleSubjectKeyDown = (e: React.KeyboardEvent) => {
if (!showSubjectDropdown || subjectSuggestions.length === 0) return;
if (e.key === "ArrowDown") {
e.preventDefault();
setActiveSubjectIndex((i) => Math.min(i + 1, subjectSuggestions.length - 1));
} else if (e.key === "ArrowUp") {
e.preventDefault();
setActiveSubjectIndex((i) => Math.max(i - 1, 0));
} else if (e.key === "Enter" && activeSubjectIndex >= 0) {
e.preventDefault();
selectSubject(subjectSuggestions[activeSubjectIndex]);
} else if (e.key === "Escape") {
setShowSubjectDropdown(false);
}
};
const handlePredicateKeyDown = (e: React.KeyboardEvent) => {
if (!showPredicateDropdown || filteredPredicates.length === 0) return;
if (e.key === "ArrowDown") {
e.preventDefault();
setActivePredicateIndex((i) => Math.min(i + 1, filteredPredicates.length - 1));
} else if (e.key === "ArrowUp") {
e.preventDefault();
setActivePredicateIndex((i) => Math.max(i - 1, 0));
} else if (e.key === "Enter" && activePredicateIndex >= 0) {
e.preventDefault();
selectPredicate(filteredPredicates[activePredicateIndex]);
} else if (e.key === "Escape") {
setShowPredicateDropdown(false);
}
};
const handleSubmit = (e: React.FormEvent) => { const handleSubmit = (e: React.FormEvent) => {
e.preventDefault(); e.preventDefault();
setShowSubjectDropdown(false);
setShowPredicateDropdown(false);
if (subject.trim() && predicate.trim()) { if (subject.trim() && predicate.trim()) {
onSubmit({ onSubmit({
subject: subject.trim(), subject: subject.trim(),
predicate: predicate.trim(), predicate: predicate.trim(),
includeSourceMetadata, includeSourceMetadata,
// Convert Date to Unix timestamp (seconds)
asOf: asOfDate ? Math.floor(asOfDate.getTime() / 1000) : undefined, asOf: asOfDate ? Math.floor(asOfDate.getTime() / 1000) : undefined,
}); });
} }
@ -41,32 +182,81 @@ export function QueryForm({ onSubmit, isLoading }: QueryFormProps) {
return ( return (
<form onSubmit={handleSubmit} className="space-y-4"> <form onSubmit={handleSubmit} className="space-y-4">
<div className="grid grid-cols-1 md:grid-cols-2 gap-4"> <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="space-y-2"> {/* Subject with autocomplete */}
<div className="space-y-2" ref={subjectRef}>
<label htmlFor="subject" className="text-sm font-medium text-foreground"> <label htmlFor="subject" className="text-sm font-medium text-foreground">
Subject Subject
</label> </label>
<div className="relative">
<Input <Input
id="subject" id="subject"
placeholder="e.g., semaglutide:gastroparesis_risk" placeholder="e.g., semaglutide:gastroparesis_risk"
value={subject} value={subject}
onChange={(e) => setSubject(e.target.value)} onChange={(e) => handleSubjectChange(e.target.value)}
onFocus={() => {
if (subjectSuggestions.length > 0) setShowSubjectDropdown(true);
}}
onKeyDown={handleSubjectKeyDown}
disabled={isLoading} disabled={isLoading}
autoComplete="off"
/> />
{showSubjectDropdown && subjectSuggestions.length > 0 && (
<div className="absolute z-50 w-full mt-1 max-h-60 overflow-auto rounded-md border border-border bg-popover shadow-md">
{subjectSuggestions.map((s, i) => (
<button
key={s}
type="button"
className={`w-full px-3 py-2 text-left text-sm font-mono truncate hover:bg-muted ${
i === activeSubjectIndex ? "bg-muted" : ""
}`}
onMouseDown={() => selectSubject(s)}
>
{s}
</button>
))}
</div>
)}
</div>
<p className="text-xs text-muted-foreground"> <p className="text-xs text-muted-foreground">
The entity you want to query The entity you want to query
</p> </p>
</div> </div>
<div className="space-y-2">
{/* Predicate with autocomplete */}
<div className="space-y-2" ref={predicateRef}>
<label htmlFor="predicate" className="text-sm font-medium text-foreground"> <label htmlFor="predicate" className="text-sm font-medium text-foreground">
Predicate Predicate
</label> </label>
<div className="relative">
<Input <Input
id="predicate" id="predicate"
placeholder="e.g., risk_level" placeholder="e.g., risk_level"
value={predicate} value={predicate}
onChange={(e) => setPredicate(e.target.value)} onChange={(e) => handlePredicateChange(e.target.value)}
onFocus={() => {
if (filteredPredicates.length > 0) setShowPredicateDropdown(true);
}}
onKeyDown={handlePredicateKeyDown}
disabled={isLoading} disabled={isLoading}
autoComplete="off"
/> />
{showPredicateDropdown && filteredPredicates.length > 0 && (
<div className="absolute z-50 w-full mt-1 max-h-60 overflow-auto rounded-md border border-border bg-popover shadow-md">
{filteredPredicates.map((p, i) => (
<button
key={p}
type="button"
className={`w-full px-3 py-2 text-left text-sm font-mono truncate hover:bg-muted ${
i === activePredicateIndex ? "bg-muted" : ""
}`}
onMouseDown={() => selectPredicate(p)}
>
{p}
</button>
))}
</div>
)}
</div>
<p className="text-xs text-muted-foreground"> <p className="text-xs text-muted-foreground">
The property or relationship to analyze The property or relationship to analyze
</p> </p>

View File

@ -1,6 +1,6 @@
"use client"; "use client";
import { useState, useCallback } from "react"; import { useState, useCallback, useEffect, useRef } from "react";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import { StemeDBClient, type SkepticResponse, ApiError } from "@/lib/api"; import { StemeDBClient, type SkepticResponse, ApiError } from "@/lib/api";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
@ -20,9 +20,15 @@ type QueryState =
| { status: "success"; data: SkepticResponse; params: QueryParams } | { status: "success"; data: SkepticResponse; params: QueryParams }
| { status: "error"; error: string; params: QueryParams }; | { status: "error"; error: string; params: QueryParams };
export function QueryResults() { interface QueryResultsProps {
initialSubject?: string;
initialPredicate?: string;
}
export function QueryResults({ initialSubject, initialPredicate }: QueryResultsProps) {
const [state, setState] = useState<QueryState>({ status: "idle" }); const [state, setState] = useState<QueryState>({ status: "idle" });
const router = useRouter(); const router = useRouter();
const hasAutoQueried = useRef(false);
const handleViewAudit = useCallback( const handleViewAudit = useCallback(
(subject: string, predicate: string) => { (subject: string, predicate: string) => {
@ -56,6 +62,18 @@ export function QueryResults() {
} }
}, []); }, []);
// Auto-execute query when initial subject+predicate are provided (e.g., from URL params)
useEffect(() => {
if (initialSubject && initialPredicate && !hasAutoQueried.current) {
hasAutoQueried.current = true;
executeQuery({
subject: initialSubject,
predicate: initialPredicate,
includeSourceMetadata: true,
});
}
}, [initialSubject, initialPredicate, executeQuery]);
const handleRetry = useCallback(() => { const handleRetry = useCallback(() => {
if (state.status === "error") { if (state.status === "error") {
executeQuery(state.params); executeQuery(state.params);
@ -71,7 +89,12 @@ export function QueryResults() {
<h2 className="text-lg font-medium text-card-foreground mb-4"> <h2 className="text-lg font-medium text-card-foreground mb-4">
Conflict Analysis Query Conflict Analysis Query
</h2> </h2>
<QueryForm onSubmit={executeQuery} isLoading={isLoading} /> <QueryForm
onSubmit={executeQuery}
isLoading={isLoading}
initialSubject={initialSubject}
initialPredicate={initialPredicate}
/>
</div> </div>
{/* Results Section */} {/* Results Section */}

View File

@ -1,8 +1,8 @@
"use client"; "use client";
import { useCallback } from "react"; import { useCallback, useEffect, useState } from "react";
import { FileJson, FileText } from "lucide-react"; import { ChevronDown, ChevronUp, FileJson, FileText } from "lucide-react";
import type { SourceImpactResponse } from "@/lib/api/types"; import type { SourceImpactResponse, SourceRecordDto } from "@/lib/api/types";
import { StemeDBClient } from "@/lib/api"; import { StemeDBClient } from "@/lib/api";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { import {
@ -20,11 +20,90 @@ interface ImpactDetailPanelProps {
onClose: () => void; onClose: () => void;
} }
function CopyableHash({ hash }: { hash: string }) {
const [copied, setCopied] = useState(false);
const handleCopy = () => {
navigator.clipboard.writeText(hash);
setCopied(true);
setTimeout(() => setCopied(false), 1500);
};
return (
<button
onClick={handleCopy}
className="font-mono text-xs cursor-pointer hover:text-foreground transition-colors"
title="Click to copy full hash"
>
{hash.slice(0, 12)}...{hash.slice(-4)}
<span className="ml-1 text-primary text-[10px]">
{copied ? "Copied!" : ""}
</span>
</button>
);
}
function CopyableAgent({ agent }: { agent: string }) {
const [copied, setCopied] = useState(false);
const handleCopy = () => {
navigator.clipboard.writeText(agent);
setCopied(true);
setTimeout(() => setCopied(false), 1500);
};
return (
<button
key={agent}
onClick={handleCopy}
className="px-2 py-1 rounded bg-muted text-xs font-mono cursor-pointer hover:text-foreground transition-colors"
title="Click to copy agent ID"
>
{agent}
<span className="ml-1 text-primary text-[10px]">
{copied ? "Copied!" : ""}
</span>
</button>
);
}
function StatusBadge({ status }: { status: string }) {
const colorMap: Record<string, string> = {
active: "bg-green-500/15 text-green-700 dark:text-green-400",
inactive: "bg-muted text-muted-foreground",
quarantined: "bg-red-500/15 text-red-700 dark:text-red-400",
pending: "bg-yellow-500/15 text-yellow-700 dark:text-yellow-400",
};
const classes =
colorMap[status.toLowerCase()] ?? "bg-muted text-muted-foreground";
return (
<span
className={`inline-block px-2 py-0.5 rounded text-[11px] font-medium ${classes}`}
>
{status}
</span>
);
}
export function ImpactDetailPanel({ export function ImpactDetailPanel({
isOpen, isOpen,
impact, impact,
onClose, onClose,
}: ImpactDetailPanelProps) { }: ImpactDetailPanelProps) {
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(
null
);
const [contentExpanded, setContentExpanded] = useState(false);
useEffect(() => {
if (isOpen && impact?.source_hash) {
const client = new StemeDBClient();
client
.getSource(impact.source_hash)
.then(setSourceRecord)
.catch(() => setSourceRecord(null));
} else {
setSourceRecord(null);
setContentExpanded(false);
}
}, [isOpen, impact?.source_hash]);
const handleExport = useCallback( const handleExport = useCallback(
(format: "csv" | "json") => { (format: "csv" | "json") => {
if (!impact) return; if (!impact) return;
@ -54,6 +133,35 @@ export function ImpactDetailPanel({
{impact ? ( {impact ? (
<div className="mt-6 space-y-6"> <div className="mt-6 space-y-6">
{/* Source Info */}
<div className="rounded-lg border border-border p-4 space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs text-muted-foreground font-medium uppercase tracking-wide">
Source
</span>
<StatusBadge status={impact.status} />
</div>
<div className="text-muted-foreground">
<CopyableHash hash={impact.source_hash} />
</div>
<div className="flex gap-4 pt-1">
<div className="flex items-baseline gap-1.5">
<span className="text-xs text-muted-foreground">
Assertions
</span>
<span className="text-sm font-bold">
{impact.assertion_count}
</span>
</div>
<div className="flex items-baseline gap-1.5">
<span className="text-xs text-muted-foreground">Agents</span>
<span className="text-sm font-bold">
{impact.affected_agents.length}
</span>
</div>
</div>
</div>
{/* Export buttons - only when there's data to export */} {/* Export buttons - only when there's data to export */}
{impact.assertion_count > 0 && ( {impact.assertion_count > 0 && (
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
@ -84,31 +192,60 @@ export function ImpactDetailPanel({
<p className="text-sm text-muted-foreground">{impact.summary}</p> <p className="text-sm text-muted-foreground">{impact.summary}</p>
</div> </div>
{/* Source Content */}
{sourceRecord?.content && (
<div>
<div className="flex items-center justify-between mb-2">
<h4 className="text-sm font-medium text-foreground">
Source Content
<span className="ml-2 text-xs text-muted-foreground font-normal">
({sourceRecord.content.length.toLocaleString()} chars)
</span>
</h4>
<button
onClick={() => setContentExpanded(!contentExpanded)}
className="flex items-center gap-1 text-xs text-muted-foreground hover:text-foreground transition-colors"
>
{contentExpanded ? (
<>
Collapse <ChevronUp className="h-3 w-3" />
</>
) : (
<>
Expand <ChevronDown className="h-3 w-3" />
</>
)}
</button>
</div>
<div
className={`rounded border border-border bg-muted/30 overflow-y-auto ${
contentExpanded ? "max-h-[600px]" : "max-h-96"
}`}
>
<pre className="p-3 text-xs text-muted-foreground whitespace-pre-wrap font-mono leading-relaxed">
{sourceRecord.content}
</pre>
</div>
</div>
)}
{/* Affected Assertions */} {/* Affected Assertions */}
{impact.affected_assertions.length > 0 && ( {impact.affected_assertions.length > 0 && (
<div> <div>
<h4 className="text-sm font-medium text-foreground mb-3"> <h4 className="text-sm font-medium text-foreground mb-3">
Affected Assertions ({impact.affected_assertions.length}) Affected Assertions ({impact.affected_assertions.length})
</h4> </h4>
<div className="max-h-48 overflow-y-auto rounded border border-border"> <div className="max-h-48 overflow-y-auto rounded border border-border divide-y divide-border">
<table className="w-full text-sm"> {impact.affected_assertions.map((hash, idx) => (
<thead className="sticky top-0 bg-muted/50"> <div
<tr> key={hash}
<th className="text-left px-3 py-2 font-medium text-muted-foreground"> className={`flex items-center px-3 py-2 ${
Hash idx % 2 === 0 ? "bg-background" : "bg-muted/30"
</th> } hover:bg-accent/10 transition-colors`}
</tr> >
</thead> <CopyableHash hash={hash} />
<tbody className="divide-y divide-border"> </div>
{impact.affected_assertions.map((hash) => (
<tr key={hash} className="hover:bg-accent/5">
<td className="px-3 py-2 font-mono text-xs">
{hash}
</td>
</tr>
))} ))}
</tbody>
</table>
</div> </div>
</div> </div>
)} )}
@ -121,12 +258,7 @@ export function ImpactDetailPanel({
</h4> </h4>
<div className="flex flex-wrap gap-2"> <div className="flex flex-wrap gap-2">
{impact.affected_agents.map((agent) => ( {impact.affected_agents.map((agent) => (
<span <CopyableAgent key={agent} agent={agent} />
key={agent}
className="px-2 py-1 rounded bg-muted text-xs font-mono"
>
{agent}
</span>
))} ))}
</div> </div>
</div> </div>

View File

@ -1,5 +1,6 @@
"use client"; "use client";
import Link from "next/link";
import { ExternalLink, Eye, Ban, RotateCcw } from "lucide-react"; import { ExternalLink, Eye, Ban, RotateCcw } from "lucide-react";
import type { SourceRecordDto } from "@/lib/api/types"; import type { SourceRecordDto } from "@/lib/api/types";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
@ -8,6 +9,7 @@ import { TierBadge } from "./tier-badge";
interface SourceRowProps { interface SourceRowProps {
source: SourceRecordDto; source: SourceRecordDto;
assertionCount?: number;
onViewImpact: (source: SourceRecordDto) => void; onViewImpact: (source: SourceRecordDto) => void;
onBlock: (source: SourceRecordDto) => void; onBlock: (source: SourceRecordDto) => void;
onRestore: (source: SourceRecordDto) => void; onRestore: (source: SourceRecordDto) => void;
@ -15,6 +17,7 @@ interface SourceRowProps {
export function SourceRow({ export function SourceRow({
source, source,
assertionCount,
onViewImpact, onViewImpact,
onBlock, onBlock,
onRestore, onRestore,
@ -43,6 +46,17 @@ export function SourceRow({
{updatedDate !== createdDate && ( {updatedDate !== createdDate && (
<span>Updated: {updatedDate}</span> <span>Updated: {updatedDate}</span>
)} )}
{assertionCount !== undefined && (
<span className="text-xs text-muted-foreground">
{assertionCount.toLocaleString()} assertions
</span>
)}
<Link
href="/"
className="text-xs text-blue-600 dark:text-blue-400 hover:underline"
>
View Feed
</Link>
</div> </div>
{source.url && ( {source.url && (

View File

@ -35,6 +35,9 @@ export function SourcesPanel() {
const [impact, setImpact] = useState<SourceImpactResponse | null>(null); const [impact, setImpact] = useState<SourceImpactResponse | null>(null);
const [isLoadingImpact, setIsLoadingImpact] = useState(false); const [isLoadingImpact, setIsLoadingImpact] = useState(false);
const [isProcessing, setIsProcessing] = useState(false); const [isProcessing, setIsProcessing] = useState(false);
const [assertionCounts, setAssertionCounts] = useState<Map<string, number>>(
new Map()
);
const fetchData = useCallback(async () => { const fetchData = useCallback(async () => {
setState({ status: "loading" }); setState({ status: "loading" });
@ -66,6 +69,31 @@ export function SourcesPanel() {
fetchData(); fetchData();
}, [fetchData]); }, [fetchData]);
// Lazily fetch assertion counts for all sources after list loads
useEffect(() => {
if (state.status !== "success" || state.data.sources.length === 0) return;
const sources = state.data.sources;
const client = new StemeDBClient();
const fetches = sources.map((source) =>
client
.getSourceImpact(source.hash)
.then((data) => ({ hash: source.hash, count: data.assertion_count }))
.catch(() => null)
);
Promise.allSettled(fetches).then((results) => {
const counts = new Map<string, number>();
for (const result of results) {
if (result.status === "fulfilled" && result.value !== null) {
counts.set(result.value.hash, result.value.count);
}
}
setAssertionCounts(counts);
});
}, [state]);
// Fetch impact when block dialog opens // Fetch impact when block dialog opens
useEffect(() => { useEffect(() => {
if (dialogState.type === "block") { if (dialogState.type === "block") {
@ -221,6 +249,7 @@ export function SourcesPanel() {
<SourceRow <SourceRow
key={source.hash} key={source.hash}
source={source} source={source}
assertionCount={assertionCounts.get(source.hash)}
onViewImpact={handleViewImpact} onViewImpact={handleViewImpact}
onBlock={handleBlock} onBlock={handleBlock}
onRestore={handleRestore} onRestore={handleRestore}

View File

@ -7,6 +7,7 @@ import {
type CircuitBreakerResponse, type CircuitBreakerResponse,
type AuditResponse, type AuditResponse,
type ListSourcesResponse, type ListSourcesResponse,
type SourceRecordDto,
type SourceImpactResponse, type SourceImpactResponse,
type QuarantineSourceResponse, type QuarantineSourceResponse,
type RestoreSourceResponse, type RestoreSourceResponse,
@ -15,6 +16,8 @@ import {
type ScanResponse, type ScanResponse,
type ListScansResponse, type ListScansResponse,
type FeedResponse, type FeedResponse,
type ListSubjectsResponse,
type ListPredicatesResponse,
type ListClaimsRequest, type ListClaimsRequest,
type ListClaimsResponse, type ListClaimsResponse,
type CreateClaimRequest, type CreateClaimRequest,
@ -76,6 +79,18 @@ export class StemeDBClient {
return this.fetch<FeedResponse>(`/v1/feed?${params}`); return this.fetch<FeedResponse>(`/v1/feed?${params}`);
} }
async listSubjects(q?: string, limit = 100): Promise<ListSubjectsResponse> {
const params = new URLSearchParams({ limit: String(limit) });
if (q) params.set("q", q);
return this.fetch<ListSubjectsResponse>(`/v1/subjects?${params}`);
}
async listPredicates(subject: string): Promise<ListPredicatesResponse> {
return this.fetch<ListPredicatesResponse>(
`/v1/subjects/${encodeURIComponent(subject)}/predicates`
);
}
async health(): Promise<HealthResponse> { async health(): Promise<HealthResponse> {
return this.fetch<HealthResponse>("/health"); return this.fetch<HealthResponse>("/health");
} }
@ -160,6 +175,10 @@ export class StemeDBClient {
return this.fetch<ListSourcesResponse>(`/v1/sources?${params}`); return this.fetch<ListSourcesResponse>(`/v1/sources?${params}`);
} }
async getSource(hash: string): Promise<SourceRecordDto> {
return this.fetch<SourceRecordDto>(`/v1/sources/${encodeURIComponent(hash)}`);
}
async getSourceImpact(hash: string): Promise<SourceImpactResponse> { async getSourceImpact(hash: string): Promise<SourceImpactResponse> {
return this.fetch<SourceImpactResponse>(`/v1/sources/${hash}/impact`); return this.fetch<SourceImpactResponse>(`/v1/sources/${hash}/impact`);
} }

View File

@ -62,6 +62,7 @@ export interface AssertionObject {
timestamp: number; timestamp: number;
version: number; version: number;
}>; }>;
narrative?: string;
} }
export interface LayeredTier { export interface LayeredTier {
@ -209,6 +210,7 @@ export interface SourceRecordDto {
status: "active" | "deprecated" | "quarantined"; status: "active" | "deprecated" | "quarantined";
url?: string; url?: string;
notes?: string; notes?: string;
content?: string;
created_at: number; created_at: number;
updated_at: number; updated_at: number;
} }
@ -347,6 +349,17 @@ export interface FeedResponse {
has_more: boolean; has_more: boolean;
} }
// Discovery types (subject/predicate autocomplete)
export interface ListSubjectsResponse {
subjects: string[];
total_count: number;
}
export interface ListPredicatesResponse {
subject: string;
predicates: string[];
}
export class ApiError extends Error { export class ApiError extends Error {
public userMessage: string; public userMessage: string;

File diff suppressed because one or more lines are too long

View File

@ -95,10 +95,8 @@ impl AdminClient {
} }
// Gateway returns different format than /admin/ranges, so convert it // Gateway returns different format than /admin/ranges, so convert it
let shard_response: ShardInfoResponse = response let shard_response: ShardInfoResponse =
.json() response.json().await.context("Failed to parse shard info response")?;
.await
.context("Failed to parse shard info response")?;
Ok(shard_response.into()) Ok(shard_response.into())
} }
@ -125,10 +123,8 @@ impl AdminClient {
} }
// Gateway returns {"ranges": [...]} so we need to unwrap it // Gateway returns {"ranges": [...]} so we need to unwrap it
let wrapper: RangesWrapper = response let wrapper: RangesWrapper =
.json() response.json().await.context("Failed to parse ranges response")?;
.await
.context("Failed to parse ranges response")?;
Ok(wrapper.ranges) Ok(wrapper.ranges)
} }

View File

@ -132,6 +132,14 @@ pub struct CreateAssertionRequest {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub source_metadata: Option<String>, pub source_metadata: Option<String>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
///
/// Makes the assertion self-contained: pick it up, read it, understand the
/// full claim without dereferencing anything. Max 64 KB.
#[serde(skip_serializing_if = "Option::is_none")]
#[schema(example = "Based on STEP 1 trial (n=1961). Limitation: 68-week duration only.")]
pub narrative: Option<String>,
/// Unix timestamp when the assertion was created. /// Unix timestamp when the assertion was created.
/// If not provided, defaults to the current time. /// If not provided, defaults to the current time.
/// **Important for v2 signatures:** Provide this field to preserve the /// **Important for v2 signatures:** Provide this field to preserve the

View File

@ -29,6 +29,7 @@ pub mod responses;
pub mod skeptic; pub mod skeptic;
pub mod source_registry; pub mod source_registry;
pub mod stemedb_claims; pub mod stemedb_claims;
pub mod subjects;
// Re-export all public types for backward compatibility // Re-export all public types for backward compatibility
// This allows existing code to use `use crate::dto::*;` without changes // This allows existing code to use `use crate::dto::*;` without changes
@ -51,7 +52,7 @@ pub use query_params::{FeedParams, QueryParams};
// From responses module // From responses module
pub use responses::{ pub use responses::{
AssertionResponse, ChangeEntryDto, ErrorResponse, HealthResponse, LayeredQueryResponse, AssertionResponse, ChangeEntryDto, ErrorResponse, HealthResponse, LayeredQueryResponse,
ProvenanceResponse, QueryResponse, SourceWarningDto, TierResolutionDto, ProvenanceResponse, QueryResponse, RebuildIndexesResponse, SourceWarningDto, TierResolutionDto,
}; };
// From audit module // From audit module
@ -131,4 +132,9 @@ pub use aphoria::{
}; };
// From stemedb_claims module // From stemedb_claims module
pub use stemedb_claims::{AuthoredClaimDto, AuthoredValueDto, CreateClaimRequest, CreateClaimResponse}; pub use stemedb_claims::{
AuthoredClaimDto, AuthoredValueDto, CreateClaimRequest, CreateClaimResponse,
};
// From subjects module
pub use subjects::{ListPredicatesResponse, ListSubjectsParams, ListSubjectsResponse};

View File

@ -88,6 +88,10 @@ pub struct AssertionResponse {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub source_metadata: Option<String>, pub source_metadata: Option<String>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
#[serde(skip_serializing_if = "Option::is_none")]
pub narrative: Option<String>,
/// Warning if this assertion cites a quarantined or deprecated source. /// Warning if this assertion cites a quarantined or deprecated source.
/// ///
/// Present when the assertion's source has a non-Active status in the /// Present when the assertion's source has a non-Active status in the
@ -217,6 +221,30 @@ pub struct TierResolutionDto {
pub resolution_confidence: f32, pub resolution_confidence: f32,
} }
/// Response from the admin rebuild-indexes endpoint.
///
/// Reports how many assertion indexes were rebuilt, how many were
/// skipped (e.g., deserialization failures), and how long the
/// operation took.
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct RebuildIndexesResponse {
/// Number of assertions whose indexes were rebuilt.
pub rebuilt_count: u64,
/// Number of keys that were skipped (deserialization failures).
pub skipped_count: u64,
/// Wall-clock time for the operation in milliseconds.
pub elapsed_ms: u64,
/// Human-readable status message.
pub status: String,
/// First error encountered (for diagnostics). Absent when all succeed.
#[serde(skip_serializing_if = "Option::is_none")]
pub first_error: Option<String>,
}
/// Response from a LayeredConsensus query. /// Response from a LayeredConsensus query.
/// ///
/// Provides per-tier resolution results plus an overall winner. /// Provides per-tier resolution results plus an overall winner.

View File

@ -31,6 +31,10 @@ pub struct RegisterSourceRequest {
/// Optional curator notes about the source. /// Optional curator notes about the source.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub notes: Option<String>, pub notes: Option<String>,
/// Optional full-text content of the source document.
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
} }
/// Response from registering a source. /// Response from registering a source.
@ -78,6 +82,10 @@ pub struct SourceRecordDto {
/// Optional curator notes. /// Optional curator notes.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub notes: Option<String>, pub notes: Option<String>,
/// Optional full-text content of the source document.
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
} }
impl From<SourceRecord> for SourceRecordDto { impl From<SourceRecord> for SourceRecordDto {
@ -92,6 +100,7 @@ impl From<SourceRecord> for SourceRecordDto {
created_at: record.created_at, created_at: record.created_at,
updated_at: record.updated_at, updated_at: record.updated_at,
notes: record.notes.clone(), notes: record.notes.clone(),
content: record.content.clone(),
} }
} }
} }

View File

@ -0,0 +1,34 @@
//! DTOs for subject and predicate discovery endpoints.
use serde::{Deserialize, Serialize};
use utoipa::{IntoParams, ToSchema};
/// Query parameters for `GET /v1/subjects`.
#[derive(Debug, Deserialize, IntoParams)]
pub struct ListSubjectsParams {
/// Optional prefix filter for subject names.
#[param(example = "sema")]
pub q: Option<String>,
/// Maximum number of subjects to return (default 100, max 1000).
#[param(example = 100)]
pub limit: Option<usize>,
}
/// Response for `GET /v1/subjects`.
#[derive(Debug, Serialize, ToSchema)]
pub struct ListSubjectsResponse {
/// List of matching subject strings.
pub subjects: Vec<String>,
/// Total number of subjects matching the filter (before limit).
pub total_count: usize,
}
/// Response for `GET /v1/subjects/:subject/predicates`.
#[derive(Debug, Serialize, ToSchema)]
pub struct ListPredicatesResponse {
/// The subject these predicates belong to.
pub subject: String,
/// List of predicate strings for this subject.
pub predicates: Vec<String>,
}

View File

@ -1,14 +1,14 @@
//! Admin handlers for maintenance operations. //! Admin handlers for maintenance operations.
use axum::{extract::State, Json}; use axum::{extract::State, Json};
use tracing::instrument; use tracing::{info, instrument, warn};
use crate::{ use crate::{
dto::{DecayTrustRanksRequest, DecayTrustRanksResponse}, dto::{DecayTrustRanksRequest, DecayTrustRanksResponse, RebuildIndexesResponse},
error::Result, error::Result,
state::AppState, state::AppState,
}; };
use stemedb_storage::{GenericTrustRankStore, TrustRankStore}; use stemedb_storage::{GenericIndexStore, GenericTrustRankStore, IndexStore, KVStore, TrustRankStore, key_codec};
/// Default half-life for trust rank decay (30 days in seconds). /// Default half-life for trust rank decay (30 days in seconds).
const DEFAULT_HALF_LIFE_SECONDS: u64 = 30 * 24 * 60 * 60; const DEFAULT_HALF_LIFE_SECONDS: u64 = 30 * 24 * 60 * 60;
@ -68,3 +68,215 @@ pub async fn decay_trust_ranks(
status: "Decay operation completed".to_string(), status: "Decay operation completed".to_string(),
})) }))
} }
/// Rebuild secondary indexes (Redb) from assertion data (Fjall).
///
/// This is a repair operation for when Redb indexes are missing or stale
/// while Fjall assertion data is intact. It scans all assertion data from
/// Fjall and reconstructs the S:, SP:, SUBJECTS:, HASH_SUBJECT:, and SRC:
/// indexes in Redb, then corrects the META:assertion_count.
///
/// This endpoint is idempotent — running it multiple times is safe because
/// the index store uses append-with-dedup semantics.
#[utoipa::path(
post,
path = "/v1/admin/rebuild-indexes",
responses(
(status = 200, description = "Index rebuild completed", body = RebuildIndexesResponse),
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse),
),
tag = "admin"
)]
#[instrument(skip(state))]
pub async fn rebuild_indexes(
State(state): State<AppState>,
) -> Result<Json<RebuildIndexesResponse>> {
let start = std::time::Instant::now();
metrics::counter!("stemedb_http_requests_total", "method" => "POST", "path" => "/v1/admin/rebuild-indexes").increment(1);
info!("Starting index rebuild: scanning Fjall for all assertions");
// Capture current time once for FEED index fallback (timestamp:0 assertions)
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
// Scan all assertion key-value pairs from Fjall
let assertion_kvs = state.store.scan_fjall_assertions().await?;
let total_keys = assertion_kvs.len();
info!(total_keys, "Found assertion keys in Fjall");
// Create an IndexStore backed by the same HybridStore
let index_store = GenericIndexStore::new(state.store.clone());
let mut rebuilt_count: u64 = 0;
let mut skipped_count: u64 = 0;
let mut first_error: Option<String> = None;
for (key, value) in &assertion_kvs {
// Extract subject from key
let subject = match key_codec::extract_subject(key) {
Some(s) => s.to_string(),
None => {
let msg = format!(
"extract_subject failed: key_len={}, first_bytes={:?}",
key.len(),
&key[..key.len().min(40)]
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
};
// Extract hash_hex from tag (tag is "H:{hash_hex}")
let tag = key_codec::extract_tag(key);
let hash_hex = match tag.strip_prefix(b"H:") {
Some(hex_bytes) => match std::str::from_utf8(hex_bytes) {
Ok(s) => s.to_string(),
Err(e) => {
let msg = format!("hash_hex UTF-8 error for subject={subject}: {e}");
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
},
None => {
let msg = format!(
"tag strip_prefix H: failed for subject={subject}: tag={:?}",
String::from_utf8_lossy(tag)
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
};
// Deserialize the assertion to get predicate and source_hash.
// Uses compat deserialization to handle legacy data (pre-narrative schema).
let assertion: stemedb_core::types::Assertion =
match stemedb_core::serde::deserialize_assertion_compat(value) {
Ok(a) => a,
Err(e) => {
let msg = format!(
"deserialize failed for subject={subject} hash={hash_hex}: {e} (value_len={})",
value.len()
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
};
// Decode assertion hash from hex
let hash_bytes: [u8; 32] = match hex::decode(&hash_hex) {
Ok(bytes) if bytes.len() == 32 => {
let mut arr = [0u8; 32];
arr.copy_from_slice(&bytes);
arr
}
Ok(bytes) => {
let msg = format!(
"hash decode wrong length for subject={subject} hash={hash_hex}: got {} bytes",
bytes.len()
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
Err(e) => {
let msg = format!(
"hex decode failed for subject={subject} hash={hash_hex}: {e}"
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
};
// Rebuild S: and SP: indexes (includes SUBJECTS: discovery index)
if let Err(e) =
index_store.add_to_indexes(&subject, &assertion.predicate, &hash_bytes).await
{
warn!(%subject, %hash_hex, error = %e, "Failed to add to indexes");
skipped_count += 1;
continue;
}
// Rebuild HASH_SUBJECT: reverse index
let hs_key = key_codec::hash_subject_key(&hash_hex);
if let Err(e) = state.store.put(&hs_key, subject.as_bytes()).await {
warn!(%subject, %hash_hex, error = %e, "Failed to write hash_subject index");
}
// Rebuild SRC: source index
if let Err(e) =
index_store.add_to_source_index(&assertion.source_hash, &hash_bytes).await
{
warn!(%subject, %hash_hex, error = %e, "Failed to add to source index");
}
// Rebuild FEED index: use assertion.timestamp as best-available proxy
// for ingestion time. Fall back to current time for timestamp:0 assertions.
let feed_ts = if assertion.timestamp > 0 { assertion.timestamp } else { now };
let feed_idx_key = key_codec::feed_key(feed_ts, &hash_hex);
if let Err(e) = state.store.put(&feed_idx_key, subject.as_bytes()).await {
warn!(%subject, %hash_hex, error = %e, "Failed to write feed index");
}
rebuilt_count += 1;
}
// Correct the assertion count: total = rebuilt + skipped (both are real assertions).
// The count key stores a u64 in little-endian format.
let total_assertions = rebuilt_count + skipped_count;
let count_key = key_codec::assertion_count_key();
let count_bytes = total_assertions.to_le_bytes();
state.store.put(&count_key, &count_bytes).await?;
let elapsed_ms = start.elapsed().as_millis() as u64;
info!(
rebuilt_count,
skipped_count,
elapsed_ms,
"Index rebuild complete"
);
metrics::histogram!("stemedb_http_request_duration_seconds",
"method" => "POST",
"path" => "/v1/admin/rebuild-indexes",
"status" => "200"
)
.record(start.elapsed().as_secs_f64());
Ok(Json(RebuildIndexesResponse {
rebuilt_count,
skipped_count,
elapsed_ms,
status: format!(
"Rebuilt indexes for {} assertions ({} skipped) in {}ms",
rebuilt_count, skipped_count, elapsed_ms
),
first_error,
}))
}

View File

@ -143,6 +143,7 @@ pub fn observation_dto_to_assertion(
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata, source_metadata,
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures, signatures,
confidence: dto.confidence, confidence: dto.confidence,

View File

@ -10,6 +10,7 @@ use crate::{
state::AppState, state::AppState,
}; };
use stemedb_core::limits::MAX_NARRATIVE_LEN;
use stemedb_core::types::{ use stemedb_core::types::{
Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass, Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
}; };
@ -44,6 +45,18 @@ pub async fn create_assertion(
// Convert DTO to internal Assertion type // Convert DTO to internal Assertion type
let assertion = dto_to_assertion(req)?; let assertion = dto_to_assertion(req)?;
// Verify Ed25519 signatures BEFORE writing to WAL.
// This prevents poison records that would permanently block the IngestWorker.
stemedb_core::signing::verify_assertion_signatures(&assertion).map_err(|e| {
metrics::counter!("stemedb_assertions_rejected_total", "reason" => "invalid_signature")
.increment(1);
ApiError::InvalidRequest(format!("Signature verification failed: {}", e))
})?;
// Validate subject does not contain null byte separator (mirrors IngestWorker check)
stemedb_storage::key_codec::validate_subject(&assertion.subject)
.map_err(|e| ApiError::InvalidRequest(format!("Invalid subject: {}", e)))?;
// Serialize to WAL format (includes record type header) // Serialize to WAL format (includes record type header)
let payload = serialize_assertion(&assertion) let payload = serialize_assertion(&assertion)
.map_err(|e| ApiError::Serialization(format!("Failed to serialize assertion: {}", e)))?; .map_err(|e| ApiError::Serialization(format!("Failed to serialize assertion: {}", e)))?;
@ -93,14 +106,33 @@ fn dto_to_assertion(req: CreateAssertionRequest) -> Result<Assertion> {
return Err(ApiError::InvalidRequest("At least one signature is required".to_string())); return Err(ApiError::InvalidRequest("At least one signature is required".to_string()));
} }
// Validate narrative length
if let Some(ref narrative) = req.narrative {
if narrative.len() > MAX_NARRATIVE_LEN {
return Err(ApiError::InvalidRequest(format!(
"narrative exceeds {} bytes (got {})",
MAX_NARRATIVE_LEN,
narrative.len()
)));
}
}
// Use provided timestamp or generate a new one // Use provided timestamp or generate a new one
// IMPORTANT: For v2 signatures, the timestamp must match what was signed // IMPORTANT: For v2 signatures, the timestamp must match what was signed
let timestamp = req.timestamp.unwrap_or_else(|| { let timestamp = match req.timestamp {
std::time::SystemTime::now() Some(0) => {
return Err(ApiError::InvalidRequest(
"timestamp must be a valid Unix epoch (> 0). \
Omit the field to use server time."
.to_string(),
));
}
Some(t) => t,
None => std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH) .duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs()) .map(|d| d.as_secs())
.unwrap_or(0) .unwrap_or(0),
}); };
// Use provided HLC timestamp or default // Use provided HLC timestamp or default
// IMPORTANT: For v2 signatures, the HLC timestamp must match what was signed // IMPORTANT: For v2 signatures, the HLC timestamp must match what was signed
@ -122,6 +154,7 @@ fn dto_to_assertion(req: CreateAssertionRequest) -> Result<Assertion> {
visual_hash, visual_hash,
epoch, epoch,
source_metadata: req.source_metadata.map(|s| s.into_bytes()), source_metadata: req.source_metadata.map(|s| s.into_bytes()),
narrative: req.narrative,
lifecycle: req.lifecycle.map(Into::into).unwrap_or(LifecycleStage::Proposed), lifecycle: req.lifecycle.map(Into::into).unwrap_or(LifecycleStage::Proposed),
signatures, signatures,
confidence: req.confidence, confidence: req.confidence,

View File

@ -1,4 +1,7 @@
//! Handler for the `/v1/feed` endpoint (newest-first assertion browsing). //! Handler for the `/v1/feed` endpoint (newest-first assertion browsing).
//!
//! Uses a dedicated FEED index (`\x00FEED:{inverted_ts}:{hash_hex}`) for
//! O(page_size) reads instead of loading all assertions into memory.
use axum::{extract::State, Json}; use axum::{extract::State, Json};
use tracing::{instrument, warn}; use tracing::{instrument, warn};
@ -10,15 +13,16 @@ use crate::{
state::AppState, state::AppState,
}; };
use stemedb_query::Query; use stemedb_core::serde::deserialize_assertion_compat;
use stemedb_storage::{KVStore, key_codec};
use super::query::assertion_to_dto_with_warning; use super::query::assertion_to_dto_with_warning;
/// Browse all assertions in newest-first order with pagination. /// Browse all assertions in newest-first order with pagination.
/// ///
/// Returns assertions sorted by timestamp descending, useful for /// Returns assertions ordered by ingestion time descending (when the system
/// "what was just written?" dashboards and dev workflows. No lens /// received each assertion), useful for "what was just written?" dashboards
/// resolution is applied — this is a raw chronological feed. /// and dev workflows. No lens resolution is applied — this is a raw feed.
/// ///
/// # Pagination /// # Pagination
/// ///
@ -45,36 +49,57 @@ pub async fn feed(
metrics::counter!("stemedb_queries_total", "endpoint" => "feed").increment(1); metrics::counter!("stemedb_queries_total", "endpoint" => "feed").increment(1);
let query_start = std::time::Instant::now(); let query_start = std::time::Instant::now();
// Fetch all assertions (no subject filter) // Scan the FEED index — keys are in newest-first order (inverted timestamp).
let query = Query::builder().limit(usize::MAX).build(); let feed_prefix = key_codec::feed_scan_prefix();
let query_engine = state.query_engine(); let entries = state.store.scan_prefix(&feed_prefix).await?;
let result = query_engine.execute(&query).await?;
let mut assertions = result.assertions; let total_count = entries.len();
if assertions.len() > 10_000 {
warn!(
count = assertions.len(),
"Feed scanning large assertion set; consider adding index-backed pagination"
);
}
// Sort by timestamp descending (newest first)
assertions.sort_unstable_by(|a, b| b.timestamp.cmp(&a.timestamp));
let total_count = assertions.len();
let limit = params.clamped_limit(); let limit = params.clamped_limit();
let offset = params.offset; let offset = params.offset;
let has_more = offset + limit < total_count; let has_more = offset + limit < total_count;
// Apply offset + limit pagination // Paginate the index entries (cheap — no assertion data loaded yet)
let page: Vec<_> = assertions.into_iter().skip(offset).take(limit).collect(); let page_entries: Vec<_> = entries.into_iter().skip(offset).take(limit).collect();
// Convert to DTOs (no source enrichment for speed) // Fetch actual assertion data only for the current page
let assertion_responses = page let mut assertion_responses = Vec::with_capacity(page_entries.len());
.into_iter() for (key, value) in &page_entries {
.map(|a| assertion_to_dto_with_warning(a, None)) let hash_hex = match extract_hash_hex_from_feed_key(key) {
.collect::<Result<Vec<_>>>()?; Some(h) => h,
None => {
warn!(key_len = key.len(), "Malformed FEED index key, skipping");
continue;
}
};
let subject = match std::str::from_utf8(value) {
Ok(s) => s,
Err(e) => {
warn!(error = %e, "Invalid UTF-8 in FEED index value, skipping");
continue;
}
};
let assertion_data_key = key_codec::assertion_key(subject, hash_hex);
let data = match state.store.get(&assertion_data_key).await? {
Some(d) => d,
None => {
warn!(%hash_hex, %subject, "FEED index references missing assertion data, skipping");
continue;
}
};
match deserialize_assertion_compat(&data) {
Ok(a) => match assertion_to_dto_with_warning(a, None) {
Ok(dto) => assertion_responses.push(dto),
Err(e) => {
warn!(%hash_hex, error = %e, "Failed to convert assertion to DTO, skipping");
}
},
Err(e) => {
warn!(%hash_hex, error = %e, "Failed to deserialize assertion, skipping");
}
}
}
metrics::histogram!("stemedb_query_latency_seconds", "endpoint" => "feed") metrics::histogram!("stemedb_query_latency_seconds", "endpoint" => "feed")
.record(query_start.elapsed().as_secs_f64()); .record(query_start.elapsed().as_secs_f64());
@ -88,3 +113,16 @@ pub async fn feed(
changes_since: None, changes_since: None,
})) }))
} }
/// Extract the hash_hex portion from a FEED index key.
///
/// Key format: `\x00FEED:{16 hex chars (inverted ts)}:{64 hex chars (hash)}`
/// Prefix `\x00FEED:` = 6 bytes, inverted_ts = 16 bytes, `:` = 1 byte → 23 bytes offset.
fn extract_hash_hex_from_feed_key(key: &[u8]) -> Option<&str> {
// \x00FEED: = 6 bytes, inverted_ts = 16 hex chars, : = 1 byte
const HASH_OFFSET: usize = 6 + 16 + 1; // 23
if key.len() <= HASH_OFFSET {
return None;
}
std::str::from_utf8(&key[HASH_OFFSET..]).ok()
}

View File

@ -178,6 +178,7 @@ fn assertion_to_dto(assertion: stemedb_core::types::Assertion) -> Result<Asserti
timestamp: assertion.timestamp, timestamp: assertion.timestamp,
vector: assertion.vector, vector: assertion.vector,
source_metadata: assertion.source_metadata.and_then(|bytes| String::from_utf8(bytes).ok()), source_metadata: assertion.source_metadata.and_then(|bytes| String::from_utf8(bytes).ok()),
narrative: assertion.narrative,
source_warning: None, // LayeredConsensus doesn't do source status enrichment source_warning: None, // LayeredConsensus doesn't do source status enrichment
}) })
} }

View File

@ -28,8 +28,8 @@ pub mod circuit_breaker;
pub mod concepts; pub mod concepts;
pub mod constraints; pub mod constraints;
pub mod epoch; pub mod epoch;
pub mod feed;
pub mod escalation; pub mod escalation;
pub mod feed;
pub mod gold_standard; pub mod gold_standard;
pub mod health; pub mod health;
pub mod layered; pub mod layered;
@ -37,15 +37,17 @@ pub mod meter;
pub mod metrics; pub mod metrics;
pub mod quarantine; pub mod quarantine;
pub mod query; pub mod query;
pub mod rejected;
pub mod skeptic; pub mod skeptic;
pub mod source; pub mod source;
pub mod source_registry; pub mod source_registry;
pub mod stemedb_claims; pub mod stemedb_claims;
pub mod subjects;
pub mod supersede; pub mod supersede;
pub mod trace; pub mod trace;
pub mod vote; pub mod vote;
pub use admin::decay_trust_ranks; pub use admin::{decay_trust_ranks, rebuild_indexes};
pub use admission::get_admission_status; pub use admission::get_admission_status;
pub use api_keys::{create_api_key, list_api_keys, revoke_api_key, rotate_api_key, update_api_key}; pub use api_keys::{create_api_key, list_api_keys, revoke_api_key, rotate_api_key, update_api_key};
pub use assert::create_assertion; pub use assert::create_assertion;
@ -53,8 +55,8 @@ pub use audit::{get_audit, list_audits};
pub use circuit_breaker::{get_circuit_status, list_tripped_circuits, reset_circuit}; pub use circuit_breaker::{get_circuit_status, list_tripped_circuits, reset_circuit};
pub use constraints::constraints_query; pub use constraints::constraints_query;
pub use epoch::create_epoch; pub use epoch::create_epoch;
pub use feed::feed;
pub use escalation::{list_escalations, resolve_escalation}; pub use escalation::{list_escalations, resolve_escalation};
pub use feed::feed;
pub use gold_standard::{ pub use gold_standard::{
create_gold_standard, list_gold_standards, remove_gold_standard, verify_agent, create_gold_standard, list_gold_standards, remove_gold_standard, verify_agent,
}; };
@ -63,6 +65,7 @@ pub use layered::layered_query;
pub use meter::{get_quota_status, set_quota_limit}; pub use meter::{get_quota_status, set_quota_limit};
pub use quarantine::{approve_quarantine, get_quarantine, list_quarantine, reject_quarantine}; pub use quarantine::{approve_quarantine, get_quarantine, list_quarantine, reject_quarantine};
pub use query::query_assertions; pub use query::query_assertions;
pub use rejected::list_rejected;
pub use skeptic::skeptic_query; pub use skeptic::skeptic_query;
pub use source::{get_provenance, store_source}; pub use source::{get_provenance, store_source};
pub use source_registry::{ pub use source_registry::{
@ -89,3 +92,4 @@ pub use stemedb_claims::{
create_claim as create_stemedb_claim, delete_claim as delete_stemedb_claim, create_claim as create_stemedb_claim, delete_claim as delete_stemedb_claim,
get_claim as get_stemedb_claim, list_claims as list_stemedb_claims, get_claim as get_stemedb_claim, list_claims as list_stemedb_claims,
}; };
pub use subjects::{list_predicates, list_subjects};

View File

@ -490,6 +490,7 @@ pub(crate) fn assertion_to_dto_with_warning(
timestamp: assertion.timestamp, timestamp: assertion.timestamp,
vector: assertion.vector, vector: assertion.vector,
source_metadata: assertion.source_metadata.and_then(|bytes| String::from_utf8(bytes).ok()), source_metadata: assertion.source_metadata.and_then(|bytes| String::from_utf8(bytes).ok()),
narrative: assertion.narrative,
source_warning, source_warning,
}) })
} }

View File

@ -0,0 +1,89 @@
//! Admin endpoint for listing WAL records permanently rejected by the IngestWorker.
//!
//! These records passed API-level validation but were skipped during WAL replay
//! due to permanent failures (invalid signatures, corrupt serialization, etc.).
//! With the API-side signature verification fix, new rejected records should be rare.
use axum::{extract::State, Json};
use serde::{Deserialize, Serialize};
use stemedb_storage::{key_codec, KVStore};
use tracing::instrument;
use utoipa::ToSchema;
use crate::{dto::ErrorResponse, state::AppState};
/// Query parameters for listing rejected records.
#[derive(Debug, Deserialize)]
pub struct RejectedParams {
/// Maximum number of records to return (default: 100).
pub limit: Option<usize>,
}
/// A WAL record that was permanently skipped by the IngestWorker.
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct RejectedRecordDto {
/// WAL offset where the record was found.
pub offset: u64,
/// The record type (Assertion, Vote, Epoch).
pub record_type: String,
/// Why the record was rejected.
pub reason: String,
/// When the record was skipped (Unix timestamp).
pub timestamp: u64,
}
/// Response listing rejected WAL records.
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct RejectedRecordsResponse {
/// List of rejected records.
pub rejected: Vec<RejectedRecordDto>,
/// Total number of rejected records found.
pub count: usize,
}
/// GET /v1/admin/rejected
///
/// List WAL records that were permanently rejected by the IngestWorker.
#[utoipa::path(
get,
path = "/v1/admin/rejected",
params(
("limit" = Option<usize>, Query, description = "Maximum records to return (default: 100)")
),
responses(
(status = 200, description = "Rejected records listed", body = RejectedRecordsResponse),
(status = 500, description = "Internal server error", body = ErrorResponse)
),
tag = "admin"
)]
#[instrument(skip(state))]
pub async fn list_rejected(
State(state): State<AppState>,
axum::extract::Query(params): axum::extract::Query<RejectedParams>,
) -> std::result::Result<Json<RejectedRecordsResponse>, (axum::http::StatusCode, Json<ErrorResponse>)>
{
let limit = params.limit.unwrap_or(100);
let prefix = key_codec::rejected_records_scan_prefix();
let entries = state.store.scan_prefix(&prefix).await.map_err(|e| {
tracing::error!(error = %e, "Failed to scan rejected records");
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: "Failed to retrieve rejected records".to_string(),
code: "REJECTED_SCAN_ERROR".to_string(),
}),
)
})?;
let mut rejected = Vec::new();
for (_key, value) in entries.into_iter().take(limit) {
let json_str = String::from_utf8_lossy(&value);
if let Ok(dto) = serde_json::from_str::<RejectedRecordDto>(&json_str) {
rejected.push(dto);
}
}
let count = rejected.len();
Ok(Json(RejectedRecordsResponse { rejected, count }))
}

View File

@ -7,6 +7,7 @@ use axum::{
response::{IntoResponse, Response}, response::{IntoResponse, Response},
Json, Json,
}; };
use stemedb_core::limits::MAX_SOURCE_CONTENT_LEN;
use stemedb_core::types::{SourceRecord, SourceStatus}; use stemedb_core::types::{SourceRecord, SourceStatus};
use stemedb_storage::{GenericIndexStore, GenericSourceRegistry, IndexStore, SourceRegistry}; use stemedb_storage::{GenericIndexStore, GenericSourceRegistry, IndexStore, SourceRegistry};
use tracing::instrument; use tracing::instrument;
@ -56,12 +57,24 @@ pub async fn register_source(
return Err(ApiError::InvalidRequest("Label cannot be empty".to_string())); return Err(ApiError::InvalidRequest("Label cannot be empty".to_string()));
} }
// Validate content size
if let Some(ref content) = req.content {
if content.len() > MAX_SOURCE_CONTENT_LEN {
return Err(ApiError::InvalidRequest(format!(
"Content too large: {} bytes (max {})",
content.len(),
MAX_SOURCE_CONTENT_LEN
)));
}
}
// Get timestamp // Get timestamp
let timestamp = current_timestamp(); let timestamp = current_timestamp();
// Create the record // Create the record
let mut record = SourceRecord::new(hash, req.label.clone(), req.url, req.tier, timestamp); let mut record = SourceRecord::new(hash, req.label.clone(), req.url, req.tier, timestamp);
record.notes = req.notes; record.notes = req.notes;
record.content = req.content;
// Register in the store // Register in the store
let registry = GenericSourceRegistry::new(state.store.clone()); let registry = GenericSourceRegistry::new(state.store.clone());
@ -206,7 +219,7 @@ pub async fn list_sources(
let registry = GenericSourceRegistry::new(state.store.clone()); let registry = GenericSourceRegistry::new(state.store.clone());
let sources: Vec<SourceRecordDto> = if let Some(query) = &params.query { let mut sources: Vec<SourceRecordDto> = if let Some(query) = &params.query {
// Search by label // Search by label
registry.search(query, limit).await?.into_iter().map(Into::into).collect() registry.search(query, limit).await?.into_iter().map(Into::into).collect()
} else if let Some(tier) = params.tier { } else if let Some(tier) = params.tier {
@ -227,6 +240,11 @@ pub async fn list_sources(
all.into_iter().map(Into::into).collect() all.into_iter().map(Into::into).collect()
}; };
// Strip content from list responses to avoid returning megabytes
for dto in &mut sources {
dto.content = None;
}
let count = sources.len(); let count = sources.len();
Ok(Json(ListSourcesResponse { sources, count })) Ok(Json(ListSourcesResponse { sources, count }))
} }
@ -629,7 +647,7 @@ async fn build_impact_response(
if let Ok(Some(data)) = store_get_with_timeout(&*state.store, &assertion_key).await if let Ok(Some(data)) = store_get_with_timeout(&*state.store, &assertion_key).await
{ {
if let Ok(assertion) = if let Ok(assertion) =
stemedb_core::serde::deserialize::<stemedb_core::types::Assertion>(&data) stemedb_core::serde::deserialize_assertion_compat(&data)
{ {
for sig in &assertion.signatures { for sig in &assertion.signatures {
let agent_hex = hex::encode(sig.agent_id); let agent_hex = hex::encode(sig.agent_id);

View File

@ -3,12 +3,16 @@
//! These endpoints provide claim storage DIRECTLY in StemeDB (not `.aphoria/claims.toml`). //! These endpoints provide claim storage DIRECTLY in StemeDB (not `.aphoria/claims.toml`).
//! Used for remote/hosted mode where claims are stored in the knowledge graph. //! Used for remote/hosted mode where claims are stored in the knowledge graph.
use axum::{extract::{Path, State}, http::StatusCode, Json}; use axum::{
extract::{Path, State},
http::StatusCode,
Json,
};
use ed25519_dalek::{Signer, SigningKey, VerifyingKey};
use tracing::info; use tracing::info;
use ed25519_dalek::{SigningKey, Signer, VerifyingKey};
use stemedb_core::types::{Assertion, LifecycleStage, ObjectValue, SignatureEntry};
use stemedb_core::signing::compute_content_hash_v2; use stemedb_core::signing::compute_content_hash_v2;
use stemedb_core::types::{Assertion, LifecycleStage, ObjectValue, SignatureEntry};
use stemedb_ingest::worker::serialize_assertion; use stemedb_ingest::worker::serialize_assertion;
use stemedb_storage::{key_codec, KVStore}; use stemedb_storage::{key_codec, KVStore};
@ -86,10 +90,7 @@ pub async fn create_claim(
state.commit_buffer.append(payload).await?; state.commit_buffer.append(payload).await?;
Ok(( Ok((StatusCode::CREATED, Json(CreateClaimResponse { id: req.claim.id.clone(), stored: true })))
StatusCode::CREATED,
Json(CreateClaimResponse { id: req.claim.id.clone(), stored: true }),
))
} }
/// List all claims, optionally filtered. /// List all claims, optionally filtered.
@ -129,7 +130,7 @@ pub async fn list_claims(
let hash_hex = hex::encode(&hash_bytes); let hash_hex = hex::encode(&hash_bytes);
let assertion_key = key_codec::assertion_key(&subject, &hash_hex); let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
if let Some(data) = state.store.get(&assertion_key).await? { if let Some(data) = state.store.get(&assertion_key).await? {
if let Ok(assertion) = stemedb_core::serde::deserialize::<Assertion>(&data) { if let Ok(assertion) = stemedb_core::serde::deserialize_assertion_compat(&data) {
if let Ok(dto) = assertion_to_dto(&assertion) { if let Ok(dto) = assertion_to_dto(&assertion) {
claims.push(dto); claims.push(dto);
} }
@ -189,10 +190,11 @@ pub async fn get_claim(
let hash_hex = hex::encode(hash_bytes); let hash_hex = hex::encode(hash_bytes);
let assertion_key = key_codec::assertion_key(&subject, &hash_hex); let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
let data = state.store.get(&assertion_key).await? let data = state.store.get(&assertion_key).await?.ok_or_else(|| {
.ok_or_else(|| ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate)))?; ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate))
})?;
let assertion = stemedb_core::serde::deserialize::<Assertion>(&data) let assertion = stemedb_core::serde::deserialize_assertion_compat(&data)
.map_err(|e| ApiError::Serialization(format!("Failed to deserialize assertion: {e}")))?; .map_err(|e| ApiError::Serialization(format!("Failed to deserialize assertion: {e}")))?;
assertion_to_dto(&assertion) assertion_to_dto(&assertion)
@ -237,10 +239,11 @@ pub async fn delete_claim(
let hash_hex = hex::encode(hash_bytes); let hash_hex = hex::encode(hash_bytes);
let assertion_key = key_codec::assertion_key(&subject, &hash_hex); let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
let data = state.store.get(&assertion_key).await? let data = state.store.get(&assertion_key).await?.ok_or_else(|| {
.ok_or_else(|| ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate)))?; ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate))
})?;
let mut assertion = stemedb_core::serde::deserialize::<Assertion>(&data) let mut assertion = stemedb_core::serde::deserialize_assertion_compat(&data)
.map_err(|e| ApiError::Serialization(format!("Failed to deserialize assertion: {e}")))?; .map_err(|e| ApiError::Serialization(format!("Failed to deserialize assertion: {e}")))?;
// Mark as deprecated (append-only: create new version) // Mark as deprecated (append-only: create new version)
@ -328,12 +331,13 @@ fn dto_to_assertion(dto: &AuthoredClaimDto) -> Result<Assertion> {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: serde_json::to_vec(&metadata).ok(), source_metadata: serde_json::to_vec(&metadata).ok(),
narrative: None,
lifecycle, lifecycle,
signatures: vec![], // Signatures added by ingestion pipeline signatures: vec![], // Signatures added by ingestion pipeline
confidence: 1.0, // Authored claims have full confidence confidence: 1.0, // Authored claims have full confidence
timestamp: std::time::SystemTime::now() timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH) .duration_since(std::time::UNIX_EPOCH)
.unwrap() .unwrap_or_default()
.as_secs(), .as_secs(),
hlc_timestamp: Default::default(), hlc_timestamp: Default::default(),
vector: None, vector: None,
@ -360,10 +364,14 @@ fn assertion_to_dto(assertion: &Assertion) -> Result<AuthoredClaimDto> {
let concept_path = assertion let concept_path = assertion
.subject .subject
.strip_prefix("claim://") .strip_prefix("claim://")
.ok_or_else(|| ApiError::Internal("Invalid subject format: missing claim:// prefix".to_string()))? .ok_or_else(|| {
ApiError::Internal("Invalid subject format: missing claim:// prefix".to_string())
})?
.rsplit_once('/') .rsplit_once('/')
.map(|(cp, _)| cp) .map(|(cp, _)| cp)
.ok_or_else(|| ApiError::Internal("Invalid subject format: missing predicate separator".to_string()))? .ok_or_else(|| {
ApiError::Internal("Invalid subject format: missing predicate separator".to_string())
})?
.to_string(); .to_string();
// Convert object value // Convert object value
@ -393,11 +401,7 @@ fn assertion_to_dto(assertion: &Assertion) -> Result<AuthoredClaimDto> {
.and_then(|v| v.as_str()) .and_then(|v| v.as_str())
.unwrap_or("equals") .unwrap_or("equals")
.to_string(), .to_string(),
provenance: metadata provenance: metadata.get("provenance").and_then(|v| v.as_str()).unwrap_or("").to_string(),
.get("provenance")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
invariant: metadata.get("invariant").and_then(|v| v.as_str()).unwrap_or("").to_string(), invariant: metadata.get("invariant").and_then(|v| v.as_str()).unwrap_or("").to_string(),
consequence: metadata.get("consequence").and_then(|v| v.as_str()).unwrap_or("").to_string(), consequence: metadata.get("consequence").and_then(|v| v.as_str()).unwrap_or("").to_string(),
authority_tier: source_class_to_tier_string(assertion.source_class), authority_tier: source_class_to_tier_string(assertion.source_class),

View File

@ -0,0 +1,97 @@
//! Handlers for subject and predicate discovery endpoints.
//!
//! These endpoints scan existing Redb indexes to expose the subjects
//! and predicates known to the system, enabling autocomplete/typeahead
//! in the dashboard.
use axum::{
extract::{Path, State},
Json,
};
use tracing::instrument;
use crate::{
dto::subjects::{ListPredicatesResponse, ListSubjectsParams, ListSubjectsResponse},
error::Result,
extractors::QsQuery,
state::AppState,
};
use stemedb_storage::{key_codec, KVStore};
/// List all known subjects, with optional prefix filtering.
///
/// Scans the `\x00SUBJECTS:` index in Redb. Supports prefix filtering
/// via the `q` parameter for typeahead/autocomplete use cases.
#[utoipa::path(
get,
path = "/v1/subjects",
params(
("q" = Option<String>, Query, description = "Prefix filter for subject names"),
("limit" = Option<usize>, Query, description = "Max results (default 100, max 1000)")
),
responses(
(status = 200, description = "List of subjects", body = ListSubjectsResponse),
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse)
),
tag = "discovery"
)]
#[instrument(skip(state), fields(q = ?params.q, limit = ?params.limit))]
pub async fn list_subjects(
State(state): State<AppState>,
QsQuery(params): QsQuery<ListSubjectsParams>,
) -> Result<Json<ListSubjectsResponse>> {
metrics::counter!("stemedb_queries_total", "endpoint" => "list_subjects").increment(1);
let prefix = if let Some(ref q) = params.q {
key_codec::subjects_index_key(q)
} else {
key_codec::subjects_scan_prefix()
};
let entries = state.store.scan_prefix(&prefix).await?;
let total_count = entries.len();
let limit = params.limit.unwrap_or(100).min(1000);
let subjects: Vec<String> = entries
.iter()
.filter_map(|(k, _)| key_codec::extract_subject_from_subjects_key(k))
.take(limit)
.collect();
Ok(Json(ListSubjectsResponse { subjects, total_count }))
}
/// List all predicates for a given subject.
///
/// Scans the `{subject}\x00SP:` index in Redb to find all predicates
/// that have been asserted for this subject.
#[utoipa::path(
get,
path = "/v1/subjects/{subject}/predicates",
params(
("subject" = String, Path, description = "The subject to list predicates for")
),
responses(
(status = 200, description = "List of predicates for the subject", body = ListPredicatesResponse),
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse)
),
tag = "discovery"
)]
#[instrument(skip(state), fields(%subject))]
pub async fn list_predicates(
State(state): State<AppState>,
Path(subject): Path<String>,
) -> Result<Json<ListPredicatesResponse>> {
metrics::counter!("stemedb_queries_total", "endpoint" => "list_predicates").increment(1);
let prefix = key_codec::subject_predicate_scan_prefix(&subject);
let entries = state.store.scan_prefix(&prefix).await?;
let predicates: Vec<String> = entries
.iter()
.filter_map(|(k, _)| key_codec::extract_sp_key(k).map(|(_, p)| p))
.collect();
Ok(Json(ListPredicatesResponse { subject, predicates }))
}

View File

@ -66,7 +66,7 @@ pub use state::AppState;
// Re-export the path items for OpenAPI // Re-export the path items for OpenAPI
use handlers::{ use handlers::{
admin::__path_decay_trust_ranks, admin::{__path_decay_trust_ranks, __path_rebuild_indexes},
admission::__path_get_admission_status, admission::__path_get_admission_status,
api_keys::{ api_keys::{
__path_create_api_key, __path_list_api_keys, __path_revoke_api_key, __path_rotate_api_key, __path_create_api_key, __path_list_api_keys, __path_revoke_api_key, __path_rotate_api_key,
@ -83,8 +83,8 @@ use handlers::{
}, },
constraints::__path_constraints_query, constraints::__path_constraints_query,
epoch::__path_create_epoch, epoch::__path_create_epoch,
feed::__path_feed,
escalation::{__path_list_escalations, __path_resolve_escalation}, escalation::{__path_list_escalations, __path_resolve_escalation},
feed::__path_feed,
gold_standard::{ gold_standard::{
__path_create_gold_standard, __path_list_gold_standards, __path_remove_gold_standard, __path_create_gold_standard, __path_list_gold_standards, __path_remove_gold_standard,
__path_verify_agent, __path_verify_agent,
@ -104,6 +104,7 @@ use handlers::{
__path_list_sources, __path_quarantine_source, __path_register_source, __path_list_sources, __path_quarantine_source, __path_register_source,
__path_restore_source, __path_update_source_status, __path_restore_source, __path_update_source_status,
}, },
subjects::{__path_list_predicates, __path_list_subjects},
supersede::__path_supersede, supersede::__path_supersede,
trace::__path_trace, trace::__path_trace,
vote::__path_create_vote, vote::__path_create_vote,
@ -132,6 +133,7 @@ use handlers::{
store_source, store_source,
get_provenance, get_provenance,
decay_trust_ranks, decay_trust_ranks,
rebuild_indexes,
list_escalations, list_escalations,
resolve_escalation, resolve_escalation,
create_gold_standard, create_gold_standard,
@ -168,6 +170,9 @@ use handlers::{
revoke_api_key, revoke_api_key,
rotate_api_key, rotate_api_key,
update_api_key, update_api_key,
// Discovery (subject/predicate autocomplete)
list_subjects,
list_predicates,
), ),
components( components(
schemas( schemas(
@ -215,6 +220,7 @@ use handlers::{
dto::ProvenanceResponse, dto::ProvenanceResponse,
dto::DecayTrustRanksRequest, dto::DecayTrustRanksRequest,
dto::DecayTrustRanksResponse, dto::DecayTrustRanksResponse,
dto::RebuildIndexesResponse,
dto::EscalationEventDto, dto::EscalationEventDto,
dto::EscalationLevelDto, dto::EscalationLevelDto,
dto::EscalationListResponse, dto::EscalationListResponse,
@ -284,6 +290,9 @@ use handlers::{
dto::RotateApiKeyResponse, dto::RotateApiKeyResponse,
dto::UpdateApiKeyRequest, dto::UpdateApiKeyRequest,
dto::UpdateApiKeyResponse, dto::UpdateApiKeyResponse,
// Discovery (subject/predicate autocomplete)
dto::ListSubjectsResponse,
dto::ListPredicatesResponse,
) )
), ),
tags( tags(
@ -302,6 +311,7 @@ use handlers::{
(name = "quarantine", description = "Content defense quarantine management"), (name = "quarantine", description = "Content defense quarantine management"),
(name = "circuit_breaker", description = "Per-agent circuit breaker management"), (name = "circuit_breaker", description = "Per-agent circuit breaker management"),
(name = "source-registry", description = "Source metadata registry and impact analysis"), (name = "source-registry", description = "Source metadata registry and impact analysis"),
(name = "discovery", description = "Subject and predicate discovery for autocomplete"),
), ),
info( info(
title = "Episteme (StemeDB) API", title = "Episteme (StemeDB) API",

View File

@ -276,11 +276,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
info!("API server listening on {} (plaintext)", config.bind_addr); info!("API server listening on {} (plaintext)", config.bind_addr);
info!("Swagger UI available at http://{}/swagger-ui", config.bind_addr); info!("Swagger UI available at http://{}/swagger-ui", config.bind_addr);
axum::serve( axum::serve(listener, app.into_make_service_with_connect_info::<SocketAddr>()).await?;
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.await?;
} }
Ok(()) Ok(())

View File

@ -410,6 +410,7 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
.route("/v1/claims", post(handlers::create_stemedb_claim)) .route("/v1/claims", post(handlers::create_stemedb_claim))
// Admin write endpoints // Admin write endpoints
.route("/v1/admin/decay-trust-ranks", post(handlers::decay_trust_ranks)) .route("/v1/admin/decay-trust-ranks", post(handlers::decay_trust_ranks))
.route("/v1/admin/rebuild-indexes", post(handlers::rebuild_indexes))
.route("/v1/admin/escalations/:id/resolve", post(handlers::resolve_escalation)) .route("/v1/admin/escalations/:id/resolve", post(handlers::resolve_escalation))
.route("/v1/admin/gold-standards", post(handlers::create_gold_standard)) .route("/v1/admin/gold-standards", post(handlers::create_gold_standard))
.route( .route(
@ -449,7 +450,10 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
// Claims endpoints (StemeDB-backed) // Claims endpoints (StemeDB-backed)
.route("/v1/claims", get(handlers::list_stemedb_claims)) .route("/v1/claims", get(handlers::list_stemedb_claims))
.route("/v1/claims/:concept_path/:predicate", get(handlers::get_stemedb_claim)) .route("/v1/claims/:concept_path/:predicate", get(handlers::get_stemedb_claim))
.route("/v1/claims/:concept_path/:predicate", axum::routing::delete(handlers::delete_stemedb_claim)) .route(
"/v1/claims/:concept_path/:predicate",
axum::routing::delete(handlers::delete_stemedb_claim),
)
.route("/v1/admin/escalations", get(handlers::list_escalations)) .route("/v1/admin/escalations", get(handlers::list_escalations))
.route("/v1/admin/gold-standards", get(handlers::list_gold_standards)) .route("/v1/admin/gold-standards", get(handlers::list_gold_standards))
.route("/v1/concepts/resolve", get(handlers::resolve_alias)) .route("/v1/concepts/resolve", get(handlers::resolve_alias))
@ -459,6 +463,7 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
.route("/v1/admission/status", get(handlers::get_admission_status)) .route("/v1/admission/status", get(handlers::get_admission_status))
.route("/v1/admin/quarantine", get(handlers::list_quarantine)) .route("/v1/admin/quarantine", get(handlers::list_quarantine))
.route("/v1/admin/quarantine/:hash", get(handlers::get_quarantine)) .route("/v1/admin/quarantine/:hash", get(handlers::get_quarantine))
.route("/v1/admin/rejected", get(handlers::list_rejected))
.route("/v1/admin/circuit-breaker/:agent_id", get(handlers::get_circuit_status)) .route("/v1/admin/circuit-breaker/:agent_id", get(handlers::get_circuit_status))
.route("/v1/admin/circuit-breakers/tripped", get(handlers::list_tripped_circuits)) .route("/v1/admin/circuit-breakers/tripped", get(handlers::list_tripped_circuits))
.route("/v1/admin/api-keys", get(handlers::list_api_keys)) .route("/v1/admin/api-keys", get(handlers::list_api_keys))
@ -466,6 +471,9 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
.route("/v1/sources/:hash", get(handlers::get_source)) .route("/v1/sources/:hash", get(handlers::get_source))
.route("/v1/sources/:hash/impact", get(handlers::get_source_impact)) .route("/v1/sources/:hash/impact", get(handlers::get_source_impact))
.route("/v1/sources/:hash/impact/export", get(handlers::export_source_impact)) .route("/v1/sources/:hash/impact/export", get(handlers::export_source_impact))
// Discovery endpoints (subject/predicate autocomplete)
.route("/v1/subjects", get(handlers::list_subjects))
.route("/v1/subjects/:subject/predicates", get(handlers::list_predicates))
.layer(RequestBodyLimitLayer::new(config.read_body_limit)); // P5.1: Configurable limit .layer(RequestBodyLimitLayer::new(config.read_body_limit)); // P5.1: Configurable limit
// Add Aphoria endpoints when feature is enabled // Add Aphoria endpoints when feature is enabled

View File

@ -61,10 +61,7 @@ async fn test_health_check_over_tcp() {
// Serve with ConnectInfo injection (the fix for the 500 bug) // Serve with ConnectInfo injection (the fix for the 500 bug)
tokio::spawn(async move { tokio::spawn(async move {
axum::serve( axum::serve(listener, app.into_make_service_with_connect_info::<SocketAddr>())
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.await .await
.expect("server"); .expect("server");
}); });
@ -74,10 +71,7 @@ async fn test_health_check_over_tcp() {
// Make a raw HTTP/1.1 request over TCP // Make a raw HTTP/1.1 request over TCP
let mut stream = tokio::net::TcpStream::connect(addr).await.expect("connect"); let mut stream = tokio::net::TcpStream::connect(addr).await.expect("connect");
let request = format!( let request = format!("GET /v1/health HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n\r\n", addr);
"GET /v1/health HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n\r\n",
addr
);
stream.write_all(request.as_bytes()).await.expect("write"); stream.write_all(request.as_bytes()).await.expect("write");
let mut response = String::new(); let mut response = String::new();
@ -95,3 +89,111 @@ async fn test_health_check_over_tcp() {
let json: serde_json::Value = serde_json::from_str(body).expect("json parse"); let json: serde_json::Value = serde_json::from_str(body).expect("json parse");
assert_eq!(json["status"], "healthy"); assert_eq!(json["status"], "healthy");
} }
// ============================================================================
// Signature Verification Tests (pre-WAL validation)
// ============================================================================
/// Test: POST /v1/assert with invalid signatures returns 400 (not 201).
///
/// Regression test for the "assert returns 201 but data is silently dropped" bug.
/// Previously, the API accepted structurally valid but cryptographically invalid
/// signatures, wrote them to the WAL, and returned 201. The IngestWorker would
/// then silently reject them, permanently blocking the ingestion pipeline.
#[tokio::test]
async fn test_assert_invalid_signature_returns_400() {
use serde_json::json;
let env = common::create_test_env().await;
let app = create_router(env.state);
// Construct assertion with structurally valid but cryptographically invalid signature.
// agent_id is a SHA-256 hash (not a valid Ed25519 public key).
// signature is random 64 bytes.
let body = json!({
"subject": "test/bug_regression",
"predicate": "has_value",
"object": {"type": "Text", "value": "hello"},
"confidence": 0.9,
"source_hash": "0".repeat(64),
"signatures": [{
"agent_id": "a".repeat(64),
"signature": "b".repeat(128),
"timestamp": 1700000000
}],
"timestamp": 1700000000
});
let request = Request::builder()
.uri("/v1/assert")
.method("POST")
.header("Content-Type", "application/json")
.body(Body::from(serde_json::to_vec(&body).expect("json")))
.expect("Request");
let response = app.oneshot(request).await.expect("Request");
assert_eq!(
response.status(),
StatusCode::BAD_REQUEST,
"Invalid signature should return 400, not 201"
);
let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.expect("Body");
let json: serde_json::Value = serde_json::from_slice(&body).expect("JSON");
// Verify error message mentions signature
let error_msg = json["error"].as_str().unwrap_or("");
assert!(
error_msg.contains("Signature") || error_msg.contains("signature"),
"Error should mention signature failure, got: {}",
error_msg
);
}
/// Test: POST /v1/assert with valid Ed25519 signature returns 201.
#[tokio::test]
async fn test_assert_valid_signature_returns_201() {
let env = common::create_test_env().await;
let app = create_router(env.state);
let body = common::create_signed_assertion_json("test/valid", "has_value", 42.0);
let request = Request::builder()
.uri("/v1/assert")
.method("POST")
.header("Content-Type", "application/json")
.body(Body::from(serde_json::to_vec(&body).expect("json")))
.expect("Request");
let response = app.oneshot(request).await.expect("Request");
assert_eq!(response.status(), StatusCode::CREATED, "Valid signature should return 201");
let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.expect("Body");
let json: serde_json::Value = serde_json::from_slice(&body).expect("JSON");
assert_eq!(json["status"], "created");
}
/// Test: POST /v1/assert with null byte in subject returns 400.
#[tokio::test]
async fn test_assert_null_byte_subject_returns_400() {
let env = common::create_test_env().await;
let app = create_router(env.state);
// Use a properly signed assertion but with null byte in subject
let body = common::create_signed_assertion_json("test\x00injected", "has_value", 1.0);
let request = Request::builder()
.uri("/v1/assert")
.method("POST")
.header("Content-Type", "application/json")
.body(Body::from(serde_json::to_vec(&body).expect("json")))
.expect("Request");
let response = app.oneshot(request).await.expect("Request");
// Should fail with 400 due to null byte in subject
assert_eq!(
response.status(),
StatusCode::BAD_REQUEST,
"Null byte in subject should return 400"
);
}

View File

@ -48,6 +48,7 @@ mod tests {
visual_hash: Some([1u8; 8]), visual_hash: Some([1u8; 8]),
epoch: Some([2u8; 32]), epoch: Some([2u8; 32]),
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
agent_id: [2u8; 32], agent_id: [2u8; 32],
@ -103,6 +104,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: stage, lifecycle: stage,
signatures: vec![], signatures: vec![],
confidence: 1.0, confidence: 1.0,

View File

@ -55,6 +55,25 @@ pub const MAX_OBJECT_LEN: usize = 4096;
/// in the source metadata instead of the raw bytes. /// in the source metadata instead of the raw bytes.
pub const MAX_SOURCE_SIZE: usize = 10 * 1024 * 1024; pub const MAX_SOURCE_SIZE: usize = 10 * 1024 * 1024;
/// Maximum allowed narrative length in bytes (64 KB).
///
/// Narratives are free-text explanations of methodology, limitations, bias,
/// and caveats that make an assertion self-contained. This limit prevents
/// unbounded memory growth while allowing rich context.
///
/// # Example
/// - Valid: A 2 KB explanation of trial methodology
/// - Valid: A 10 KB narrative covering bias, limitations, and caveats
/// - Invalid: A 100 KB embedded document in the narrative field
pub const MAX_NARRATIVE_LEN: usize = 65_536;
/// Maximum allowed source content length in bytes (1 MB).
///
/// Source content is the extracted full text from PDFs or other documents.
/// This limit prevents unbounded memory growth while allowing typical
/// research papers and regulatory documents.
pub const MAX_SOURCE_CONTENT_LEN: usize = 1_048_576;
/// Default limit for paginated query results. /// Default limit for paginated query results.
/// ///
/// Applied when no explicit limit is provided in the query parameters. /// Applied when no explicit limit is provided in the query parameters.

View File

@ -44,6 +44,11 @@ use rkyv::validation::validators::DefaultValidator;
use rkyv::{Archive, CheckBytes, Deserialize, Serialize}; use rkyv::{Archive, CheckBytes, Deserialize, Serialize};
use thiserror::Error; use thiserror::Error;
use crate::types::{
Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
SourceRecord, SourceStatus,
};
/// Default scratch buffer size for serialization. /// Default scratch buffer size for serialization.
/// ///
/// 4KB is sufficient for most assertions. Larger payloads will trigger /// 4KB is sufficient for most assertions. Larger payloads will trigger
@ -88,6 +93,7 @@ pub enum SerdeError {
/// visual_hash: None, /// visual_hash: None,
/// epoch: None, /// epoch: None,
/// source_metadata: None, /// source_metadata: None,
/// narrative: None,
/// lifecycle: LifecycleStage::Proposed, /// lifecycle: LifecycleStage::Proposed,
/// signatures: vec![], /// signatures: vec![],
/// confidence: 1.0, /// confidence: 1.0,
@ -156,6 +162,131 @@ where
.map_err(|e| SerdeError::Deserialization(e.to_string())) .map_err(|e| SerdeError::Deserialization(e.to_string()))
} }
// ============================================================================
// Legacy Assertion (pre-narrative schema)
// ============================================================================
/// Assertion struct matching the pre-narrative rkyv layout.
///
/// The `narrative: Option<String>` field was added between `source_metadata`
/// and `lifecycle`. rkyv doesn't support schema evolution, so data serialized
/// before that change needs this struct to deserialize correctly.
#[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
#[archive(check_bytes)]
struct LegacyAssertion {
pub subject: String,
pub predicate: String,
pub object: ObjectValue,
pub parent_hash: Option<[u8; 32]>,
pub source_hash: [u8; 32],
pub source_class: SourceClass,
pub visual_hash: Option<[u8; 8]>,
pub epoch: Option<[u8; 32]>,
pub source_metadata: Option<Vec<u8>>,
// narrative: Option<String> did NOT exist in this version
pub lifecycle: LifecycleStage,
pub signatures: Vec<SignatureEntry>,
pub confidence: f32,
pub timestamp: u64,
pub hlc_timestamp: HlcTimestamp,
pub vector: Option<Vec<f32>>,
}
impl From<LegacyAssertion> for Assertion {
fn from(legacy: LegacyAssertion) -> Self {
Self {
subject: legacy.subject,
predicate: legacy.predicate,
object: legacy.object,
parent_hash: legacy.parent_hash,
source_hash: legacy.source_hash,
source_class: legacy.source_class,
visual_hash: legacy.visual_hash,
epoch: legacy.epoch,
source_metadata: legacy.source_metadata,
narrative: None,
lifecycle: legacy.lifecycle,
signatures: legacy.signatures,
confidence: legacy.confidence,
timestamp: legacy.timestamp,
hlc_timestamp: legacy.hlc_timestamp,
vector: legacy.vector,
}
}
}
/// Deserialize an assertion with backward compatibility.
///
/// Tries the current `Assertion` layout first. If that fails, tries the
/// legacy layout (before `narrative` field was added) and converts.
///
/// This allows the system to read assertions written before schema changes
/// without requiring a data migration.
pub fn deserialize_assertion_compat(data: &[u8]) -> Result<Assertion, SerdeError> {
// Try current format first (fast path for new data)
if let Ok(assertion) = deserialize::<Assertion>(data) {
return Ok(assertion);
}
// Fallback: try legacy format (no narrative field)
let legacy: LegacyAssertion = deserialize(data)?;
Ok(legacy.into())
}
// ============================================================================
// Legacy SourceRecord (pre-content schema)
// ============================================================================
/// SourceRecord struct matching the pre-content rkyv layout.
///
/// The `content: Option<String>` field was added after `notes`.
/// rkyv doesn't support schema evolution, so data serialized
/// before that change needs this struct to deserialize correctly.
#[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
#[archive(check_bytes)]
struct LegacySourceRecord {
pub hash: [u8; 32],
pub label: String,
pub url: Option<String>,
pub tier: u8,
pub status: SourceStatus,
pub created_at: u64,
pub updated_at: u64,
pub notes: Option<String>,
// content: Option<String> did NOT exist in this version
}
impl From<LegacySourceRecord> for SourceRecord {
fn from(legacy: LegacySourceRecord) -> Self {
Self {
hash: legacy.hash,
label: legacy.label,
url: legacy.url,
tier: legacy.tier,
status: legacy.status,
created_at: legacy.created_at,
updated_at: legacy.updated_at,
notes: legacy.notes,
content: None,
}
}
}
/// Deserialize a source record with backward compatibility.
///
/// Tries the current `SourceRecord` layout first. If that fails, tries the
/// legacy layout (before `content` field was added) and converts.
pub fn deserialize_source_record_compat(data: &[u8]) -> Result<SourceRecord, SerdeError> {
// Try current format first (fast path for new data)
if let Ok(record) = deserialize::<SourceRecord>(data) {
return Ok(record);
}
// Fallback: try legacy format (no content field)
let legacy: LegacySourceRecord = deserialize(data)?;
Ok(legacy.into())
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -176,6 +307,7 @@ mod tests {
visual_hash: Some([1u8; 8]), visual_hash: Some([1u8; 8]),
epoch: Some([2u8; 32]), epoch: Some([2u8; 32]),
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
agent_id: [2u8; 32], agent_id: [2u8; 32],
@ -303,6 +435,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![], signatures: vec![],
confidence: 0.0, confidence: 0.0,
@ -330,6 +463,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: Some(metadata.as_bytes().to_vec()), source_metadata: Some(metadata.as_bytes().to_vec()),
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![], signatures: vec![],
confidence: 0.85, confidence: 0.85,
@ -357,6 +491,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![], signatures: vec![],
confidence: 1.0, confidence: 1.0,
@ -371,4 +506,127 @@ mod tests {
assert_eq!(assertion, recovered); assert_eq!(assertion, recovered);
assert!(recovered.source_metadata.is_none()); assert!(recovered.source_metadata.is_none());
} }
#[test]
fn test_legacy_assertion_compat_deserialize() {
// Simulate data serialized with the pre-narrative struct layout.
let legacy = LegacyAssertion {
subject: "Semaglutide".to_string(),
predicate: "reduces_weight".to_string(),
object: ObjectValue::Text("significant".to_string()),
parent_hash: None,
source_hash: [1u8; 32],
source_class: SourceClass::Clinical,
visual_hash: None,
epoch: None,
source_metadata: Some(b"{}".to_vec()),
lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry {
agent_id: [2u8; 32],
signature: [3u8; 64],
timestamp: 1000,
version: 1,
}],
confidence: 0.95,
timestamp: 1700000000,
hlc_timestamp: HlcTimestamp::default(),
vector: Some(vec![0.1, 0.2]),
};
let bytes = serialize(&legacy).expect("serialize legacy");
// Current format should fail (different layout)
assert!(deserialize::<Assertion>(&bytes).is_err());
// Compat function should succeed
let recovered = deserialize_assertion_compat(&bytes)
.expect("compat deserialize should succeed");
assert_eq!(recovered.subject, "Semaglutide");
assert_eq!(recovered.predicate, "reduces_weight");
assert_eq!(recovered.confidence, 0.95);
assert_eq!(recovered.signatures.len(), 1);
assert!(recovered.narrative.is_none()); // Wasn't in legacy
assert!(recovered.source_metadata.is_some());
assert_eq!(recovered.timestamp, 1700000000);
}
#[test]
fn test_current_assertion_also_works_via_compat() {
// Current-format assertions should work via the compat path too.
let assertion = Assertion {
subject: "test".to_string(),
predicate: "works".to_string(),
object: ObjectValue::Boolean(true),
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Expert,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: Some("This is a narrative.".to_string()),
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 1.0,
timestamp: 0,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
let bytes = serialize(&assertion).expect("serialize");
let recovered = deserialize_assertion_compat(&bytes)
.expect("compat deserialize should succeed for current format");
assert_eq!(recovered, assertion);
assert_eq!(recovered.narrative, Some("This is a narrative.".to_string()));
}
#[test]
fn test_legacy_source_record_compat_deserialize() {
// Simulate data serialized with the pre-content struct layout.
let legacy = LegacySourceRecord {
hash: [42u8; 32],
label: "RFC 7519".to_string(),
url: Some("https://tools.ietf.org/html/rfc7519".to_string()),
tier: 0,
status: SourceStatus::Active,
created_at: 1000,
updated_at: 2000,
notes: Some("JWT spec".to_string()),
};
let bytes = serialize(&legacy).expect("serialize legacy");
// Current format should fail (different layout)
assert!(deserialize::<SourceRecord>(&bytes).is_err());
// Compat function should succeed
let recovered = deserialize_source_record_compat(&bytes)
.expect("compat deserialize should succeed");
assert_eq!(recovered.hash, [42u8; 32]);
assert_eq!(recovered.label, "RFC 7519");
assert_eq!(recovered.tier, 0);
assert_eq!(recovered.notes, Some("JWT spec".to_string()));
assert!(recovered.content.is_none()); // Wasn't in legacy
}
#[test]
fn test_current_source_record_also_works_via_compat() {
let record = SourceRecord::new(
[1u8; 32],
"Test".to_string(),
None,
2,
1000,
)
.with_content(Some("Full text content".to_string()));
let bytes = serialize(&record).expect("serialize");
let recovered = deserialize_source_record_compat(&bytes)
.expect("compat deserialize should succeed for current format");
assert_eq!(recovered, record);
assert_eq!(recovered.content, Some("Full text content".to_string()));
}
} }

View File

@ -21,6 +21,140 @@
//! ``` //! ```
use crate::types::{Assertion, ObjectValue}; use crate::types::{Assertion, ObjectValue};
use ed25519_dalek::{Signature, Verifier, VerifyingKey};
/// Errors from signature verification.
///
/// Error messages are written for API consumers who may not understand Ed25519
/// cryptography, explaining what fields must contain and common mistakes.
#[derive(Debug)]
pub enum SignatureError {
/// No signatures present on the assertion.
Empty,
/// Unknown signature version.
UnknownVersion {
/// The unrecognized version number.
version: u8,
/// Which signature in the list.
index: usize,
},
/// The agent_id bytes are not a valid Ed25519 public key.
InvalidPublicKey {
/// Which signature in the list.
index: usize,
/// The underlying error detail.
detail: String,
},
/// The signature does not verify against the message.
VerificationFailed {
/// Which signature in the list.
index: usize,
/// The signature version.
version: u8,
/// The underlying error detail.
detail: String,
},
}
impl std::fmt::Display for SignatureError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Empty => write!(
f,
"Assertion must have at least one signature. \
Each signature requires: agent_id (32-byte Ed25519 public key, hex-encoded as 64 chars), \
signature (64-byte Ed25519 signature, hex-encoded as 128 chars). \
SHA-256/SHA-512 hashes cannot be used as agent_id or signature"
),
Self::UnknownVersion { version, index } => write!(
f,
"Signature {index}: unknown version {version}. \
Supported versions: 1 (signs '{{subject}}:{{predicate}}'), 2 (signs BLAKE3 content hash)"
),
Self::InvalidPublicKey { index, detail } => write!(
f,
"Signature {index}: agent_id is not a valid Ed25519 public key ({detail}). \
agent_id must be a 32-byte Ed25519 public key (hex-encoded as 64 chars). \
Common mistake: using SHA-256 or other hashes as agent_id. \
Generate a keypair with Ed25519 (e.g., ed25519-dalek, crypto/ed25519, or openssl)"
),
Self::VerificationFailed { index, version, detail } => {
let message_desc = match version {
1 => "'{subject}:{predicate}' (UTF-8 bytes)",
2 => "the BLAKE3 content hash of the assertion",
_ => "unknown",
};
write!(
f,
"Signature {index}: Ed25519 verification failed ({detail}). \
For v{version} signatures, signature must be Ed25519_sign(private_key, {message_desc}). \
Common mistakes: (1) using SHA hashes instead of Ed25519 signatures, \
(2) signing the wrong message, (3) agent_id doesn't match the signing key"
)
}
}
}
}
impl std::error::Error for SignatureError {}
/// Verify all Ed25519 signatures on an assertion.
///
/// Supports two signature versions:
/// - **Version 1 (legacy):** signs `"{subject}:{predicate}"` — only protects those fields
/// - **Version 2 (enterprise):** signs the BLAKE3 content hash — protects ALL fields
///
/// All signatures must be valid for the assertion to be accepted.
///
/// This function is used at both the API boundary (fail fast with 400) and in the
/// IngestWorker (defense in depth). Keeping it in `stemedb-core` avoids duplication.
pub fn verify_assertion_signatures(
assertion: &Assertion,
) -> std::result::Result<(), SignatureError> {
if assertion.signatures.is_empty() {
return Err(SignatureError::Empty);
}
// Pre-compute v1 message (subject:predicate) — only used if v1 signatures exist
let v1_message = format!("{}:{}", assertion.subject, assertion.predicate);
// Pre-compute v2 content hash — only if any v2 signature exists
let v2_content_hash: Option<[u8; 32]> = if assertion.signatures.iter().any(|s| s.version == 2) {
Some(compute_content_hash_v2(assertion))
} else {
None
};
for (idx, sig_entry) in assertion.signatures.iter().enumerate() {
let message_bytes: &[u8] = match sig_entry.version {
1 => v1_message.as_bytes(),
2 => v2_content_hash
.as_ref()
.ok_or(SignatureError::UnknownVersion { version: 2, index: idx })?,
v => {
return Err(SignatureError::UnknownVersion { version: v, index: idx });
}
};
let verifying_key = VerifyingKey::from_bytes(&sig_entry.agent_id)
.map_err(|e| SignatureError::InvalidPublicKey { index: idx, detail: e.to_string() })?;
let signature = Signature::from_bytes(&sig_entry.signature);
verifying_key.verify(message_bytes, &signature).map_err(|e| {
SignatureError::VerificationFailed {
index: idx,
version: sig_entry.version,
detail: e.to_string(),
}
})?;
}
Ok(())
}
/// Compute the canonical content hash for v2 (enterprise) signing. /// Compute the canonical content hash for v2 (enterprise) signing.
/// ///
@ -37,6 +171,10 @@ use crate::types::{Assertion, ObjectValue};
/// - `source_metadata`: Variable-length, domain-specific /// - `source_metadata`: Variable-length, domain-specific
/// - `lifecycle`: Can change over time /// - `lifecycle`: Can change over time
/// ///
/// **Narrative IS included** because it is content-bearing (methodology, limitations).
/// Changing the narrative changes the assertion's meaning. When `None`, no bytes
/// are added, preserving backward compatibility with pre-narrative hashes.
///
/// # Format /// # Format
/// ///
/// The hash is computed over: /// The hash is computed over:
@ -93,6 +231,12 @@ pub fn compute_content_hash_v2(assertion: &Assertion) -> [u8; 32] {
hasher.update(b":"); hasher.update(b":");
hasher.update(&assertion.timestamp.to_le_bytes()); hasher.update(&assertion.timestamp.to_le_bytes());
// Narrative (only when present, so None preserves backward-compat hash)
if let Some(ref narrative) = assertion.narrative {
hasher.update(b":narrative:");
hasher.update(narrative.as_bytes());
}
*hasher.finalize().as_bytes() *hasher.finalize().as_bytes()
} }
@ -123,6 +267,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![], signatures: vec![],
confidence: 0.95, confidence: 0.95,
@ -246,4 +391,46 @@ mod tests {
} }
} }
} }
#[test]
fn test_content_hash_changes_with_narrative() {
let mut assertion = test_assertion();
let hash_none = compute_content_hash_v2(&assertion);
assertion.narrative =
Some("This drug carries a boxed warning for thyroid C-cell tumors.".to_string());
let hash_some = compute_content_hash_v2(&assertion);
assert_ne!(hash_none, hash_some, "Narrative should change the content hash");
}
#[test]
fn test_content_hash_backward_compat_narrative_none() {
// Capture a hash with narrative: None
let assertion = test_assertion();
let hash1 = compute_content_hash_v2(&assertion);
// Build the same assertion again independently
let assertion2 = Assertion {
subject: "Semaglutide".to_string(),
predicate: "has_boxed_warning".to_string(),
object: ObjectValue::Boolean(true),
parent_hash: None,
source_hash: [1u8; 32],
source_class: SourceClass::Regulatory,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![],
confidence: 0.95,
timestamp: 1704067200,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
let hash2 = compute_content_hash_v2(&assertion2);
assert_eq!(hash1, hash2, "narrative: None must produce identical hash for backward compat");
}
} }

View File

@ -49,6 +49,7 @@ pub struct AssertionBuilder {
visual_hash: Option<[u8; 8]>, visual_hash: Option<[u8; 8]>,
epoch: Option<[u8; 32]>, epoch: Option<[u8; 32]>,
source_metadata: Option<Vec<u8>>, source_metadata: Option<Vec<u8>>,
narrative: Option<String>,
lifecycle: LifecycleStage, lifecycle: LifecycleStage,
signatures: Option<Vec<SignatureEntry>>, signatures: Option<Vec<SignatureEntry>>,
agent_id: [u8; 32], agent_id: [u8; 32],
@ -77,6 +78,7 @@ impl AssertionBuilder {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: None, // Will use agent_id to build default signatures: None, // Will use agent_id to build default
agent_id: [1u8; 32], agent_id: [1u8; 32],
@ -199,6 +201,12 @@ impl AssertionBuilder {
self self
} }
/// Set the narrative (free-text methodology, limitations, caveats).
pub fn narrative(mut self, narrative: &str) -> Self {
self.narrative = Some(narrative.to_string());
self
}
/// Provide explicit signatures (overrides the default single-signature behavior). /// Provide explicit signatures (overrides the default single-signature behavior).
pub fn signatures(mut self, signatures: Vec<SignatureEntry>) -> Self { pub fn signatures(mut self, signatures: Vec<SignatureEntry>) -> Self {
self.signatures = Some(signatures); self.signatures = Some(signatures);
@ -226,6 +234,7 @@ impl AssertionBuilder {
visual_hash: self.visual_hash, visual_hash: self.visual_hash,
epoch: self.epoch, epoch: self.epoch,
source_metadata: self.source_metadata, source_metadata: self.source_metadata,
narrative: self.narrative,
lifecycle: self.lifecycle, lifecycle: self.lifecycle,
signatures, signatures,
confidence: self.confidence, confidence: self.confidence,

View File

@ -33,6 +33,15 @@ pub struct Assertion {
/// Schema is domain-specific (journal info, social metrics, etc.). /// Schema is domain-specific (journal info, social metrics, etc.).
/// Use `Vec<u8>` for rkyv zero-copy compatibility. /// Use `Vec<u8>` for rkyv zero-copy compatibility.
pub source_metadata: Option<Vec<u8>>, pub source_metadata: Option<Vec<u8>>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
///
/// Makes the assertion self-contained: pick it up, read it, understand the
/// full claim without dereferencing anything. Not structured into categories
/// because there are too many kinds of information to pre-categorize.
///
/// Included in v2 content hash (narrative is content-bearing).
/// Max length: `limits::MAX_NARRATIVE_LEN` (64 KB).
pub narrative: Option<String>,
/// The lifecycle stage (Proposed, UnderReview, Approved, Deprecated, Rejected). /// The lifecycle stage (Proposed, UnderReview, Approved, Deprecated, Rejected).
pub lifecycle: LifecycleStage, pub lifecycle: LifecycleStage,

View File

@ -102,6 +102,10 @@ pub struct SourceRecord {
/// Optional curator notes about the source. /// Optional curator notes about the source.
/// Examples: "Deprecated in favor of RFC 9068", "Under review for accuracy" /// Examples: "Deprecated in favor of RFC 9068", "Under review for accuracy"
pub notes: Option<String>, pub notes: Option<String>,
/// Optional full-text content of the source document.
/// Populated by pipelines that extract text from PDFs or other formats.
pub content: Option<String>,
} }
impl SourceRecord { impl SourceRecord {
@ -122,6 +126,7 @@ impl SourceRecord {
created_at: timestamp, created_at: timestamp,
updated_at: timestamp, updated_at: timestamp,
notes: None, notes: None,
content: None,
} }
} }
@ -137,7 +142,13 @@ impl SourceRecord {
updated_at: u64, updated_at: u64,
notes: Option<String>, notes: Option<String>,
) -> Self { ) -> Self {
Self { hash, label, url, tier: tier.min(5), status, created_at, updated_at, notes } Self { hash, label, url, tier: tier.min(5), status, created_at, updated_at, notes, content: None }
}
/// Set the full-text content of the source document.
pub fn with_content(mut self, content: Option<String>) -> Self {
self.content = content;
self
} }
/// Returns the tier label based on the tier number. /// Returns the tier label based on the tier number.
@ -186,6 +197,7 @@ mod tests {
assert_eq!(record.created_at, 1000); assert_eq!(record.created_at, 1000);
assert_eq!(record.updated_at, 1000); assert_eq!(record.updated_at, 1000);
assert!(record.notes.is_none()); assert!(record.notes.is_none());
assert!(record.content.is_none());
} }
#[test] #[test]
@ -263,5 +275,38 @@ mod tests {
crate::serde::deserialize(&bytes).expect("Failed to deserialize SourceRecord"); crate::serde::deserialize(&bytes).expect("Failed to deserialize SourceRecord");
assert_eq!(record, recovered); assert_eq!(record, recovered);
assert!(recovered.content.is_none());
}
#[test]
fn test_rkyv_roundtrip_with_content() {
let hash = [42u8; 32];
let record = SourceRecord::new(
hash,
"FDA Approval Letter".to_string(),
None,
0,
1000,
)
.with_content(Some("Full text of the FDA approval letter...".to_string()));
let bytes = crate::serde::serialize(&record).expect("Failed to serialize SourceRecord");
let recovered: SourceRecord =
crate::serde::deserialize(&bytes).expect("Failed to deserialize SourceRecord");
assert_eq!(record, recovered);
assert_eq!(recovered.content, Some("Full text of the FDA approval letter...".to_string()));
}
#[test]
fn test_with_content_builder() {
let hash = [1u8; 32];
let record = SourceRecord::new(hash, "Test".to_string(), None, 0, 1000)
.with_content(Some("content".to_string()));
assert_eq!(record.content, Some("content".to_string()));
let record_none = SourceRecord::new(hash, "Test".to_string(), None, 0, 1000)
.with_content(None);
assert!(record_none.content.is_none());
} }
} }

View File

@ -23,6 +23,8 @@ ed25519-dalek = { version = "2.1", features = ["rand_core"] }
uhlc = "0.7" uhlc = "0.7"
# Async traits # Async traits
async-trait = "0.1" async-trait = "0.1"
# Metrics
metrics = "0.23"
[dev-dependencies] [dev-dependencies]
tempfile = "3.10" tempfile = "3.10"

View File

@ -32,3 +32,23 @@ pub enum IngestError {
#[error("Input validation failed: {0}")] #[error("Input validation failed: {0}")]
InputValidation(String), InputValidation(String),
} }
impl IngestError {
/// Returns true if retrying this exact WAL record could succeed.
///
/// Transient errors (I/O, storage engine) may resolve on retry.
/// Permanent errors (invalid signature, bad input, corrupt serialization)
/// will never succeed — the bytes in the WAL are immutable.
pub fn is_retryable(&self) -> bool {
match self {
// I/O and storage errors: disk might recover, RocksDB might unblock
IngestError::Wal(_) | IngestError::Storage(_) => true,
// The WAL record bytes are immutable — these will never pass
IngestError::InvalidSignature(_)
| IngestError::InputValidation(_)
| IngestError::Serialization(_) => false,
// Worker errors are ambiguous; treat as retryable to be safe
IngestError::Worker(_) => true,
}
}
}

View File

@ -4,9 +4,8 @@
use super::record_types::RECORD_HEADER_SIZE; use super::record_types::RECORD_HEADER_SIZE;
use super::{IngestWorker, RecordType}; use super::{IngestWorker, RecordType};
use crate::error::{IngestError, Result}; use crate::error::{IngestError, Result};
use ed25519_dalek::{Signature, Verifier, VerifyingKey};
use stemedb_core::serde::deserialize; use stemedb_core::serde::deserialize;
use stemedb_core::signing::compute_content_hash_v2; use stemedb_core::signing;
use stemedb_core::types::{Assertion, Epoch, Hash, Vote}; use stemedb_core::types::{Assertion, Epoch, Hash, Vote};
use stemedb_storage::key_codec; use stemedb_storage::key_codec;
use stemedb_storage::{IndexStore, KVStore, VoteStore}; use stemedb_storage::{IndexStore, KVStore, VoteStore};
@ -82,10 +81,77 @@ impl<S: KVStore + 'static> IngestWorker<S> {
let record_type = RecordType::try_from(record.payload[0])?; let record_type = RecordType::try_from(record.payload[0])?;
let data = &record.payload[RECORD_HEADER_SIZE..]; let data = &record.payload[RECORD_HEADER_SIZE..];
match record_type { let ingest_result = match record_type {
RecordType::Assertion => self.ingest_assertion(data).await?, RecordType::Assertion => self.ingest_assertion(data).await,
RecordType::Vote => self.ingest_vote(data).await?, RecordType::Vote => self.ingest_vote(data).await,
RecordType::Epoch => self.ingest_epoch(data).await?, RecordType::Epoch => self.ingest_epoch(data).await,
};
if let Err(e) = ingest_result {
if !e.is_retryable() {
// Permanent failure: the WAL record bytes are immutable and will
// never pass validation. Advance the cursor past this poison record
// so it doesn't block all subsequent ingestion.
let skip_offset = self.current_offset;
self.current_offset += bytes_read;
let cursor_key = key_codec::cursor_key();
// Best-effort cursor persist. If this fails, on restart we will
// re-encounter this record, classify it as permanent again, and
// skip it. No data is lost.
if let Err(persist_err) =
self.store.put(&cursor_key, &self.current_offset.to_le_bytes()).await
{
warn!(
offset = skip_offset,
error = %persist_err,
"Failed to persist cursor after skipping poison record"
);
}
warn!(
record_type = ?record_type,
offset = skip_offset,
new_offset = self.current_offset,
error = %e,
"Skipped permanently invalid WAL record"
);
// Store rejection metadata for admin observability.
// Best-effort: failure to persist metadata should not block ingestion.
let rejection_key = key_codec::rejected_record_key(skip_offset);
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let rejection_json = format!(
r#"{{"offset":{},"record_type":"{:?}","reason":"{}","timestamp":{}}}"#,
skip_offset,
record_type,
e.to_string().replace('"', "'"),
now
);
if let Err(store_err) =
self.store.put(&rejection_key, rejection_json.as_bytes()).await
{
warn!(
offset = skip_offset,
error = %store_err,
"Failed to store rejection metadata"
);
}
metrics::counter!(
"stemedb_ingest_records_skipped_total",
"reason" => e.to_string()
)
.increment(1);
}
// Return the error so the run loop can log it.
// For permanent errors the cursor has already advanced;
// for transient errors the cursor is unchanged (will retry).
return Err(e);
} }
let prev_offset = self.current_offset; let prev_offset = self.current_offset;
@ -170,6 +236,16 @@ impl<S: KVStore + 'static> IngestWorker<S> {
// This enables O(1) lookup of "which assertions cite this source?" // This enables O(1) lookup of "which assertions cite this source?"
self.index_store.add_to_source_index(&assertion.source_hash, &assertion_hash).await?; self.index_store.add_to_source_index(&assertion.source_hash, &assertion_hash).await?;
// Write feed index: \x00FEED:{inverted_ts}:{hash_hex} -> subject
// Uses server clock (not assertion.timestamp) for ingestion ordering.
// This separates "when the claim was made" from "when the system learned about it".
let ingested_at = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let feed_idx_key = key_codec::feed_key(ingested_at, &hash_hex);
self.store.put(&feed_idx_key, assertion.subject.as_bytes()).await?;
// Insert into vector index if present and assertion has a vector // Insert into vector index if present and assertion has a vector
if let (Some(ref vector_index), Some(ref vector)) = (&self.vector_index, &assertion.vector) if let (Some(ref vector_index), Some(ref vector)) = (&self.vector_index, &assertion.vector)
{ {
@ -282,6 +358,17 @@ impl<S: KVStore + 'static> IngestWorker<S> {
))); )));
} }
// Validate narrative length
if let Some(ref narrative) = assertion.narrative {
if narrative.len() > stemedb_core::limits::MAX_NARRATIVE_LEN {
return Err(IngestError::InputValidation(format!(
"narrative exceeds {} bytes (got {})",
stemedb_core::limits::MAX_NARRATIVE_LEN,
narrative.len()
)));
}
}
// Validate timestamp: reject if more than 1 hour in future (clock skew protection) // Validate timestamp: reject if more than 1 hour in future (clock skew protection)
let now = std::time::SystemTime::now() let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH) .duration_since(std::time::UNIX_EPOCH)
@ -300,109 +387,19 @@ impl<S: KVStore + 'static> IngestWorker<S> {
/// Verify all Ed25519 signatures on an assertion. /// Verify all Ed25519 signatures on an assertion.
/// ///
/// Supports two signature versions: /// Delegates to `stemedb_core::signing::verify_assertion_signatures` which
/// - Version 1 (legacy): signs `"{subject}:{predicate}"` - only protects those fields /// is the single source of truth for signature verification logic, shared
/// - Version 2 (enterprise): signs the BLAKE3 content hash - protects ALL fields /// between the API handler (fail fast) and this worker (defense in depth).
///
/// For v2 signatures, the content hash is computed from the assertion with
/// empty signatures (canonical form), so tampering with any field except
/// signatures will invalidate the signature.
///
/// All signatures must be valid for the assertion to be accepted.
fn verify_assertion_signatures(&self, assertion: &Assertion) -> Result<()> { fn verify_assertion_signatures(&self, assertion: &Assertion) -> Result<()> {
if assertion.signatures.is_empty() { signing::verify_assertion_signatures(assertion).map_err(|e| {
warn!( warn!(
subject = %assertion.subject, subject = %assertion.subject,
predicate = %assertion.predicate, predicate = %assertion.predicate,
"Assertion has no signatures" error = %e,
"Signature verification failed"
); );
return Err(IngestError::InvalidSignature( IngestError::InvalidSignature(e.to_string())
"Assertion must have at least one signature".to_string(), })
));
}
// Pre-compute v1 message (subject:predicate) - only used if v1 signatures exist
let v1_message = format!("{}:{}", assertion.subject, assertion.predicate);
// Pre-compute v2 content hash using the shared utility from stemedb-core.
// This must match exactly what the signing code uses in compute_content_hash_v2().
// The hash covers: subject, predicate, object, source_hash, source_class, confidence, timestamp.
let v2_content_hash: Option<[u8; 32]> =
if assertion.signatures.iter().any(|s| s.version == 2) {
// Debug: show exact number format for comparison with signing
let object_str = match &assertion.object {
stemedb_core::types::ObjectValue::Number(n) => format!("Number({:.17})", n),
other => format!("{:?}", other),
};
let confidence_str = format!("{:.17}", assertion.confidence);
let hash = compute_content_hash_v2(assertion);
debug!(
subject = %assertion.subject,
predicate = %assertion.predicate,
object = %object_str,
source_hash = %hex::encode(assertion.source_hash),
source_class = ?assertion.source_class,
confidence = %confidence_str,
timestamp = %assertion.timestamp,
content_hash = %hex::encode(hash),
"Computed v2 content hash for verification"
);
Some(hash)
} else {
None
};
for (idx, sig_entry) in assertion.signatures.iter().enumerate() {
// Determine which message was signed based on version
let message_bytes: &[u8] = match sig_entry.version {
1 => {
// v1 (legacy): signs "{subject}:{predicate}"
v1_message.as_bytes()
}
2 => {
// v2 (enterprise): signs the content hash computed by compute_content_hash_v2
v2_content_hash.as_ref().ok_or_else(|| {
IngestError::InvalidSignature(
"v2 signature present but v2_content_hash was not computed".to_string(),
)
})?
}
v => {
return Err(IngestError::InvalidSignature(format!(
"Unknown signature version {} for signature {}",
v, idx
)));
}
};
// Reconstruct the verifying key from the stored agent_id
let verifying_key = VerifyingKey::from_bytes(&sig_entry.agent_id).map_err(|e| {
IngestError::InvalidSignature(format!(
"Invalid public key for signature {}: {}",
idx, e
))
})?;
// Reconstruct the signature
let signature = Signature::from_bytes(&sig_entry.signature);
// Verify the signature
verifying_key.verify(message_bytes, &signature).map_err(|e| {
IngestError::InvalidSignature(format!(
"Signature {} (v{}) failed verification: {}",
idx, sig_entry.version, e
))
})?;
debug!(
agent_id = %hex::encode(&sig_entry.agent_id[..8]),
signature_idx = idx,
version = sig_entry.version,
"Signature verified"
);
}
Ok(())
} }
/// Ingest a vote into the KV store via VoteStore. /// Ingest a vote into the KV store via VoteStore.

View File

@ -3,7 +3,6 @@
//! Contains the continuous ingestion loop that tails the WAL. //! Contains the continuous ingestion loop that tails the WAL.
use super::IngestWorker; use super::IngestWorker;
use crate::error::IngestError;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use std::time::Duration; use std::time::Duration;
use stemedb_storage::KVStore; use stemedb_storage::KVStore;
@ -72,18 +71,17 @@ impl<S: KVStore + 'static> IngestWorker<S> {
debug!("Error during shutdown (expected): {:?}", e); debug!("Error during shutdown (expected): {:?}", e);
break; break;
} }
match &e {
IngestError::InputValidation(msg) => { if e.is_retryable() {
warn!("Rejected invalid input: {}", msg); // Transient error: back off and retry the same record
} error!("Transient ingestion error (will retry): {:?}", e);
IngestError::InvalidSignature(msg) => {
warn!("Rejected invalid signature: {}", msg);
}
_ => {
error!("Ingestion error: {:?}", e);
}
}
tokio::time::sleep(Duration::from_secs(1)).await; tokio::time::sleep(Duration::from_secs(1)).await;
} else {
// Permanent error: step() already advanced the cursor past
// the poison record, so continue immediately to process the
// next record without sleeping.
warn!("Permanent ingestion error (record skipped): {}", e);
}
} }
} }
} }

View File

@ -25,6 +25,7 @@ async fn test_rejects_invalid_signature() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -83,6 +84,7 @@ async fn test_rejects_unsigned_assertion() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![], // No signatures! signatures: vec![], // No signatures!
confidence: 0.95, confidence: 0.95,
@ -112,6 +114,88 @@ async fn test_rejects_unsigned_assertion() {
); );
} }
/// Test: Invalid signature advances cursor past poison record so subsequent records process.
///
/// This is the core regression test for the "assert returns 201 but data not queryable" bug.
/// Previously, an invalid-signature record would cause the IngestWorker to retry the same
/// offset forever, blocking all subsequent records.
#[tokio::test]
async fn test_invalid_signature_skips_and_continues() {
let dir = tempdir().expect("Failed to create temp dir");
let wal_dir = dir.path().join("wal");
let db_dir = dir.path().join("db");
// Record 1: Invalid signature (poison record)
let bad_assertion = Assertion {
subject: "Bad".to_string(),
predicate: "poison".to_string(),
object: ObjectValue::Text("should be skipped".to_string()),
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Expert,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
agent_id: [1u8; 32], // Invalid Ed25519 public key
signature: [2u8; 64], // Invalid signature
timestamp: 1000,
}],
confidence: 0.95,
timestamp: 1000,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
// Record 2: Valid signature (should be processed after skipping record 1)
let good_assertion = create_signed_assertion("Good", "valid");
let mut journal = Journal::open(&wal_dir).expect("Failed to open journal");
let store = HybridStore::open(&db_dir).expect("Failed to open store");
// Write both records to WAL
journal.append(serialize_assertion(&bad_assertion).expect("ser")).expect("append bad");
journal.append(serialize_assertion(&good_assertion).expect("ser")).expect("append good");
let journal = Arc::new(Mutex::new(journal));
let store = Arc::new(store);
let mut worker =
IngestWorker::new(journal, store.clone()).await.expect("Failed to create worker");
// Step 1: Should fail with InvalidSignature but advance cursor past the poison record
let result1 = worker.step().await;
assert!(result1.is_err(), "Should reject invalid signature");
assert!(
matches!(result1.unwrap_err(), IngestError::InvalidSignature(_)),
"Should be InvalidSignature"
);
// Step 2: Should succeed — the cursor moved past the poison record
let result2 = worker.step().await;
assert!(
result2.is_ok(),
"Should process valid record after skipping poison, got: {:?}",
result2
);
let bytes = result2.expect("step 2");
assert!(bytes > 0, "Should have read bytes from the valid record");
// Verify the good assertion was stored
let count_key = key_codec::assertion_count_key();
let count_entry = store.get(&count_key).await.expect("get").expect("should have count");
let count = u64::from_le_bytes(count_entry.try_into().expect("8 bytes"));
assert_eq!(count, 1, "Exactly one assertion should be stored (the good one)");
// Verify rejection metadata was recorded
use stemedb_storage::KVStore;
let rejected_prefix = key_codec::rejected_records_scan_prefix();
let rejected = store.scan_prefix(&rejected_prefix).await.expect("scan rejected");
assert_eq!(rejected.len(), 1, "Should have exactly one rejected record entry");
}
/// Test: Multi-signature assertions require all signatures to be valid. /// Test: Multi-signature assertions require all signatures to be valid.
#[tokio::test] #[tokio::test]
async fn test_multisig_all_must_be_valid() { async fn test_multisig_all_must_be_valid() {
@ -136,6 +220,7 @@ async fn test_multisig_all_must_be_valid() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![ signatures: vec![
// Valid signature // Valid signature

View File

@ -29,6 +29,7 @@ async fn test_rejects_high_confidence() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -86,6 +87,7 @@ async fn test_rejects_negative_confidence() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -213,6 +215,7 @@ async fn test_rejects_oversized_subject() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -273,6 +276,7 @@ async fn test_rejects_oversized_predicate() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -335,6 +339,7 @@ async fn test_accepts_exact_max_subject_length() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -393,6 +398,7 @@ async fn test_accepts_exact_max_predicate_length() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -446,6 +452,7 @@ async fn test_rejects_nan_confidence() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,

View File

@ -29,6 +29,7 @@ async fn test_rejects_infinite_confidence() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -172,6 +173,7 @@ async fn test_rejects_future_timestamp() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -237,6 +239,7 @@ async fn test_accepts_near_future_timestamp() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -287,6 +290,7 @@ async fn test_accepts_zero_confidence() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,
@ -337,6 +341,7 @@ async fn test_accepts_one_confidence() {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed, lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
version: 1, version: 1,

View File

@ -24,6 +24,7 @@ pub fn assertion_to_request(assertion: &Assertion) -> CreateAssertionRequest {
.source_metadata .source_metadata
.as_ref() .as_ref()
.map(|b| String::from_utf8_lossy(b).into_owned()), .map(|b| String::from_utf8_lossy(b).into_owned()),
narrative: assertion.narrative.clone(),
// Include timestamps for v2 signature verification // Include timestamps for v2 signature verification
timestamp: Some(assertion.timestamp), timestamp: Some(assertion.timestamp),
hlc_timestamp: Some(HlcTimestampDto { hlc_timestamp: Some(HlcTimestampDto {
@ -94,6 +95,7 @@ mod tests {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved, lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
agent_id: [1u8; 32], agent_id: [1u8; 32],

View File

@ -41,6 +41,10 @@ pub struct CreateAssertionRequest {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub source_metadata: Option<String>, pub source_metadata: Option<String>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
#[serde(skip_serializing_if = "Option::is_none")]
pub narrative: Option<String>,
/// Unix timestamp when the assertion was created. /// Unix timestamp when the assertion was created.
/// Required for v2 signatures to verify correctly. /// Required for v2 signatures to verify correctly.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]

View File

@ -66,6 +66,10 @@ pub struct AssertionDto {
/// Structured source metadata as a JSON string (optional). /// Structured source metadata as a JSON string (optional).
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub source_metadata: Option<String>, pub source_metadata: Option<String>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
#[serde(skip_serializing_if = "Option::is_none")]
pub narrative: Option<String>,
} }
/// Response from a query operation. /// Response from a query operation.

View File

@ -233,6 +233,7 @@ impl MedicalClaim {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata, source_metadata,
narrative: None,
lifecycle, lifecycle,
signatures: Vec::new(), signatures: Vec::new(),
confidence: self.confidence, confidence: self.confidence,

View File

@ -236,9 +236,12 @@ impl<S: KVStore + 'static> QueryEngine<S> {
Ok(changes) Ok(changes)
} }
/// Deserialize an assertion using the canonical serde module. /// Deserialize an assertion with backward compatibility.
///
/// Tries current format first, then falls back to legacy (pre-narrative)
/// format for assertions serialized before the schema change.
pub(super) fn deserialize_assertion(&self, data: &[u8]) -> Result<Assertion> { pub(super) fn deserialize_assertion(&self, data: &[u8]) -> Result<Assertion> {
stemedb_core::serde::deserialize(data) stemedb_core::serde::deserialize_assertion_compat(data)
.map_err(|e| QueryError::Deserialization(e.to_string())) .map_err(|e| QueryError::Deserialization(e.to_string()))
} }

View File

@ -362,7 +362,7 @@ impl<S: KVStore + 'static> Materializer<S> {
for hash in hash_list { for hash in hash_list {
let key = key_codec::assertion_key(subject, &hex::encode(hash)); let key = key_codec::assertion_key(subject, &hex::encode(hash));
if let Some(data) = self.store.get(&key).await? { if let Some(data) = self.store.get(&key).await? {
match stemedb_core::serde::deserialize::<Assertion>(&data) { match stemedb_core::serde::deserialize_assertion_compat(&data) {
Ok(assertion) => candidates.push(assertion), Ok(assertion) => candidates.push(assertion),
Err(e) => { Err(e) => {
debug!( debug!(

View File

@ -57,6 +57,7 @@ impl Agent {
visual_hash: None, visual_hash: None,
epoch: None, epoch: None,
source_metadata: None, source_metadata: None,
narrative: None,
lifecycle, lifecycle,
signatures: vec![SignatureEntry { signatures: vec![SignatureEntry {
agent_id: self.verifying_key.to_bytes(), agent_id: self.verifying_key.to_bytes(),

View File

@ -106,6 +106,26 @@ impl HybridStore {
Ok(Self { fjall, redb, _temp_dir: Some(temp_dir) }) Ok(Self { fjall, redb, _temp_dir: Some(temp_dir) })
} }
/// Scan all assertion key-value pairs from the Fjall backend.
///
/// This scans all keys in Fjall and returns only those with the `H:` tag
/// (assertion data). Used by the admin rebuild-indexes endpoint to
/// reconstruct missing Redb secondary indexes.
///
/// Returns `Vec<(key, value)>` where keys are `{subject}\x00H:{hash_hex}`.
#[instrument(skip_all)]
pub async fn scan_fjall_assertions(&self) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
let all_fjall = self.fjall.scan_prefix(b"").await?;
let assertions: Vec<(Vec<u8>, Vec<u8>)> = all_fjall
.into_iter()
.filter(|(key, _)| {
let tag = key_codec::extract_tag(key);
tag.starts_with(b"H:")
})
.collect();
Ok(assertions)
}
} }
#[async_trait] #[async_trait]

View File

@ -113,3 +113,33 @@ pub fn assertion_count_key() -> Vec<u8> {
pub fn trust_rank_scan_prefix() -> Vec<u8> { pub fn trust_rank_scan_prefix() -> Vec<u8> {
global_key(b"TRUST:", b"") global_key(b"TRUST:", b"")
} }
/// Rejected WAL record key: `\x00REJECTED:{offset}`
///
/// Stores metadata about WAL records that were permanently skipped
/// by the IngestWorker (invalid signatures, validation failures, etc.)
pub fn rejected_record_key(offset: u64) -> Vec<u8> {
global_key(b"REJECTED:", offset.to_string().as_bytes())
}
/// Rejected records scan prefix: `\x00REJECTED:`
pub fn rejected_records_scan_prefix() -> Vec<u8> {
global_key(b"REJECTED:", b"")
}
/// Feed index key: `\x00FEED:{inverted_ts_hex}:{hash_hex}`
///
/// Uses inverted timestamp (`u64::MAX - ingested_at`) so lexicographic
/// scan order = reverse chronological (newest first).
/// Value stores the subject so the feed handler can construct the
/// assertion key without a reverse lookup.
pub fn feed_key(ingested_at: u64, hash_hex: &str) -> Vec<u8> {
let inverted = u64::MAX - ingested_at;
let suffix = format!("{}:{}", hex::encode(inverted.to_be_bytes()), hash_hex);
global_key(b"FEED:", suffix.as_bytes())
}
/// Feed index scan prefix: `\x00FEED:`
pub fn feed_scan_prefix() -> Vec<u8> {
global_key(b"FEED:", b"")
}

View File

@ -57,10 +57,10 @@ pub use subject_keys::{
// Global keys // Global keys
pub use global_keys::{ pub use global_keys::{
assertion_count_key, audit_agent_index_key, audit_agent_prefix, audit_key, audit_scan_prefix, assertion_count_key, audit_agent_index_key, audit_agent_prefix, audit_key, audit_scan_prefix,
cursor_key, epoch_key, escalation_key, escalation_scan_prefix, gs_verified_key, quota_key, cursor_key, epoch_key, escalation_key, escalation_scan_prefix, feed_key, feed_scan_prefix,
quota_limit_key, superseded_key, supersession_index_key, supersession_index_prefix, gs_verified_key, quota_key, quota_limit_key, rejected_record_key, rejected_records_scan_prefix,
supersession_key, trust_pack_key, trust_pack_scan_prefix, trust_rank_key, superseded_key, supersession_index_key, supersession_index_prefix, supersession_key,
trust_rank_scan_prefix, trust_pack_key, trust_pack_scan_prefix, trust_rank_key, trust_rank_scan_prefix,
}; };
// Index keys // Index keys

View File

@ -229,3 +229,45 @@ fn test_global_keys_sort_first() {
let subject = assertion_key("Apple", "abc"); let subject = assertion_key("Apple", "abc");
assert!(global < subject, "Global keys should sort before subject keys"); assert!(global < subject, "Global keys should sort before subject keys");
} }
#[test]
fn test_feed_key_newest_first_ordering() {
let k_older = feed_key(1000, "aaaa");
let k_newer = feed_key(2000, "bbbb");
// Newer timestamp should sort BEFORE older (inverted for newest-first)
assert!(k_newer < k_older, "Newer feed keys should sort before older ones");
}
#[test]
fn test_feed_key_same_timestamp_tiebreak() {
let k1 = feed_key(1000, "aaaa");
let k2 = feed_key(1000, "zzzz");
// Same timestamp: lexicographic tiebreak on hash_hex
assert!(k1 < k2);
}
#[test]
fn test_feed_key_starts_with_scan_prefix() {
let prefix = feed_scan_prefix();
let k = feed_key(1000, "abc123def456");
assert!(k.starts_with(&prefix), "Feed key should start with feed scan prefix");
}
#[test]
fn test_feed_key_format() {
let k = feed_key(0, "deadbeef");
// \x00FEED: prefix
assert_eq!(&k[..6], b"\x00FEED:");
// With ingested_at=0, inverted = u64::MAX, hex = "ffffffffffffffff"
assert_eq!(&k[6..22], b"ffffffffffffffff");
// Separator
assert_eq!(k[22], b':');
// hash_hex
assert_eq!(&k[23..], b"deadbeef");
}
#[test]
fn test_feed_scan_prefix() {
let prefix = feed_scan_prefix();
assert_eq!(prefix, b"\x00FEED:");
}

View File

@ -44,6 +44,16 @@ where
stemedb_core::serde::deserialize(data).map_err(|e| StorageError::Serialization(e.to_string())) stemedb_core::serde::deserialize(data).map_err(|e| StorageError::Serialization(e.to_string()))
} }
/// Deserialize a SourceRecord with backward compatibility for the pre-content layout.
///
/// Maps deserialization errors to [`StorageError::Serialization`].
pub fn deserialize_source_record_compat(
data: &[u8],
) -> Result<stemedb_core::types::SourceRecord> {
stemedb_core::serde::deserialize_source_record_compat(data)
.map_err(|e| StorageError::Serialization(e.to_string()))
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View File

@ -7,7 +7,7 @@ use tracing::{debug, instrument};
use super::SourceRegistry; use super::SourceRegistry;
use crate::error::{Result, StorageError}; use crate::error::{Result, StorageError};
use crate::key_codec; use crate::key_codec;
use crate::serde_helpers::{deserialize, serialize}; use crate::serde_helpers::{deserialize_source_record_compat, serialize};
use crate::traits::KVStore; use crate::traits::KVStore;
/// Generic SourceRegistry implementation backed by any KVStore. /// Generic SourceRegistry implementation backed by any KVStore.
@ -80,7 +80,7 @@ impl<S: KVStore + 'static> SourceRegistry for GenericSourceRegistry<S> {
match self.store.get(&key).await? { match self.store.get(&key).await? {
Some(data) => { Some(data) => {
let record: SourceRecord = deserialize(&data)?; let record: SourceRecord = deserialize_source_record_compat(&data)?;
Ok(Some(record)) Ok(Some(record))
} }
None => Ok(None), None => Ok(None),

View File

@ -9,8 +9,8 @@ use crate::error::Result;
use metrics::{counter, gauge}; use metrics::{counter, gauge};
use std::collections::HashSet; use std::collections::HashSet;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use stemedb_core::serde::deserialize; use stemedb_core::serde::deserialize_assertion_compat;
use stemedb_core::types::{detect_clock_skew, Assertion, HlcTimestamp}; use stemedb_core::types::{detect_clock_skew, HlcTimestamp};
use stemedb_rpc::proto::{FetchRequest, GetLeavesRequest, RootExchangeRequest}; use stemedb_rpc::proto::{FetchRequest, GetLeavesRequest, RootExchangeRequest};
use stemedb_storage::crdt::AssertionTransfer; use stemedb_storage::crdt::AssertionTransfer;
use stemedb_storage::KVStore; use stemedb_storage::KVStore;
@ -201,7 +201,7 @@ impl<S: KVStore + 'static> AntiEntropyWorker<S> {
} }
// Extract subject and HLC timestamp from the assertion data // Extract subject and HLC timestamp from the assertion data
let (subject, remote_hlc) = match deserialize::<Assertion>(&transfer.data) { let (subject, remote_hlc) = match deserialize_assertion_compat(&transfer.data) {
Ok(assertion) => (assertion.subject.clone(), assertion.hlc_timestamp), Ok(assertion) => (assertion.subject.clone(), assertion.hlc_timestamp),
Err(e) => { Err(e) => {
warn!( warn!(

View File

@ -1,6 +1,6 @@
# StemeDB Data Structures # StemeDB Data Structures
> **Last Updated:** 2026-01-31 > **Last Updated:** 2026-02-19
> **Source:** `crates/stemedb-core/src/types.rs` > **Source:** `crates/stemedb-core/src/types.rs`
This document describes the core data structures in StemeDB (Episteme). These types form the foundation of the "Git for Truth" knowledge graph. This document describes the core data structures in StemeDB (Episteme). These types form the foundation of the "Git for Truth" knowledge graph.
@ -417,6 +417,50 @@ pub struct TrustPack {
--- ---
## The SourceRecord (Source Registry)
The Source Registry maps content-addressed source hashes to human-readable metadata. This enables the dashboard to show "FDA Approval Letter for Wegovy" instead of a raw BLAKE3 hash.
```rust
pub struct SourceRecord {
/// Content-addressed hash of the source (BLAKE3, 32 bytes).
pub hash: [u8; 32],
/// Human-readable label.
pub label: String,
/// Optional URL where the source can be accessed.
pub url: Option<String>,
/// Authority tier (0-5), matching SourceClass.
pub tier: u8,
/// Current status (Active, Deprecated, Quarantined).
pub status: SourceStatus,
/// HLC timestamp when the record was created.
pub created_at: u64,
/// HLC timestamp of the last update.
pub updated_at: u64,
/// Optional curator notes about the source.
pub notes: Option<String>,
/// Optional full-text content of the source document.
/// Populated by pipelines that extract text from PDFs.
/// Max size: 1 MB (MAX_SOURCE_CONTENT_LEN).
pub content: Option<String>,
}
```
**Key Points:**
- **Status lifecycle:** Active → Deprecated or Quarantined (curator-driven)
- **Content field:** Stores extracted document text (e.g., from `pdftotext`). Stripped from list responses (`GET /v1/sources`) to avoid returning megabytes; included in single-source responses (`GET /v1/sources/{hash}`)
- **rkyv compat:** Uses `deserialize_source_record_compat()` for backward compatibility with data written before the `content` field was added
---
## Serialization ## Serialization
All types use `rkyv` for zero-copy deserialization: All types use `rkyv` for zero-copy deserialization:
@ -433,6 +477,17 @@ let assertion: Assertion = deserialize(&bytes)?;
**Critical Rule**: Never use raw `AllocSerializer` in production code. Always use `stemedb_core::serde::{serialize, deserialize}`. **Critical Rule**: Never use raw `AllocSerializer` in production code. Always use `stemedb_core::serde::{serialize, deserialize}`.
### Schema Evolution (rkyv Compat)
rkyv does **not** support schema evolution. When a field is added to a struct, old data can't be deserialized with the new struct. The solution is a legacy compat pattern:
| Type | Compat Function | Legacy Struct |
|------|----------------|---------------|
| `Assertion` | `deserialize_assertion_compat()` | `LegacyAssertion` (pre-`narrative`) |
| `SourceRecord` | `deserialize_source_record_compat()` | `LegacySourceRecord` (pre-`content`) |
All assertion deserialization should use `deserialize_assertion_compat()`. All source record deserialization should use `deserialize_source_record_compat()`. When adding fields to rkyv structs in the future, always add a legacy compat deserializer following this pattern.
--- ---
## Relationship Diagram ## Relationship Diagram

View File

@ -45,6 +45,9 @@ type Assertion struct {
// Semantic embedding vector (optional) // Semantic embedding vector (optional)
Vector []float32 `json:"vector,omitempty"` Vector []float32 `json:"vector,omitempty"`
// Free-text narrative explaining methodology, limitations, bias, and caveats (optional)
Narrative *string `json:"narrative,omitempty"`
} }
// AssertionBuilder provides a fluent API for building assertions. // AssertionBuilder provides a fluent API for building assertions.
@ -150,6 +153,12 @@ func (b *AssertionBuilder) WithEpoch(epochHex string) *AssertionBuilder {
return b return b
} }
// WithNarrative sets the free-text narrative (methodology, limitations, caveats).
func (b *AssertionBuilder) WithNarrative(narrative string) *AssertionBuilder {
b.assertion.Narrative = &narrative
return b
}
// WithVector sets the semantic embedding vector. // WithVector sets the semantic embedding vector.
func (b *AssertionBuilder) WithVector(vector []float32) *AssertionBuilder { func (b *AssertionBuilder) WithVector(vector []float32) *AssertionBuilder {
b.assertion.Vector = vector b.assertion.Vector = vector

View File

@ -174,6 +174,9 @@ type AssertionResponse struct {
// Semantic embedding vector (optional) // Semantic embedding vector (optional)
Vector []float32 `json:"vector,omitempty"` Vector []float32 `json:"vector,omitempty"`
// Free-text narrative explaining methodology, limitations, bias, and caveats (optional)
Narrative *string `json:"narrative,omitempty"`
} }
// CreateResponse represents the response from a create operation. // CreateResponse represents the response from a create operation.