feat: add source content to source registry, signed assertions, feed endpoint, dashboard enhancements

- Add `content: Option<String>` to SourceRecord with rkyv schema evolution
  (LegacySourceRecord compat deserializer for backward compatibility)
- Add MAX_SOURCE_CONTENT_LEN (1MB) limit with API validation
- Strip content from list responses, include in single-source GET
- Update Go SDK RegisterSourceRequest with Content field
- FCM pipeline extracts PDF text via pdftotext and passes to registration
- Dashboard impact panel fetches and displays source content with expand/collapse
- Add feed endpoint, dashboard feed panel, and signed assertion support
- Update data-structures.md, API docs, and storage docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jordan 2026-02-19 21:54:27 -07:00
parent 58594bc7b9
commit ad07a75d0a
85 changed files with 2651 additions and 348 deletions

View File

@ -1,6 +1,6 @@
# API Surface
**Last Updated:** 2026-02-03
**Last Updated:** 2026-02-19
**Confidence:** High
## Summary
@ -41,10 +41,10 @@ Episteme exposes an HTTP API via `axum` with auto-generated OpenAPI 3.1 document
| `GET` | `/metrics` | Prometheus metrics (Phase 8B) | ✅ Implemented |
| `GET` | `/api-docs/openapi.json` | OpenAPI 3.1 spec | ✅ Implemented |
| `GET` | `/swagger-ui` | Interactive API docs | ✅ Implemented |
| `POST` | `/v1/sources` | Register source with human-readable metadata | ✅ Implemented |
| `GET` | `/v1/sources/{hash}` | Get source record by hash | ✅ Implemented |
| `POST` | `/v1/sources` | Register source with metadata and optional content | ✅ Implemented |
| `GET` | `/v1/sources/{hash}` | Get source record by hash (includes content) | ✅ Implemented |
| `PATCH` | `/v1/sources/{hash}/status` | Update source status (deprecate/quarantine) | ✅ Implemented |
| `GET` | `/v1/sources` | List/search sources (filter by tier or query) | ✅ Implemented |
| `GET` | `/v1/sources` | List/search sources (content stripped for performance) | ✅ Implemented |
### Cluster Gateway Endpoints (stemedb-cluster)

View File

@ -1,6 +1,6 @@
# SDK - Go Client Libraries
**Last Updated:** 2026-02-01
**Last Updated:** 2026-02-19
**Confidence:** High
## Summary

View File

@ -1,6 +1,6 @@
# Storage
**Last Updated:** 2026-01-31
**Last Updated:** 2026-02-19
**Confidence:** High
## Summary
@ -91,6 +91,16 @@ let value: MyType = deserialize(&bytes)?;
This provides unified error handling across all store implementations (VoteStore, IndexStore, TrustRankStore, AuditStore, TrustPackStore, QuotaStore).
For types with schema evolution (rkyv compat), use the dedicated compat functions:
```rust
use crate::serde_helpers::deserialize_source_record_compat;
let record: SourceRecord = deserialize_source_record_compat(&bytes)?;
```
Available compat deserializers: `deserialize_source_record_compat` (SourceRecord). For assertions, use `stemedb_core::serde::deserialize_assertion_compat` directly.
## Write Path
```

View File

@ -146,6 +146,7 @@ fn claim_to_assertion_with_tier(
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![signature_entry],
confidence: claim.confidence,
@ -235,6 +236,7 @@ pub fn authored_claim_to_assertion(
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle,
signatures: vec![signature_entry],
confidence: 1.0, // Authored claims have full confidence

View File

@ -79,7 +79,7 @@ impl StemeDBPatternStore {
return Ok(None);
};
let assertion = stemedb_core::serde::deserialize::<Assertion>(&bytes).map_err(|e| {
let assertion = stemedb_core::serde::deserialize_assertion_compat(&bytes).map_err(|e| {
AphoriaError::Storage(format!(
"Failed to deserialize assertion {}: {}",
hex::encode(hash),
@ -389,6 +389,7 @@ impl PatternAggregator {
visual_hash: None,
epoch: None,
source_metadata: Some(metadata_bytes),
narrative: None,
lifecycle: stemedb_core::types::LifecycleStage::Approved,
signatures: vec![], // Bootstrap patterns are unsigned (no signing key available)
confidence: 1.0, // Pattern aggregates are high confidence

View File

@ -114,6 +114,7 @@ pub fn create_authoritative_assertion_with_metadata(
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![signature_entry],
confidence: 1.0,
@ -170,6 +171,7 @@ pub fn create_authoritative_assertion(
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![signature_entry],
confidence: 1.0,

View File

@ -342,7 +342,7 @@ impl LocalEpisteme {
let assertion_key = stemedb_storage::key_codec::assertion_key(&subject, &hash_hex);
self.store.get(&assertion_key).await.ok().flatten().and_then(|bytes| {
stemedb_core::serde::deserialize::<Assertion>(&bytes)
stemedb_core::serde::deserialize_assertion_compat(&bytes)
.map_err(|e| warn!(hash = %hash_hex, error = %e, "Failed to deserialize"))
.ok()
})

View File

@ -854,6 +854,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: Some(b"{\"file\":\"test.rs\"}".to_vec()),
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry {
agent_id: [2u8; 32],

View File

@ -438,6 +438,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![],
confidence: 1.0,

View File

@ -255,6 +255,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![],
confidence: 1.0,

View File

@ -109,6 +109,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&source_metadata).ok(),
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![],
confidence: 1.0,

View File

@ -47,7 +47,7 @@ impl ClaimCache {
pub fn save(&self, claims: &[AuthoredClaim], remote_url: &str) -> Result<(), AphoriaError> {
let now = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.map_err(|e| AphoriaError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?
.map_err(|e| AphoriaError::Io(std::io::Error::other(e)))?
.as_secs();
let cache = ClaimCacheFile {

View File

@ -179,9 +179,7 @@ impl RemoteClaimStore {
}
}
Err(last_error.unwrap_or_else(|| {
AphoriaError::Hosted("Max retries exceeded".to_string())
}))
Err(last_error.unwrap_or_else(|| AphoriaError::Hosted("Max retries exceeded".to_string())))
}
/// Perform the actual HTTP request.
@ -211,8 +209,8 @@ impl RemoteClaimStore {
http_request.call()
};
let response = response
.map_err(|e| AphoriaError::Hosted(format!("HTTP request failed: {e}")))?;
let response =
response.map_err(|e| AphoriaError::Hosted(format!("HTTP request failed: {e}")))?;
if response.status() >= 200 && response.status() < 300 {
let body = response
@ -230,8 +228,7 @@ impl ClaimStore for RemoteClaimStore {
fn save_claim(&self, claim: &AuthoredClaim) -> Result<(), AphoriaError> {
let request = CreateClaimRequest { claim: claim_to_dto(claim) };
let response: CreateClaimResponse =
self.request("POST", "/v1/claims", Some(&request))?;
let response: CreateClaimResponse = self.request("POST", "/v1/claims", Some(&request))?;
if response.stored {
info!(claim_id = %claim.id, "Claim stored remotely");
@ -324,10 +321,9 @@ impl RemoteClaimStore {
warn!(operation, "Remote unreachable, using cached claims");
fallback()
}
OfflineFallback::Fail => Err(AphoriaError::Hosted(format!(
"{}: remote unreachable",
operation
))),
OfflineFallback::Fail => {
Err(AphoriaError::Hosted(format!("{}: remote unreachable", operation)))
}
OfflineFallback::Queue => {
warn!(operation, "Remote unreachable, queue not implemented (using cache)");
fallback()
@ -425,14 +421,11 @@ fn is_network_error(err: &AphoriaError) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use crate::config::types::hosted::SyncMode;
use crate::SyncMode;
#[test]
fn test_remote_store_requires_url() {
let config = HostedConfig {
url: None,
..Default::default()
};
let config = HostedConfig { url: None, ..Default::default() };
let result = RemoteClaimStore::new(&config);
assert!(result.is_err());
@ -474,6 +467,7 @@ mod tests {
let config = HostedConfig {
url: Some("https://example.com".to_string()),
project_id: Some("test-project".to_string()),
team_id: None,
api_key_env: "TEST_API_KEY".to_string(),
sync_mode: SyncMode::RemoteOnly,
offline_fallback: OfflineFallback::Skip,

View File

@ -98,8 +98,8 @@ impl ReportFormatter for JsonReport {
// Add tier-aware verdict if available
if let Some(ref tier_verdict) = conflict.tier_verdict {
conflict_json["tier_verdict"] = serde_json::to_value(tier_verdict)
.unwrap_or(serde_json::Value::Null);
conflict_json["tier_verdict"] =
serde_json::to_value(tier_verdict).unwrap_or(serde_json::Value::Null);
}
// Add primary tier if available

View File

@ -87,7 +87,9 @@ impl TierAwareVerdict {
/// Returns a human-readable string describing the tier-aware verdict.
pub fn display(&self) -> String {
match self {
TierAwareVerdict::SingleTier { tier_name, verdict, sources, max_confidence, .. } => {
TierAwareVerdict::SingleTier {
tier_name, verdict, sources, max_confidence, ..
} => {
format!(
"{} {} - {} source{}, max confidence {:.2}",
verdict.symbol(),
@ -159,12 +161,7 @@ impl TierAwareVerdict {
})
.collect();
Self::MultiTier {
primary_tier,
primary_verdict,
tier_verdicts,
conflict_score,
}
Self::MultiTier { primary_tier, primary_verdict, tier_verdicts, conflict_score }
}
}
@ -250,8 +247,7 @@ mod tests {
},
);
let verdict =
TierAwareVerdict::from_multi_tier(&tier_breakdown, 1, Verdict::Block, 0.92);
let verdict = TierAwareVerdict::from_multi_tier(&tier_breakdown, 1, Verdict::Block, 0.92);
assert_eq!(verdict.effective_verdict(), Verdict::Block);
assert_eq!(verdict.primary_tier(), 1);

View File

@ -235,11 +235,7 @@ impl fmt::Display for ConflictResult {
writeln!(f, " {} {}", verdict_str, self.claim.concept_path)?;
}
writeln!(
f,
" Concept: {}",
self.claim.concept_path
)?;
writeln!(f, " Concept: {}", self.claim.concept_path)?;
writeln!(
f,
" Your code: {} ({}: L{})",

View File

@ -1,12 +1,23 @@
import { Header } from "@/components/layout/header";
import { LayeredQueryResults } from "@/components/layered";
export default function LayeredPage() {
interface LayeredPageProps {
searchParams: Promise<{ subject?: string; predicate?: string }>;
}
export default async function LayeredPage({ searchParams }: LayeredPageProps) {
const params = await searchParams;
const initialSubject = params.subject;
const initialPredicate = params.predicate;
return (
<>
<Header title="Layered Consensus" />
<div className="p-6">
<LayeredQueryResults />
<LayeredQueryResults
initialSubject={initialSubject}
initialPredicate={initialPredicate}
/>
</div>
</>
);

View File

@ -1,12 +1,26 @@
"use client";
import { useSearchParams } from "next/navigation";
import { Suspense } from "react";
import { Header } from "@/components/layout/header";
import { QueryResults } from "@/components/skeptic";
function SkepticContent() {
const searchParams = useSearchParams();
const subject = searchParams.get("subject") ?? undefined;
const predicate = searchParams.get("predicate") ?? undefined;
return <QueryResults initialSubject={subject} initialPredicate={predicate} />;
}
export default function SkepticPage() {
return (
<>
<Header title="Skeptic Query" />
<div className="p-6">
<QueryResults />
<Suspense fallback={<div className="text-sm text-muted-foreground">Loading...</div>}>
<SkepticContent />
</Suspense>
</div>
</>
);

View File

@ -29,14 +29,14 @@ export function AuditPanel({ initialFilters }: AuditPanelProps) {
try {
const client = new StemeDBClient();
// Convert time range to from/to timestamps
// Convert time range to from/to timestamps (Unix seconds — backend uses seconds, not ms)
let fromTs: number | undefined;
let toTs: number | undefined;
if (currentFilters.timeRange !== "all") {
const now = Date.now();
const rangeMs = TIME_RANGES_MS[currentFilters.timeRange as TimeRangeKey] ?? TIME_RANGES_MS["24h"];
fromTs = now - rangeMs;
toTs = now;
const nowSecs = Math.floor(Date.now() / 1000);
const rangeSecs = Math.floor((TIME_RANGES_MS[currentFilters.timeRange as TimeRangeKey] ?? TIME_RANGES_MS["24h"]) / 1000);
fromTs = nowSecs - rangeSecs;
toTs = nowSecs;
}
const data = await client.auditQueries({

View File

@ -1,6 +1,7 @@
"use client";
import { useState } from "react";
import { useState, useCallback } from "react";
import Link from "next/link";
import type { AuditEntry } from "@/lib/api/types";
import { formatTime, formatDate } from "@/lib/format";
import { ResultBadge } from "./result-badge";
@ -10,6 +11,36 @@ interface AuditRowProps {
entry: AuditEntry;
}
function CopyableHash({ hash, label }: { hash: string; label?: string }) {
const [copied, setCopied] = useState(false);
const handleCopy = useCallback(
(e: React.MouseEvent) => {
e.stopPropagation();
navigator.clipboard.writeText(hash).then(() => {
setCopied(true);
setTimeout(() => setCopied(false), 1500);
});
},
[hash]
);
return (
<button
type="button"
onClick={handleCopy}
title={label ? `${label}: ${hash}` : hash}
className="font-mono text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
>
{copied ? (
<span className="text-green-600 dark:text-green-400">Copied!</span>
) : (
`${hash.slice(0, 12)}`
)}
</button>
);
}
export function AuditRow({ entry }: AuditRowProps) {
const [expanded, setExpanded] = useState(false);
@ -30,6 +61,15 @@ export function AuditRow({ entry }: AuditRowProps) {
? `${entry.agent_id.slice(0, 8)}...`
: "-";
// Build cross-navigation URLs when subject is present
const hasSubject = Boolean(entry.params.subject);
const crossNavParams = hasSubject
? new URLSearchParams({
subject: entry.params.subject!,
...(entry.params.predicate ? { predicate: entry.params.predicate } : {}),
}).toString()
: null;
return (
<div
className={`rounded-lg border border-border transition-colors hover:bg-muted/50 ${
@ -84,7 +124,8 @@ export function AuditRow({ entry }: AuditRowProps) {
{/* Expanded details */}
{expanded && (
<div className="px-4 pb-3 pt-0 border-t border-border mt-0">
<div className="bg-muted/50 rounded-md p-3 mt-3 space-y-2">
<div className="bg-muted/50 rounded-md p-3 mt-3 space-y-3">
{/* Metadata grid */}
<div className="grid grid-cols-2 gap-2 text-xs">
<div>
<span className="text-muted-foreground">Query ID:</span>
@ -107,16 +148,51 @@ export function AuditRow({ entry }: AuditRowProps) {
<span className="ml-2">{entry.contributing_assertions.length}</span>
</div>
</div>
{/* Contributing assertions */}
{entry.contributing_assertions.length > 0 && (
<div className="text-xs">
<span className="text-muted-foreground">Top contributors:</span>
<div className="mt-1 space-y-1">
{entry.contributing_assertions.slice(0, 3).map((ca) => (
<div key={ca.assertion_hash} className="font-mono text-muted-foreground">
{ca.assertion_hash.slice(0, 12)}... (weight: {(ca.weight * 100).toFixed(0)}%)
</div>
))}
<div className="text-xs space-y-1">
<div className="grid grid-cols-3 gap-2 text-muted-foreground font-medium pb-1 border-b border-border/50">
<span>Assertion Hash</span>
<span>Source Hash</span>
<span>Lifecycle / Weight</span>
</div>
{entry.contributing_assertions.slice(0, 3).map((ca) => (
<div
key={ca.assertion_hash}
className="grid grid-cols-3 gap-2 items-center py-0.5"
>
<CopyableHash hash={ca.assertion_hash} label="Assertion hash" />
<CopyableHash hash={ca.source_hash} label="Source hash" />
<span className="text-muted-foreground">
<span className="px-1.5 py-0.5 rounded bg-muted text-foreground mr-1">
{ca.lifecycle}
</span>
{(ca.weight * 100).toFixed(0)}%
</span>
</div>
))}
</div>
)}
{/* Cross-navigation links */}
{hasSubject && crossNavParams && (
<div
className="flex items-center gap-3 pt-2 border-t border-border"
onClick={(e) => e.stopPropagation()}
>
<Link
href={`/skeptic?${crossNavParams}`}
className="text-xs text-blue-600 dark:text-blue-400 hover:underline px-2 py-1 rounded bg-muted"
>
View in Skeptic
</Link>
<Link
href={`/layered?${crossNavParams}`}
className="text-xs text-blue-600 dark:text-blue-400 hover:underline px-2 py-1 rounded bg-muted"
>
View in Layered
</Link>
</div>
)}
</div>

View File

@ -1,6 +1,7 @@
"use client";
import { useState } from "react";
import Link from "next/link";
import type { AssertionObject } from "@/lib/api/types";
import { formatRelativeTime, formatUnixDateTime } from "@/lib/format";
import { Badge } from "@/components/ui/badge";
@ -26,6 +27,10 @@ function formatValue(obj: { type: string; value: string | number | boolean }): s
return str.length > 60 ? `${str.slice(0, 57)}...` : str;
}
function investigateHref(entry: AssertionObject): string {
return `/skeptic?subject=${encodeURIComponent(entry.subject)}&predicate=${encodeURIComponent(entry.predicate)}`;
}
export function FeedRow({ entry }: FeedRowProps) {
const [expanded, setExpanded] = useState(false);
@ -34,11 +39,13 @@ export function FeedRow({ entry }: FeedRowProps) {
return (
<div
className="rounded-lg border border-border transition-colors hover:bg-muted/50 cursor-pointer"
onClick={() => setExpanded(!expanded)}
className="rounded-lg border border-border transition-colors"
>
{/* Main row */}
<div className="grid grid-cols-2 sm:grid-cols-5 gap-2 sm:gap-4 px-4 py-3 items-center">
<div
className="grid grid-cols-2 sm:grid-cols-5 gap-2 sm:gap-4 px-4 py-3 items-center cursor-pointer hover:bg-muted/50 rounded-t-lg"
onClick={() => setExpanded(!expanded)}
>
{/* Time */}
<div className="text-sm" title={formatUnixDateTime(entry.timestamp)}>
<span className="font-medium">{formatRelativeTime(entry.timestamp)}</span>
@ -66,20 +73,36 @@ export function FeedRow({ entry }: FeedRowProps) {
<span className="text-foreground">{formatValue(entry.object)}</span>
</div>
{/* Source Class */}
{/* Source Class + Investigate icon */}
<div className="flex items-center justify-between gap-2">
<Badge variant="outline" className={cn("text-xs", badgeColor)}>
{entry.source_class}
</Badge>
<span className="text-xs text-muted-foreground">
{expanded ? "\u25B2" : "\u25BC"}
</span>
<div className="flex items-center gap-1">
<Link
href={investigateHref(entry)}
className="text-muted-foreground hover:text-primary transition-colors p-1"
title="Investigate in Skeptic"
onClick={(e) => e.stopPropagation()}
>
<svg className="h-3.5 w-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<circle cx="11" cy="11" r="8" />
<path d="m21 21-4.3-4.3" />
</svg>
</Link>
<span className="text-xs text-muted-foreground">
{expanded ? "\u25B2" : "\u25BC"}
</span>
</div>
</div>
</div>
{/* Expanded details */}
{expanded && (
<div className="px-4 pb-3 pt-0 border-t border-border mt-0">
<div
className="px-4 pb-3 pt-0 border-t border-border mt-0"
onClick={(e) => e.stopPropagation()}
>
<div className="bg-muted/50 rounded-md p-3 mt-3 space-y-2">
<div className="grid grid-cols-2 gap-2 text-xs">
<div>
@ -116,6 +139,29 @@ export function FeedRow({ entry }: FeedRowProps) {
</div>
</div>
)}
{/* Narrative */}
{entry.narrative && (
<div className="text-xs border-t border-border pt-2">
<span className="text-muted-foreground">Narrative:</span>
<p className="mt-1 text-foreground whitespace-pre-wrap leading-relaxed">
{entry.narrative}
</p>
</div>
)}
{/* Investigate link */}
<div className="border-t border-border pt-2 flex justify-end">
<Link
href={investigateHref(entry)}
className="text-xs text-primary hover:underline inline-flex items-center gap-1"
onClick={(e) => e.stopPropagation()}
>
<svg className="h-3 w-3" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<circle cx="11" cy="11" r="8" />
<path d="m21 21-4.3-4.3" />
</svg>
Investigate in Skeptic
</Link>
</div>
</div>
</div>
)}

View File

@ -1,6 +1,6 @@
"use client";
import { useState, useCallback } from "react";
import { useState, useCallback, useEffect, useRef } from "react";
import { StemeDBClient, type LayeredResponse, ApiError } from "@/lib/api";
import { QueryForm, type QueryParams, EmptyState, ErrorState } from "@/components/skeptic";
import { LayeredLoadingSkeleton } from "./layered-loading-skeleton";
@ -12,8 +12,14 @@ type QueryState =
| { status: "success"; data: LayeredResponse; params: QueryParams }
| { status: "error"; error: string; params: QueryParams };
export function LayeredQueryResults() {
interface LayeredQueryResultsProps {
initialSubject?: string;
initialPredicate?: string;
}
export function LayeredQueryResults({ initialSubject, initialPredicate }: LayeredQueryResultsProps) {
const [state, setState] = useState<QueryState>({ status: "idle" });
const hasAutoQueried = useRef(false);
const executeQuery = useCallback(async (params: QueryParams) => {
setState({ status: "loading", params });
@ -33,6 +39,18 @@ export function LayeredQueryResults() {
}
}, []);
// Auto-execute query when initial subject+predicate are provided (e.g., from audit trail links)
useEffect(() => {
if (initialSubject && initialPredicate && !hasAutoQueried.current) {
hasAutoQueried.current = true;
executeQuery({
subject: initialSubject,
predicate: initialPredicate,
includeSourceMetadata: true,
});
}
}, [initialSubject, initialPredicate, executeQuery]);
const handleRetry = useCallback(() => {
if (state.status === "error") {
executeQuery(state.params);
@ -48,7 +66,12 @@ export function LayeredQueryResults() {
<h2 className="text-lg font-medium text-card-foreground mb-4">
Layered Consensus Query
</h2>
<QueryForm onSubmit={executeQuery} isLoading={isLoading} />
<QueryForm
onSubmit={executeQuery}
isLoading={isLoading}
initialSubject={initialSubject}
initialPredicate={initialPredicate}
/>
</div>
{/* Results Section */}

View File

@ -112,6 +112,11 @@ export function LayeredResultsView({ data }: LayeredResultsViewProps) {
<p className="text-xs text-muted-foreground mt-1">
Confidence: {(data.overall_winner.confidence * 100).toFixed(0)}%
</p>
{data.overall_winner.narrative && (
<p className="text-sm text-muted-foreground mt-2 whitespace-pre-wrap leading-relaxed border-t border-primary/20 pt-2">
{data.overall_winner.narrative}
</p>
)}
</div>
)}

View File

@ -1,7 +1,10 @@
"use client";
import { useState, useEffect } from "react";
import Link from "next/link";
import { cn } from "@/lib/utils";
import type { LayeredTier } from "@/lib/api/types";
import type { LayeredTier, SourceRecordDto } from "@/lib/api/types";
import { StemeDBClient } from "@/lib/api";
import { SourceTierBadge, ConflictGauge, tierLabels, type SourceTier } from "@/components/skeptic";
function getConflictStatus(score: number): "Unanimous" | "Agreed" | "Contested" {
@ -10,6 +13,17 @@ function getConflictStatus(score: number): "Unanimous" | "Agreed" | "Contested"
return "Contested";
}
function formatTimestamp(unixSeconds: number): string {
const date = new Date(unixSeconds * 1000);
return date.toLocaleString(undefined, {
year: "numeric",
month: "short",
day: "numeric",
hour: "2-digit",
minute: "2-digit",
});
}
interface TierAccordionProps {
tier: LayeredTier;
isExpanded: boolean;
@ -21,6 +35,20 @@ export function TierAccordion({ tier, isExpanded, onToggle }: TierAccordionProps
const tierLabel = tierLabels[safeTier] || tier.source_class;
const conflictStatus = getConflictStatus(tier.conflict_score);
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(null);
const [sourceLoading, setSourceLoading] = useState(false);
useEffect(() => {
if (!isExpanded || !tier.winner || sourceRecord || sourceLoading) return;
setSourceLoading(true);
const client = new StemeDBClient();
client
.getSource(tier.winner.source_hash)
.then(setSourceRecord)
.catch(() => {})
.finally(() => setSourceLoading(false));
}, [isExpanded, tier.winner, sourceRecord, sourceLoading]);
return (
<div className="border border-border rounded-lg overflow-hidden">
<button
@ -99,12 +127,105 @@ export function TierAccordion({ tier, isExpanded, onToggle }: TierAccordionProps
</div>
<div>
<span className="text-muted-foreground">Source</span>
<p className="font-mono text-xs text-foreground truncate" title={tier.winner.source_hash}>
{tier.winner.source_hash.slice(0, 12)}...
{sourceLoading ? (
<p className="font-mono text-xs text-muted-foreground animate-pulse">
Loading...
</p>
) : sourceRecord ? (
<p className="font-medium text-foreground truncate" title={sourceRecord.label}>
{sourceRecord.label}
</p>
) : (
<p className="font-mono text-xs text-foreground truncate" title={tier.winner.source_hash}>
{tier.winner.source_hash.slice(0, 12)}...
</p>
)}
</div>
</div>
{/* Assertion timestamp */}
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
<div className="col-span-2">
<span className="text-muted-foreground">Asserted at</span>
<p className="font-medium text-foreground">
{formatTimestamp(tier.winner.timestamp)}
</p>
</div>
</div>
{/* Narrative */}
{tier.winner.narrative && (
<div className="text-sm">
<span className="text-muted-foreground">Narrative</span>
<p className="mt-1 text-foreground whitespace-pre-wrap leading-relaxed">
{tier.winner.narrative}
</p>
</div>
)}
{/* Source registry details */}
{sourceLoading && (
<div className="rounded border border-border bg-muted/30 p-2">
<p className="text-xs text-muted-foreground animate-pulse">
Loading source details...
</p>
</div>
)}
{!sourceLoading && sourceRecord && (
<div className="rounded border border-border bg-muted/30 p-2 space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
Source Registry
</span>
<Link
href="/sources"
className="text-xs text-blue-600 dark:text-blue-400 hover:underline"
>
View in Source Registry
</Link>
</div>
<div className="grid grid-cols-2 gap-x-4 gap-y-1 text-xs">
<div>
<span className="text-muted-foreground">Label</span>
<p className="font-medium text-foreground">{sourceRecord.label}</p>
</div>
<div>
<span className="text-muted-foreground">Status</span>
<p className="font-medium text-foreground capitalize">{sourceRecord.status}</p>
</div>
{sourceRecord.url && (
<div className="col-span-2">
<span className="text-muted-foreground">URL</span>
<p className="font-mono text-foreground truncate" title={sourceRecord.url}>
<a
href={sourceRecord.url}
target="_blank"
rel="noopener noreferrer"
className="text-blue-600 dark:text-blue-400 hover:underline"
>
{sourceRecord.url}
</a>
</p>
</div>
)}
{sourceRecord.notes && (
<div className="col-span-2">
<span className="text-muted-foreground">Notes</span>
<p className="text-foreground leading-relaxed">{sourceRecord.notes}</p>
</div>
)}
<div>
<span className="text-muted-foreground">Created</span>
<p className="text-foreground">{formatTimestamp(sourceRecord.created_at)}</p>
</div>
<div>
<span className="text-muted-foreground">Updated</span>
<p className="text-foreground">{formatTimestamp(sourceRecord.updated_at)}</p>
</div>
</div>
</div>
)}
{/* Assertion hash */}
<div className="pt-2 border-t border-border">
<span className="text-xs text-muted-foreground">Assertion: </span>

View File

@ -1,7 +1,10 @@
"use client";
import { useState, useEffect } from "react";
import Link from "next/link";
import { cn } from "@/lib/utils";
import type { ClaimSummary } from "@/lib/api/types";
import type { ClaimSummary, SourceRecordDto } from "@/lib/api/types";
import { StemeDBClient } from "@/lib/api";
import { SourceTierBadge } from "./source-tier-badge";
import { WeightBar } from "./weight-bar";
import { HashDisplay } from "./hash-display";
@ -33,6 +36,23 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
: "active") as SourceStatus;
const valueStr = formatValue(claim.value);
// Fetch full source record when expanded
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(null);
const [sourceLoading, setSourceLoading] = useState(false);
useEffect(() => {
if (!isExpanded || sourceRecord || sourceLoading) return;
setSourceLoading(true);
const client = new StemeDBClient();
client
.getSource(claim.source.source_hash)
.then(setSourceRecord)
.catch(() => {
// Source may not be in registry — that's fine
})
.finally(() => setSourceLoading(false));
}, [isExpanded, claim.source.source_hash, sourceRecord, sourceLoading]);
return (
<div
className={cn(
@ -88,6 +108,19 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
{/* Expanded details */}
{isExpanded && (
<div className="px-3 pb-3 space-y-4 border-t border-border pt-3">
{/* Full value */}
<div className="space-y-1">
<div className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
Value
</div>
<p className="text-sm text-foreground whitespace-pre-wrap break-words leading-relaxed">
{valueStr}
</p>
<div className="text-xs text-muted-foreground">
Type: <code className="bg-muted px-1 py-0.5 rounded">{claim.value.type}</code>
</div>
</div>
{/* Source info */}
<div className="space-y-1">
<div className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
@ -98,7 +131,7 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
<span className={statusColors[status]}>
{statusIcons[status]} {status}
</span>
<span className="text-muted-foreground"></span>
<span className="text-muted-foreground">&middot;</span>
<span className="text-muted-foreground">
{tierLabel} (T{tier})
</span>
@ -113,6 +146,33 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
{sourceUrl}
</a>
)}
{/* Source registry details (fetched) */}
{sourceLoading && (
<div className="text-xs text-muted-foreground animate-pulse mt-1">
Loading source details...
</div>
)}
{sourceRecord && (
<div className="mt-2 rounded border border-border bg-muted/30 p-2 space-y-1">
{sourceRecord.notes && (
<p className="text-xs text-muted-foreground whitespace-pre-wrap">
{sourceRecord.notes}
</p>
)}
<div className="flex items-center gap-3 text-[10px] text-muted-foreground">
<span>Created: {new Date(sourceRecord.created_at).toLocaleDateString()}</span>
{sourceRecord.updated_at !== sourceRecord.created_at && (
<span>Updated: {new Date(sourceRecord.updated_at).toLocaleDateString()}</span>
)}
</div>
<Link
href={`/sources`}
className="text-[10px] text-blue-600 dark:text-blue-400 hover:underline"
>
View in Source Registry &rarr;
</Link>
</div>
)}
</div>
{/* Supporting agents */}

View File

@ -1,9 +1,10 @@
"use client";
import { useState } from "react";
import { useState, useEffect, useRef, useCallback } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { DatePicker } from "@/components/ui/date-picker";
import { StemeDBClient } from "@/lib/api";
export interface QueryParams {
subject: string;
@ -15,22 +16,162 @@ export interface QueryParams {
interface QueryFormProps {
onSubmit: (params: QueryParams) => void;
isLoading: boolean;
initialSubject?: string;
initialPredicate?: string;
}
export function QueryForm({ onSubmit, isLoading }: QueryFormProps) {
const [subject, setSubject] = useState("");
const [predicate, setPredicate] = useState("");
export function QueryForm({ onSubmit, isLoading, initialSubject, initialPredicate }: QueryFormProps) {
const [subject, setSubject] = useState(initialSubject ?? "");
const [predicate, setPredicate] = useState(initialPredicate ?? "");
const [includeSourceMetadata, setIncludeSourceMetadata] = useState(true);
const [asOfDate, setAsOfDate] = useState<Date | undefined>(undefined);
// Autocomplete state
const [subjectSuggestions, setSubjectSuggestions] = useState<string[]>([]);
const [predicateSuggestions, setPredicateSuggestions] = useState<string[]>([]);
const [showSubjectDropdown, setShowSubjectDropdown] = useState(false);
const [showPredicateDropdown, setShowPredicateDropdown] = useState(false);
const [activeSubjectIndex, setActiveSubjectIndex] = useState(-1);
const [activePredicateIndex, setActivePredicateIndex] = useState(-1);
const subjectRef = useRef<HTMLDivElement>(null);
const predicateRef = useRef<HTMLDivElement>(null);
const debounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
// Sync initial values when they change (e.g., from URL params)
useEffect(() => {
if (initialSubject !== undefined) setSubject(initialSubject);
}, [initialSubject]);
useEffect(() => {
if (initialPredicate !== undefined) setPredicate(initialPredicate);
}, [initialPredicate]);
// Fetch subject suggestions with debounce
const fetchSubjects = useCallback((query: string) => {
if (debounceRef.current) clearTimeout(debounceRef.current);
debounceRef.current = setTimeout(async () => {
if (!query.trim()) {
setSubjectSuggestions([]);
setShowSubjectDropdown(false);
return;
}
try {
const client = new StemeDBClient();
const resp = await client.listSubjects(query, 20);
setSubjectSuggestions(resp.subjects);
setShowSubjectDropdown(resp.subjects.length > 0);
setActiveSubjectIndex(-1);
} catch {
setSubjectSuggestions([]);
setShowSubjectDropdown(false);
}
}, 200);
}, []);
// Fetch predicates when subject is selected
const fetchPredicates = useCallback(async (subj: string) => {
if (!subj.trim()) {
setPredicateSuggestions([]);
return;
}
try {
const client = new StemeDBClient();
const resp = await client.listPredicates(subj);
setPredicateSuggestions(resp.predicates);
} catch {
setPredicateSuggestions([]);
}
}, []);
// Close dropdowns on click outside
useEffect(() => {
function handleClickOutside(e: MouseEvent) {
if (subjectRef.current && !subjectRef.current.contains(e.target as Node)) {
setShowSubjectDropdown(false);
}
if (predicateRef.current && !predicateRef.current.contains(e.target as Node)) {
setShowPredicateDropdown(false);
}
}
document.addEventListener("mousedown", handleClickOutside);
return () => document.removeEventListener("mousedown", handleClickOutside);
}, []);
const handleSubjectChange = (value: string) => {
setSubject(value);
fetchSubjects(value);
// Clear predicate suggestions when subject changes
setPredicateSuggestions([]);
};
const selectSubject = (value: string) => {
setSubject(value);
setShowSubjectDropdown(false);
setActiveSubjectIndex(-1);
fetchPredicates(value);
};
const handlePredicateChange = (value: string) => {
setPredicate(value);
// Filter existing predicate suggestions locally
if (predicateSuggestions.length > 0) {
setShowPredicateDropdown(true);
setActivePredicateIndex(-1);
}
};
const selectPredicate = (value: string) => {
setPredicate(value);
setShowPredicateDropdown(false);
setActivePredicateIndex(-1);
};
const filteredPredicates = predicateSuggestions.filter((p) =>
p.toLowerCase().includes(predicate.toLowerCase())
);
const handleSubjectKeyDown = (e: React.KeyboardEvent) => {
if (!showSubjectDropdown || subjectSuggestions.length === 0) return;
if (e.key === "ArrowDown") {
e.preventDefault();
setActiveSubjectIndex((i) => Math.min(i + 1, subjectSuggestions.length - 1));
} else if (e.key === "ArrowUp") {
e.preventDefault();
setActiveSubjectIndex((i) => Math.max(i - 1, 0));
} else if (e.key === "Enter" && activeSubjectIndex >= 0) {
e.preventDefault();
selectSubject(subjectSuggestions[activeSubjectIndex]);
} else if (e.key === "Escape") {
setShowSubjectDropdown(false);
}
};
const handlePredicateKeyDown = (e: React.KeyboardEvent) => {
if (!showPredicateDropdown || filteredPredicates.length === 0) return;
if (e.key === "ArrowDown") {
e.preventDefault();
setActivePredicateIndex((i) => Math.min(i + 1, filteredPredicates.length - 1));
} else if (e.key === "ArrowUp") {
e.preventDefault();
setActivePredicateIndex((i) => Math.max(i - 1, 0));
} else if (e.key === "Enter" && activePredicateIndex >= 0) {
e.preventDefault();
selectPredicate(filteredPredicates[activePredicateIndex]);
} else if (e.key === "Escape") {
setShowPredicateDropdown(false);
}
};
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
setShowSubjectDropdown(false);
setShowPredicateDropdown(false);
if (subject.trim() && predicate.trim()) {
onSubmit({
subject: subject.trim(),
predicate: predicate.trim(),
includeSourceMetadata,
// Convert Date to Unix timestamp (seconds)
asOf: asOfDate ? Math.floor(asOfDate.getTime() / 1000) : undefined,
});
}
@ -41,32 +182,81 @@ export function QueryForm({ onSubmit, isLoading }: QueryFormProps) {
return (
<form onSubmit={handleSubmit} className="space-y-4">
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="space-y-2">
{/* Subject with autocomplete */}
<div className="space-y-2" ref={subjectRef}>
<label htmlFor="subject" className="text-sm font-medium text-foreground">
Subject
</label>
<Input
id="subject"
placeholder="e.g., semaglutide:gastroparesis_risk"
value={subject}
onChange={(e) => setSubject(e.target.value)}
disabled={isLoading}
/>
<div className="relative">
<Input
id="subject"
placeholder="e.g., semaglutide:gastroparesis_risk"
value={subject}
onChange={(e) => handleSubjectChange(e.target.value)}
onFocus={() => {
if (subjectSuggestions.length > 0) setShowSubjectDropdown(true);
}}
onKeyDown={handleSubjectKeyDown}
disabled={isLoading}
autoComplete="off"
/>
{showSubjectDropdown && subjectSuggestions.length > 0 && (
<div className="absolute z-50 w-full mt-1 max-h-60 overflow-auto rounded-md border border-border bg-popover shadow-md">
{subjectSuggestions.map((s, i) => (
<button
key={s}
type="button"
className={`w-full px-3 py-2 text-left text-sm font-mono truncate hover:bg-muted ${
i === activeSubjectIndex ? "bg-muted" : ""
}`}
onMouseDown={() => selectSubject(s)}
>
{s}
</button>
))}
</div>
)}
</div>
<p className="text-xs text-muted-foreground">
The entity you want to query
</p>
</div>
<div className="space-y-2">
{/* Predicate with autocomplete */}
<div className="space-y-2" ref={predicateRef}>
<label htmlFor="predicate" className="text-sm font-medium text-foreground">
Predicate
</label>
<Input
id="predicate"
placeholder="e.g., risk_level"
value={predicate}
onChange={(e) => setPredicate(e.target.value)}
disabled={isLoading}
/>
<div className="relative">
<Input
id="predicate"
placeholder="e.g., risk_level"
value={predicate}
onChange={(e) => handlePredicateChange(e.target.value)}
onFocus={() => {
if (filteredPredicates.length > 0) setShowPredicateDropdown(true);
}}
onKeyDown={handlePredicateKeyDown}
disabled={isLoading}
autoComplete="off"
/>
{showPredicateDropdown && filteredPredicates.length > 0 && (
<div className="absolute z-50 w-full mt-1 max-h-60 overflow-auto rounded-md border border-border bg-popover shadow-md">
{filteredPredicates.map((p, i) => (
<button
key={p}
type="button"
className={`w-full px-3 py-2 text-left text-sm font-mono truncate hover:bg-muted ${
i === activePredicateIndex ? "bg-muted" : ""
}`}
onMouseDown={() => selectPredicate(p)}
>
{p}
</button>
))}
</div>
)}
</div>
<p className="text-xs text-muted-foreground">
The property or relationship to analyze
</p>

View File

@ -1,6 +1,6 @@
"use client";
import { useState, useCallback } from "react";
import { useState, useCallback, useEffect, useRef } from "react";
import { useRouter } from "next/navigation";
import { StemeDBClient, type SkepticResponse, ApiError } from "@/lib/api";
import { Button } from "@/components/ui/button";
@ -20,9 +20,15 @@ type QueryState =
| { status: "success"; data: SkepticResponse; params: QueryParams }
| { status: "error"; error: string; params: QueryParams };
export function QueryResults() {
interface QueryResultsProps {
initialSubject?: string;
initialPredicate?: string;
}
export function QueryResults({ initialSubject, initialPredicate }: QueryResultsProps) {
const [state, setState] = useState<QueryState>({ status: "idle" });
const router = useRouter();
const hasAutoQueried = useRef(false);
const handleViewAudit = useCallback(
(subject: string, predicate: string) => {
@ -56,6 +62,18 @@ export function QueryResults() {
}
}, []);
// Auto-execute query when initial subject+predicate are provided (e.g., from URL params)
useEffect(() => {
if (initialSubject && initialPredicate && !hasAutoQueried.current) {
hasAutoQueried.current = true;
executeQuery({
subject: initialSubject,
predicate: initialPredicate,
includeSourceMetadata: true,
});
}
}, [initialSubject, initialPredicate, executeQuery]);
const handleRetry = useCallback(() => {
if (state.status === "error") {
executeQuery(state.params);
@ -71,7 +89,12 @@ export function QueryResults() {
<h2 className="text-lg font-medium text-card-foreground mb-4">
Conflict Analysis Query
</h2>
<QueryForm onSubmit={executeQuery} isLoading={isLoading} />
<QueryForm
onSubmit={executeQuery}
isLoading={isLoading}
initialSubject={initialSubject}
initialPredicate={initialPredicate}
/>
</div>
{/* Results Section */}

View File

@ -1,8 +1,8 @@
"use client";
import { useCallback } from "react";
import { FileJson, FileText } from "lucide-react";
import type { SourceImpactResponse } from "@/lib/api/types";
import { useCallback, useEffect, useState } from "react";
import { ChevronDown, ChevronUp, FileJson, FileText } from "lucide-react";
import type { SourceImpactResponse, SourceRecordDto } from "@/lib/api/types";
import { StemeDBClient } from "@/lib/api";
import { Button } from "@/components/ui/button";
import {
@ -20,11 +20,90 @@ interface ImpactDetailPanelProps {
onClose: () => void;
}
function CopyableHash({ hash }: { hash: string }) {
const [copied, setCopied] = useState(false);
const handleCopy = () => {
navigator.clipboard.writeText(hash);
setCopied(true);
setTimeout(() => setCopied(false), 1500);
};
return (
<button
onClick={handleCopy}
className="font-mono text-xs cursor-pointer hover:text-foreground transition-colors"
title="Click to copy full hash"
>
{hash.slice(0, 12)}...{hash.slice(-4)}
<span className="ml-1 text-primary text-[10px]">
{copied ? "Copied!" : ""}
</span>
</button>
);
}
function CopyableAgent({ agent }: { agent: string }) {
const [copied, setCopied] = useState(false);
const handleCopy = () => {
navigator.clipboard.writeText(agent);
setCopied(true);
setTimeout(() => setCopied(false), 1500);
};
return (
<button
key={agent}
onClick={handleCopy}
className="px-2 py-1 rounded bg-muted text-xs font-mono cursor-pointer hover:text-foreground transition-colors"
title="Click to copy agent ID"
>
{agent}
<span className="ml-1 text-primary text-[10px]">
{copied ? "Copied!" : ""}
</span>
</button>
);
}
function StatusBadge({ status }: { status: string }) {
const colorMap: Record<string, string> = {
active: "bg-green-500/15 text-green-700 dark:text-green-400",
inactive: "bg-muted text-muted-foreground",
quarantined: "bg-red-500/15 text-red-700 dark:text-red-400",
pending: "bg-yellow-500/15 text-yellow-700 dark:text-yellow-400",
};
const classes =
colorMap[status.toLowerCase()] ?? "bg-muted text-muted-foreground";
return (
<span
className={`inline-block px-2 py-0.5 rounded text-[11px] font-medium ${classes}`}
>
{status}
</span>
);
}
export function ImpactDetailPanel({
isOpen,
impact,
onClose,
}: ImpactDetailPanelProps) {
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(
null
);
const [contentExpanded, setContentExpanded] = useState(false);
useEffect(() => {
if (isOpen && impact?.source_hash) {
const client = new StemeDBClient();
client
.getSource(impact.source_hash)
.then(setSourceRecord)
.catch(() => setSourceRecord(null));
} else {
setSourceRecord(null);
setContentExpanded(false);
}
}, [isOpen, impact?.source_hash]);
const handleExport = useCallback(
(format: "csv" | "json") => {
if (!impact) return;
@ -54,6 +133,35 @@ export function ImpactDetailPanel({
{impact ? (
<div className="mt-6 space-y-6">
{/* Source Info */}
<div className="rounded-lg border border-border p-4 space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs text-muted-foreground font-medium uppercase tracking-wide">
Source
</span>
<StatusBadge status={impact.status} />
</div>
<div className="text-muted-foreground">
<CopyableHash hash={impact.source_hash} />
</div>
<div className="flex gap-4 pt-1">
<div className="flex items-baseline gap-1.5">
<span className="text-xs text-muted-foreground">
Assertions
</span>
<span className="text-sm font-bold">
{impact.assertion_count}
</span>
</div>
<div className="flex items-baseline gap-1.5">
<span className="text-xs text-muted-foreground">Agents</span>
<span className="text-sm font-bold">
{impact.affected_agents.length}
</span>
</div>
</div>
</div>
{/* Export buttons - only when there's data to export */}
{impact.assertion_count > 0 && (
<div className="flex items-center gap-2">
@ -84,31 +192,60 @@ export function ImpactDetailPanel({
<p className="text-sm text-muted-foreground">{impact.summary}</p>
</div>
{/* Source Content */}
{sourceRecord?.content && (
<div>
<div className="flex items-center justify-between mb-2">
<h4 className="text-sm font-medium text-foreground">
Source Content
<span className="ml-2 text-xs text-muted-foreground font-normal">
({sourceRecord.content.length.toLocaleString()} chars)
</span>
</h4>
<button
onClick={() => setContentExpanded(!contentExpanded)}
className="flex items-center gap-1 text-xs text-muted-foreground hover:text-foreground transition-colors"
>
{contentExpanded ? (
<>
Collapse <ChevronUp className="h-3 w-3" />
</>
) : (
<>
Expand <ChevronDown className="h-3 w-3" />
</>
)}
</button>
</div>
<div
className={`rounded border border-border bg-muted/30 overflow-y-auto ${
contentExpanded ? "max-h-[600px]" : "max-h-96"
}`}
>
<pre className="p-3 text-xs text-muted-foreground whitespace-pre-wrap font-mono leading-relaxed">
{sourceRecord.content}
</pre>
</div>
</div>
)}
{/* Affected Assertions */}
{impact.affected_assertions.length > 0 && (
<div>
<h4 className="text-sm font-medium text-foreground mb-3">
Affected Assertions ({impact.affected_assertions.length})
</h4>
<div className="max-h-48 overflow-y-auto rounded border border-border">
<table className="w-full text-sm">
<thead className="sticky top-0 bg-muted/50">
<tr>
<th className="text-left px-3 py-2 font-medium text-muted-foreground">
Hash
</th>
</tr>
</thead>
<tbody className="divide-y divide-border">
{impact.affected_assertions.map((hash) => (
<tr key={hash} className="hover:bg-accent/5">
<td className="px-3 py-2 font-mono text-xs">
{hash}
</td>
</tr>
))}
</tbody>
</table>
<div className="max-h-48 overflow-y-auto rounded border border-border divide-y divide-border">
{impact.affected_assertions.map((hash, idx) => (
<div
key={hash}
className={`flex items-center px-3 py-2 ${
idx % 2 === 0 ? "bg-background" : "bg-muted/30"
} hover:bg-accent/10 transition-colors`}
>
<CopyableHash hash={hash} />
</div>
))}
</div>
</div>
)}
@ -121,12 +258,7 @@ export function ImpactDetailPanel({
</h4>
<div className="flex flex-wrap gap-2">
{impact.affected_agents.map((agent) => (
<span
key={agent}
className="px-2 py-1 rounded bg-muted text-xs font-mono"
>
{agent}
</span>
<CopyableAgent key={agent} agent={agent} />
))}
</div>
</div>

View File

@ -1,5 +1,6 @@
"use client";
import Link from "next/link";
import { ExternalLink, Eye, Ban, RotateCcw } from "lucide-react";
import type { SourceRecordDto } from "@/lib/api/types";
import { Button } from "@/components/ui/button";
@ -8,6 +9,7 @@ import { TierBadge } from "./tier-badge";
interface SourceRowProps {
source: SourceRecordDto;
assertionCount?: number;
onViewImpact: (source: SourceRecordDto) => void;
onBlock: (source: SourceRecordDto) => void;
onRestore: (source: SourceRecordDto) => void;
@ -15,6 +17,7 @@ interface SourceRowProps {
export function SourceRow({
source,
assertionCount,
onViewImpact,
onBlock,
onRestore,
@ -43,6 +46,17 @@ export function SourceRow({
{updatedDate !== createdDate && (
<span>Updated: {updatedDate}</span>
)}
{assertionCount !== undefined && (
<span className="text-xs text-muted-foreground">
{assertionCount.toLocaleString()} assertions
</span>
)}
<Link
href="/"
className="text-xs text-blue-600 dark:text-blue-400 hover:underline"
>
View Feed
</Link>
</div>
{source.url && (

View File

@ -35,6 +35,9 @@ export function SourcesPanel() {
const [impact, setImpact] = useState<SourceImpactResponse | null>(null);
const [isLoadingImpact, setIsLoadingImpact] = useState(false);
const [isProcessing, setIsProcessing] = useState(false);
const [assertionCounts, setAssertionCounts] = useState<Map<string, number>>(
new Map()
);
const fetchData = useCallback(async () => {
setState({ status: "loading" });
@ -66,6 +69,31 @@ export function SourcesPanel() {
fetchData();
}, [fetchData]);
// Lazily fetch assertion counts for all sources after list loads
useEffect(() => {
if (state.status !== "success" || state.data.sources.length === 0) return;
const sources = state.data.sources;
const client = new StemeDBClient();
const fetches = sources.map((source) =>
client
.getSourceImpact(source.hash)
.then((data) => ({ hash: source.hash, count: data.assertion_count }))
.catch(() => null)
);
Promise.allSettled(fetches).then((results) => {
const counts = new Map<string, number>();
for (const result of results) {
if (result.status === "fulfilled" && result.value !== null) {
counts.set(result.value.hash, result.value.count);
}
}
setAssertionCounts(counts);
});
}, [state]);
// Fetch impact when block dialog opens
useEffect(() => {
if (dialogState.type === "block") {
@ -221,6 +249,7 @@ export function SourcesPanel() {
<SourceRow
key={source.hash}
source={source}
assertionCount={assertionCounts.get(source.hash)}
onViewImpact={handleViewImpact}
onBlock={handleBlock}
onRestore={handleRestore}

View File

@ -7,6 +7,7 @@ import {
type CircuitBreakerResponse,
type AuditResponse,
type ListSourcesResponse,
type SourceRecordDto,
type SourceImpactResponse,
type QuarantineSourceResponse,
type RestoreSourceResponse,
@ -15,6 +16,8 @@ import {
type ScanResponse,
type ListScansResponse,
type FeedResponse,
type ListSubjectsResponse,
type ListPredicatesResponse,
type ListClaimsRequest,
type ListClaimsResponse,
type CreateClaimRequest,
@ -76,6 +79,18 @@ export class StemeDBClient {
return this.fetch<FeedResponse>(`/v1/feed?${params}`);
}
async listSubjects(q?: string, limit = 100): Promise<ListSubjectsResponse> {
const params = new URLSearchParams({ limit: String(limit) });
if (q) params.set("q", q);
return this.fetch<ListSubjectsResponse>(`/v1/subjects?${params}`);
}
async listPredicates(subject: string): Promise<ListPredicatesResponse> {
return this.fetch<ListPredicatesResponse>(
`/v1/subjects/${encodeURIComponent(subject)}/predicates`
);
}
async health(): Promise<HealthResponse> {
return this.fetch<HealthResponse>("/health");
}
@ -160,6 +175,10 @@ export class StemeDBClient {
return this.fetch<ListSourcesResponse>(`/v1/sources?${params}`);
}
async getSource(hash: string): Promise<SourceRecordDto> {
return this.fetch<SourceRecordDto>(`/v1/sources/${encodeURIComponent(hash)}`);
}
async getSourceImpact(hash: string): Promise<SourceImpactResponse> {
return this.fetch<SourceImpactResponse>(`/v1/sources/${hash}/impact`);
}

View File

@ -62,6 +62,7 @@ export interface AssertionObject {
timestamp: number;
version: number;
}>;
narrative?: string;
}
export interface LayeredTier {
@ -209,6 +210,7 @@ export interface SourceRecordDto {
status: "active" | "deprecated" | "quarantined";
url?: string;
notes?: string;
content?: string;
created_at: number;
updated_at: number;
}
@ -347,6 +349,17 @@ export interface FeedResponse {
has_more: boolean;
}
// Discovery types (subject/predicate autocomplete)
export interface ListSubjectsResponse {
subjects: string[];
total_count: number;
}
export interface ListPredicatesResponse {
subject: string;
predicates: string[];
}
export class ApiError extends Error {
public userMessage: string;

File diff suppressed because one or more lines are too long

View File

@ -95,10 +95,8 @@ impl AdminClient {
}
// Gateway returns different format than /admin/ranges, so convert it
let shard_response: ShardInfoResponse = response
.json()
.await
.context("Failed to parse shard info response")?;
let shard_response: ShardInfoResponse =
response.json().await.context("Failed to parse shard info response")?;
Ok(shard_response.into())
}
@ -125,10 +123,8 @@ impl AdminClient {
}
// Gateway returns {"ranges": [...]} so we need to unwrap it
let wrapper: RangesWrapper = response
.json()
.await
.context("Failed to parse ranges response")?;
let wrapper: RangesWrapper =
response.json().await.context("Failed to parse ranges response")?;
Ok(wrapper.ranges)
}

View File

@ -132,6 +132,14 @@ pub struct CreateAssertionRequest {
#[serde(skip_serializing_if = "Option::is_none")]
pub source_metadata: Option<String>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
///
/// Makes the assertion self-contained: pick it up, read it, understand the
/// full claim without dereferencing anything. Max 64 KB.
#[serde(skip_serializing_if = "Option::is_none")]
#[schema(example = "Based on STEP 1 trial (n=1961). Limitation: 68-week duration only.")]
pub narrative: Option<String>,
/// Unix timestamp when the assertion was created.
/// If not provided, defaults to the current time.
/// **Important for v2 signatures:** Provide this field to preserve the

View File

@ -29,6 +29,7 @@ pub mod responses;
pub mod skeptic;
pub mod source_registry;
pub mod stemedb_claims;
pub mod subjects;
// Re-export all public types for backward compatibility
// This allows existing code to use `use crate::dto::*;` without changes
@ -51,7 +52,7 @@ pub use query_params::{FeedParams, QueryParams};
// From responses module
pub use responses::{
AssertionResponse, ChangeEntryDto, ErrorResponse, HealthResponse, LayeredQueryResponse,
ProvenanceResponse, QueryResponse, SourceWarningDto, TierResolutionDto,
ProvenanceResponse, QueryResponse, RebuildIndexesResponse, SourceWarningDto, TierResolutionDto,
};
// From audit module
@ -131,4 +132,9 @@ pub use aphoria::{
};
// From stemedb_claims module
pub use stemedb_claims::{AuthoredClaimDto, AuthoredValueDto, CreateClaimRequest, CreateClaimResponse};
pub use stemedb_claims::{
AuthoredClaimDto, AuthoredValueDto, CreateClaimRequest, CreateClaimResponse,
};
// From subjects module
pub use subjects::{ListPredicatesResponse, ListSubjectsParams, ListSubjectsResponse};

View File

@ -88,6 +88,10 @@ pub struct AssertionResponse {
#[serde(skip_serializing_if = "Option::is_none")]
pub source_metadata: Option<String>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
#[serde(skip_serializing_if = "Option::is_none")]
pub narrative: Option<String>,
/// Warning if this assertion cites a quarantined or deprecated source.
///
/// Present when the assertion's source has a non-Active status in the
@ -217,6 +221,30 @@ pub struct TierResolutionDto {
pub resolution_confidence: f32,
}
/// Response from the admin rebuild-indexes endpoint.
///
/// Reports how many assertion indexes were rebuilt, how many were
/// skipped (e.g., deserialization failures), and how long the
/// operation took.
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct RebuildIndexesResponse {
/// Number of assertions whose indexes were rebuilt.
pub rebuilt_count: u64,
/// Number of keys that were skipped (deserialization failures).
pub skipped_count: u64,
/// Wall-clock time for the operation in milliseconds.
pub elapsed_ms: u64,
/// Human-readable status message.
pub status: String,
/// First error encountered (for diagnostics). Absent when all succeed.
#[serde(skip_serializing_if = "Option::is_none")]
pub first_error: Option<String>,
}
/// Response from a LayeredConsensus query.
///
/// Provides per-tier resolution results plus an overall winner.

View File

@ -31,6 +31,10 @@ pub struct RegisterSourceRequest {
/// Optional curator notes about the source.
#[serde(skip_serializing_if = "Option::is_none")]
pub notes: Option<String>,
/// Optional full-text content of the source document.
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
}
/// Response from registering a source.
@ -78,6 +82,10 @@ pub struct SourceRecordDto {
/// Optional curator notes.
#[serde(skip_serializing_if = "Option::is_none")]
pub notes: Option<String>,
/// Optional full-text content of the source document.
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<String>,
}
impl From<SourceRecord> for SourceRecordDto {
@ -92,6 +100,7 @@ impl From<SourceRecord> for SourceRecordDto {
created_at: record.created_at,
updated_at: record.updated_at,
notes: record.notes.clone(),
content: record.content.clone(),
}
}
}

View File

@ -0,0 +1,34 @@
//! DTOs for subject and predicate discovery endpoints.
use serde::{Deserialize, Serialize};
use utoipa::{IntoParams, ToSchema};
/// Query parameters for `GET /v1/subjects`.
#[derive(Debug, Deserialize, IntoParams)]
pub struct ListSubjectsParams {
/// Optional prefix filter for subject names.
#[param(example = "sema")]
pub q: Option<String>,
/// Maximum number of subjects to return (default 100, max 1000).
#[param(example = 100)]
pub limit: Option<usize>,
}
/// Response for `GET /v1/subjects`.
#[derive(Debug, Serialize, ToSchema)]
pub struct ListSubjectsResponse {
/// List of matching subject strings.
pub subjects: Vec<String>,
/// Total number of subjects matching the filter (before limit).
pub total_count: usize,
}
/// Response for `GET /v1/subjects/:subject/predicates`.
#[derive(Debug, Serialize, ToSchema)]
pub struct ListPredicatesResponse {
/// The subject these predicates belong to.
pub subject: String,
/// List of predicate strings for this subject.
pub predicates: Vec<String>,
}

View File

@ -1,14 +1,14 @@
//! Admin handlers for maintenance operations.
use axum::{extract::State, Json};
use tracing::instrument;
use tracing::{info, instrument, warn};
use crate::{
dto::{DecayTrustRanksRequest, DecayTrustRanksResponse},
dto::{DecayTrustRanksRequest, DecayTrustRanksResponse, RebuildIndexesResponse},
error::Result,
state::AppState,
};
use stemedb_storage::{GenericTrustRankStore, TrustRankStore};
use stemedb_storage::{GenericIndexStore, GenericTrustRankStore, IndexStore, KVStore, TrustRankStore, key_codec};
/// Default half-life for trust rank decay (30 days in seconds).
const DEFAULT_HALF_LIFE_SECONDS: u64 = 30 * 24 * 60 * 60;
@ -68,3 +68,215 @@ pub async fn decay_trust_ranks(
status: "Decay operation completed".to_string(),
}))
}
/// Rebuild secondary indexes (Redb) from assertion data (Fjall).
///
/// This is a repair operation for when Redb indexes are missing or stale
/// while Fjall assertion data is intact. It scans all assertion data from
/// Fjall and reconstructs the S:, SP:, SUBJECTS:, HASH_SUBJECT:, and SRC:
/// indexes in Redb, then corrects the META:assertion_count.
///
/// This endpoint is idempotent — running it multiple times is safe because
/// the index store uses append-with-dedup semantics.
#[utoipa::path(
post,
path = "/v1/admin/rebuild-indexes",
responses(
(status = 200, description = "Index rebuild completed", body = RebuildIndexesResponse),
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse),
),
tag = "admin"
)]
#[instrument(skip(state))]
pub async fn rebuild_indexes(
State(state): State<AppState>,
) -> Result<Json<RebuildIndexesResponse>> {
let start = std::time::Instant::now();
metrics::counter!("stemedb_http_requests_total", "method" => "POST", "path" => "/v1/admin/rebuild-indexes").increment(1);
info!("Starting index rebuild: scanning Fjall for all assertions");
// Capture current time once for FEED index fallback (timestamp:0 assertions)
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
// Scan all assertion key-value pairs from Fjall
let assertion_kvs = state.store.scan_fjall_assertions().await?;
let total_keys = assertion_kvs.len();
info!(total_keys, "Found assertion keys in Fjall");
// Create an IndexStore backed by the same HybridStore
let index_store = GenericIndexStore::new(state.store.clone());
let mut rebuilt_count: u64 = 0;
let mut skipped_count: u64 = 0;
let mut first_error: Option<String> = None;
for (key, value) in &assertion_kvs {
// Extract subject from key
let subject = match key_codec::extract_subject(key) {
Some(s) => s.to_string(),
None => {
let msg = format!(
"extract_subject failed: key_len={}, first_bytes={:?}",
key.len(),
&key[..key.len().min(40)]
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
};
// Extract hash_hex from tag (tag is "H:{hash_hex}")
let tag = key_codec::extract_tag(key);
let hash_hex = match tag.strip_prefix(b"H:") {
Some(hex_bytes) => match std::str::from_utf8(hex_bytes) {
Ok(s) => s.to_string(),
Err(e) => {
let msg = format!("hash_hex UTF-8 error for subject={subject}: {e}");
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
},
None => {
let msg = format!(
"tag strip_prefix H: failed for subject={subject}: tag={:?}",
String::from_utf8_lossy(tag)
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
};
// Deserialize the assertion to get predicate and source_hash.
// Uses compat deserialization to handle legacy data (pre-narrative schema).
let assertion: stemedb_core::types::Assertion =
match stemedb_core::serde::deserialize_assertion_compat(value) {
Ok(a) => a,
Err(e) => {
let msg = format!(
"deserialize failed for subject={subject} hash={hash_hex}: {e} (value_len={})",
value.len()
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
};
// Decode assertion hash from hex
let hash_bytes: [u8; 32] = match hex::decode(&hash_hex) {
Ok(bytes) if bytes.len() == 32 => {
let mut arr = [0u8; 32];
arr.copy_from_slice(&bytes);
arr
}
Ok(bytes) => {
let msg = format!(
"hash decode wrong length for subject={subject} hash={hash_hex}: got {} bytes",
bytes.len()
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
Err(e) => {
let msg = format!(
"hex decode failed for subject={subject} hash={hash_hex}: {e}"
);
warn!("{}", msg);
if first_error.is_none() {
first_error = Some(msg);
}
skipped_count += 1;
continue;
}
};
// Rebuild S: and SP: indexes (includes SUBJECTS: discovery index)
if let Err(e) =
index_store.add_to_indexes(&subject, &assertion.predicate, &hash_bytes).await
{
warn!(%subject, %hash_hex, error = %e, "Failed to add to indexes");
skipped_count += 1;
continue;
}
// Rebuild HASH_SUBJECT: reverse index
let hs_key = key_codec::hash_subject_key(&hash_hex);
if let Err(e) = state.store.put(&hs_key, subject.as_bytes()).await {
warn!(%subject, %hash_hex, error = %e, "Failed to write hash_subject index");
}
// Rebuild SRC: source index
if let Err(e) =
index_store.add_to_source_index(&assertion.source_hash, &hash_bytes).await
{
warn!(%subject, %hash_hex, error = %e, "Failed to add to source index");
}
// Rebuild FEED index: use assertion.timestamp as best-available proxy
// for ingestion time. Fall back to current time for timestamp:0 assertions.
let feed_ts = if assertion.timestamp > 0 { assertion.timestamp } else { now };
let feed_idx_key = key_codec::feed_key(feed_ts, &hash_hex);
if let Err(e) = state.store.put(&feed_idx_key, subject.as_bytes()).await {
warn!(%subject, %hash_hex, error = %e, "Failed to write feed index");
}
rebuilt_count += 1;
}
// Correct the assertion count: total = rebuilt + skipped (both are real assertions).
// The count key stores a u64 in little-endian format.
let total_assertions = rebuilt_count + skipped_count;
let count_key = key_codec::assertion_count_key();
let count_bytes = total_assertions.to_le_bytes();
state.store.put(&count_key, &count_bytes).await?;
let elapsed_ms = start.elapsed().as_millis() as u64;
info!(
rebuilt_count,
skipped_count,
elapsed_ms,
"Index rebuild complete"
);
metrics::histogram!("stemedb_http_request_duration_seconds",
"method" => "POST",
"path" => "/v1/admin/rebuild-indexes",
"status" => "200"
)
.record(start.elapsed().as_secs_f64());
Ok(Json(RebuildIndexesResponse {
rebuilt_count,
skipped_count,
elapsed_ms,
status: format!(
"Rebuilt indexes for {} assertions ({} skipped) in {}ms",
rebuilt_count, skipped_count, elapsed_ms
),
first_error,
}))
}

View File

@ -143,6 +143,7 @@ pub fn observation_dto_to_assertion(
visual_hash: None,
epoch: None,
source_metadata,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures,
confidence: dto.confidence,

View File

@ -10,6 +10,7 @@ use crate::{
state::AppState,
};
use stemedb_core::limits::MAX_NARRATIVE_LEN;
use stemedb_core::types::{
Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
};
@ -44,6 +45,18 @@ pub async fn create_assertion(
// Convert DTO to internal Assertion type
let assertion = dto_to_assertion(req)?;
// Verify Ed25519 signatures BEFORE writing to WAL.
// This prevents poison records that would permanently block the IngestWorker.
stemedb_core::signing::verify_assertion_signatures(&assertion).map_err(|e| {
metrics::counter!("stemedb_assertions_rejected_total", "reason" => "invalid_signature")
.increment(1);
ApiError::InvalidRequest(format!("Signature verification failed: {}", e))
})?;
// Validate subject does not contain null byte separator (mirrors IngestWorker check)
stemedb_storage::key_codec::validate_subject(&assertion.subject)
.map_err(|e| ApiError::InvalidRequest(format!("Invalid subject: {}", e)))?;
// Serialize to WAL format (includes record type header)
let payload = serialize_assertion(&assertion)
.map_err(|e| ApiError::Serialization(format!("Failed to serialize assertion: {}", e)))?;
@ -93,14 +106,33 @@ fn dto_to_assertion(req: CreateAssertionRequest) -> Result<Assertion> {
return Err(ApiError::InvalidRequest("At least one signature is required".to_string()));
}
// Validate narrative length
if let Some(ref narrative) = req.narrative {
if narrative.len() > MAX_NARRATIVE_LEN {
return Err(ApiError::InvalidRequest(format!(
"narrative exceeds {} bytes (got {})",
MAX_NARRATIVE_LEN,
narrative.len()
)));
}
}
// Use provided timestamp or generate a new one
// IMPORTANT: For v2 signatures, the timestamp must match what was signed
let timestamp = req.timestamp.unwrap_or_else(|| {
std::time::SystemTime::now()
let timestamp = match req.timestamp {
Some(0) => {
return Err(ApiError::InvalidRequest(
"timestamp must be a valid Unix epoch (> 0). \
Omit the field to use server time."
.to_string(),
));
}
Some(t) => t,
None => std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
});
.unwrap_or(0),
};
// Use provided HLC timestamp or default
// IMPORTANT: For v2 signatures, the HLC timestamp must match what was signed
@ -122,6 +154,7 @@ fn dto_to_assertion(req: CreateAssertionRequest) -> Result<Assertion> {
visual_hash,
epoch,
source_metadata: req.source_metadata.map(|s| s.into_bytes()),
narrative: req.narrative,
lifecycle: req.lifecycle.map(Into::into).unwrap_or(LifecycleStage::Proposed),
signatures,
confidence: req.confidence,

View File

@ -1,4 +1,7 @@
//! Handler for the `/v1/feed` endpoint (newest-first assertion browsing).
//!
//! Uses a dedicated FEED index (`\x00FEED:{inverted_ts}:{hash_hex}`) for
//! O(page_size) reads instead of loading all assertions into memory.
use axum::{extract::State, Json};
use tracing::{instrument, warn};
@ -10,15 +13,16 @@ use crate::{
state::AppState,
};
use stemedb_query::Query;
use stemedb_core::serde::deserialize_assertion_compat;
use stemedb_storage::{KVStore, key_codec};
use super::query::assertion_to_dto_with_warning;
/// Browse all assertions in newest-first order with pagination.
///
/// Returns assertions sorted by timestamp descending, useful for
/// "what was just written?" dashboards and dev workflows. No lens
/// resolution is applied — this is a raw chronological feed.
/// Returns assertions ordered by ingestion time descending (when the system
/// received each assertion), useful for "what was just written?" dashboards
/// and dev workflows. No lens resolution is applied — this is a raw feed.
///
/// # Pagination
///
@ -45,36 +49,57 @@ pub async fn feed(
metrics::counter!("stemedb_queries_total", "endpoint" => "feed").increment(1);
let query_start = std::time::Instant::now();
// Fetch all assertions (no subject filter)
let query = Query::builder().limit(usize::MAX).build();
let query_engine = state.query_engine();
let result = query_engine.execute(&query).await?;
// Scan the FEED index — keys are in newest-first order (inverted timestamp).
let feed_prefix = key_codec::feed_scan_prefix();
let entries = state.store.scan_prefix(&feed_prefix).await?;
let mut assertions = result.assertions;
if assertions.len() > 10_000 {
warn!(
count = assertions.len(),
"Feed scanning large assertion set; consider adding index-backed pagination"
);
}
// Sort by timestamp descending (newest first)
assertions.sort_unstable_by(|a, b| b.timestamp.cmp(&a.timestamp));
let total_count = assertions.len();
let total_count = entries.len();
let limit = params.clamped_limit();
let offset = params.offset;
let has_more = offset + limit < total_count;
// Apply offset + limit pagination
let page: Vec<_> = assertions.into_iter().skip(offset).take(limit).collect();
// Paginate the index entries (cheap — no assertion data loaded yet)
let page_entries: Vec<_> = entries.into_iter().skip(offset).take(limit).collect();
// Convert to DTOs (no source enrichment for speed)
let assertion_responses = page
.into_iter()
.map(|a| assertion_to_dto_with_warning(a, None))
.collect::<Result<Vec<_>>>()?;
// Fetch actual assertion data only for the current page
let mut assertion_responses = Vec::with_capacity(page_entries.len());
for (key, value) in &page_entries {
let hash_hex = match extract_hash_hex_from_feed_key(key) {
Some(h) => h,
None => {
warn!(key_len = key.len(), "Malformed FEED index key, skipping");
continue;
}
};
let subject = match std::str::from_utf8(value) {
Ok(s) => s,
Err(e) => {
warn!(error = %e, "Invalid UTF-8 in FEED index value, skipping");
continue;
}
};
let assertion_data_key = key_codec::assertion_key(subject, hash_hex);
let data = match state.store.get(&assertion_data_key).await? {
Some(d) => d,
None => {
warn!(%hash_hex, %subject, "FEED index references missing assertion data, skipping");
continue;
}
};
match deserialize_assertion_compat(&data) {
Ok(a) => match assertion_to_dto_with_warning(a, None) {
Ok(dto) => assertion_responses.push(dto),
Err(e) => {
warn!(%hash_hex, error = %e, "Failed to convert assertion to DTO, skipping");
}
},
Err(e) => {
warn!(%hash_hex, error = %e, "Failed to deserialize assertion, skipping");
}
}
}
metrics::histogram!("stemedb_query_latency_seconds", "endpoint" => "feed")
.record(query_start.elapsed().as_secs_f64());
@ -88,3 +113,16 @@ pub async fn feed(
changes_since: None,
}))
}
/// Extract the hash_hex portion from a FEED index key.
///
/// Key format: `\x00FEED:{16 hex chars (inverted ts)}:{64 hex chars (hash)}`
/// Prefix `\x00FEED:` = 6 bytes, inverted_ts = 16 bytes, `:` = 1 byte → 23 bytes offset.
fn extract_hash_hex_from_feed_key(key: &[u8]) -> Option<&str> {
// \x00FEED: = 6 bytes, inverted_ts = 16 hex chars, : = 1 byte
const HASH_OFFSET: usize = 6 + 16 + 1; // 23
if key.len() <= HASH_OFFSET {
return None;
}
std::str::from_utf8(&key[HASH_OFFSET..]).ok()
}

View File

@ -178,6 +178,7 @@ fn assertion_to_dto(assertion: stemedb_core::types::Assertion) -> Result<Asserti
timestamp: assertion.timestamp,
vector: assertion.vector,
source_metadata: assertion.source_metadata.and_then(|bytes| String::from_utf8(bytes).ok()),
narrative: assertion.narrative,
source_warning: None, // LayeredConsensus doesn't do source status enrichment
})
}

View File

@ -28,8 +28,8 @@ pub mod circuit_breaker;
pub mod concepts;
pub mod constraints;
pub mod epoch;
pub mod feed;
pub mod escalation;
pub mod feed;
pub mod gold_standard;
pub mod health;
pub mod layered;
@ -37,15 +37,17 @@ pub mod meter;
pub mod metrics;
pub mod quarantine;
pub mod query;
pub mod rejected;
pub mod skeptic;
pub mod source;
pub mod source_registry;
pub mod stemedb_claims;
pub mod subjects;
pub mod supersede;
pub mod trace;
pub mod vote;
pub use admin::decay_trust_ranks;
pub use admin::{decay_trust_ranks, rebuild_indexes};
pub use admission::get_admission_status;
pub use api_keys::{create_api_key, list_api_keys, revoke_api_key, rotate_api_key, update_api_key};
pub use assert::create_assertion;
@ -53,8 +55,8 @@ pub use audit::{get_audit, list_audits};
pub use circuit_breaker::{get_circuit_status, list_tripped_circuits, reset_circuit};
pub use constraints::constraints_query;
pub use epoch::create_epoch;
pub use feed::feed;
pub use escalation::{list_escalations, resolve_escalation};
pub use feed::feed;
pub use gold_standard::{
create_gold_standard, list_gold_standards, remove_gold_standard, verify_agent,
};
@ -63,6 +65,7 @@ pub use layered::layered_query;
pub use meter::{get_quota_status, set_quota_limit};
pub use quarantine::{approve_quarantine, get_quarantine, list_quarantine, reject_quarantine};
pub use query::query_assertions;
pub use rejected::list_rejected;
pub use skeptic::skeptic_query;
pub use source::{get_provenance, store_source};
pub use source_registry::{
@ -89,3 +92,4 @@ pub use stemedb_claims::{
create_claim as create_stemedb_claim, delete_claim as delete_stemedb_claim,
get_claim as get_stemedb_claim, list_claims as list_stemedb_claims,
};
pub use subjects::{list_predicates, list_subjects};

View File

@ -490,6 +490,7 @@ pub(crate) fn assertion_to_dto_with_warning(
timestamp: assertion.timestamp,
vector: assertion.vector,
source_metadata: assertion.source_metadata.and_then(|bytes| String::from_utf8(bytes).ok()),
narrative: assertion.narrative,
source_warning,
})
}

View File

@ -0,0 +1,89 @@
//! Admin endpoint for listing WAL records permanently rejected by the IngestWorker.
//!
//! These records passed API-level validation but were skipped during WAL replay
//! due to permanent failures (invalid signatures, corrupt serialization, etc.).
//! With the API-side signature verification fix, new rejected records should be rare.
use axum::{extract::State, Json};
use serde::{Deserialize, Serialize};
use stemedb_storage::{key_codec, KVStore};
use tracing::instrument;
use utoipa::ToSchema;
use crate::{dto::ErrorResponse, state::AppState};
/// Query parameters for listing rejected records.
#[derive(Debug, Deserialize)]
pub struct RejectedParams {
/// Maximum number of records to return (default: 100).
pub limit: Option<usize>,
}
/// A WAL record that was permanently skipped by the IngestWorker.
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct RejectedRecordDto {
/// WAL offset where the record was found.
pub offset: u64,
/// The record type (Assertion, Vote, Epoch).
pub record_type: String,
/// Why the record was rejected.
pub reason: String,
/// When the record was skipped (Unix timestamp).
pub timestamp: u64,
}
/// Response listing rejected WAL records.
#[derive(Debug, Serialize, Deserialize, ToSchema)]
pub struct RejectedRecordsResponse {
/// List of rejected records.
pub rejected: Vec<RejectedRecordDto>,
/// Total number of rejected records found.
pub count: usize,
}
/// GET /v1/admin/rejected
///
/// List WAL records that were permanently rejected by the IngestWorker.
#[utoipa::path(
get,
path = "/v1/admin/rejected",
params(
("limit" = Option<usize>, Query, description = "Maximum records to return (default: 100)")
),
responses(
(status = 200, description = "Rejected records listed", body = RejectedRecordsResponse),
(status = 500, description = "Internal server error", body = ErrorResponse)
),
tag = "admin"
)]
#[instrument(skip(state))]
pub async fn list_rejected(
State(state): State<AppState>,
axum::extract::Query(params): axum::extract::Query<RejectedParams>,
) -> std::result::Result<Json<RejectedRecordsResponse>, (axum::http::StatusCode, Json<ErrorResponse>)>
{
let limit = params.limit.unwrap_or(100);
let prefix = key_codec::rejected_records_scan_prefix();
let entries = state.store.scan_prefix(&prefix).await.map_err(|e| {
tracing::error!(error = %e, "Failed to scan rejected records");
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(ErrorResponse {
error: "Failed to retrieve rejected records".to_string(),
code: "REJECTED_SCAN_ERROR".to_string(),
}),
)
})?;
let mut rejected = Vec::new();
for (_key, value) in entries.into_iter().take(limit) {
let json_str = String::from_utf8_lossy(&value);
if let Ok(dto) = serde_json::from_str::<RejectedRecordDto>(&json_str) {
rejected.push(dto);
}
}
let count = rejected.len();
Ok(Json(RejectedRecordsResponse { rejected, count }))
}

View File

@ -7,6 +7,7 @@ use axum::{
response::{IntoResponse, Response},
Json,
};
use stemedb_core::limits::MAX_SOURCE_CONTENT_LEN;
use stemedb_core::types::{SourceRecord, SourceStatus};
use stemedb_storage::{GenericIndexStore, GenericSourceRegistry, IndexStore, SourceRegistry};
use tracing::instrument;
@ -56,12 +57,24 @@ pub async fn register_source(
return Err(ApiError::InvalidRequest("Label cannot be empty".to_string()));
}
// Validate content size
if let Some(ref content) = req.content {
if content.len() > MAX_SOURCE_CONTENT_LEN {
return Err(ApiError::InvalidRequest(format!(
"Content too large: {} bytes (max {})",
content.len(),
MAX_SOURCE_CONTENT_LEN
)));
}
}
// Get timestamp
let timestamp = current_timestamp();
// Create the record
let mut record = SourceRecord::new(hash, req.label.clone(), req.url, req.tier, timestamp);
record.notes = req.notes;
record.content = req.content;
// Register in the store
let registry = GenericSourceRegistry::new(state.store.clone());
@ -206,7 +219,7 @@ pub async fn list_sources(
let registry = GenericSourceRegistry::new(state.store.clone());
let sources: Vec<SourceRecordDto> = if let Some(query) = &params.query {
let mut sources: Vec<SourceRecordDto> = if let Some(query) = &params.query {
// Search by label
registry.search(query, limit).await?.into_iter().map(Into::into).collect()
} else if let Some(tier) = params.tier {
@ -227,6 +240,11 @@ pub async fn list_sources(
all.into_iter().map(Into::into).collect()
};
// Strip content from list responses to avoid returning megabytes
for dto in &mut sources {
dto.content = None;
}
let count = sources.len();
Ok(Json(ListSourcesResponse { sources, count }))
}
@ -629,7 +647,7 @@ async fn build_impact_response(
if let Ok(Some(data)) = store_get_with_timeout(&*state.store, &assertion_key).await
{
if let Ok(assertion) =
stemedb_core::serde::deserialize::<stemedb_core::types::Assertion>(&data)
stemedb_core::serde::deserialize_assertion_compat(&data)
{
for sig in &assertion.signatures {
let agent_hex = hex::encode(sig.agent_id);

View File

@ -3,12 +3,16 @@
//! These endpoints provide claim storage DIRECTLY in StemeDB (not `.aphoria/claims.toml`).
//! Used for remote/hosted mode where claims are stored in the knowledge graph.
use axum::{extract::{Path, State}, http::StatusCode, Json};
use axum::{
extract::{Path, State},
http::StatusCode,
Json,
};
use ed25519_dalek::{Signer, SigningKey, VerifyingKey};
use tracing::info;
use ed25519_dalek::{SigningKey, Signer, VerifyingKey};
use stemedb_core::types::{Assertion, LifecycleStage, ObjectValue, SignatureEntry};
use stemedb_core::signing::compute_content_hash_v2;
use stemedb_core::types::{Assertion, LifecycleStage, ObjectValue, SignatureEntry};
use stemedb_ingest::worker::serialize_assertion;
use stemedb_storage::{key_codec, KVStore};
@ -86,10 +90,7 @@ pub async fn create_claim(
state.commit_buffer.append(payload).await?;
Ok((
StatusCode::CREATED,
Json(CreateClaimResponse { id: req.claim.id.clone(), stored: true }),
))
Ok((StatusCode::CREATED, Json(CreateClaimResponse { id: req.claim.id.clone(), stored: true })))
}
/// List all claims, optionally filtered.
@ -129,7 +130,7 @@ pub async fn list_claims(
let hash_hex = hex::encode(&hash_bytes);
let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
if let Some(data) = state.store.get(&assertion_key).await? {
if let Ok(assertion) = stemedb_core::serde::deserialize::<Assertion>(&data) {
if let Ok(assertion) = stemedb_core::serde::deserialize_assertion_compat(&data) {
if let Ok(dto) = assertion_to_dto(&assertion) {
claims.push(dto);
}
@ -189,10 +190,11 @@ pub async fn get_claim(
let hash_hex = hex::encode(hash_bytes);
let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
let data = state.store.get(&assertion_key).await?
.ok_or_else(|| ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate)))?;
let data = state.store.get(&assertion_key).await?.ok_or_else(|| {
ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate))
})?;
let assertion = stemedb_core::serde::deserialize::<Assertion>(&data)
let assertion = stemedb_core::serde::deserialize_assertion_compat(&data)
.map_err(|e| ApiError::Serialization(format!("Failed to deserialize assertion: {e}")))?;
assertion_to_dto(&assertion)
@ -237,10 +239,11 @@ pub async fn delete_claim(
let hash_hex = hex::encode(hash_bytes);
let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
let data = state.store.get(&assertion_key).await?
.ok_or_else(|| ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate)))?;
let data = state.store.get(&assertion_key).await?.ok_or_else(|| {
ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate))
})?;
let mut assertion = stemedb_core::serde::deserialize::<Assertion>(&data)
let mut assertion = stemedb_core::serde::deserialize_assertion_compat(&data)
.map_err(|e| ApiError::Serialization(format!("Failed to deserialize assertion: {e}")))?;
// Mark as deprecated (append-only: create new version)
@ -328,12 +331,13 @@ fn dto_to_assertion(dto: &AuthoredClaimDto) -> Result<Assertion> {
visual_hash: None,
epoch: None,
source_metadata: serde_json::to_vec(&metadata).ok(),
narrative: None,
lifecycle,
signatures: vec![], // Signatures added by ingestion pipeline
confidence: 1.0, // Authored claims have full confidence
timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.unwrap_or_default()
.as_secs(),
hlc_timestamp: Default::default(),
vector: None,
@ -360,10 +364,14 @@ fn assertion_to_dto(assertion: &Assertion) -> Result<AuthoredClaimDto> {
let concept_path = assertion
.subject
.strip_prefix("claim://")
.ok_or_else(|| ApiError::Internal("Invalid subject format: missing claim:// prefix".to_string()))?
.ok_or_else(|| {
ApiError::Internal("Invalid subject format: missing claim:// prefix".to_string())
})?
.rsplit_once('/')
.map(|(cp, _)| cp)
.ok_or_else(|| ApiError::Internal("Invalid subject format: missing predicate separator".to_string()))?
.ok_or_else(|| {
ApiError::Internal("Invalid subject format: missing predicate separator".to_string())
})?
.to_string();
// Convert object value
@ -393,11 +401,7 @@ fn assertion_to_dto(assertion: &Assertion) -> Result<AuthoredClaimDto> {
.and_then(|v| v.as_str())
.unwrap_or("equals")
.to_string(),
provenance: metadata
.get("provenance")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
provenance: metadata.get("provenance").and_then(|v| v.as_str()).unwrap_or("").to_string(),
invariant: metadata.get("invariant").and_then(|v| v.as_str()).unwrap_or("").to_string(),
consequence: metadata.get("consequence").and_then(|v| v.as_str()).unwrap_or("").to_string(),
authority_tier: source_class_to_tier_string(assertion.source_class),

View File

@ -0,0 +1,97 @@
//! Handlers for subject and predicate discovery endpoints.
//!
//! These endpoints scan existing Redb indexes to expose the subjects
//! and predicates known to the system, enabling autocomplete/typeahead
//! in the dashboard.
use axum::{
extract::{Path, State},
Json,
};
use tracing::instrument;
use crate::{
dto::subjects::{ListPredicatesResponse, ListSubjectsParams, ListSubjectsResponse},
error::Result,
extractors::QsQuery,
state::AppState,
};
use stemedb_storage::{key_codec, KVStore};
/// List all known subjects, with optional prefix filtering.
///
/// Scans the `\x00SUBJECTS:` index in Redb. Supports prefix filtering
/// via the `q` parameter for typeahead/autocomplete use cases.
#[utoipa::path(
get,
path = "/v1/subjects",
params(
("q" = Option<String>, Query, description = "Prefix filter for subject names"),
("limit" = Option<usize>, Query, description = "Max results (default 100, max 1000)")
),
responses(
(status = 200, description = "List of subjects", body = ListSubjectsResponse),
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse)
),
tag = "discovery"
)]
#[instrument(skip(state), fields(q = ?params.q, limit = ?params.limit))]
pub async fn list_subjects(
State(state): State<AppState>,
QsQuery(params): QsQuery<ListSubjectsParams>,
) -> Result<Json<ListSubjectsResponse>> {
metrics::counter!("stemedb_queries_total", "endpoint" => "list_subjects").increment(1);
let prefix = if let Some(ref q) = params.q {
key_codec::subjects_index_key(q)
} else {
key_codec::subjects_scan_prefix()
};
let entries = state.store.scan_prefix(&prefix).await?;
let total_count = entries.len();
let limit = params.limit.unwrap_or(100).min(1000);
let subjects: Vec<String> = entries
.iter()
.filter_map(|(k, _)| key_codec::extract_subject_from_subjects_key(k))
.take(limit)
.collect();
Ok(Json(ListSubjectsResponse { subjects, total_count }))
}
/// List all predicates for a given subject.
///
/// Scans the `{subject}\x00SP:` index in Redb to find all predicates
/// that have been asserted for this subject.
#[utoipa::path(
get,
path = "/v1/subjects/{subject}/predicates",
params(
("subject" = String, Path, description = "The subject to list predicates for")
),
responses(
(status = 200, description = "List of predicates for the subject", body = ListPredicatesResponse),
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse)
),
tag = "discovery"
)]
#[instrument(skip(state), fields(%subject))]
pub async fn list_predicates(
State(state): State<AppState>,
Path(subject): Path<String>,
) -> Result<Json<ListPredicatesResponse>> {
metrics::counter!("stemedb_queries_total", "endpoint" => "list_predicates").increment(1);
let prefix = key_codec::subject_predicate_scan_prefix(&subject);
let entries = state.store.scan_prefix(&prefix).await?;
let predicates: Vec<String> = entries
.iter()
.filter_map(|(k, _)| key_codec::extract_sp_key(k).map(|(_, p)| p))
.collect();
Ok(Json(ListPredicatesResponse { subject, predicates }))
}

View File

@ -66,7 +66,7 @@ pub use state::AppState;
// Re-export the path items for OpenAPI
use handlers::{
admin::__path_decay_trust_ranks,
admin::{__path_decay_trust_ranks, __path_rebuild_indexes},
admission::__path_get_admission_status,
api_keys::{
__path_create_api_key, __path_list_api_keys, __path_revoke_api_key, __path_rotate_api_key,
@ -83,8 +83,8 @@ use handlers::{
},
constraints::__path_constraints_query,
epoch::__path_create_epoch,
feed::__path_feed,
escalation::{__path_list_escalations, __path_resolve_escalation},
feed::__path_feed,
gold_standard::{
__path_create_gold_standard, __path_list_gold_standards, __path_remove_gold_standard,
__path_verify_agent,
@ -104,6 +104,7 @@ use handlers::{
__path_list_sources, __path_quarantine_source, __path_register_source,
__path_restore_source, __path_update_source_status,
},
subjects::{__path_list_predicates, __path_list_subjects},
supersede::__path_supersede,
trace::__path_trace,
vote::__path_create_vote,
@ -132,6 +133,7 @@ use handlers::{
store_source,
get_provenance,
decay_trust_ranks,
rebuild_indexes,
list_escalations,
resolve_escalation,
create_gold_standard,
@ -168,6 +170,9 @@ use handlers::{
revoke_api_key,
rotate_api_key,
update_api_key,
// Discovery (subject/predicate autocomplete)
list_subjects,
list_predicates,
),
components(
schemas(
@ -215,6 +220,7 @@ use handlers::{
dto::ProvenanceResponse,
dto::DecayTrustRanksRequest,
dto::DecayTrustRanksResponse,
dto::RebuildIndexesResponse,
dto::EscalationEventDto,
dto::EscalationLevelDto,
dto::EscalationListResponse,
@ -284,6 +290,9 @@ use handlers::{
dto::RotateApiKeyResponse,
dto::UpdateApiKeyRequest,
dto::UpdateApiKeyResponse,
// Discovery (subject/predicate autocomplete)
dto::ListSubjectsResponse,
dto::ListPredicatesResponse,
)
),
tags(
@ -302,6 +311,7 @@ use handlers::{
(name = "quarantine", description = "Content defense quarantine management"),
(name = "circuit_breaker", description = "Per-agent circuit breaker management"),
(name = "source-registry", description = "Source metadata registry and impact analysis"),
(name = "discovery", description = "Subject and predicate discovery for autocomplete"),
),
info(
title = "Episteme (StemeDB) API",

View File

@ -276,11 +276,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
info!("API server listening on {} (plaintext)", config.bind_addr);
info!("Swagger UI available at http://{}/swagger-ui", config.bind_addr);
axum::serve(
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.await?;
axum::serve(listener, app.into_make_service_with_connect_info::<SocketAddr>()).await?;
}
Ok(())

View File

@ -410,6 +410,7 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
.route("/v1/claims", post(handlers::create_stemedb_claim))
// Admin write endpoints
.route("/v1/admin/decay-trust-ranks", post(handlers::decay_trust_ranks))
.route("/v1/admin/rebuild-indexes", post(handlers::rebuild_indexes))
.route("/v1/admin/escalations/:id/resolve", post(handlers::resolve_escalation))
.route("/v1/admin/gold-standards", post(handlers::create_gold_standard))
.route(
@ -449,7 +450,10 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
// Claims endpoints (StemeDB-backed)
.route("/v1/claims", get(handlers::list_stemedb_claims))
.route("/v1/claims/:concept_path/:predicate", get(handlers::get_stemedb_claim))
.route("/v1/claims/:concept_path/:predicate", axum::routing::delete(handlers::delete_stemedb_claim))
.route(
"/v1/claims/:concept_path/:predicate",
axum::routing::delete(handlers::delete_stemedb_claim),
)
.route("/v1/admin/escalations", get(handlers::list_escalations))
.route("/v1/admin/gold-standards", get(handlers::list_gold_standards))
.route("/v1/concepts/resolve", get(handlers::resolve_alias))
@ -459,6 +463,7 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
.route("/v1/admission/status", get(handlers::get_admission_status))
.route("/v1/admin/quarantine", get(handlers::list_quarantine))
.route("/v1/admin/quarantine/:hash", get(handlers::get_quarantine))
.route("/v1/admin/rejected", get(handlers::list_rejected))
.route("/v1/admin/circuit-breaker/:agent_id", get(handlers::get_circuit_status))
.route("/v1/admin/circuit-breakers/tripped", get(handlers::list_tripped_circuits))
.route("/v1/admin/api-keys", get(handlers::list_api_keys))
@ -466,6 +471,9 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
.route("/v1/sources/:hash", get(handlers::get_source))
.route("/v1/sources/:hash/impact", get(handlers::get_source_impact))
.route("/v1/sources/:hash/impact/export", get(handlers::export_source_impact))
// Discovery endpoints (subject/predicate autocomplete)
.route("/v1/subjects", get(handlers::list_subjects))
.route("/v1/subjects/:subject/predicates", get(handlers::list_predicates))
.layer(RequestBodyLimitLayer::new(config.read_body_limit)); // P5.1: Configurable limit
// Add Aphoria endpoints when feature is enabled

View File

@ -61,12 +61,9 @@ async fn test_health_check_over_tcp() {
// Serve with ConnectInfo injection (the fix for the 500 bug)
tokio::spawn(async move {
axum::serve(
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.await
.expect("server");
axum::serve(listener, app.into_make_service_with_connect_info::<SocketAddr>())
.await
.expect("server");
});
// Give the server a moment to start
@ -74,10 +71,7 @@ async fn test_health_check_over_tcp() {
// Make a raw HTTP/1.1 request over TCP
let mut stream = tokio::net::TcpStream::connect(addr).await.expect("connect");
let request = format!(
"GET /v1/health HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n\r\n",
addr
);
let request = format!("GET /v1/health HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n\r\n", addr);
stream.write_all(request.as_bytes()).await.expect("write");
let mut response = String::new();
@ -95,3 +89,111 @@ async fn test_health_check_over_tcp() {
let json: serde_json::Value = serde_json::from_str(body).expect("json parse");
assert_eq!(json["status"], "healthy");
}
// ============================================================================
// Signature Verification Tests (pre-WAL validation)
// ============================================================================
/// Test: POST /v1/assert with invalid signatures returns 400 (not 201).
///
/// Regression test for the "assert returns 201 but data is silently dropped" bug.
/// Previously, the API accepted structurally valid but cryptographically invalid
/// signatures, wrote them to the WAL, and returned 201. The IngestWorker would
/// then silently reject them, permanently blocking the ingestion pipeline.
#[tokio::test]
async fn test_assert_invalid_signature_returns_400() {
use serde_json::json;
let env = common::create_test_env().await;
let app = create_router(env.state);
// Construct assertion with structurally valid but cryptographically invalid signature.
// agent_id is a SHA-256 hash (not a valid Ed25519 public key).
// signature is random 64 bytes.
let body = json!({
"subject": "test/bug_regression",
"predicate": "has_value",
"object": {"type": "Text", "value": "hello"},
"confidence": 0.9,
"source_hash": "0".repeat(64),
"signatures": [{
"agent_id": "a".repeat(64),
"signature": "b".repeat(128),
"timestamp": 1700000000
}],
"timestamp": 1700000000
});
let request = Request::builder()
.uri("/v1/assert")
.method("POST")
.header("Content-Type", "application/json")
.body(Body::from(serde_json::to_vec(&body).expect("json")))
.expect("Request");
let response = app.oneshot(request).await.expect("Request");
assert_eq!(
response.status(),
StatusCode::BAD_REQUEST,
"Invalid signature should return 400, not 201"
);
let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.expect("Body");
let json: serde_json::Value = serde_json::from_slice(&body).expect("JSON");
// Verify error message mentions signature
let error_msg = json["error"].as_str().unwrap_or("");
assert!(
error_msg.contains("Signature") || error_msg.contains("signature"),
"Error should mention signature failure, got: {}",
error_msg
);
}
/// Test: POST /v1/assert with valid Ed25519 signature returns 201.
#[tokio::test]
async fn test_assert_valid_signature_returns_201() {
let env = common::create_test_env().await;
let app = create_router(env.state);
let body = common::create_signed_assertion_json("test/valid", "has_value", 42.0);
let request = Request::builder()
.uri("/v1/assert")
.method("POST")
.header("Content-Type", "application/json")
.body(Body::from(serde_json::to_vec(&body).expect("json")))
.expect("Request");
let response = app.oneshot(request).await.expect("Request");
assert_eq!(response.status(), StatusCode::CREATED, "Valid signature should return 201");
let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.expect("Body");
let json: serde_json::Value = serde_json::from_slice(&body).expect("JSON");
assert_eq!(json["status"], "created");
}
/// Test: POST /v1/assert with null byte in subject returns 400.
#[tokio::test]
async fn test_assert_null_byte_subject_returns_400() {
let env = common::create_test_env().await;
let app = create_router(env.state);
// Use a properly signed assertion but with null byte in subject
let body = common::create_signed_assertion_json("test\x00injected", "has_value", 1.0);
let request = Request::builder()
.uri("/v1/assert")
.method("POST")
.header("Content-Type", "application/json")
.body(Body::from(serde_json::to_vec(&body).expect("json")))
.expect("Request");
let response = app.oneshot(request).await.expect("Request");
// Should fail with 400 due to null byte in subject
assert_eq!(
response.status(),
StatusCode::BAD_REQUEST,
"Null byte in subject should return 400"
);
}

View File

@ -48,6 +48,7 @@ mod tests {
visual_hash: Some([1u8; 8]),
epoch: Some([2u8; 32]),
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry {
agent_id: [2u8; 32],
@ -103,6 +104,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: stage,
signatures: vec![],
confidence: 1.0,

View File

@ -55,6 +55,25 @@ pub const MAX_OBJECT_LEN: usize = 4096;
/// in the source metadata instead of the raw bytes.
pub const MAX_SOURCE_SIZE: usize = 10 * 1024 * 1024;
/// Maximum allowed narrative length in bytes (64 KB).
///
/// Narratives are free-text explanations of methodology, limitations, bias,
/// and caveats that make an assertion self-contained. This limit prevents
/// unbounded memory growth while allowing rich context.
///
/// # Example
/// - Valid: A 2 KB explanation of trial methodology
/// - Valid: A 10 KB narrative covering bias, limitations, and caveats
/// - Invalid: A 100 KB embedded document in the narrative field
pub const MAX_NARRATIVE_LEN: usize = 65_536;
/// Maximum allowed source content length in bytes (1 MB).
///
/// Source content is the extracted full text from PDFs or other documents.
/// This limit prevents unbounded memory growth while allowing typical
/// research papers and regulatory documents.
pub const MAX_SOURCE_CONTENT_LEN: usize = 1_048_576;
/// Default limit for paginated query results.
///
/// Applied when no explicit limit is provided in the query parameters.

View File

@ -44,6 +44,11 @@ use rkyv::validation::validators::DefaultValidator;
use rkyv::{Archive, CheckBytes, Deserialize, Serialize};
use thiserror::Error;
use crate::types::{
Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
SourceRecord, SourceStatus,
};
/// Default scratch buffer size for serialization.
///
/// 4KB is sufficient for most assertions. Larger payloads will trigger
@ -88,6 +93,7 @@ pub enum SerdeError {
/// visual_hash: None,
/// epoch: None,
/// source_metadata: None,
/// narrative: None,
/// lifecycle: LifecycleStage::Proposed,
/// signatures: vec![],
/// confidence: 1.0,
@ -156,6 +162,131 @@ where
.map_err(|e| SerdeError::Deserialization(e.to_string()))
}
// ============================================================================
// Legacy Assertion (pre-narrative schema)
// ============================================================================
/// Assertion struct matching the pre-narrative rkyv layout.
///
/// The `narrative: Option<String>` field was added between `source_metadata`
/// and `lifecycle`. rkyv doesn't support schema evolution, so data serialized
/// before that change needs this struct to deserialize correctly.
#[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
#[archive(check_bytes)]
struct LegacyAssertion {
pub subject: String,
pub predicate: String,
pub object: ObjectValue,
pub parent_hash: Option<[u8; 32]>,
pub source_hash: [u8; 32],
pub source_class: SourceClass,
pub visual_hash: Option<[u8; 8]>,
pub epoch: Option<[u8; 32]>,
pub source_metadata: Option<Vec<u8>>,
// narrative: Option<String> did NOT exist in this version
pub lifecycle: LifecycleStage,
pub signatures: Vec<SignatureEntry>,
pub confidence: f32,
pub timestamp: u64,
pub hlc_timestamp: HlcTimestamp,
pub vector: Option<Vec<f32>>,
}
impl From<LegacyAssertion> for Assertion {
fn from(legacy: LegacyAssertion) -> Self {
Self {
subject: legacy.subject,
predicate: legacy.predicate,
object: legacy.object,
parent_hash: legacy.parent_hash,
source_hash: legacy.source_hash,
source_class: legacy.source_class,
visual_hash: legacy.visual_hash,
epoch: legacy.epoch,
source_metadata: legacy.source_metadata,
narrative: None,
lifecycle: legacy.lifecycle,
signatures: legacy.signatures,
confidence: legacy.confidence,
timestamp: legacy.timestamp,
hlc_timestamp: legacy.hlc_timestamp,
vector: legacy.vector,
}
}
}
/// Deserialize an assertion with backward compatibility.
///
/// Tries the current `Assertion` layout first. If that fails, tries the
/// legacy layout (before `narrative` field was added) and converts.
///
/// This allows the system to read assertions written before schema changes
/// without requiring a data migration.
pub fn deserialize_assertion_compat(data: &[u8]) -> Result<Assertion, SerdeError> {
// Try current format first (fast path for new data)
if let Ok(assertion) = deserialize::<Assertion>(data) {
return Ok(assertion);
}
// Fallback: try legacy format (no narrative field)
let legacy: LegacyAssertion = deserialize(data)?;
Ok(legacy.into())
}
// ============================================================================
// Legacy SourceRecord (pre-content schema)
// ============================================================================
/// SourceRecord struct matching the pre-content rkyv layout.
///
/// The `content: Option<String>` field was added after `notes`.
/// rkyv doesn't support schema evolution, so data serialized
/// before that change needs this struct to deserialize correctly.
#[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
#[archive(check_bytes)]
struct LegacySourceRecord {
pub hash: [u8; 32],
pub label: String,
pub url: Option<String>,
pub tier: u8,
pub status: SourceStatus,
pub created_at: u64,
pub updated_at: u64,
pub notes: Option<String>,
// content: Option<String> did NOT exist in this version
}
impl From<LegacySourceRecord> for SourceRecord {
fn from(legacy: LegacySourceRecord) -> Self {
Self {
hash: legacy.hash,
label: legacy.label,
url: legacy.url,
tier: legacy.tier,
status: legacy.status,
created_at: legacy.created_at,
updated_at: legacy.updated_at,
notes: legacy.notes,
content: None,
}
}
}
/// Deserialize a source record with backward compatibility.
///
/// Tries the current `SourceRecord` layout first. If that fails, tries the
/// legacy layout (before `content` field was added) and converts.
pub fn deserialize_source_record_compat(data: &[u8]) -> Result<SourceRecord, SerdeError> {
// Try current format first (fast path for new data)
if let Ok(record) = deserialize::<SourceRecord>(data) {
return Ok(record);
}
// Fallback: try legacy format (no content field)
let legacy: LegacySourceRecord = deserialize(data)?;
Ok(legacy.into())
}
#[cfg(test)]
mod tests {
use super::*;
@ -176,6 +307,7 @@ mod tests {
visual_hash: Some([1u8; 8]),
epoch: Some([2u8; 32]),
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry {
agent_id: [2u8; 32],
@ -303,6 +435,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 0.0,
@ -330,6 +463,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: Some(metadata.as_bytes().to_vec()),
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 0.85,
@ -357,6 +491,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 1.0,
@ -371,4 +506,127 @@ mod tests {
assert_eq!(assertion, recovered);
assert!(recovered.source_metadata.is_none());
}
#[test]
fn test_legacy_assertion_compat_deserialize() {
// Simulate data serialized with the pre-narrative struct layout.
let legacy = LegacyAssertion {
subject: "Semaglutide".to_string(),
predicate: "reduces_weight".to_string(),
object: ObjectValue::Text("significant".to_string()),
parent_hash: None,
source_hash: [1u8; 32],
source_class: SourceClass::Clinical,
visual_hash: None,
epoch: None,
source_metadata: Some(b"{}".to_vec()),
lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry {
agent_id: [2u8; 32],
signature: [3u8; 64],
timestamp: 1000,
version: 1,
}],
confidence: 0.95,
timestamp: 1700000000,
hlc_timestamp: HlcTimestamp::default(),
vector: Some(vec![0.1, 0.2]),
};
let bytes = serialize(&legacy).expect("serialize legacy");
// Current format should fail (different layout)
assert!(deserialize::<Assertion>(&bytes).is_err());
// Compat function should succeed
let recovered = deserialize_assertion_compat(&bytes)
.expect("compat deserialize should succeed");
assert_eq!(recovered.subject, "Semaglutide");
assert_eq!(recovered.predicate, "reduces_weight");
assert_eq!(recovered.confidence, 0.95);
assert_eq!(recovered.signatures.len(), 1);
assert!(recovered.narrative.is_none()); // Wasn't in legacy
assert!(recovered.source_metadata.is_some());
assert_eq!(recovered.timestamp, 1700000000);
}
#[test]
fn test_current_assertion_also_works_via_compat() {
// Current-format assertions should work via the compat path too.
let assertion = Assertion {
subject: "test".to_string(),
predicate: "works".to_string(),
object: ObjectValue::Boolean(true),
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Expert,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: Some("This is a narrative.".to_string()),
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 1.0,
timestamp: 0,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
let bytes = serialize(&assertion).expect("serialize");
let recovered = deserialize_assertion_compat(&bytes)
.expect("compat deserialize should succeed for current format");
assert_eq!(recovered, assertion);
assert_eq!(recovered.narrative, Some("This is a narrative.".to_string()));
}
#[test]
fn test_legacy_source_record_compat_deserialize() {
// Simulate data serialized with the pre-content struct layout.
let legacy = LegacySourceRecord {
hash: [42u8; 32],
label: "RFC 7519".to_string(),
url: Some("https://tools.ietf.org/html/rfc7519".to_string()),
tier: 0,
status: SourceStatus::Active,
created_at: 1000,
updated_at: 2000,
notes: Some("JWT spec".to_string()),
};
let bytes = serialize(&legacy).expect("serialize legacy");
// Current format should fail (different layout)
assert!(deserialize::<SourceRecord>(&bytes).is_err());
// Compat function should succeed
let recovered = deserialize_source_record_compat(&bytes)
.expect("compat deserialize should succeed");
assert_eq!(recovered.hash, [42u8; 32]);
assert_eq!(recovered.label, "RFC 7519");
assert_eq!(recovered.tier, 0);
assert_eq!(recovered.notes, Some("JWT spec".to_string()));
assert!(recovered.content.is_none()); // Wasn't in legacy
}
#[test]
fn test_current_source_record_also_works_via_compat() {
let record = SourceRecord::new(
[1u8; 32],
"Test".to_string(),
None,
2,
1000,
)
.with_content(Some("Full text content".to_string()));
let bytes = serialize(&record).expect("serialize");
let recovered = deserialize_source_record_compat(&bytes)
.expect("compat deserialize should succeed for current format");
assert_eq!(recovered, record);
assert_eq!(recovered.content, Some("Full text content".to_string()));
}
}

View File

@ -21,6 +21,140 @@
//! ```
use crate::types::{Assertion, ObjectValue};
use ed25519_dalek::{Signature, Verifier, VerifyingKey};
/// Errors from signature verification.
///
/// Error messages are written for API consumers who may not understand Ed25519
/// cryptography, explaining what fields must contain and common mistakes.
#[derive(Debug)]
pub enum SignatureError {
/// No signatures present on the assertion.
Empty,
/// Unknown signature version.
UnknownVersion {
/// The unrecognized version number.
version: u8,
/// Which signature in the list.
index: usize,
},
/// The agent_id bytes are not a valid Ed25519 public key.
InvalidPublicKey {
/// Which signature in the list.
index: usize,
/// The underlying error detail.
detail: String,
},
/// The signature does not verify against the message.
VerificationFailed {
/// Which signature in the list.
index: usize,
/// The signature version.
version: u8,
/// The underlying error detail.
detail: String,
},
}
impl std::fmt::Display for SignatureError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Empty => write!(
f,
"Assertion must have at least one signature. \
Each signature requires: agent_id (32-byte Ed25519 public key, hex-encoded as 64 chars), \
signature (64-byte Ed25519 signature, hex-encoded as 128 chars). \
SHA-256/SHA-512 hashes cannot be used as agent_id or signature"
),
Self::UnknownVersion { version, index } => write!(
f,
"Signature {index}: unknown version {version}. \
Supported versions: 1 (signs '{{subject}}:{{predicate}}'), 2 (signs BLAKE3 content hash)"
),
Self::InvalidPublicKey { index, detail } => write!(
f,
"Signature {index}: agent_id is not a valid Ed25519 public key ({detail}). \
agent_id must be a 32-byte Ed25519 public key (hex-encoded as 64 chars). \
Common mistake: using SHA-256 or other hashes as agent_id. \
Generate a keypair with Ed25519 (e.g., ed25519-dalek, crypto/ed25519, or openssl)"
),
Self::VerificationFailed { index, version, detail } => {
let message_desc = match version {
1 => "'{subject}:{predicate}' (UTF-8 bytes)",
2 => "the BLAKE3 content hash of the assertion",
_ => "unknown",
};
write!(
f,
"Signature {index}: Ed25519 verification failed ({detail}). \
For v{version} signatures, signature must be Ed25519_sign(private_key, {message_desc}). \
Common mistakes: (1) using SHA hashes instead of Ed25519 signatures, \
(2) signing the wrong message, (3) agent_id doesn't match the signing key"
)
}
}
}
}
impl std::error::Error for SignatureError {}
/// Verify all Ed25519 signatures on an assertion.
///
/// Supports two signature versions:
/// - **Version 1 (legacy):** signs `"{subject}:{predicate}"` — only protects those fields
/// - **Version 2 (enterprise):** signs the BLAKE3 content hash — protects ALL fields
///
/// All signatures must be valid for the assertion to be accepted.
///
/// This function is used at both the API boundary (fail fast with 400) and in the
/// IngestWorker (defense in depth). Keeping it in `stemedb-core` avoids duplication.
pub fn verify_assertion_signatures(
assertion: &Assertion,
) -> std::result::Result<(), SignatureError> {
if assertion.signatures.is_empty() {
return Err(SignatureError::Empty);
}
// Pre-compute v1 message (subject:predicate) — only used if v1 signatures exist
let v1_message = format!("{}:{}", assertion.subject, assertion.predicate);
// Pre-compute v2 content hash — only if any v2 signature exists
let v2_content_hash: Option<[u8; 32]> = if assertion.signatures.iter().any(|s| s.version == 2) {
Some(compute_content_hash_v2(assertion))
} else {
None
};
for (idx, sig_entry) in assertion.signatures.iter().enumerate() {
let message_bytes: &[u8] = match sig_entry.version {
1 => v1_message.as_bytes(),
2 => v2_content_hash
.as_ref()
.ok_or(SignatureError::UnknownVersion { version: 2, index: idx })?,
v => {
return Err(SignatureError::UnknownVersion { version: v, index: idx });
}
};
let verifying_key = VerifyingKey::from_bytes(&sig_entry.agent_id)
.map_err(|e| SignatureError::InvalidPublicKey { index: idx, detail: e.to_string() })?;
let signature = Signature::from_bytes(&sig_entry.signature);
verifying_key.verify(message_bytes, &signature).map_err(|e| {
SignatureError::VerificationFailed {
index: idx,
version: sig_entry.version,
detail: e.to_string(),
}
})?;
}
Ok(())
}
/// Compute the canonical content hash for v2 (enterprise) signing.
///
@ -37,6 +171,10 @@ use crate::types::{Assertion, ObjectValue};
/// - `source_metadata`: Variable-length, domain-specific
/// - `lifecycle`: Can change over time
///
/// **Narrative IS included** because it is content-bearing (methodology, limitations).
/// Changing the narrative changes the assertion's meaning. When `None`, no bytes
/// are added, preserving backward compatibility with pre-narrative hashes.
///
/// # Format
///
/// The hash is computed over:
@ -93,6 +231,12 @@ pub fn compute_content_hash_v2(assertion: &Assertion) -> [u8; 32] {
hasher.update(b":");
hasher.update(&assertion.timestamp.to_le_bytes());
// Narrative (only when present, so None preserves backward-compat hash)
if let Some(ref narrative) = assertion.narrative {
hasher.update(b":narrative:");
hasher.update(narrative.as_bytes());
}
*hasher.finalize().as_bytes()
}
@ -123,6 +267,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![],
confidence: 0.95,
@ -246,4 +391,46 @@ mod tests {
}
}
}
#[test]
fn test_content_hash_changes_with_narrative() {
let mut assertion = test_assertion();
let hash_none = compute_content_hash_v2(&assertion);
assertion.narrative =
Some("This drug carries a boxed warning for thyroid C-cell tumors.".to_string());
let hash_some = compute_content_hash_v2(&assertion);
assert_ne!(hash_none, hash_some, "Narrative should change the content hash");
}
#[test]
fn test_content_hash_backward_compat_narrative_none() {
// Capture a hash with narrative: None
let assertion = test_assertion();
let hash1 = compute_content_hash_v2(&assertion);
// Build the same assertion again independently
let assertion2 = Assertion {
subject: "Semaglutide".to_string(),
predicate: "has_boxed_warning".to_string(),
object: ObjectValue::Boolean(true),
parent_hash: None,
source_hash: [1u8; 32],
source_class: SourceClass::Regulatory,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![],
confidence: 0.95,
timestamp: 1704067200,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
let hash2 = compute_content_hash_v2(&assertion2);
assert_eq!(hash1, hash2, "narrative: None must produce identical hash for backward compat");
}
}

View File

@ -49,6 +49,7 @@ pub struct AssertionBuilder {
visual_hash: Option<[u8; 8]>,
epoch: Option<[u8; 32]>,
source_metadata: Option<Vec<u8>>,
narrative: Option<String>,
lifecycle: LifecycleStage,
signatures: Option<Vec<SignatureEntry>>,
agent_id: [u8; 32],
@ -77,6 +78,7 @@ impl AssertionBuilder {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: None, // Will use agent_id to build default
agent_id: [1u8; 32],
@ -199,6 +201,12 @@ impl AssertionBuilder {
self
}
/// Set the narrative (free-text methodology, limitations, caveats).
pub fn narrative(mut self, narrative: &str) -> Self {
self.narrative = Some(narrative.to_string());
self
}
/// Provide explicit signatures (overrides the default single-signature behavior).
pub fn signatures(mut self, signatures: Vec<SignatureEntry>) -> Self {
self.signatures = Some(signatures);
@ -226,6 +234,7 @@ impl AssertionBuilder {
visual_hash: self.visual_hash,
epoch: self.epoch,
source_metadata: self.source_metadata,
narrative: self.narrative,
lifecycle: self.lifecycle,
signatures,
confidence: self.confidence,

View File

@ -33,6 +33,15 @@ pub struct Assertion {
/// Schema is domain-specific (journal info, social metrics, etc.).
/// Use `Vec<u8>` for rkyv zero-copy compatibility.
pub source_metadata: Option<Vec<u8>>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
///
/// Makes the assertion self-contained: pick it up, read it, understand the
/// full claim without dereferencing anything. Not structured into categories
/// because there are too many kinds of information to pre-categorize.
///
/// Included in v2 content hash (narrative is content-bearing).
/// Max length: `limits::MAX_NARRATIVE_LEN` (64 KB).
pub narrative: Option<String>,
/// The lifecycle stage (Proposed, UnderReview, Approved, Deprecated, Rejected).
pub lifecycle: LifecycleStage,

View File

@ -102,6 +102,10 @@ pub struct SourceRecord {
/// Optional curator notes about the source.
/// Examples: "Deprecated in favor of RFC 9068", "Under review for accuracy"
pub notes: Option<String>,
/// Optional full-text content of the source document.
/// Populated by pipelines that extract text from PDFs or other formats.
pub content: Option<String>,
}
impl SourceRecord {
@ -122,6 +126,7 @@ impl SourceRecord {
created_at: timestamp,
updated_at: timestamp,
notes: None,
content: None,
}
}
@ -137,7 +142,13 @@ impl SourceRecord {
updated_at: u64,
notes: Option<String>,
) -> Self {
Self { hash, label, url, tier: tier.min(5), status, created_at, updated_at, notes }
Self { hash, label, url, tier: tier.min(5), status, created_at, updated_at, notes, content: None }
}
/// Set the full-text content of the source document.
pub fn with_content(mut self, content: Option<String>) -> Self {
self.content = content;
self
}
/// Returns the tier label based on the tier number.
@ -186,6 +197,7 @@ mod tests {
assert_eq!(record.created_at, 1000);
assert_eq!(record.updated_at, 1000);
assert!(record.notes.is_none());
assert!(record.content.is_none());
}
#[test]
@ -263,5 +275,38 @@ mod tests {
crate::serde::deserialize(&bytes).expect("Failed to deserialize SourceRecord");
assert_eq!(record, recovered);
assert!(recovered.content.is_none());
}
#[test]
fn test_rkyv_roundtrip_with_content() {
let hash = [42u8; 32];
let record = SourceRecord::new(
hash,
"FDA Approval Letter".to_string(),
None,
0,
1000,
)
.with_content(Some("Full text of the FDA approval letter...".to_string()));
let bytes = crate::serde::serialize(&record).expect("Failed to serialize SourceRecord");
let recovered: SourceRecord =
crate::serde::deserialize(&bytes).expect("Failed to deserialize SourceRecord");
assert_eq!(record, recovered);
assert_eq!(recovered.content, Some("Full text of the FDA approval letter...".to_string()));
}
#[test]
fn test_with_content_builder() {
let hash = [1u8; 32];
let record = SourceRecord::new(hash, "Test".to_string(), None, 0, 1000)
.with_content(Some("content".to_string()));
assert_eq!(record.content, Some("content".to_string()));
let record_none = SourceRecord::new(hash, "Test".to_string(), None, 0, 1000)
.with_content(None);
assert!(record_none.content.is_none());
}
}

View File

@ -23,6 +23,8 @@ ed25519-dalek = { version = "2.1", features = ["rand_core"] }
uhlc = "0.7"
# Async traits
async-trait = "0.1"
# Metrics
metrics = "0.23"
[dev-dependencies]
tempfile = "3.10"

View File

@ -32,3 +32,23 @@ pub enum IngestError {
#[error("Input validation failed: {0}")]
InputValidation(String),
}
impl IngestError {
/// Returns true if retrying this exact WAL record could succeed.
///
/// Transient errors (I/O, storage engine) may resolve on retry.
/// Permanent errors (invalid signature, bad input, corrupt serialization)
/// will never succeed — the bytes in the WAL are immutable.
pub fn is_retryable(&self) -> bool {
match self {
// I/O and storage errors: disk might recover, RocksDB might unblock
IngestError::Wal(_) | IngestError::Storage(_) => true,
// The WAL record bytes are immutable — these will never pass
IngestError::InvalidSignature(_)
| IngestError::InputValidation(_)
| IngestError::Serialization(_) => false,
// Worker errors are ambiguous; treat as retryable to be safe
IngestError::Worker(_) => true,
}
}
}

View File

@ -4,9 +4,8 @@
use super::record_types::RECORD_HEADER_SIZE;
use super::{IngestWorker, RecordType};
use crate::error::{IngestError, Result};
use ed25519_dalek::{Signature, Verifier, VerifyingKey};
use stemedb_core::serde::deserialize;
use stemedb_core::signing::compute_content_hash_v2;
use stemedb_core::signing;
use stemedb_core::types::{Assertion, Epoch, Hash, Vote};
use stemedb_storage::key_codec;
use stemedb_storage::{IndexStore, KVStore, VoteStore};
@ -82,10 +81,77 @@ impl<S: KVStore + 'static> IngestWorker<S> {
let record_type = RecordType::try_from(record.payload[0])?;
let data = &record.payload[RECORD_HEADER_SIZE..];
match record_type {
RecordType::Assertion => self.ingest_assertion(data).await?,
RecordType::Vote => self.ingest_vote(data).await?,
RecordType::Epoch => self.ingest_epoch(data).await?,
let ingest_result = match record_type {
RecordType::Assertion => self.ingest_assertion(data).await,
RecordType::Vote => self.ingest_vote(data).await,
RecordType::Epoch => self.ingest_epoch(data).await,
};
if let Err(e) = ingest_result {
if !e.is_retryable() {
// Permanent failure: the WAL record bytes are immutable and will
// never pass validation. Advance the cursor past this poison record
// so it doesn't block all subsequent ingestion.
let skip_offset = self.current_offset;
self.current_offset += bytes_read;
let cursor_key = key_codec::cursor_key();
// Best-effort cursor persist. If this fails, on restart we will
// re-encounter this record, classify it as permanent again, and
// skip it. No data is lost.
if let Err(persist_err) =
self.store.put(&cursor_key, &self.current_offset.to_le_bytes()).await
{
warn!(
offset = skip_offset,
error = %persist_err,
"Failed to persist cursor after skipping poison record"
);
}
warn!(
record_type = ?record_type,
offset = skip_offset,
new_offset = self.current_offset,
error = %e,
"Skipped permanently invalid WAL record"
);
// Store rejection metadata for admin observability.
// Best-effort: failure to persist metadata should not block ingestion.
let rejection_key = key_codec::rejected_record_key(skip_offset);
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let rejection_json = format!(
r#"{{"offset":{},"record_type":"{:?}","reason":"{}","timestamp":{}}}"#,
skip_offset,
record_type,
e.to_string().replace('"', "'"),
now
);
if let Err(store_err) =
self.store.put(&rejection_key, rejection_json.as_bytes()).await
{
warn!(
offset = skip_offset,
error = %store_err,
"Failed to store rejection metadata"
);
}
metrics::counter!(
"stemedb_ingest_records_skipped_total",
"reason" => e.to_string()
)
.increment(1);
}
// Return the error so the run loop can log it.
// For permanent errors the cursor has already advanced;
// for transient errors the cursor is unchanged (will retry).
return Err(e);
}
let prev_offset = self.current_offset;
@ -170,6 +236,16 @@ impl<S: KVStore + 'static> IngestWorker<S> {
// This enables O(1) lookup of "which assertions cite this source?"
self.index_store.add_to_source_index(&assertion.source_hash, &assertion_hash).await?;
// Write feed index: \x00FEED:{inverted_ts}:{hash_hex} -> subject
// Uses server clock (not assertion.timestamp) for ingestion ordering.
// This separates "when the claim was made" from "when the system learned about it".
let ingested_at = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let feed_idx_key = key_codec::feed_key(ingested_at, &hash_hex);
self.store.put(&feed_idx_key, assertion.subject.as_bytes()).await?;
// Insert into vector index if present and assertion has a vector
if let (Some(ref vector_index), Some(ref vector)) = (&self.vector_index, &assertion.vector)
{
@ -282,6 +358,17 @@ impl<S: KVStore + 'static> IngestWorker<S> {
)));
}
// Validate narrative length
if let Some(ref narrative) = assertion.narrative {
if narrative.len() > stemedb_core::limits::MAX_NARRATIVE_LEN {
return Err(IngestError::InputValidation(format!(
"narrative exceeds {} bytes (got {})",
stemedb_core::limits::MAX_NARRATIVE_LEN,
narrative.len()
)));
}
}
// Validate timestamp: reject if more than 1 hour in future (clock skew protection)
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
@ -300,109 +387,19 @@ impl<S: KVStore + 'static> IngestWorker<S> {
/// Verify all Ed25519 signatures on an assertion.
///
/// Supports two signature versions:
/// - Version 1 (legacy): signs `"{subject}:{predicate}"` - only protects those fields
/// - Version 2 (enterprise): signs the BLAKE3 content hash - protects ALL fields
///
/// For v2 signatures, the content hash is computed from the assertion with
/// empty signatures (canonical form), so tampering with any field except
/// signatures will invalidate the signature.
///
/// All signatures must be valid for the assertion to be accepted.
/// Delegates to `stemedb_core::signing::verify_assertion_signatures` which
/// is the single source of truth for signature verification logic, shared
/// between the API handler (fail fast) and this worker (defense in depth).
fn verify_assertion_signatures(&self, assertion: &Assertion) -> Result<()> {
if assertion.signatures.is_empty() {
signing::verify_assertion_signatures(assertion).map_err(|e| {
warn!(
subject = %assertion.subject,
predicate = %assertion.predicate,
"Assertion has no signatures"
error = %e,
"Signature verification failed"
);
return Err(IngestError::InvalidSignature(
"Assertion must have at least one signature".to_string(),
));
}
// Pre-compute v1 message (subject:predicate) - only used if v1 signatures exist
let v1_message = format!("{}:{}", assertion.subject, assertion.predicate);
// Pre-compute v2 content hash using the shared utility from stemedb-core.
// This must match exactly what the signing code uses in compute_content_hash_v2().
// The hash covers: subject, predicate, object, source_hash, source_class, confidence, timestamp.
let v2_content_hash: Option<[u8; 32]> =
if assertion.signatures.iter().any(|s| s.version == 2) {
// Debug: show exact number format for comparison with signing
let object_str = match &assertion.object {
stemedb_core::types::ObjectValue::Number(n) => format!("Number({:.17})", n),
other => format!("{:?}", other),
};
let confidence_str = format!("{:.17}", assertion.confidence);
let hash = compute_content_hash_v2(assertion);
debug!(
subject = %assertion.subject,
predicate = %assertion.predicate,
object = %object_str,
source_hash = %hex::encode(assertion.source_hash),
source_class = ?assertion.source_class,
confidence = %confidence_str,
timestamp = %assertion.timestamp,
content_hash = %hex::encode(hash),
"Computed v2 content hash for verification"
);
Some(hash)
} else {
None
};
for (idx, sig_entry) in assertion.signatures.iter().enumerate() {
// Determine which message was signed based on version
let message_bytes: &[u8] = match sig_entry.version {
1 => {
// v1 (legacy): signs "{subject}:{predicate}"
v1_message.as_bytes()
}
2 => {
// v2 (enterprise): signs the content hash computed by compute_content_hash_v2
v2_content_hash.as_ref().ok_or_else(|| {
IngestError::InvalidSignature(
"v2 signature present but v2_content_hash was not computed".to_string(),
)
})?
}
v => {
return Err(IngestError::InvalidSignature(format!(
"Unknown signature version {} for signature {}",
v, idx
)));
}
};
// Reconstruct the verifying key from the stored agent_id
let verifying_key = VerifyingKey::from_bytes(&sig_entry.agent_id).map_err(|e| {
IngestError::InvalidSignature(format!(
"Invalid public key for signature {}: {}",
idx, e
))
})?;
// Reconstruct the signature
let signature = Signature::from_bytes(&sig_entry.signature);
// Verify the signature
verifying_key.verify(message_bytes, &signature).map_err(|e| {
IngestError::InvalidSignature(format!(
"Signature {} (v{}) failed verification: {}",
idx, sig_entry.version, e
))
})?;
debug!(
agent_id = %hex::encode(&sig_entry.agent_id[..8]),
signature_idx = idx,
version = sig_entry.version,
"Signature verified"
);
}
Ok(())
IngestError::InvalidSignature(e.to_string())
})
}
/// Ingest a vote into the KV store via VoteStore.

View File

@ -3,7 +3,6 @@
//! Contains the continuous ingestion loop that tails the WAL.
use super::IngestWorker;
use crate::error::IngestError;
use std::sync::atomic::Ordering;
use std::time::Duration;
use stemedb_storage::KVStore;
@ -72,18 +71,17 @@ impl<S: KVStore + 'static> IngestWorker<S> {
debug!("Error during shutdown (expected): {:?}", e);
break;
}
match &e {
IngestError::InputValidation(msg) => {
warn!("Rejected invalid input: {}", msg);
}
IngestError::InvalidSignature(msg) => {
warn!("Rejected invalid signature: {}", msg);
}
_ => {
error!("Ingestion error: {:?}", e);
}
if e.is_retryable() {
// Transient error: back off and retry the same record
error!("Transient ingestion error (will retry): {:?}", e);
tokio::time::sleep(Duration::from_secs(1)).await;
} else {
// Permanent error: step() already advanced the cursor past
// the poison record, so continue immediately to process the
// next record without sleeping.
warn!("Permanent ingestion error (record skipped): {}", e);
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
}

View File

@ -25,6 +25,7 @@ async fn test_rejects_invalid_signature() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -83,6 +84,7 @@ async fn test_rejects_unsigned_assertion() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![], // No signatures!
confidence: 0.95,
@ -112,6 +114,88 @@ async fn test_rejects_unsigned_assertion() {
);
}
/// Test: Invalid signature advances cursor past poison record so subsequent records process.
///
/// This is the core regression test for the "assert returns 201 but data not queryable" bug.
/// Previously, an invalid-signature record would cause the IngestWorker to retry the same
/// offset forever, blocking all subsequent records.
#[tokio::test]
async fn test_invalid_signature_skips_and_continues() {
let dir = tempdir().expect("Failed to create temp dir");
let wal_dir = dir.path().join("wal");
let db_dir = dir.path().join("db");
// Record 1: Invalid signature (poison record)
let bad_assertion = Assertion {
subject: "Bad".to_string(),
predicate: "poison".to_string(),
object: ObjectValue::Text("should be skipped".to_string()),
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Expert,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
agent_id: [1u8; 32], // Invalid Ed25519 public key
signature: [2u8; 64], // Invalid signature
timestamp: 1000,
}],
confidence: 0.95,
timestamp: 1000,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
// Record 2: Valid signature (should be processed after skipping record 1)
let good_assertion = create_signed_assertion("Good", "valid");
let mut journal = Journal::open(&wal_dir).expect("Failed to open journal");
let store = HybridStore::open(&db_dir).expect("Failed to open store");
// Write both records to WAL
journal.append(serialize_assertion(&bad_assertion).expect("ser")).expect("append bad");
journal.append(serialize_assertion(&good_assertion).expect("ser")).expect("append good");
let journal = Arc::new(Mutex::new(journal));
let store = Arc::new(store);
let mut worker =
IngestWorker::new(journal, store.clone()).await.expect("Failed to create worker");
// Step 1: Should fail with InvalidSignature but advance cursor past the poison record
let result1 = worker.step().await;
assert!(result1.is_err(), "Should reject invalid signature");
assert!(
matches!(result1.unwrap_err(), IngestError::InvalidSignature(_)),
"Should be InvalidSignature"
);
// Step 2: Should succeed — the cursor moved past the poison record
let result2 = worker.step().await;
assert!(
result2.is_ok(),
"Should process valid record after skipping poison, got: {:?}",
result2
);
let bytes = result2.expect("step 2");
assert!(bytes > 0, "Should have read bytes from the valid record");
// Verify the good assertion was stored
let count_key = key_codec::assertion_count_key();
let count_entry = store.get(&count_key).await.expect("get").expect("should have count");
let count = u64::from_le_bytes(count_entry.try_into().expect("8 bytes"));
assert_eq!(count, 1, "Exactly one assertion should be stored (the good one)");
// Verify rejection metadata was recorded
use stemedb_storage::KVStore;
let rejected_prefix = key_codec::rejected_records_scan_prefix();
let rejected = store.scan_prefix(&rejected_prefix).await.expect("scan rejected");
assert_eq!(rejected.len(), 1, "Should have exactly one rejected record entry");
}
/// Test: Multi-signature assertions require all signatures to be valid.
#[tokio::test]
async fn test_multisig_all_must_be_valid() {
@ -136,6 +220,7 @@ async fn test_multisig_all_must_be_valid() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![
// Valid signature

View File

@ -29,6 +29,7 @@ async fn test_rejects_high_confidence() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -86,6 +87,7 @@ async fn test_rejects_negative_confidence() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -213,6 +215,7 @@ async fn test_rejects_oversized_subject() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -273,6 +276,7 @@ async fn test_rejects_oversized_predicate() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -335,6 +339,7 @@ async fn test_accepts_exact_max_subject_length() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -393,6 +398,7 @@ async fn test_accepts_exact_max_predicate_length() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -446,6 +452,7 @@ async fn test_rejects_nan_confidence() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,

View File

@ -29,6 +29,7 @@ async fn test_rejects_infinite_confidence() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -172,6 +173,7 @@ async fn test_rejects_future_timestamp() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -237,6 +239,7 @@ async fn test_accepts_near_future_timestamp() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -287,6 +290,7 @@ async fn test_accepts_zero_confidence() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,
@ -337,6 +341,7 @@ async fn test_accepts_one_confidence() {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![SignatureEntry {
version: 1,

View File

@ -24,6 +24,7 @@ pub fn assertion_to_request(assertion: &Assertion) -> CreateAssertionRequest {
.source_metadata
.as_ref()
.map(|b| String::from_utf8_lossy(b).into_owned()),
narrative: assertion.narrative.clone(),
// Include timestamps for v2 signature verification
timestamp: Some(assertion.timestamp),
hlc_timestamp: Some(HlcTimestampDto {
@ -94,6 +95,7 @@ mod tests {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry {
agent_id: [1u8; 32],

View File

@ -41,6 +41,10 @@ pub struct CreateAssertionRequest {
#[serde(skip_serializing_if = "Option::is_none")]
pub source_metadata: Option<String>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
#[serde(skip_serializing_if = "Option::is_none")]
pub narrative: Option<String>,
/// Unix timestamp when the assertion was created.
/// Required for v2 signatures to verify correctly.
#[serde(skip_serializing_if = "Option::is_none")]

View File

@ -66,6 +66,10 @@ pub struct AssertionDto {
/// Structured source metadata as a JSON string (optional).
#[serde(skip_serializing_if = "Option::is_none")]
pub source_metadata: Option<String>,
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
#[serde(skip_serializing_if = "Option::is_none")]
pub narrative: Option<String>,
}
/// Response from a query operation.

View File

@ -233,6 +233,7 @@ impl MedicalClaim {
visual_hash: None,
epoch: None,
source_metadata,
narrative: None,
lifecycle,
signatures: Vec::new(),
confidence: self.confidence,

View File

@ -236,9 +236,12 @@ impl<S: KVStore + 'static> QueryEngine<S> {
Ok(changes)
}
/// Deserialize an assertion using the canonical serde module.
/// Deserialize an assertion with backward compatibility.
///
/// Tries current format first, then falls back to legacy (pre-narrative)
/// format for assertions serialized before the schema change.
pub(super) fn deserialize_assertion(&self, data: &[u8]) -> Result<Assertion> {
stemedb_core::serde::deserialize(data)
stemedb_core::serde::deserialize_assertion_compat(data)
.map_err(|e| QueryError::Deserialization(e.to_string()))
}

View File

@ -362,7 +362,7 @@ impl<S: KVStore + 'static> Materializer<S> {
for hash in hash_list {
let key = key_codec::assertion_key(subject, &hex::encode(hash));
if let Some(data) = self.store.get(&key).await? {
match stemedb_core::serde::deserialize::<Assertion>(&data) {
match stemedb_core::serde::deserialize_assertion_compat(&data) {
Ok(assertion) => candidates.push(assertion),
Err(e) => {
debug!(

View File

@ -57,6 +57,7 @@ impl Agent {
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle,
signatures: vec![SignatureEntry {
agent_id: self.verifying_key.to_bytes(),

View File

@ -106,6 +106,26 @@ impl HybridStore {
Ok(Self { fjall, redb, _temp_dir: Some(temp_dir) })
}
/// Scan all assertion key-value pairs from the Fjall backend.
///
/// This scans all keys in Fjall and returns only those with the `H:` tag
/// (assertion data). Used by the admin rebuild-indexes endpoint to
/// reconstruct missing Redb secondary indexes.
///
/// Returns `Vec<(key, value)>` where keys are `{subject}\x00H:{hash_hex}`.
#[instrument(skip_all)]
pub async fn scan_fjall_assertions(&self) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
let all_fjall = self.fjall.scan_prefix(b"").await?;
let assertions: Vec<(Vec<u8>, Vec<u8>)> = all_fjall
.into_iter()
.filter(|(key, _)| {
let tag = key_codec::extract_tag(key);
tag.starts_with(b"H:")
})
.collect();
Ok(assertions)
}
}
#[async_trait]

View File

@ -113,3 +113,33 @@ pub fn assertion_count_key() -> Vec<u8> {
pub fn trust_rank_scan_prefix() -> Vec<u8> {
global_key(b"TRUST:", b"")
}
/// Rejected WAL record key: `\x00REJECTED:{offset}`
///
/// Stores metadata about WAL records that were permanently skipped
/// by the IngestWorker (invalid signatures, validation failures, etc.)
pub fn rejected_record_key(offset: u64) -> Vec<u8> {
global_key(b"REJECTED:", offset.to_string().as_bytes())
}
/// Rejected records scan prefix: `\x00REJECTED:`
pub fn rejected_records_scan_prefix() -> Vec<u8> {
global_key(b"REJECTED:", b"")
}
/// Feed index key: `\x00FEED:{inverted_ts_hex}:{hash_hex}`
///
/// Uses inverted timestamp (`u64::MAX - ingested_at`) so lexicographic
/// scan order = reverse chronological (newest first).
/// Value stores the subject so the feed handler can construct the
/// assertion key without a reverse lookup.
pub fn feed_key(ingested_at: u64, hash_hex: &str) -> Vec<u8> {
let inverted = u64::MAX - ingested_at;
let suffix = format!("{}:{}", hex::encode(inverted.to_be_bytes()), hash_hex);
global_key(b"FEED:", suffix.as_bytes())
}
/// Feed index scan prefix: `\x00FEED:`
pub fn feed_scan_prefix() -> Vec<u8> {
global_key(b"FEED:", b"")
}

View File

@ -57,10 +57,10 @@ pub use subject_keys::{
// Global keys
pub use global_keys::{
assertion_count_key, audit_agent_index_key, audit_agent_prefix, audit_key, audit_scan_prefix,
cursor_key, epoch_key, escalation_key, escalation_scan_prefix, gs_verified_key, quota_key,
quota_limit_key, superseded_key, supersession_index_key, supersession_index_prefix,
supersession_key, trust_pack_key, trust_pack_scan_prefix, trust_rank_key,
trust_rank_scan_prefix,
cursor_key, epoch_key, escalation_key, escalation_scan_prefix, feed_key, feed_scan_prefix,
gs_verified_key, quota_key, quota_limit_key, rejected_record_key, rejected_records_scan_prefix,
superseded_key, supersession_index_key, supersession_index_prefix, supersession_key,
trust_pack_key, trust_pack_scan_prefix, trust_rank_key, trust_rank_scan_prefix,
};
// Index keys

View File

@ -229,3 +229,45 @@ fn test_global_keys_sort_first() {
let subject = assertion_key("Apple", "abc");
assert!(global < subject, "Global keys should sort before subject keys");
}
#[test]
fn test_feed_key_newest_first_ordering() {
let k_older = feed_key(1000, "aaaa");
let k_newer = feed_key(2000, "bbbb");
// Newer timestamp should sort BEFORE older (inverted for newest-first)
assert!(k_newer < k_older, "Newer feed keys should sort before older ones");
}
#[test]
fn test_feed_key_same_timestamp_tiebreak() {
let k1 = feed_key(1000, "aaaa");
let k2 = feed_key(1000, "zzzz");
// Same timestamp: lexicographic tiebreak on hash_hex
assert!(k1 < k2);
}
#[test]
fn test_feed_key_starts_with_scan_prefix() {
let prefix = feed_scan_prefix();
let k = feed_key(1000, "abc123def456");
assert!(k.starts_with(&prefix), "Feed key should start with feed scan prefix");
}
#[test]
fn test_feed_key_format() {
let k = feed_key(0, "deadbeef");
// \x00FEED: prefix
assert_eq!(&k[..6], b"\x00FEED:");
// With ingested_at=0, inverted = u64::MAX, hex = "ffffffffffffffff"
assert_eq!(&k[6..22], b"ffffffffffffffff");
// Separator
assert_eq!(k[22], b':');
// hash_hex
assert_eq!(&k[23..], b"deadbeef");
}
#[test]
fn test_feed_scan_prefix() {
let prefix = feed_scan_prefix();
assert_eq!(prefix, b"\x00FEED:");
}

View File

@ -44,6 +44,16 @@ where
stemedb_core::serde::deserialize(data).map_err(|e| StorageError::Serialization(e.to_string()))
}
/// Deserialize a SourceRecord with backward compatibility for the pre-content layout.
///
/// Maps deserialization errors to [`StorageError::Serialization`].
pub fn deserialize_source_record_compat(
data: &[u8],
) -> Result<stemedb_core::types::SourceRecord> {
stemedb_core::serde::deserialize_source_record_compat(data)
.map_err(|e| StorageError::Serialization(e.to_string()))
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -7,7 +7,7 @@ use tracing::{debug, instrument};
use super::SourceRegistry;
use crate::error::{Result, StorageError};
use crate::key_codec;
use crate::serde_helpers::{deserialize, serialize};
use crate::serde_helpers::{deserialize_source_record_compat, serialize};
use crate::traits::KVStore;
/// Generic SourceRegistry implementation backed by any KVStore.
@ -80,7 +80,7 @@ impl<S: KVStore + 'static> SourceRegistry for GenericSourceRegistry<S> {
match self.store.get(&key).await? {
Some(data) => {
let record: SourceRecord = deserialize(&data)?;
let record: SourceRecord = deserialize_source_record_compat(&data)?;
Ok(Some(record))
}
None => Ok(None),

View File

@ -9,8 +9,8 @@ use crate::error::Result;
use metrics::{counter, gauge};
use std::collections::HashSet;
use std::sync::atomic::Ordering;
use stemedb_core::serde::deserialize;
use stemedb_core::types::{detect_clock_skew, Assertion, HlcTimestamp};
use stemedb_core::serde::deserialize_assertion_compat;
use stemedb_core::types::{detect_clock_skew, HlcTimestamp};
use stemedb_rpc::proto::{FetchRequest, GetLeavesRequest, RootExchangeRequest};
use stemedb_storage::crdt::AssertionTransfer;
use stemedb_storage::KVStore;
@ -201,7 +201,7 @@ impl<S: KVStore + 'static> AntiEntropyWorker<S> {
}
// Extract subject and HLC timestamp from the assertion data
let (subject, remote_hlc) = match deserialize::<Assertion>(&transfer.data) {
let (subject, remote_hlc) = match deserialize_assertion_compat(&transfer.data) {
Ok(assertion) => (assertion.subject.clone(), assertion.hlc_timestamp),
Err(e) => {
warn!(

View File

@ -1,6 +1,6 @@
# StemeDB Data Structures
> **Last Updated:** 2026-01-31
> **Last Updated:** 2026-02-19
> **Source:** `crates/stemedb-core/src/types.rs`
This document describes the core data structures in StemeDB (Episteme). These types form the foundation of the "Git for Truth" knowledge graph.
@ -417,6 +417,50 @@ pub struct TrustPack {
---
## The SourceRecord (Source Registry)
The Source Registry maps content-addressed source hashes to human-readable metadata. This enables the dashboard to show "FDA Approval Letter for Wegovy" instead of a raw BLAKE3 hash.
```rust
pub struct SourceRecord {
/// Content-addressed hash of the source (BLAKE3, 32 bytes).
pub hash: [u8; 32],
/// Human-readable label.
pub label: String,
/// Optional URL where the source can be accessed.
pub url: Option<String>,
/// Authority tier (0-5), matching SourceClass.
pub tier: u8,
/// Current status (Active, Deprecated, Quarantined).
pub status: SourceStatus,
/// HLC timestamp when the record was created.
pub created_at: u64,
/// HLC timestamp of the last update.
pub updated_at: u64,
/// Optional curator notes about the source.
pub notes: Option<String>,
/// Optional full-text content of the source document.
/// Populated by pipelines that extract text from PDFs.
/// Max size: 1 MB (MAX_SOURCE_CONTENT_LEN).
pub content: Option<String>,
}
```
**Key Points:**
- **Status lifecycle:** Active → Deprecated or Quarantined (curator-driven)
- **Content field:** Stores extracted document text (e.g., from `pdftotext`). Stripped from list responses (`GET /v1/sources`) to avoid returning megabytes; included in single-source responses (`GET /v1/sources/{hash}`)
- **rkyv compat:** Uses `deserialize_source_record_compat()` for backward compatibility with data written before the `content` field was added
---
## Serialization
All types use `rkyv` for zero-copy deserialization:
@ -433,6 +477,17 @@ let assertion: Assertion = deserialize(&bytes)?;
**Critical Rule**: Never use raw `AllocSerializer` in production code. Always use `stemedb_core::serde::{serialize, deserialize}`.
### Schema Evolution (rkyv Compat)
rkyv does **not** support schema evolution. When a field is added to a struct, old data can't be deserialized with the new struct. The solution is a legacy compat pattern:
| Type | Compat Function | Legacy Struct |
|------|----------------|---------------|
| `Assertion` | `deserialize_assertion_compat()` | `LegacyAssertion` (pre-`narrative`) |
| `SourceRecord` | `deserialize_source_record_compat()` | `LegacySourceRecord` (pre-`content`) |
All assertion deserialization should use `deserialize_assertion_compat()`. All source record deserialization should use `deserialize_source_record_compat()`. When adding fields to rkyv structs in the future, always add a legacy compat deserializer following this pattern.
---
## Relationship Diagram

View File

@ -45,6 +45,9 @@ type Assertion struct {
// Semantic embedding vector (optional)
Vector []float32 `json:"vector,omitempty"`
// Free-text narrative explaining methodology, limitations, bias, and caveats (optional)
Narrative *string `json:"narrative,omitempty"`
}
// AssertionBuilder provides a fluent API for building assertions.
@ -150,6 +153,12 @@ func (b *AssertionBuilder) WithEpoch(epochHex string) *AssertionBuilder {
return b
}
// WithNarrative sets the free-text narrative (methodology, limitations, caveats).
func (b *AssertionBuilder) WithNarrative(narrative string) *AssertionBuilder {
b.assertion.Narrative = &narrative
return b
}
// WithVector sets the semantic embedding vector.
func (b *AssertionBuilder) WithVector(vector []float32) *AssertionBuilder {
b.assertion.Vector = vector

View File

@ -174,6 +174,9 @@ type AssertionResponse struct {
// Semantic embedding vector (optional)
Vector []float32 `json:"vector,omitempty"`
// Free-text narrative explaining methodology, limitations, bias, and caveats (optional)
Narrative *string `json:"narrative,omitempty"`
}
// CreateResponse represents the response from a create operation.