feat: add source content to source registry, signed assertions, feed endpoint, dashboard enhancements
- Add `content: Option<String>` to SourceRecord with rkyv schema evolution (LegacySourceRecord compat deserializer for backward compatibility) - Add MAX_SOURCE_CONTENT_LEN (1MB) limit with API validation - Strip content from list responses, include in single-source GET - Update Go SDK RegisterSourceRequest with Content field - FCM pipeline extracts PDF text via pdftotext and passes to registration - Dashboard impact panel fetches and displays source content with expand/collapse - Add feed endpoint, dashboard feed panel, and signed assertion support - Update data-structures.md, API docs, and storage docs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
58594bc7b9
commit
ad07a75d0a
@ -1,6 +1,6 @@
|
||||
# API Surface
|
||||
|
||||
**Last Updated:** 2026-02-03
|
||||
**Last Updated:** 2026-02-19
|
||||
**Confidence:** High
|
||||
|
||||
## Summary
|
||||
@ -41,10 +41,10 @@ Episteme exposes an HTTP API via `axum` with auto-generated OpenAPI 3.1 document
|
||||
| `GET` | `/metrics` | Prometheus metrics (Phase 8B) | ✅ Implemented |
|
||||
| `GET` | `/api-docs/openapi.json` | OpenAPI 3.1 spec | ✅ Implemented |
|
||||
| `GET` | `/swagger-ui` | Interactive API docs | ✅ Implemented |
|
||||
| `POST` | `/v1/sources` | Register source with human-readable metadata | ✅ Implemented |
|
||||
| `GET` | `/v1/sources/{hash}` | Get source record by hash | ✅ Implemented |
|
||||
| `POST` | `/v1/sources` | Register source with metadata and optional content | ✅ Implemented |
|
||||
| `GET` | `/v1/sources/{hash}` | Get source record by hash (includes content) | ✅ Implemented |
|
||||
| `PATCH` | `/v1/sources/{hash}/status` | Update source status (deprecate/quarantine) | ✅ Implemented |
|
||||
| `GET` | `/v1/sources` | List/search sources (filter by tier or query) | ✅ Implemented |
|
||||
| `GET` | `/v1/sources` | List/search sources (content stripped for performance) | ✅ Implemented |
|
||||
|
||||
### Cluster Gateway Endpoints (stemedb-cluster)
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# SDK - Go Client Libraries
|
||||
|
||||
**Last Updated:** 2026-02-01
|
||||
**Last Updated:** 2026-02-19
|
||||
**Confidence:** High
|
||||
|
||||
## Summary
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# Storage
|
||||
|
||||
**Last Updated:** 2026-01-31
|
||||
**Last Updated:** 2026-02-19
|
||||
**Confidence:** High
|
||||
|
||||
## Summary
|
||||
@ -91,6 +91,16 @@ let value: MyType = deserialize(&bytes)?;
|
||||
|
||||
This provides unified error handling across all store implementations (VoteStore, IndexStore, TrustRankStore, AuditStore, TrustPackStore, QuotaStore).
|
||||
|
||||
For types with schema evolution (rkyv compat), use the dedicated compat functions:
|
||||
|
||||
```rust
|
||||
use crate::serde_helpers::deserialize_source_record_compat;
|
||||
|
||||
let record: SourceRecord = deserialize_source_record_compat(&bytes)?;
|
||||
```
|
||||
|
||||
Available compat deserializers: `deserialize_source_record_compat` (SourceRecord). For assertions, use `stemedb_core::serde::deserialize_assertion_compat` directly.
|
||||
|
||||
## Write Path
|
||||
|
||||
```
|
||||
|
||||
@ -146,6 +146,7 @@ fn claim_to_assertion_with_tier(
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: serde_json::to_vec(&source_metadata).ok(),
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![signature_entry],
|
||||
confidence: claim.confidence,
|
||||
@ -235,6 +236,7 @@ pub fn authored_claim_to_assertion(
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: serde_json::to_vec(&source_metadata).ok(),
|
||||
narrative: None,
|
||||
lifecycle,
|
||||
signatures: vec![signature_entry],
|
||||
confidence: 1.0, // Authored claims have full confidence
|
||||
|
||||
@ -79,7 +79,7 @@ impl StemeDBPatternStore {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let assertion = stemedb_core::serde::deserialize::<Assertion>(&bytes).map_err(|e| {
|
||||
let assertion = stemedb_core::serde::deserialize_assertion_compat(&bytes).map_err(|e| {
|
||||
AphoriaError::Storage(format!(
|
||||
"Failed to deserialize assertion {}: {}",
|
||||
hex::encode(hash),
|
||||
@ -389,6 +389,7 @@ impl PatternAggregator {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: Some(metadata_bytes),
|
||||
narrative: None,
|
||||
lifecycle: stemedb_core::types::LifecycleStage::Approved,
|
||||
signatures: vec![], // Bootstrap patterns are unsigned (no signing key available)
|
||||
confidence: 1.0, // Pattern aggregates are high confidence
|
||||
|
||||
@ -114,6 +114,7 @@ pub fn create_authoritative_assertion_with_metadata(
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: serde_json::to_vec(&metadata).ok(),
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![signature_entry],
|
||||
confidence: 1.0,
|
||||
@ -170,6 +171,7 @@ pub fn create_authoritative_assertion(
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: serde_json::to_vec(&source_metadata).ok(),
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![signature_entry],
|
||||
confidence: 1.0,
|
||||
|
||||
@ -342,7 +342,7 @@ impl LocalEpisteme {
|
||||
|
||||
let assertion_key = stemedb_storage::key_codec::assertion_key(&subject, &hash_hex);
|
||||
self.store.get(&assertion_key).await.ok().flatten().and_then(|bytes| {
|
||||
stemedb_core::serde::deserialize::<Assertion>(&bytes)
|
||||
stemedb_core::serde::deserialize_assertion_compat(&bytes)
|
||||
.map_err(|e| warn!(hash = %hash_hex, error = %e, "Failed to deserialize"))
|
||||
.ok()
|
||||
})
|
||||
|
||||
@ -854,6 +854,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: Some(b"{\"file\":\"test.rs\"}".to_vec()),
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![SignatureEntry {
|
||||
agent_id: [2u8; 32],
|
||||
|
||||
@ -438,6 +438,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: serde_json::to_vec(&source_metadata).ok(),
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![],
|
||||
confidence: 1.0,
|
||||
|
||||
@ -255,6 +255,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: serde_json::to_vec(&source_metadata).ok(),
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![],
|
||||
confidence: 1.0,
|
||||
|
||||
@ -109,6 +109,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: serde_json::to_vec(&source_metadata).ok(),
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![],
|
||||
confidence: 1.0,
|
||||
|
||||
@ -47,7 +47,7 @@ impl ClaimCache {
|
||||
pub fn save(&self, claims: &[AuthoredClaim], remote_url: &str) -> Result<(), AphoriaError> {
|
||||
let now = SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.map_err(|e| AphoriaError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?
|
||||
.map_err(|e| AphoriaError::Io(std::io::Error::other(e)))?
|
||||
.as_secs();
|
||||
|
||||
let cache = ClaimCacheFile {
|
||||
|
||||
@ -179,9 +179,7 @@ impl RemoteClaimStore {
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| {
|
||||
AphoriaError::Hosted("Max retries exceeded".to_string())
|
||||
}))
|
||||
Err(last_error.unwrap_or_else(|| AphoriaError::Hosted("Max retries exceeded".to_string())))
|
||||
}
|
||||
|
||||
/// Perform the actual HTTP request.
|
||||
@ -211,8 +209,8 @@ impl RemoteClaimStore {
|
||||
http_request.call()
|
||||
};
|
||||
|
||||
let response = response
|
||||
.map_err(|e| AphoriaError::Hosted(format!("HTTP request failed: {e}")))?;
|
||||
let response =
|
||||
response.map_err(|e| AphoriaError::Hosted(format!("HTTP request failed: {e}")))?;
|
||||
|
||||
if response.status() >= 200 && response.status() < 300 {
|
||||
let body = response
|
||||
@ -230,8 +228,7 @@ impl ClaimStore for RemoteClaimStore {
|
||||
fn save_claim(&self, claim: &AuthoredClaim) -> Result<(), AphoriaError> {
|
||||
let request = CreateClaimRequest { claim: claim_to_dto(claim) };
|
||||
|
||||
let response: CreateClaimResponse =
|
||||
self.request("POST", "/v1/claims", Some(&request))?;
|
||||
let response: CreateClaimResponse = self.request("POST", "/v1/claims", Some(&request))?;
|
||||
|
||||
if response.stored {
|
||||
info!(claim_id = %claim.id, "Claim stored remotely");
|
||||
@ -324,10 +321,9 @@ impl RemoteClaimStore {
|
||||
warn!(operation, "Remote unreachable, using cached claims");
|
||||
fallback()
|
||||
}
|
||||
OfflineFallback::Fail => Err(AphoriaError::Hosted(format!(
|
||||
"{}: remote unreachable",
|
||||
operation
|
||||
))),
|
||||
OfflineFallback::Fail => {
|
||||
Err(AphoriaError::Hosted(format!("{}: remote unreachable", operation)))
|
||||
}
|
||||
OfflineFallback::Queue => {
|
||||
warn!(operation, "Remote unreachable, queue not implemented (using cache)");
|
||||
fallback()
|
||||
@ -425,14 +421,11 @@ fn is_network_error(err: &AphoriaError) -> bool {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::config::types::hosted::SyncMode;
|
||||
use crate::SyncMode;
|
||||
|
||||
#[test]
|
||||
fn test_remote_store_requires_url() {
|
||||
let config = HostedConfig {
|
||||
url: None,
|
||||
..Default::default()
|
||||
};
|
||||
let config = HostedConfig { url: None, ..Default::default() };
|
||||
|
||||
let result = RemoteClaimStore::new(&config);
|
||||
assert!(result.is_err());
|
||||
@ -474,6 +467,7 @@ mod tests {
|
||||
let config = HostedConfig {
|
||||
url: Some("https://example.com".to_string()),
|
||||
project_id: Some("test-project".to_string()),
|
||||
team_id: None,
|
||||
api_key_env: "TEST_API_KEY".to_string(),
|
||||
sync_mode: SyncMode::RemoteOnly,
|
||||
offline_fallback: OfflineFallback::Skip,
|
||||
|
||||
@ -98,8 +98,8 @@ impl ReportFormatter for JsonReport {
|
||||
|
||||
// Add tier-aware verdict if available
|
||||
if let Some(ref tier_verdict) = conflict.tier_verdict {
|
||||
conflict_json["tier_verdict"] = serde_json::to_value(tier_verdict)
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
conflict_json["tier_verdict"] =
|
||||
serde_json::to_value(tier_verdict).unwrap_or(serde_json::Value::Null);
|
||||
}
|
||||
|
||||
// Add primary tier if available
|
||||
|
||||
@ -87,7 +87,9 @@ impl TierAwareVerdict {
|
||||
/// Returns a human-readable string describing the tier-aware verdict.
|
||||
pub fn display(&self) -> String {
|
||||
match self {
|
||||
TierAwareVerdict::SingleTier { tier_name, verdict, sources, max_confidence, .. } => {
|
||||
TierAwareVerdict::SingleTier {
|
||||
tier_name, verdict, sources, max_confidence, ..
|
||||
} => {
|
||||
format!(
|
||||
"{} {} - {} source{}, max confidence {:.2}",
|
||||
verdict.symbol(),
|
||||
@ -159,12 +161,7 @@ impl TierAwareVerdict {
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self::MultiTier {
|
||||
primary_tier,
|
||||
primary_verdict,
|
||||
tier_verdicts,
|
||||
conflict_score,
|
||||
}
|
||||
Self::MultiTier { primary_tier, primary_verdict, tier_verdicts, conflict_score }
|
||||
}
|
||||
}
|
||||
|
||||
@ -250,8 +247,7 @@ mod tests {
|
||||
},
|
||||
);
|
||||
|
||||
let verdict =
|
||||
TierAwareVerdict::from_multi_tier(&tier_breakdown, 1, Verdict::Block, 0.92);
|
||||
let verdict = TierAwareVerdict::from_multi_tier(&tier_breakdown, 1, Verdict::Block, 0.92);
|
||||
|
||||
assert_eq!(verdict.effective_verdict(), Verdict::Block);
|
||||
assert_eq!(verdict.primary_tier(), 1);
|
||||
|
||||
@ -235,11 +235,7 @@ impl fmt::Display for ConflictResult {
|
||||
writeln!(f, " {} {}", verdict_str, self.claim.concept_path)?;
|
||||
}
|
||||
|
||||
writeln!(
|
||||
f,
|
||||
" Concept: {}",
|
||||
self.claim.concept_path
|
||||
)?;
|
||||
writeln!(f, " Concept: {}", self.claim.concept_path)?;
|
||||
writeln!(
|
||||
f,
|
||||
" Your code: {} ({}: L{})",
|
||||
|
||||
@ -1,12 +1,23 @@
|
||||
import { Header } from "@/components/layout/header";
|
||||
import { LayeredQueryResults } from "@/components/layered";
|
||||
|
||||
export default function LayeredPage() {
|
||||
interface LayeredPageProps {
|
||||
searchParams: Promise<{ subject?: string; predicate?: string }>;
|
||||
}
|
||||
|
||||
export default async function LayeredPage({ searchParams }: LayeredPageProps) {
|
||||
const params = await searchParams;
|
||||
const initialSubject = params.subject;
|
||||
const initialPredicate = params.predicate;
|
||||
|
||||
return (
|
||||
<>
|
||||
<Header title="Layered Consensus" />
|
||||
<div className="p-6">
|
||||
<LayeredQueryResults />
|
||||
<LayeredQueryResults
|
||||
initialSubject={initialSubject}
|
||||
initialPredicate={initialPredicate}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
|
||||
@ -1,12 +1,26 @@
|
||||
"use client";
|
||||
|
||||
import { useSearchParams } from "next/navigation";
|
||||
import { Suspense } from "react";
|
||||
import { Header } from "@/components/layout/header";
|
||||
import { QueryResults } from "@/components/skeptic";
|
||||
|
||||
function SkepticContent() {
|
||||
const searchParams = useSearchParams();
|
||||
const subject = searchParams.get("subject") ?? undefined;
|
||||
const predicate = searchParams.get("predicate") ?? undefined;
|
||||
|
||||
return <QueryResults initialSubject={subject} initialPredicate={predicate} />;
|
||||
}
|
||||
|
||||
export default function SkepticPage() {
|
||||
return (
|
||||
<>
|
||||
<Header title="Skeptic Query" />
|
||||
<div className="p-6">
|
||||
<QueryResults />
|
||||
<Suspense fallback={<div className="text-sm text-muted-foreground">Loading...</div>}>
|
||||
<SkepticContent />
|
||||
</Suspense>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
|
||||
@ -29,14 +29,14 @@ export function AuditPanel({ initialFilters }: AuditPanelProps) {
|
||||
try {
|
||||
const client = new StemeDBClient();
|
||||
|
||||
// Convert time range to from/to timestamps
|
||||
// Convert time range to from/to timestamps (Unix seconds — backend uses seconds, not ms)
|
||||
let fromTs: number | undefined;
|
||||
let toTs: number | undefined;
|
||||
if (currentFilters.timeRange !== "all") {
|
||||
const now = Date.now();
|
||||
const rangeMs = TIME_RANGES_MS[currentFilters.timeRange as TimeRangeKey] ?? TIME_RANGES_MS["24h"];
|
||||
fromTs = now - rangeMs;
|
||||
toTs = now;
|
||||
const nowSecs = Math.floor(Date.now() / 1000);
|
||||
const rangeSecs = Math.floor((TIME_RANGES_MS[currentFilters.timeRange as TimeRangeKey] ?? TIME_RANGES_MS["24h"]) / 1000);
|
||||
fromTs = nowSecs - rangeSecs;
|
||||
toTs = nowSecs;
|
||||
}
|
||||
|
||||
const data = await client.auditQueries({
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { useState, useCallback } from "react";
|
||||
import Link from "next/link";
|
||||
import type { AuditEntry } from "@/lib/api/types";
|
||||
import { formatTime, formatDate } from "@/lib/format";
|
||||
import { ResultBadge } from "./result-badge";
|
||||
@ -10,6 +11,36 @@ interface AuditRowProps {
|
||||
entry: AuditEntry;
|
||||
}
|
||||
|
||||
function CopyableHash({ hash, label }: { hash: string; label?: string }) {
|
||||
const [copied, setCopied] = useState(false);
|
||||
|
||||
const handleCopy = useCallback(
|
||||
(e: React.MouseEvent) => {
|
||||
e.stopPropagation();
|
||||
navigator.clipboard.writeText(hash).then(() => {
|
||||
setCopied(true);
|
||||
setTimeout(() => setCopied(false), 1500);
|
||||
});
|
||||
},
|
||||
[hash]
|
||||
);
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleCopy}
|
||||
title={label ? `${label}: ${hash}` : hash}
|
||||
className="font-mono text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
|
||||
>
|
||||
{copied ? (
|
||||
<span className="text-green-600 dark:text-green-400">Copied!</span>
|
||||
) : (
|
||||
`${hash.slice(0, 12)}…`
|
||||
)}
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
export function AuditRow({ entry }: AuditRowProps) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
|
||||
@ -30,6 +61,15 @@ export function AuditRow({ entry }: AuditRowProps) {
|
||||
? `${entry.agent_id.slice(0, 8)}...`
|
||||
: "-";
|
||||
|
||||
// Build cross-navigation URLs when subject is present
|
||||
const hasSubject = Boolean(entry.params.subject);
|
||||
const crossNavParams = hasSubject
|
||||
? new URLSearchParams({
|
||||
subject: entry.params.subject!,
|
||||
...(entry.params.predicate ? { predicate: entry.params.predicate } : {}),
|
||||
}).toString()
|
||||
: null;
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`rounded-lg border border-border transition-colors hover:bg-muted/50 ${
|
||||
@ -84,7 +124,8 @@ export function AuditRow({ entry }: AuditRowProps) {
|
||||
{/* Expanded details */}
|
||||
{expanded && (
|
||||
<div className="px-4 pb-3 pt-0 border-t border-border mt-0">
|
||||
<div className="bg-muted/50 rounded-md p-3 mt-3 space-y-2">
|
||||
<div className="bg-muted/50 rounded-md p-3 mt-3 space-y-3">
|
||||
{/* Metadata grid */}
|
||||
<div className="grid grid-cols-2 gap-2 text-xs">
|
||||
<div>
|
||||
<span className="text-muted-foreground">Query ID:</span>
|
||||
@ -107,16 +148,51 @@ export function AuditRow({ entry }: AuditRowProps) {
|
||||
<span className="ml-2">{entry.contributing_assertions.length}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Contributing assertions */}
|
||||
{entry.contributing_assertions.length > 0 && (
|
||||
<div className="text-xs">
|
||||
<span className="text-muted-foreground">Top contributors:</span>
|
||||
<div className="mt-1 space-y-1">
|
||||
{entry.contributing_assertions.slice(0, 3).map((ca) => (
|
||||
<div key={ca.assertion_hash} className="font-mono text-muted-foreground">
|
||||
{ca.assertion_hash.slice(0, 12)}... (weight: {(ca.weight * 100).toFixed(0)}%)
|
||||
</div>
|
||||
))}
|
||||
<div className="text-xs space-y-1">
|
||||
<div className="grid grid-cols-3 gap-2 text-muted-foreground font-medium pb-1 border-b border-border/50">
|
||||
<span>Assertion Hash</span>
|
||||
<span>Source Hash</span>
|
||||
<span>Lifecycle / Weight</span>
|
||||
</div>
|
||||
{entry.contributing_assertions.slice(0, 3).map((ca) => (
|
||||
<div
|
||||
key={ca.assertion_hash}
|
||||
className="grid grid-cols-3 gap-2 items-center py-0.5"
|
||||
>
|
||||
<CopyableHash hash={ca.assertion_hash} label="Assertion hash" />
|
||||
<CopyableHash hash={ca.source_hash} label="Source hash" />
|
||||
<span className="text-muted-foreground">
|
||||
<span className="px-1.5 py-0.5 rounded bg-muted text-foreground mr-1">
|
||||
{ca.lifecycle}
|
||||
</span>
|
||||
{(ca.weight * 100).toFixed(0)}%
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Cross-navigation links */}
|
||||
{hasSubject && crossNavParams && (
|
||||
<div
|
||||
className="flex items-center gap-3 pt-2 border-t border-border"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
<Link
|
||||
href={`/skeptic?${crossNavParams}`}
|
||||
className="text-xs text-blue-600 dark:text-blue-400 hover:underline px-2 py-1 rounded bg-muted"
|
||||
>
|
||||
View in Skeptic →
|
||||
</Link>
|
||||
<Link
|
||||
href={`/layered?${crossNavParams}`}
|
||||
className="text-xs text-blue-600 dark:text-blue-400 hover:underline px-2 py-1 rounded bg-muted"
|
||||
>
|
||||
View in Layered →
|
||||
</Link>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import Link from "next/link";
|
||||
import type { AssertionObject } from "@/lib/api/types";
|
||||
import { formatRelativeTime, formatUnixDateTime } from "@/lib/format";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
@ -26,6 +27,10 @@ function formatValue(obj: { type: string; value: string | number | boolean }): s
|
||||
return str.length > 60 ? `${str.slice(0, 57)}...` : str;
|
||||
}
|
||||
|
||||
function investigateHref(entry: AssertionObject): string {
|
||||
return `/skeptic?subject=${encodeURIComponent(entry.subject)}&predicate=${encodeURIComponent(entry.predicate)}`;
|
||||
}
|
||||
|
||||
export function FeedRow({ entry }: FeedRowProps) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
|
||||
@ -34,11 +39,13 @@ export function FeedRow({ entry }: FeedRowProps) {
|
||||
|
||||
return (
|
||||
<div
|
||||
className="rounded-lg border border-border transition-colors hover:bg-muted/50 cursor-pointer"
|
||||
onClick={() => setExpanded(!expanded)}
|
||||
className="rounded-lg border border-border transition-colors"
|
||||
>
|
||||
{/* Main row */}
|
||||
<div className="grid grid-cols-2 sm:grid-cols-5 gap-2 sm:gap-4 px-4 py-3 items-center">
|
||||
<div
|
||||
className="grid grid-cols-2 sm:grid-cols-5 gap-2 sm:gap-4 px-4 py-3 items-center cursor-pointer hover:bg-muted/50 rounded-t-lg"
|
||||
onClick={() => setExpanded(!expanded)}
|
||||
>
|
||||
{/* Time */}
|
||||
<div className="text-sm" title={formatUnixDateTime(entry.timestamp)}>
|
||||
<span className="font-medium">{formatRelativeTime(entry.timestamp)}</span>
|
||||
@ -66,20 +73,36 @@ export function FeedRow({ entry }: FeedRowProps) {
|
||||
<span className="text-foreground">{formatValue(entry.object)}</span>
|
||||
</div>
|
||||
|
||||
{/* Source Class */}
|
||||
{/* Source Class + Investigate icon */}
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<Badge variant="outline" className={cn("text-xs", badgeColor)}>
|
||||
{entry.source_class}
|
||||
</Badge>
|
||||
<span className="text-xs text-muted-foreground">
|
||||
{expanded ? "\u25B2" : "\u25BC"}
|
||||
</span>
|
||||
<div className="flex items-center gap-1">
|
||||
<Link
|
||||
href={investigateHref(entry)}
|
||||
className="text-muted-foreground hover:text-primary transition-colors p-1"
|
||||
title="Investigate in Skeptic"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
<svg className="h-3.5 w-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
||||
<circle cx="11" cy="11" r="8" />
|
||||
<path d="m21 21-4.3-4.3" />
|
||||
</svg>
|
||||
</Link>
|
||||
<span className="text-xs text-muted-foreground">
|
||||
{expanded ? "\u25B2" : "\u25BC"}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Expanded details */}
|
||||
{expanded && (
|
||||
<div className="px-4 pb-3 pt-0 border-t border-border mt-0">
|
||||
<div
|
||||
className="px-4 pb-3 pt-0 border-t border-border mt-0"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
<div className="bg-muted/50 rounded-md p-3 mt-3 space-y-2">
|
||||
<div className="grid grid-cols-2 gap-2 text-xs">
|
||||
<div>
|
||||
@ -116,6 +139,29 @@ export function FeedRow({ entry }: FeedRowProps) {
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{/* Narrative */}
|
||||
{entry.narrative && (
|
||||
<div className="text-xs border-t border-border pt-2">
|
||||
<span className="text-muted-foreground">Narrative:</span>
|
||||
<p className="mt-1 text-foreground whitespace-pre-wrap leading-relaxed">
|
||||
{entry.narrative}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
{/* Investigate link */}
|
||||
<div className="border-t border-border pt-2 flex justify-end">
|
||||
<Link
|
||||
href={investigateHref(entry)}
|
||||
className="text-xs text-primary hover:underline inline-flex items-center gap-1"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
<svg className="h-3 w-3" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
||||
<circle cx="11" cy="11" r="8" />
|
||||
<path d="m21 21-4.3-4.3" />
|
||||
</svg>
|
||||
Investigate in Skeptic
|
||||
</Link>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useCallback } from "react";
|
||||
import { useState, useCallback, useEffect, useRef } from "react";
|
||||
import { StemeDBClient, type LayeredResponse, ApiError } from "@/lib/api";
|
||||
import { QueryForm, type QueryParams, EmptyState, ErrorState } from "@/components/skeptic";
|
||||
import { LayeredLoadingSkeleton } from "./layered-loading-skeleton";
|
||||
@ -12,8 +12,14 @@ type QueryState =
|
||||
| { status: "success"; data: LayeredResponse; params: QueryParams }
|
||||
| { status: "error"; error: string; params: QueryParams };
|
||||
|
||||
export function LayeredQueryResults() {
|
||||
interface LayeredQueryResultsProps {
|
||||
initialSubject?: string;
|
||||
initialPredicate?: string;
|
||||
}
|
||||
|
||||
export function LayeredQueryResults({ initialSubject, initialPredicate }: LayeredQueryResultsProps) {
|
||||
const [state, setState] = useState<QueryState>({ status: "idle" });
|
||||
const hasAutoQueried = useRef(false);
|
||||
|
||||
const executeQuery = useCallback(async (params: QueryParams) => {
|
||||
setState({ status: "loading", params });
|
||||
@ -33,6 +39,18 @@ export function LayeredQueryResults() {
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Auto-execute query when initial subject+predicate are provided (e.g., from audit trail links)
|
||||
useEffect(() => {
|
||||
if (initialSubject && initialPredicate && !hasAutoQueried.current) {
|
||||
hasAutoQueried.current = true;
|
||||
executeQuery({
|
||||
subject: initialSubject,
|
||||
predicate: initialPredicate,
|
||||
includeSourceMetadata: true,
|
||||
});
|
||||
}
|
||||
}, [initialSubject, initialPredicate, executeQuery]);
|
||||
|
||||
const handleRetry = useCallback(() => {
|
||||
if (state.status === "error") {
|
||||
executeQuery(state.params);
|
||||
@ -48,7 +66,12 @@ export function LayeredQueryResults() {
|
||||
<h2 className="text-lg font-medium text-card-foreground mb-4">
|
||||
Layered Consensus Query
|
||||
</h2>
|
||||
<QueryForm onSubmit={executeQuery} isLoading={isLoading} />
|
||||
<QueryForm
|
||||
onSubmit={executeQuery}
|
||||
isLoading={isLoading}
|
||||
initialSubject={initialSubject}
|
||||
initialPredicate={initialPredicate}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Results Section */}
|
||||
|
||||
@ -112,6 +112,11 @@ export function LayeredResultsView({ data }: LayeredResultsViewProps) {
|
||||
<p className="text-xs text-muted-foreground mt-1">
|
||||
Confidence: {(data.overall_winner.confidence * 100).toFixed(0)}%
|
||||
</p>
|
||||
{data.overall_winner.narrative && (
|
||||
<p className="text-sm text-muted-foreground mt-2 whitespace-pre-wrap leading-relaxed border-t border-primary/20 pt-2">
|
||||
{data.overall_winner.narrative}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import Link from "next/link";
|
||||
import { cn } from "@/lib/utils";
|
||||
import type { LayeredTier } from "@/lib/api/types";
|
||||
import type { LayeredTier, SourceRecordDto } from "@/lib/api/types";
|
||||
import { StemeDBClient } from "@/lib/api";
|
||||
import { SourceTierBadge, ConflictGauge, tierLabels, type SourceTier } from "@/components/skeptic";
|
||||
|
||||
function getConflictStatus(score: number): "Unanimous" | "Agreed" | "Contested" {
|
||||
@ -10,6 +13,17 @@ function getConflictStatus(score: number): "Unanimous" | "Agreed" | "Contested"
|
||||
return "Contested";
|
||||
}
|
||||
|
||||
function formatTimestamp(unixSeconds: number): string {
|
||||
const date = new Date(unixSeconds * 1000);
|
||||
return date.toLocaleString(undefined, {
|
||||
year: "numeric",
|
||||
month: "short",
|
||||
day: "numeric",
|
||||
hour: "2-digit",
|
||||
minute: "2-digit",
|
||||
});
|
||||
}
|
||||
|
||||
interface TierAccordionProps {
|
||||
tier: LayeredTier;
|
||||
isExpanded: boolean;
|
||||
@ -21,6 +35,20 @@ export function TierAccordion({ tier, isExpanded, onToggle }: TierAccordionProps
|
||||
const tierLabel = tierLabels[safeTier] || tier.source_class;
|
||||
const conflictStatus = getConflictStatus(tier.conflict_score);
|
||||
|
||||
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(null);
|
||||
const [sourceLoading, setSourceLoading] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isExpanded || !tier.winner || sourceRecord || sourceLoading) return;
|
||||
setSourceLoading(true);
|
||||
const client = new StemeDBClient();
|
||||
client
|
||||
.getSource(tier.winner.source_hash)
|
||||
.then(setSourceRecord)
|
||||
.catch(() => {})
|
||||
.finally(() => setSourceLoading(false));
|
||||
}, [isExpanded, tier.winner, sourceRecord, sourceLoading]);
|
||||
|
||||
return (
|
||||
<div className="border border-border rounded-lg overflow-hidden">
|
||||
<button
|
||||
@ -99,12 +127,105 @@ export function TierAccordion({ tier, isExpanded, onToggle }: TierAccordionProps
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-muted-foreground">Source</span>
|
||||
<p className="font-mono text-xs text-foreground truncate" title={tier.winner.source_hash}>
|
||||
{tier.winner.source_hash.slice(0, 12)}...
|
||||
{sourceLoading ? (
|
||||
<p className="font-mono text-xs text-muted-foreground animate-pulse">
|
||||
Loading...
|
||||
</p>
|
||||
) : sourceRecord ? (
|
||||
<p className="font-medium text-foreground truncate" title={sourceRecord.label}>
|
||||
{sourceRecord.label}
|
||||
</p>
|
||||
) : (
|
||||
<p className="font-mono text-xs text-foreground truncate" title={tier.winner.source_hash}>
|
||||
{tier.winner.source_hash.slice(0, 12)}...
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Assertion timestamp */}
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
|
||||
<div className="col-span-2">
|
||||
<span className="text-muted-foreground">Asserted at</span>
|
||||
<p className="font-medium text-foreground">
|
||||
{formatTimestamp(tier.winner.timestamp)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Narrative */}
|
||||
{tier.winner.narrative && (
|
||||
<div className="text-sm">
|
||||
<span className="text-muted-foreground">Narrative</span>
|
||||
<p className="mt-1 text-foreground whitespace-pre-wrap leading-relaxed">
|
||||
{tier.winner.narrative}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Source registry details */}
|
||||
{sourceLoading && (
|
||||
<div className="rounded border border-border bg-muted/30 p-2">
|
||||
<p className="text-xs text-muted-foreground animate-pulse">
|
||||
Loading source details...
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
{!sourceLoading && sourceRecord && (
|
||||
<div className="rounded border border-border bg-muted/30 p-2 space-y-2">
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
|
||||
Source Registry
|
||||
</span>
|
||||
<Link
|
||||
href="/sources"
|
||||
className="text-xs text-blue-600 dark:text-blue-400 hover:underline"
|
||||
>
|
||||
View in Source Registry →
|
||||
</Link>
|
||||
</div>
|
||||
<div className="grid grid-cols-2 gap-x-4 gap-y-1 text-xs">
|
||||
<div>
|
||||
<span className="text-muted-foreground">Label</span>
|
||||
<p className="font-medium text-foreground">{sourceRecord.label}</p>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-muted-foreground">Status</span>
|
||||
<p className="font-medium text-foreground capitalize">{sourceRecord.status}</p>
|
||||
</div>
|
||||
{sourceRecord.url && (
|
||||
<div className="col-span-2">
|
||||
<span className="text-muted-foreground">URL</span>
|
||||
<p className="font-mono text-foreground truncate" title={sourceRecord.url}>
|
||||
<a
|
||||
href={sourceRecord.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-blue-600 dark:text-blue-400 hover:underline"
|
||||
>
|
||||
{sourceRecord.url}
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
{sourceRecord.notes && (
|
||||
<div className="col-span-2">
|
||||
<span className="text-muted-foreground">Notes</span>
|
||||
<p className="text-foreground leading-relaxed">{sourceRecord.notes}</p>
|
||||
</div>
|
||||
)}
|
||||
<div>
|
||||
<span className="text-muted-foreground">Created</span>
|
||||
<p className="text-foreground">{formatTimestamp(sourceRecord.created_at)}</p>
|
||||
</div>
|
||||
<div>
|
||||
<span className="text-muted-foreground">Updated</span>
|
||||
<p className="text-foreground">{formatTimestamp(sourceRecord.updated_at)}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Assertion hash */}
|
||||
<div className="pt-2 border-t border-border">
|
||||
<span className="text-xs text-muted-foreground">Assertion: </span>
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import Link from "next/link";
|
||||
import { cn } from "@/lib/utils";
|
||||
import type { ClaimSummary } from "@/lib/api/types";
|
||||
import type { ClaimSummary, SourceRecordDto } from "@/lib/api/types";
|
||||
import { StemeDBClient } from "@/lib/api";
|
||||
import { SourceTierBadge } from "./source-tier-badge";
|
||||
import { WeightBar } from "./weight-bar";
|
||||
import { HashDisplay } from "./hash-display";
|
||||
@ -33,6 +36,23 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
|
||||
: "active") as SourceStatus;
|
||||
const valueStr = formatValue(claim.value);
|
||||
|
||||
// Fetch full source record when expanded
|
||||
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(null);
|
||||
const [sourceLoading, setSourceLoading] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isExpanded || sourceRecord || sourceLoading) return;
|
||||
setSourceLoading(true);
|
||||
const client = new StemeDBClient();
|
||||
client
|
||||
.getSource(claim.source.source_hash)
|
||||
.then(setSourceRecord)
|
||||
.catch(() => {
|
||||
// Source may not be in registry — that's fine
|
||||
})
|
||||
.finally(() => setSourceLoading(false));
|
||||
}, [isExpanded, claim.source.source_hash, sourceRecord, sourceLoading]);
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
@ -88,6 +108,19 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
|
||||
{/* Expanded details */}
|
||||
{isExpanded && (
|
||||
<div className="px-3 pb-3 space-y-4 border-t border-border pt-3">
|
||||
{/* Full value */}
|
||||
<div className="space-y-1">
|
||||
<div className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
|
||||
Value
|
||||
</div>
|
||||
<p className="text-sm text-foreground whitespace-pre-wrap break-words leading-relaxed">
|
||||
{valueStr}
|
||||
</p>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
Type: <code className="bg-muted px-1 py-0.5 rounded">{claim.value.type}</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Source info */}
|
||||
<div className="space-y-1">
|
||||
<div className="text-xs font-medium text-muted-foreground uppercase tracking-wide">
|
||||
@ -98,7 +131,7 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
|
||||
<span className={statusColors[status]}>
|
||||
{statusIcons[status]} {status}
|
||||
</span>
|
||||
<span className="text-muted-foreground">•</span>
|
||||
<span className="text-muted-foreground">·</span>
|
||||
<span className="text-muted-foreground">
|
||||
{tierLabel} (T{tier})
|
||||
</span>
|
||||
@ -113,6 +146,33 @@ export function ClaimRow({ claim, isLeading, isExpanded, onToggle }: ClaimRowPro
|
||||
{sourceUrl}
|
||||
</a>
|
||||
)}
|
||||
{/* Source registry details (fetched) */}
|
||||
{sourceLoading && (
|
||||
<div className="text-xs text-muted-foreground animate-pulse mt-1">
|
||||
Loading source details...
|
||||
</div>
|
||||
)}
|
||||
{sourceRecord && (
|
||||
<div className="mt-2 rounded border border-border bg-muted/30 p-2 space-y-1">
|
||||
{sourceRecord.notes && (
|
||||
<p className="text-xs text-muted-foreground whitespace-pre-wrap">
|
||||
{sourceRecord.notes}
|
||||
</p>
|
||||
)}
|
||||
<div className="flex items-center gap-3 text-[10px] text-muted-foreground">
|
||||
<span>Created: {new Date(sourceRecord.created_at).toLocaleDateString()}</span>
|
||||
{sourceRecord.updated_at !== sourceRecord.created_at && (
|
||||
<span>Updated: {new Date(sourceRecord.updated_at).toLocaleDateString()}</span>
|
||||
)}
|
||||
</div>
|
||||
<Link
|
||||
href={`/sources`}
|
||||
className="text-[10px] text-blue-600 dark:text-blue-400 hover:underline"
|
||||
>
|
||||
View in Source Registry →
|
||||
</Link>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Supporting agents */}
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { useState, useEffect, useRef, useCallback } from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { DatePicker } from "@/components/ui/date-picker";
|
||||
import { StemeDBClient } from "@/lib/api";
|
||||
|
||||
export interface QueryParams {
|
||||
subject: string;
|
||||
@ -15,22 +16,162 @@ export interface QueryParams {
|
||||
interface QueryFormProps {
|
||||
onSubmit: (params: QueryParams) => void;
|
||||
isLoading: boolean;
|
||||
initialSubject?: string;
|
||||
initialPredicate?: string;
|
||||
}
|
||||
|
||||
export function QueryForm({ onSubmit, isLoading }: QueryFormProps) {
|
||||
const [subject, setSubject] = useState("");
|
||||
const [predicate, setPredicate] = useState("");
|
||||
export function QueryForm({ onSubmit, isLoading, initialSubject, initialPredicate }: QueryFormProps) {
|
||||
const [subject, setSubject] = useState(initialSubject ?? "");
|
||||
const [predicate, setPredicate] = useState(initialPredicate ?? "");
|
||||
const [includeSourceMetadata, setIncludeSourceMetadata] = useState(true);
|
||||
const [asOfDate, setAsOfDate] = useState<Date | undefined>(undefined);
|
||||
|
||||
// Autocomplete state
|
||||
const [subjectSuggestions, setSubjectSuggestions] = useState<string[]>([]);
|
||||
const [predicateSuggestions, setPredicateSuggestions] = useState<string[]>([]);
|
||||
const [showSubjectDropdown, setShowSubjectDropdown] = useState(false);
|
||||
const [showPredicateDropdown, setShowPredicateDropdown] = useState(false);
|
||||
const [activeSubjectIndex, setActiveSubjectIndex] = useState(-1);
|
||||
const [activePredicateIndex, setActivePredicateIndex] = useState(-1);
|
||||
|
||||
const subjectRef = useRef<HTMLDivElement>(null);
|
||||
const predicateRef = useRef<HTMLDivElement>(null);
|
||||
const debounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
|
||||
// Sync initial values when they change (e.g., from URL params)
|
||||
useEffect(() => {
|
||||
if (initialSubject !== undefined) setSubject(initialSubject);
|
||||
}, [initialSubject]);
|
||||
|
||||
useEffect(() => {
|
||||
if (initialPredicate !== undefined) setPredicate(initialPredicate);
|
||||
}, [initialPredicate]);
|
||||
|
||||
// Fetch subject suggestions with debounce
|
||||
const fetchSubjects = useCallback((query: string) => {
|
||||
if (debounceRef.current) clearTimeout(debounceRef.current);
|
||||
debounceRef.current = setTimeout(async () => {
|
||||
if (!query.trim()) {
|
||||
setSubjectSuggestions([]);
|
||||
setShowSubjectDropdown(false);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const client = new StemeDBClient();
|
||||
const resp = await client.listSubjects(query, 20);
|
||||
setSubjectSuggestions(resp.subjects);
|
||||
setShowSubjectDropdown(resp.subjects.length > 0);
|
||||
setActiveSubjectIndex(-1);
|
||||
} catch {
|
||||
setSubjectSuggestions([]);
|
||||
setShowSubjectDropdown(false);
|
||||
}
|
||||
}, 200);
|
||||
}, []);
|
||||
|
||||
// Fetch predicates when subject is selected
|
||||
const fetchPredicates = useCallback(async (subj: string) => {
|
||||
if (!subj.trim()) {
|
||||
setPredicateSuggestions([]);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const client = new StemeDBClient();
|
||||
const resp = await client.listPredicates(subj);
|
||||
setPredicateSuggestions(resp.predicates);
|
||||
} catch {
|
||||
setPredicateSuggestions([]);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Close dropdowns on click outside
|
||||
useEffect(() => {
|
||||
function handleClickOutside(e: MouseEvent) {
|
||||
if (subjectRef.current && !subjectRef.current.contains(e.target as Node)) {
|
||||
setShowSubjectDropdown(false);
|
||||
}
|
||||
if (predicateRef.current && !predicateRef.current.contains(e.target as Node)) {
|
||||
setShowPredicateDropdown(false);
|
||||
}
|
||||
}
|
||||
document.addEventListener("mousedown", handleClickOutside);
|
||||
return () => document.removeEventListener("mousedown", handleClickOutside);
|
||||
}, []);
|
||||
|
||||
const handleSubjectChange = (value: string) => {
|
||||
setSubject(value);
|
||||
fetchSubjects(value);
|
||||
// Clear predicate suggestions when subject changes
|
||||
setPredicateSuggestions([]);
|
||||
};
|
||||
|
||||
const selectSubject = (value: string) => {
|
||||
setSubject(value);
|
||||
setShowSubjectDropdown(false);
|
||||
setActiveSubjectIndex(-1);
|
||||
fetchPredicates(value);
|
||||
};
|
||||
|
||||
const handlePredicateChange = (value: string) => {
|
||||
setPredicate(value);
|
||||
// Filter existing predicate suggestions locally
|
||||
if (predicateSuggestions.length > 0) {
|
||||
setShowPredicateDropdown(true);
|
||||
setActivePredicateIndex(-1);
|
||||
}
|
||||
};
|
||||
|
||||
const selectPredicate = (value: string) => {
|
||||
setPredicate(value);
|
||||
setShowPredicateDropdown(false);
|
||||
setActivePredicateIndex(-1);
|
||||
};
|
||||
|
||||
const filteredPredicates = predicateSuggestions.filter((p) =>
|
||||
p.toLowerCase().includes(predicate.toLowerCase())
|
||||
);
|
||||
|
||||
const handleSubjectKeyDown = (e: React.KeyboardEvent) => {
|
||||
if (!showSubjectDropdown || subjectSuggestions.length === 0) return;
|
||||
if (e.key === "ArrowDown") {
|
||||
e.preventDefault();
|
||||
setActiveSubjectIndex((i) => Math.min(i + 1, subjectSuggestions.length - 1));
|
||||
} else if (e.key === "ArrowUp") {
|
||||
e.preventDefault();
|
||||
setActiveSubjectIndex((i) => Math.max(i - 1, 0));
|
||||
} else if (e.key === "Enter" && activeSubjectIndex >= 0) {
|
||||
e.preventDefault();
|
||||
selectSubject(subjectSuggestions[activeSubjectIndex]);
|
||||
} else if (e.key === "Escape") {
|
||||
setShowSubjectDropdown(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handlePredicateKeyDown = (e: React.KeyboardEvent) => {
|
||||
if (!showPredicateDropdown || filteredPredicates.length === 0) return;
|
||||
if (e.key === "ArrowDown") {
|
||||
e.preventDefault();
|
||||
setActivePredicateIndex((i) => Math.min(i + 1, filteredPredicates.length - 1));
|
||||
} else if (e.key === "ArrowUp") {
|
||||
e.preventDefault();
|
||||
setActivePredicateIndex((i) => Math.max(i - 1, 0));
|
||||
} else if (e.key === "Enter" && activePredicateIndex >= 0) {
|
||||
e.preventDefault();
|
||||
selectPredicate(filteredPredicates[activePredicateIndex]);
|
||||
} else if (e.key === "Escape") {
|
||||
setShowPredicateDropdown(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
setShowSubjectDropdown(false);
|
||||
setShowPredicateDropdown(false);
|
||||
if (subject.trim() && predicate.trim()) {
|
||||
onSubmit({
|
||||
subject: subject.trim(),
|
||||
predicate: predicate.trim(),
|
||||
includeSourceMetadata,
|
||||
// Convert Date to Unix timestamp (seconds)
|
||||
asOf: asOfDate ? Math.floor(asOfDate.getTime() / 1000) : undefined,
|
||||
});
|
||||
}
|
||||
@ -41,32 +182,81 @@ export function QueryForm({ onSubmit, isLoading }: QueryFormProps) {
|
||||
return (
|
||||
<form onSubmit={handleSubmit} className="space-y-4">
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div className="space-y-2">
|
||||
{/* Subject with autocomplete */}
|
||||
<div className="space-y-2" ref={subjectRef}>
|
||||
<label htmlFor="subject" className="text-sm font-medium text-foreground">
|
||||
Subject
|
||||
</label>
|
||||
<Input
|
||||
id="subject"
|
||||
placeholder="e.g., semaglutide:gastroparesis_risk"
|
||||
value={subject}
|
||||
onChange={(e) => setSubject(e.target.value)}
|
||||
disabled={isLoading}
|
||||
/>
|
||||
<div className="relative">
|
||||
<Input
|
||||
id="subject"
|
||||
placeholder="e.g., semaglutide:gastroparesis_risk"
|
||||
value={subject}
|
||||
onChange={(e) => handleSubjectChange(e.target.value)}
|
||||
onFocus={() => {
|
||||
if (subjectSuggestions.length > 0) setShowSubjectDropdown(true);
|
||||
}}
|
||||
onKeyDown={handleSubjectKeyDown}
|
||||
disabled={isLoading}
|
||||
autoComplete="off"
|
||||
/>
|
||||
{showSubjectDropdown && subjectSuggestions.length > 0 && (
|
||||
<div className="absolute z-50 w-full mt-1 max-h-60 overflow-auto rounded-md border border-border bg-popover shadow-md">
|
||||
{subjectSuggestions.map((s, i) => (
|
||||
<button
|
||||
key={s}
|
||||
type="button"
|
||||
className={`w-full px-3 py-2 text-left text-sm font-mono truncate hover:bg-muted ${
|
||||
i === activeSubjectIndex ? "bg-muted" : ""
|
||||
}`}
|
||||
onMouseDown={() => selectSubject(s)}
|
||||
>
|
||||
{s}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
The entity you want to query
|
||||
</p>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
|
||||
{/* Predicate with autocomplete */}
|
||||
<div className="space-y-2" ref={predicateRef}>
|
||||
<label htmlFor="predicate" className="text-sm font-medium text-foreground">
|
||||
Predicate
|
||||
</label>
|
||||
<Input
|
||||
id="predicate"
|
||||
placeholder="e.g., risk_level"
|
||||
value={predicate}
|
||||
onChange={(e) => setPredicate(e.target.value)}
|
||||
disabled={isLoading}
|
||||
/>
|
||||
<div className="relative">
|
||||
<Input
|
||||
id="predicate"
|
||||
placeholder="e.g., risk_level"
|
||||
value={predicate}
|
||||
onChange={(e) => handlePredicateChange(e.target.value)}
|
||||
onFocus={() => {
|
||||
if (filteredPredicates.length > 0) setShowPredicateDropdown(true);
|
||||
}}
|
||||
onKeyDown={handlePredicateKeyDown}
|
||||
disabled={isLoading}
|
||||
autoComplete="off"
|
||||
/>
|
||||
{showPredicateDropdown && filteredPredicates.length > 0 && (
|
||||
<div className="absolute z-50 w-full mt-1 max-h-60 overflow-auto rounded-md border border-border bg-popover shadow-md">
|
||||
{filteredPredicates.map((p, i) => (
|
||||
<button
|
||||
key={p}
|
||||
type="button"
|
||||
className={`w-full px-3 py-2 text-left text-sm font-mono truncate hover:bg-muted ${
|
||||
i === activePredicateIndex ? "bg-muted" : ""
|
||||
}`}
|
||||
onMouseDown={() => selectPredicate(p)}
|
||||
>
|
||||
{p}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
The property or relationship to analyze
|
||||
</p>
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useCallback } from "react";
|
||||
import { useState, useCallback, useEffect, useRef } from "react";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { StemeDBClient, type SkepticResponse, ApiError } from "@/lib/api";
|
||||
import { Button } from "@/components/ui/button";
|
||||
@ -20,9 +20,15 @@ type QueryState =
|
||||
| { status: "success"; data: SkepticResponse; params: QueryParams }
|
||||
| { status: "error"; error: string; params: QueryParams };
|
||||
|
||||
export function QueryResults() {
|
||||
interface QueryResultsProps {
|
||||
initialSubject?: string;
|
||||
initialPredicate?: string;
|
||||
}
|
||||
|
||||
export function QueryResults({ initialSubject, initialPredicate }: QueryResultsProps) {
|
||||
const [state, setState] = useState<QueryState>({ status: "idle" });
|
||||
const router = useRouter();
|
||||
const hasAutoQueried = useRef(false);
|
||||
|
||||
const handleViewAudit = useCallback(
|
||||
(subject: string, predicate: string) => {
|
||||
@ -56,6 +62,18 @@ export function QueryResults() {
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Auto-execute query when initial subject+predicate are provided (e.g., from URL params)
|
||||
useEffect(() => {
|
||||
if (initialSubject && initialPredicate && !hasAutoQueried.current) {
|
||||
hasAutoQueried.current = true;
|
||||
executeQuery({
|
||||
subject: initialSubject,
|
||||
predicate: initialPredicate,
|
||||
includeSourceMetadata: true,
|
||||
});
|
||||
}
|
||||
}, [initialSubject, initialPredicate, executeQuery]);
|
||||
|
||||
const handleRetry = useCallback(() => {
|
||||
if (state.status === "error") {
|
||||
executeQuery(state.params);
|
||||
@ -71,7 +89,12 @@ export function QueryResults() {
|
||||
<h2 className="text-lg font-medium text-card-foreground mb-4">
|
||||
Conflict Analysis Query
|
||||
</h2>
|
||||
<QueryForm onSubmit={executeQuery} isLoading={isLoading} />
|
||||
<QueryForm
|
||||
onSubmit={executeQuery}
|
||||
isLoading={isLoading}
|
||||
initialSubject={initialSubject}
|
||||
initialPredicate={initialPredicate}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Results Section */}
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
"use client";
|
||||
|
||||
import { useCallback } from "react";
|
||||
import { FileJson, FileText } from "lucide-react";
|
||||
import type { SourceImpactResponse } from "@/lib/api/types";
|
||||
import { useCallback, useEffect, useState } from "react";
|
||||
import { ChevronDown, ChevronUp, FileJson, FileText } from "lucide-react";
|
||||
import type { SourceImpactResponse, SourceRecordDto } from "@/lib/api/types";
|
||||
import { StemeDBClient } from "@/lib/api";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
@ -20,11 +20,90 @@ interface ImpactDetailPanelProps {
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
function CopyableHash({ hash }: { hash: string }) {
|
||||
const [copied, setCopied] = useState(false);
|
||||
const handleCopy = () => {
|
||||
navigator.clipboard.writeText(hash);
|
||||
setCopied(true);
|
||||
setTimeout(() => setCopied(false), 1500);
|
||||
};
|
||||
return (
|
||||
<button
|
||||
onClick={handleCopy}
|
||||
className="font-mono text-xs cursor-pointer hover:text-foreground transition-colors"
|
||||
title="Click to copy full hash"
|
||||
>
|
||||
{hash.slice(0, 12)}...{hash.slice(-4)}
|
||||
<span className="ml-1 text-primary text-[10px]">
|
||||
{copied ? "Copied!" : ""}
|
||||
</span>
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
function CopyableAgent({ agent }: { agent: string }) {
|
||||
const [copied, setCopied] = useState(false);
|
||||
const handleCopy = () => {
|
||||
navigator.clipboard.writeText(agent);
|
||||
setCopied(true);
|
||||
setTimeout(() => setCopied(false), 1500);
|
||||
};
|
||||
return (
|
||||
<button
|
||||
key={agent}
|
||||
onClick={handleCopy}
|
||||
className="px-2 py-1 rounded bg-muted text-xs font-mono cursor-pointer hover:text-foreground transition-colors"
|
||||
title="Click to copy agent ID"
|
||||
>
|
||||
{agent}
|
||||
<span className="ml-1 text-primary text-[10px]">
|
||||
{copied ? "Copied!" : ""}
|
||||
</span>
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
function StatusBadge({ status }: { status: string }) {
|
||||
const colorMap: Record<string, string> = {
|
||||
active: "bg-green-500/15 text-green-700 dark:text-green-400",
|
||||
inactive: "bg-muted text-muted-foreground",
|
||||
quarantined: "bg-red-500/15 text-red-700 dark:text-red-400",
|
||||
pending: "bg-yellow-500/15 text-yellow-700 dark:text-yellow-400",
|
||||
};
|
||||
const classes =
|
||||
colorMap[status.toLowerCase()] ?? "bg-muted text-muted-foreground";
|
||||
return (
|
||||
<span
|
||||
className={`inline-block px-2 py-0.5 rounded text-[11px] font-medium ${classes}`}
|
||||
>
|
||||
{status}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
export function ImpactDetailPanel({
|
||||
isOpen,
|
||||
impact,
|
||||
onClose,
|
||||
}: ImpactDetailPanelProps) {
|
||||
const [sourceRecord, setSourceRecord] = useState<SourceRecordDto | null>(
|
||||
null
|
||||
);
|
||||
const [contentExpanded, setContentExpanded] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
if (isOpen && impact?.source_hash) {
|
||||
const client = new StemeDBClient();
|
||||
client
|
||||
.getSource(impact.source_hash)
|
||||
.then(setSourceRecord)
|
||||
.catch(() => setSourceRecord(null));
|
||||
} else {
|
||||
setSourceRecord(null);
|
||||
setContentExpanded(false);
|
||||
}
|
||||
}, [isOpen, impact?.source_hash]);
|
||||
|
||||
const handleExport = useCallback(
|
||||
(format: "csv" | "json") => {
|
||||
if (!impact) return;
|
||||
@ -54,6 +133,35 @@ export function ImpactDetailPanel({
|
||||
|
||||
{impact ? (
|
||||
<div className="mt-6 space-y-6">
|
||||
{/* Source Info */}
|
||||
<div className="rounded-lg border border-border p-4 space-y-2">
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-xs text-muted-foreground font-medium uppercase tracking-wide">
|
||||
Source
|
||||
</span>
|
||||
<StatusBadge status={impact.status} />
|
||||
</div>
|
||||
<div className="text-muted-foreground">
|
||||
<CopyableHash hash={impact.source_hash} />
|
||||
</div>
|
||||
<div className="flex gap-4 pt-1">
|
||||
<div className="flex items-baseline gap-1.5">
|
||||
<span className="text-xs text-muted-foreground">
|
||||
Assertions
|
||||
</span>
|
||||
<span className="text-sm font-bold">
|
||||
{impact.assertion_count}
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex items-baseline gap-1.5">
|
||||
<span className="text-xs text-muted-foreground">Agents</span>
|
||||
<span className="text-sm font-bold">
|
||||
{impact.affected_agents.length}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Export buttons - only when there's data to export */}
|
||||
{impact.assertion_count > 0 && (
|
||||
<div className="flex items-center gap-2">
|
||||
@ -84,31 +192,60 @@ export function ImpactDetailPanel({
|
||||
<p className="text-sm text-muted-foreground">{impact.summary}</p>
|
||||
</div>
|
||||
|
||||
{/* Source Content */}
|
||||
{sourceRecord?.content && (
|
||||
<div>
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<h4 className="text-sm font-medium text-foreground">
|
||||
Source Content
|
||||
<span className="ml-2 text-xs text-muted-foreground font-normal">
|
||||
({sourceRecord.content.length.toLocaleString()} chars)
|
||||
</span>
|
||||
</h4>
|
||||
<button
|
||||
onClick={() => setContentExpanded(!contentExpanded)}
|
||||
className="flex items-center gap-1 text-xs text-muted-foreground hover:text-foreground transition-colors"
|
||||
>
|
||||
{contentExpanded ? (
|
||||
<>
|
||||
Collapse <ChevronUp className="h-3 w-3" />
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
Expand <ChevronDown className="h-3 w-3" />
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
className={`rounded border border-border bg-muted/30 overflow-y-auto ${
|
||||
contentExpanded ? "max-h-[600px]" : "max-h-96"
|
||||
}`}
|
||||
>
|
||||
<pre className="p-3 text-xs text-muted-foreground whitespace-pre-wrap font-mono leading-relaxed">
|
||||
{sourceRecord.content}
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Affected Assertions */}
|
||||
{impact.affected_assertions.length > 0 && (
|
||||
<div>
|
||||
<h4 className="text-sm font-medium text-foreground mb-3">
|
||||
Affected Assertions ({impact.affected_assertions.length})
|
||||
</h4>
|
||||
<div className="max-h-48 overflow-y-auto rounded border border-border">
|
||||
<table className="w-full text-sm">
|
||||
<thead className="sticky top-0 bg-muted/50">
|
||||
<tr>
|
||||
<th className="text-left px-3 py-2 font-medium text-muted-foreground">
|
||||
Hash
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-border">
|
||||
{impact.affected_assertions.map((hash) => (
|
||||
<tr key={hash} className="hover:bg-accent/5">
|
||||
<td className="px-3 py-2 font-mono text-xs">
|
||||
{hash}
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
<div className="max-h-48 overflow-y-auto rounded border border-border divide-y divide-border">
|
||||
{impact.affected_assertions.map((hash, idx) => (
|
||||
<div
|
||||
key={hash}
|
||||
className={`flex items-center px-3 py-2 ${
|
||||
idx % 2 === 0 ? "bg-background" : "bg-muted/30"
|
||||
} hover:bg-accent/10 transition-colors`}
|
||||
>
|
||||
<CopyableHash hash={hash} />
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
@ -121,12 +258,7 @@ export function ImpactDetailPanel({
|
||||
</h4>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{impact.affected_agents.map((agent) => (
|
||||
<span
|
||||
key={agent}
|
||||
className="px-2 py-1 rounded bg-muted text-xs font-mono"
|
||||
>
|
||||
{agent}
|
||||
</span>
|
||||
<CopyableAgent key={agent} agent={agent} />
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import Link from "next/link";
|
||||
import { ExternalLink, Eye, Ban, RotateCcw } from "lucide-react";
|
||||
import type { SourceRecordDto } from "@/lib/api/types";
|
||||
import { Button } from "@/components/ui/button";
|
||||
@ -8,6 +9,7 @@ import { TierBadge } from "./tier-badge";
|
||||
|
||||
interface SourceRowProps {
|
||||
source: SourceRecordDto;
|
||||
assertionCount?: number;
|
||||
onViewImpact: (source: SourceRecordDto) => void;
|
||||
onBlock: (source: SourceRecordDto) => void;
|
||||
onRestore: (source: SourceRecordDto) => void;
|
||||
@ -15,6 +17,7 @@ interface SourceRowProps {
|
||||
|
||||
export function SourceRow({
|
||||
source,
|
||||
assertionCount,
|
||||
onViewImpact,
|
||||
onBlock,
|
||||
onRestore,
|
||||
@ -43,6 +46,17 @@ export function SourceRow({
|
||||
{updatedDate !== createdDate && (
|
||||
<span>Updated: {updatedDate}</span>
|
||||
)}
|
||||
{assertionCount !== undefined && (
|
||||
<span className="text-xs text-muted-foreground">
|
||||
{assertionCount.toLocaleString()} assertions
|
||||
</span>
|
||||
)}
|
||||
<Link
|
||||
href="/"
|
||||
className="text-xs text-blue-600 dark:text-blue-400 hover:underline"
|
||||
>
|
||||
View Feed →
|
||||
</Link>
|
||||
</div>
|
||||
|
||||
{source.url && (
|
||||
|
||||
@ -35,6 +35,9 @@ export function SourcesPanel() {
|
||||
const [impact, setImpact] = useState<SourceImpactResponse | null>(null);
|
||||
const [isLoadingImpact, setIsLoadingImpact] = useState(false);
|
||||
const [isProcessing, setIsProcessing] = useState(false);
|
||||
const [assertionCounts, setAssertionCounts] = useState<Map<string, number>>(
|
||||
new Map()
|
||||
);
|
||||
|
||||
const fetchData = useCallback(async () => {
|
||||
setState({ status: "loading" });
|
||||
@ -66,6 +69,31 @@ export function SourcesPanel() {
|
||||
fetchData();
|
||||
}, [fetchData]);
|
||||
|
||||
// Lazily fetch assertion counts for all sources after list loads
|
||||
useEffect(() => {
|
||||
if (state.status !== "success" || state.data.sources.length === 0) return;
|
||||
|
||||
const sources = state.data.sources;
|
||||
const client = new StemeDBClient();
|
||||
|
||||
const fetches = sources.map((source) =>
|
||||
client
|
||||
.getSourceImpact(source.hash)
|
||||
.then((data) => ({ hash: source.hash, count: data.assertion_count }))
|
||||
.catch(() => null)
|
||||
);
|
||||
|
||||
Promise.allSettled(fetches).then((results) => {
|
||||
const counts = new Map<string, number>();
|
||||
for (const result of results) {
|
||||
if (result.status === "fulfilled" && result.value !== null) {
|
||||
counts.set(result.value.hash, result.value.count);
|
||||
}
|
||||
}
|
||||
setAssertionCounts(counts);
|
||||
});
|
||||
}, [state]);
|
||||
|
||||
// Fetch impact when block dialog opens
|
||||
useEffect(() => {
|
||||
if (dialogState.type === "block") {
|
||||
@ -221,6 +249,7 @@ export function SourcesPanel() {
|
||||
<SourceRow
|
||||
key={source.hash}
|
||||
source={source}
|
||||
assertionCount={assertionCounts.get(source.hash)}
|
||||
onViewImpact={handleViewImpact}
|
||||
onBlock={handleBlock}
|
||||
onRestore={handleRestore}
|
||||
|
||||
@ -7,6 +7,7 @@ import {
|
||||
type CircuitBreakerResponse,
|
||||
type AuditResponse,
|
||||
type ListSourcesResponse,
|
||||
type SourceRecordDto,
|
||||
type SourceImpactResponse,
|
||||
type QuarantineSourceResponse,
|
||||
type RestoreSourceResponse,
|
||||
@ -15,6 +16,8 @@ import {
|
||||
type ScanResponse,
|
||||
type ListScansResponse,
|
||||
type FeedResponse,
|
||||
type ListSubjectsResponse,
|
||||
type ListPredicatesResponse,
|
||||
type ListClaimsRequest,
|
||||
type ListClaimsResponse,
|
||||
type CreateClaimRequest,
|
||||
@ -76,6 +79,18 @@ export class StemeDBClient {
|
||||
return this.fetch<FeedResponse>(`/v1/feed?${params}`);
|
||||
}
|
||||
|
||||
async listSubjects(q?: string, limit = 100): Promise<ListSubjectsResponse> {
|
||||
const params = new URLSearchParams({ limit: String(limit) });
|
||||
if (q) params.set("q", q);
|
||||
return this.fetch<ListSubjectsResponse>(`/v1/subjects?${params}`);
|
||||
}
|
||||
|
||||
async listPredicates(subject: string): Promise<ListPredicatesResponse> {
|
||||
return this.fetch<ListPredicatesResponse>(
|
||||
`/v1/subjects/${encodeURIComponent(subject)}/predicates`
|
||||
);
|
||||
}
|
||||
|
||||
async health(): Promise<HealthResponse> {
|
||||
return this.fetch<HealthResponse>("/health");
|
||||
}
|
||||
@ -160,6 +175,10 @@ export class StemeDBClient {
|
||||
return this.fetch<ListSourcesResponse>(`/v1/sources?${params}`);
|
||||
}
|
||||
|
||||
async getSource(hash: string): Promise<SourceRecordDto> {
|
||||
return this.fetch<SourceRecordDto>(`/v1/sources/${encodeURIComponent(hash)}`);
|
||||
}
|
||||
|
||||
async getSourceImpact(hash: string): Promise<SourceImpactResponse> {
|
||||
return this.fetch<SourceImpactResponse>(`/v1/sources/${hash}/impact`);
|
||||
}
|
||||
|
||||
@ -62,6 +62,7 @@ export interface AssertionObject {
|
||||
timestamp: number;
|
||||
version: number;
|
||||
}>;
|
||||
narrative?: string;
|
||||
}
|
||||
|
||||
export interface LayeredTier {
|
||||
@ -209,6 +210,7 @@ export interface SourceRecordDto {
|
||||
status: "active" | "deprecated" | "quarantined";
|
||||
url?: string;
|
||||
notes?: string;
|
||||
content?: string;
|
||||
created_at: number;
|
||||
updated_at: number;
|
||||
}
|
||||
@ -347,6 +349,17 @@ export interface FeedResponse {
|
||||
has_more: boolean;
|
||||
}
|
||||
|
||||
// Discovery types (subject/predicate autocomplete)
|
||||
export interface ListSubjectsResponse {
|
||||
subjects: string[];
|
||||
total_count: number;
|
||||
}
|
||||
|
||||
export interface ListPredicatesResponse {
|
||||
subject: string;
|
||||
predicates: string[];
|
||||
}
|
||||
|
||||
export class ApiError extends Error {
|
||||
public userMessage: string;
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -95,10 +95,8 @@ impl AdminClient {
|
||||
}
|
||||
|
||||
// Gateway returns different format than /admin/ranges, so convert it
|
||||
let shard_response: ShardInfoResponse = response
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse shard info response")?;
|
||||
let shard_response: ShardInfoResponse =
|
||||
response.json().await.context("Failed to parse shard info response")?;
|
||||
|
||||
Ok(shard_response.into())
|
||||
}
|
||||
@ -125,10 +123,8 @@ impl AdminClient {
|
||||
}
|
||||
|
||||
// Gateway returns {"ranges": [...]} so we need to unwrap it
|
||||
let wrapper: RangesWrapper = response
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse ranges response")?;
|
||||
let wrapper: RangesWrapper =
|
||||
response.json().await.context("Failed to parse ranges response")?;
|
||||
|
||||
Ok(wrapper.ranges)
|
||||
}
|
||||
|
||||
@ -132,6 +132,14 @@ pub struct CreateAssertionRequest {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_metadata: Option<String>,
|
||||
|
||||
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
|
||||
///
|
||||
/// Makes the assertion self-contained: pick it up, read it, understand the
|
||||
/// full claim without dereferencing anything. Max 64 KB.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[schema(example = "Based on STEP 1 trial (n=1961). Limitation: 68-week duration only.")]
|
||||
pub narrative: Option<String>,
|
||||
|
||||
/// Unix timestamp when the assertion was created.
|
||||
/// If not provided, defaults to the current time.
|
||||
/// **Important for v2 signatures:** Provide this field to preserve the
|
||||
|
||||
@ -29,6 +29,7 @@ pub mod responses;
|
||||
pub mod skeptic;
|
||||
pub mod source_registry;
|
||||
pub mod stemedb_claims;
|
||||
pub mod subjects;
|
||||
|
||||
// Re-export all public types for backward compatibility
|
||||
// This allows existing code to use `use crate::dto::*;` without changes
|
||||
@ -51,7 +52,7 @@ pub use query_params::{FeedParams, QueryParams};
|
||||
// From responses module
|
||||
pub use responses::{
|
||||
AssertionResponse, ChangeEntryDto, ErrorResponse, HealthResponse, LayeredQueryResponse,
|
||||
ProvenanceResponse, QueryResponse, SourceWarningDto, TierResolutionDto,
|
||||
ProvenanceResponse, QueryResponse, RebuildIndexesResponse, SourceWarningDto, TierResolutionDto,
|
||||
};
|
||||
|
||||
// From audit module
|
||||
@ -131,4 +132,9 @@ pub use aphoria::{
|
||||
};
|
||||
|
||||
// From stemedb_claims module
|
||||
pub use stemedb_claims::{AuthoredClaimDto, AuthoredValueDto, CreateClaimRequest, CreateClaimResponse};
|
||||
pub use stemedb_claims::{
|
||||
AuthoredClaimDto, AuthoredValueDto, CreateClaimRequest, CreateClaimResponse,
|
||||
};
|
||||
|
||||
// From subjects module
|
||||
pub use subjects::{ListPredicatesResponse, ListSubjectsParams, ListSubjectsResponse};
|
||||
|
||||
@ -88,6 +88,10 @@ pub struct AssertionResponse {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_metadata: Option<String>,
|
||||
|
||||
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub narrative: Option<String>,
|
||||
|
||||
/// Warning if this assertion cites a quarantined or deprecated source.
|
||||
///
|
||||
/// Present when the assertion's source has a non-Active status in the
|
||||
@ -217,6 +221,30 @@ pub struct TierResolutionDto {
|
||||
pub resolution_confidence: f32,
|
||||
}
|
||||
|
||||
/// Response from the admin rebuild-indexes endpoint.
|
||||
///
|
||||
/// Reports how many assertion indexes were rebuilt, how many were
|
||||
/// skipped (e.g., deserialization failures), and how long the
|
||||
/// operation took.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||
pub struct RebuildIndexesResponse {
|
||||
/// Number of assertions whose indexes were rebuilt.
|
||||
pub rebuilt_count: u64,
|
||||
|
||||
/// Number of keys that were skipped (deserialization failures).
|
||||
pub skipped_count: u64,
|
||||
|
||||
/// Wall-clock time for the operation in milliseconds.
|
||||
pub elapsed_ms: u64,
|
||||
|
||||
/// Human-readable status message.
|
||||
pub status: String,
|
||||
|
||||
/// First error encountered (for diagnostics). Absent when all succeed.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub first_error: Option<String>,
|
||||
}
|
||||
|
||||
/// Response from a LayeredConsensus query.
|
||||
///
|
||||
/// Provides per-tier resolution results plus an overall winner.
|
||||
|
||||
@ -31,6 +31,10 @@ pub struct RegisterSourceRequest {
|
||||
/// Optional curator notes about the source.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub notes: Option<String>,
|
||||
|
||||
/// Optional full-text content of the source document.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub content: Option<String>,
|
||||
}
|
||||
|
||||
/// Response from registering a source.
|
||||
@ -78,6 +82,10 @@ pub struct SourceRecordDto {
|
||||
/// Optional curator notes.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub notes: Option<String>,
|
||||
|
||||
/// Optional full-text content of the source document.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub content: Option<String>,
|
||||
}
|
||||
|
||||
impl From<SourceRecord> for SourceRecordDto {
|
||||
@ -92,6 +100,7 @@ impl From<SourceRecord> for SourceRecordDto {
|
||||
created_at: record.created_at,
|
||||
updated_at: record.updated_at,
|
||||
notes: record.notes.clone(),
|
||||
content: record.content.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
34
crates/stemedb-api/src/dto/subjects.rs
Normal file
34
crates/stemedb-api/src/dto/subjects.rs
Normal file
@ -0,0 +1,34 @@
|
||||
//! DTOs for subject and predicate discovery endpoints.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::{IntoParams, ToSchema};
|
||||
|
||||
/// Query parameters for `GET /v1/subjects`.
|
||||
#[derive(Debug, Deserialize, IntoParams)]
|
||||
pub struct ListSubjectsParams {
|
||||
/// Optional prefix filter for subject names.
|
||||
#[param(example = "sema")]
|
||||
pub q: Option<String>,
|
||||
|
||||
/// Maximum number of subjects to return (default 100, max 1000).
|
||||
#[param(example = 100)]
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
/// Response for `GET /v1/subjects`.
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
pub struct ListSubjectsResponse {
|
||||
/// List of matching subject strings.
|
||||
pub subjects: Vec<String>,
|
||||
/// Total number of subjects matching the filter (before limit).
|
||||
pub total_count: usize,
|
||||
}
|
||||
|
||||
/// Response for `GET /v1/subjects/:subject/predicates`.
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
pub struct ListPredicatesResponse {
|
||||
/// The subject these predicates belong to.
|
||||
pub subject: String,
|
||||
/// List of predicate strings for this subject.
|
||||
pub predicates: Vec<String>,
|
||||
}
|
||||
@ -1,14 +1,14 @@
|
||||
//! Admin handlers for maintenance operations.
|
||||
|
||||
use axum::{extract::State, Json};
|
||||
use tracing::instrument;
|
||||
use tracing::{info, instrument, warn};
|
||||
|
||||
use crate::{
|
||||
dto::{DecayTrustRanksRequest, DecayTrustRanksResponse},
|
||||
dto::{DecayTrustRanksRequest, DecayTrustRanksResponse, RebuildIndexesResponse},
|
||||
error::Result,
|
||||
state::AppState,
|
||||
};
|
||||
use stemedb_storage::{GenericTrustRankStore, TrustRankStore};
|
||||
use stemedb_storage::{GenericIndexStore, GenericTrustRankStore, IndexStore, KVStore, TrustRankStore, key_codec};
|
||||
|
||||
/// Default half-life for trust rank decay (30 days in seconds).
|
||||
const DEFAULT_HALF_LIFE_SECONDS: u64 = 30 * 24 * 60 * 60;
|
||||
@ -68,3 +68,215 @@ pub async fn decay_trust_ranks(
|
||||
status: "Decay operation completed".to_string(),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Rebuild secondary indexes (Redb) from assertion data (Fjall).
|
||||
///
|
||||
/// This is a repair operation for when Redb indexes are missing or stale
|
||||
/// while Fjall assertion data is intact. It scans all assertion data from
|
||||
/// Fjall and reconstructs the S:, SP:, SUBJECTS:, HASH_SUBJECT:, and SRC:
|
||||
/// indexes in Redb, then corrects the META:assertion_count.
|
||||
///
|
||||
/// This endpoint is idempotent — running it multiple times is safe because
|
||||
/// the index store uses append-with-dedup semantics.
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/v1/admin/rebuild-indexes",
|
||||
responses(
|
||||
(status = 200, description = "Index rebuild completed", body = RebuildIndexesResponse),
|
||||
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse),
|
||||
),
|
||||
tag = "admin"
|
||||
)]
|
||||
#[instrument(skip(state))]
|
||||
pub async fn rebuild_indexes(
|
||||
State(state): State<AppState>,
|
||||
) -> Result<Json<RebuildIndexesResponse>> {
|
||||
let start = std::time::Instant::now();
|
||||
metrics::counter!("stemedb_http_requests_total", "method" => "POST", "path" => "/v1/admin/rebuild-indexes").increment(1);
|
||||
|
||||
info!("Starting index rebuild: scanning Fjall for all assertions");
|
||||
|
||||
// Capture current time once for FEED index fallback (timestamp:0 assertions)
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0);
|
||||
|
||||
// Scan all assertion key-value pairs from Fjall
|
||||
let assertion_kvs = state.store.scan_fjall_assertions().await?;
|
||||
let total_keys = assertion_kvs.len();
|
||||
info!(total_keys, "Found assertion keys in Fjall");
|
||||
|
||||
// Create an IndexStore backed by the same HybridStore
|
||||
let index_store = GenericIndexStore::new(state.store.clone());
|
||||
|
||||
let mut rebuilt_count: u64 = 0;
|
||||
let mut skipped_count: u64 = 0;
|
||||
let mut first_error: Option<String> = None;
|
||||
|
||||
for (key, value) in &assertion_kvs {
|
||||
// Extract subject from key
|
||||
let subject = match key_codec::extract_subject(key) {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
let msg = format!(
|
||||
"extract_subject failed: key_len={}, first_bytes={:?}",
|
||||
key.len(),
|
||||
&key[..key.len().min(40)]
|
||||
);
|
||||
warn!("{}", msg);
|
||||
if first_error.is_none() {
|
||||
first_error = Some(msg);
|
||||
}
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Extract hash_hex from tag (tag is "H:{hash_hex}")
|
||||
let tag = key_codec::extract_tag(key);
|
||||
let hash_hex = match tag.strip_prefix(b"H:") {
|
||||
Some(hex_bytes) => match std::str::from_utf8(hex_bytes) {
|
||||
Ok(s) => s.to_string(),
|
||||
Err(e) => {
|
||||
let msg = format!("hash_hex UTF-8 error for subject={subject}: {e}");
|
||||
warn!("{}", msg);
|
||||
if first_error.is_none() {
|
||||
first_error = Some(msg);
|
||||
}
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
let msg = format!(
|
||||
"tag strip_prefix H: failed for subject={subject}: tag={:?}",
|
||||
String::from_utf8_lossy(tag)
|
||||
);
|
||||
warn!("{}", msg);
|
||||
if first_error.is_none() {
|
||||
first_error = Some(msg);
|
||||
}
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Deserialize the assertion to get predicate and source_hash.
|
||||
// Uses compat deserialization to handle legacy data (pre-narrative schema).
|
||||
let assertion: stemedb_core::types::Assertion =
|
||||
match stemedb_core::serde::deserialize_assertion_compat(value) {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
let msg = format!(
|
||||
"deserialize failed for subject={subject} hash={hash_hex}: {e} (value_len={})",
|
||||
value.len()
|
||||
);
|
||||
warn!("{}", msg);
|
||||
if first_error.is_none() {
|
||||
first_error = Some(msg);
|
||||
}
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Decode assertion hash from hex
|
||||
let hash_bytes: [u8; 32] = match hex::decode(&hash_hex) {
|
||||
Ok(bytes) if bytes.len() == 32 => {
|
||||
let mut arr = [0u8; 32];
|
||||
arr.copy_from_slice(&bytes);
|
||||
arr
|
||||
}
|
||||
Ok(bytes) => {
|
||||
let msg = format!(
|
||||
"hash decode wrong length for subject={subject} hash={hash_hex}: got {} bytes",
|
||||
bytes.len()
|
||||
);
|
||||
warn!("{}", msg);
|
||||
if first_error.is_none() {
|
||||
first_error = Some(msg);
|
||||
}
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
let msg = format!(
|
||||
"hex decode failed for subject={subject} hash={hash_hex}: {e}"
|
||||
);
|
||||
warn!("{}", msg);
|
||||
if first_error.is_none() {
|
||||
first_error = Some(msg);
|
||||
}
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Rebuild S: and SP: indexes (includes SUBJECTS: discovery index)
|
||||
if let Err(e) =
|
||||
index_store.add_to_indexes(&subject, &assertion.predicate, &hash_bytes).await
|
||||
{
|
||||
warn!(%subject, %hash_hex, error = %e, "Failed to add to indexes");
|
||||
skipped_count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rebuild HASH_SUBJECT: reverse index
|
||||
let hs_key = key_codec::hash_subject_key(&hash_hex);
|
||||
if let Err(e) = state.store.put(&hs_key, subject.as_bytes()).await {
|
||||
warn!(%subject, %hash_hex, error = %e, "Failed to write hash_subject index");
|
||||
}
|
||||
|
||||
// Rebuild SRC: source index
|
||||
if let Err(e) =
|
||||
index_store.add_to_source_index(&assertion.source_hash, &hash_bytes).await
|
||||
{
|
||||
warn!(%subject, %hash_hex, error = %e, "Failed to add to source index");
|
||||
}
|
||||
|
||||
// Rebuild FEED index: use assertion.timestamp as best-available proxy
|
||||
// for ingestion time. Fall back to current time for timestamp:0 assertions.
|
||||
let feed_ts = if assertion.timestamp > 0 { assertion.timestamp } else { now };
|
||||
let feed_idx_key = key_codec::feed_key(feed_ts, &hash_hex);
|
||||
if let Err(e) = state.store.put(&feed_idx_key, subject.as_bytes()).await {
|
||||
warn!(%subject, %hash_hex, error = %e, "Failed to write feed index");
|
||||
}
|
||||
|
||||
rebuilt_count += 1;
|
||||
}
|
||||
|
||||
// Correct the assertion count: total = rebuilt + skipped (both are real assertions).
|
||||
// The count key stores a u64 in little-endian format.
|
||||
let total_assertions = rebuilt_count + skipped_count;
|
||||
let count_key = key_codec::assertion_count_key();
|
||||
let count_bytes = total_assertions.to_le_bytes();
|
||||
state.store.put(&count_key, &count_bytes).await?;
|
||||
|
||||
let elapsed_ms = start.elapsed().as_millis() as u64;
|
||||
|
||||
info!(
|
||||
rebuilt_count,
|
||||
skipped_count,
|
||||
elapsed_ms,
|
||||
"Index rebuild complete"
|
||||
);
|
||||
|
||||
metrics::histogram!("stemedb_http_request_duration_seconds",
|
||||
"method" => "POST",
|
||||
"path" => "/v1/admin/rebuild-indexes",
|
||||
"status" => "200"
|
||||
)
|
||||
.record(start.elapsed().as_secs_f64());
|
||||
|
||||
Ok(Json(RebuildIndexesResponse {
|
||||
rebuilt_count,
|
||||
skipped_count,
|
||||
elapsed_ms,
|
||||
status: format!(
|
||||
"Rebuilt indexes for {} assertions ({} skipped) in {}ms",
|
||||
rebuilt_count, skipped_count, elapsed_ms
|
||||
),
|
||||
first_error,
|
||||
}))
|
||||
}
|
||||
|
||||
@ -143,6 +143,7 @@ pub fn observation_dto_to_assertion(
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures,
|
||||
confidence: dto.confidence,
|
||||
|
||||
@ -10,6 +10,7 @@ use crate::{
|
||||
state::AppState,
|
||||
};
|
||||
|
||||
use stemedb_core::limits::MAX_NARRATIVE_LEN;
|
||||
use stemedb_core::types::{
|
||||
Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
|
||||
};
|
||||
@ -44,6 +45,18 @@ pub async fn create_assertion(
|
||||
// Convert DTO to internal Assertion type
|
||||
let assertion = dto_to_assertion(req)?;
|
||||
|
||||
// Verify Ed25519 signatures BEFORE writing to WAL.
|
||||
// This prevents poison records that would permanently block the IngestWorker.
|
||||
stemedb_core::signing::verify_assertion_signatures(&assertion).map_err(|e| {
|
||||
metrics::counter!("stemedb_assertions_rejected_total", "reason" => "invalid_signature")
|
||||
.increment(1);
|
||||
ApiError::InvalidRequest(format!("Signature verification failed: {}", e))
|
||||
})?;
|
||||
|
||||
// Validate subject does not contain null byte separator (mirrors IngestWorker check)
|
||||
stemedb_storage::key_codec::validate_subject(&assertion.subject)
|
||||
.map_err(|e| ApiError::InvalidRequest(format!("Invalid subject: {}", e)))?;
|
||||
|
||||
// Serialize to WAL format (includes record type header)
|
||||
let payload = serialize_assertion(&assertion)
|
||||
.map_err(|e| ApiError::Serialization(format!("Failed to serialize assertion: {}", e)))?;
|
||||
@ -93,14 +106,33 @@ fn dto_to_assertion(req: CreateAssertionRequest) -> Result<Assertion> {
|
||||
return Err(ApiError::InvalidRequest("At least one signature is required".to_string()));
|
||||
}
|
||||
|
||||
// Validate narrative length
|
||||
if let Some(ref narrative) = req.narrative {
|
||||
if narrative.len() > MAX_NARRATIVE_LEN {
|
||||
return Err(ApiError::InvalidRequest(format!(
|
||||
"narrative exceeds {} bytes (got {})",
|
||||
MAX_NARRATIVE_LEN,
|
||||
narrative.len()
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Use provided timestamp or generate a new one
|
||||
// IMPORTANT: For v2 signatures, the timestamp must match what was signed
|
||||
let timestamp = req.timestamp.unwrap_or_else(|| {
|
||||
std::time::SystemTime::now()
|
||||
let timestamp = match req.timestamp {
|
||||
Some(0) => {
|
||||
return Err(ApiError::InvalidRequest(
|
||||
"timestamp must be a valid Unix epoch (> 0). \
|
||||
Omit the field to use server time."
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
Some(t) => t,
|
||||
None => std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0)
|
||||
});
|
||||
.unwrap_or(0),
|
||||
};
|
||||
|
||||
// Use provided HLC timestamp or default
|
||||
// IMPORTANT: For v2 signatures, the HLC timestamp must match what was signed
|
||||
@ -122,6 +154,7 @@ fn dto_to_assertion(req: CreateAssertionRequest) -> Result<Assertion> {
|
||||
visual_hash,
|
||||
epoch,
|
||||
source_metadata: req.source_metadata.map(|s| s.into_bytes()),
|
||||
narrative: req.narrative,
|
||||
lifecycle: req.lifecycle.map(Into::into).unwrap_or(LifecycleStage::Proposed),
|
||||
signatures,
|
||||
confidence: req.confidence,
|
||||
|
||||
@ -1,4 +1,7 @@
|
||||
//! Handler for the `/v1/feed` endpoint (newest-first assertion browsing).
|
||||
//!
|
||||
//! Uses a dedicated FEED index (`\x00FEED:{inverted_ts}:{hash_hex}`) for
|
||||
//! O(page_size) reads instead of loading all assertions into memory.
|
||||
|
||||
use axum::{extract::State, Json};
|
||||
use tracing::{instrument, warn};
|
||||
@ -10,15 +13,16 @@ use crate::{
|
||||
state::AppState,
|
||||
};
|
||||
|
||||
use stemedb_query::Query;
|
||||
use stemedb_core::serde::deserialize_assertion_compat;
|
||||
use stemedb_storage::{KVStore, key_codec};
|
||||
|
||||
use super::query::assertion_to_dto_with_warning;
|
||||
|
||||
/// Browse all assertions in newest-first order with pagination.
|
||||
///
|
||||
/// Returns assertions sorted by timestamp descending, useful for
|
||||
/// "what was just written?" dashboards and dev workflows. No lens
|
||||
/// resolution is applied — this is a raw chronological feed.
|
||||
/// Returns assertions ordered by ingestion time descending (when the system
|
||||
/// received each assertion), useful for "what was just written?" dashboards
|
||||
/// and dev workflows. No lens resolution is applied — this is a raw feed.
|
||||
///
|
||||
/// # Pagination
|
||||
///
|
||||
@ -45,36 +49,57 @@ pub async fn feed(
|
||||
metrics::counter!("stemedb_queries_total", "endpoint" => "feed").increment(1);
|
||||
let query_start = std::time::Instant::now();
|
||||
|
||||
// Fetch all assertions (no subject filter)
|
||||
let query = Query::builder().limit(usize::MAX).build();
|
||||
let query_engine = state.query_engine();
|
||||
let result = query_engine.execute(&query).await?;
|
||||
// Scan the FEED index — keys are in newest-first order (inverted timestamp).
|
||||
let feed_prefix = key_codec::feed_scan_prefix();
|
||||
let entries = state.store.scan_prefix(&feed_prefix).await?;
|
||||
|
||||
let mut assertions = result.assertions;
|
||||
|
||||
if assertions.len() > 10_000 {
|
||||
warn!(
|
||||
count = assertions.len(),
|
||||
"Feed scanning large assertion set; consider adding index-backed pagination"
|
||||
);
|
||||
}
|
||||
|
||||
// Sort by timestamp descending (newest first)
|
||||
assertions.sort_unstable_by(|a, b| b.timestamp.cmp(&a.timestamp));
|
||||
|
||||
let total_count = assertions.len();
|
||||
let total_count = entries.len();
|
||||
let limit = params.clamped_limit();
|
||||
let offset = params.offset;
|
||||
let has_more = offset + limit < total_count;
|
||||
|
||||
// Apply offset + limit pagination
|
||||
let page: Vec<_> = assertions.into_iter().skip(offset).take(limit).collect();
|
||||
// Paginate the index entries (cheap — no assertion data loaded yet)
|
||||
let page_entries: Vec<_> = entries.into_iter().skip(offset).take(limit).collect();
|
||||
|
||||
// Convert to DTOs (no source enrichment for speed)
|
||||
let assertion_responses = page
|
||||
.into_iter()
|
||||
.map(|a| assertion_to_dto_with_warning(a, None))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
// Fetch actual assertion data only for the current page
|
||||
let mut assertion_responses = Vec::with_capacity(page_entries.len());
|
||||
for (key, value) in &page_entries {
|
||||
let hash_hex = match extract_hash_hex_from_feed_key(key) {
|
||||
Some(h) => h,
|
||||
None => {
|
||||
warn!(key_len = key.len(), "Malformed FEED index key, skipping");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let subject = match std::str::from_utf8(value) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
warn!(error = %e, "Invalid UTF-8 in FEED index value, skipping");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let assertion_data_key = key_codec::assertion_key(subject, hash_hex);
|
||||
let data = match state.store.get(&assertion_data_key).await? {
|
||||
Some(d) => d,
|
||||
None => {
|
||||
warn!(%hash_hex, %subject, "FEED index references missing assertion data, skipping");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match deserialize_assertion_compat(&data) {
|
||||
Ok(a) => match assertion_to_dto_with_warning(a, None) {
|
||||
Ok(dto) => assertion_responses.push(dto),
|
||||
Err(e) => {
|
||||
warn!(%hash_hex, error = %e, "Failed to convert assertion to DTO, skipping");
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!(%hash_hex, error = %e, "Failed to deserialize assertion, skipping");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metrics::histogram!("stemedb_query_latency_seconds", "endpoint" => "feed")
|
||||
.record(query_start.elapsed().as_secs_f64());
|
||||
@ -88,3 +113,16 @@ pub async fn feed(
|
||||
changes_since: None,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Extract the hash_hex portion from a FEED index key.
|
||||
///
|
||||
/// Key format: `\x00FEED:{16 hex chars (inverted ts)}:{64 hex chars (hash)}`
|
||||
/// Prefix `\x00FEED:` = 6 bytes, inverted_ts = 16 bytes, `:` = 1 byte → 23 bytes offset.
|
||||
fn extract_hash_hex_from_feed_key(key: &[u8]) -> Option<&str> {
|
||||
// \x00FEED: = 6 bytes, inverted_ts = 16 hex chars, : = 1 byte
|
||||
const HASH_OFFSET: usize = 6 + 16 + 1; // 23
|
||||
if key.len() <= HASH_OFFSET {
|
||||
return None;
|
||||
}
|
||||
std::str::from_utf8(&key[HASH_OFFSET..]).ok()
|
||||
}
|
||||
|
||||
@ -178,6 +178,7 @@ fn assertion_to_dto(assertion: stemedb_core::types::Assertion) -> Result<Asserti
|
||||
timestamp: assertion.timestamp,
|
||||
vector: assertion.vector,
|
||||
source_metadata: assertion.source_metadata.and_then(|bytes| String::from_utf8(bytes).ok()),
|
||||
narrative: assertion.narrative,
|
||||
source_warning: None, // LayeredConsensus doesn't do source status enrichment
|
||||
})
|
||||
}
|
||||
|
||||
@ -28,8 +28,8 @@ pub mod circuit_breaker;
|
||||
pub mod concepts;
|
||||
pub mod constraints;
|
||||
pub mod epoch;
|
||||
pub mod feed;
|
||||
pub mod escalation;
|
||||
pub mod feed;
|
||||
pub mod gold_standard;
|
||||
pub mod health;
|
||||
pub mod layered;
|
||||
@ -37,15 +37,17 @@ pub mod meter;
|
||||
pub mod metrics;
|
||||
pub mod quarantine;
|
||||
pub mod query;
|
||||
pub mod rejected;
|
||||
pub mod skeptic;
|
||||
pub mod source;
|
||||
pub mod source_registry;
|
||||
pub mod stemedb_claims;
|
||||
pub mod subjects;
|
||||
pub mod supersede;
|
||||
pub mod trace;
|
||||
pub mod vote;
|
||||
|
||||
pub use admin::decay_trust_ranks;
|
||||
pub use admin::{decay_trust_ranks, rebuild_indexes};
|
||||
pub use admission::get_admission_status;
|
||||
pub use api_keys::{create_api_key, list_api_keys, revoke_api_key, rotate_api_key, update_api_key};
|
||||
pub use assert::create_assertion;
|
||||
@ -53,8 +55,8 @@ pub use audit::{get_audit, list_audits};
|
||||
pub use circuit_breaker::{get_circuit_status, list_tripped_circuits, reset_circuit};
|
||||
pub use constraints::constraints_query;
|
||||
pub use epoch::create_epoch;
|
||||
pub use feed::feed;
|
||||
pub use escalation::{list_escalations, resolve_escalation};
|
||||
pub use feed::feed;
|
||||
pub use gold_standard::{
|
||||
create_gold_standard, list_gold_standards, remove_gold_standard, verify_agent,
|
||||
};
|
||||
@ -63,6 +65,7 @@ pub use layered::layered_query;
|
||||
pub use meter::{get_quota_status, set_quota_limit};
|
||||
pub use quarantine::{approve_quarantine, get_quarantine, list_quarantine, reject_quarantine};
|
||||
pub use query::query_assertions;
|
||||
pub use rejected::list_rejected;
|
||||
pub use skeptic::skeptic_query;
|
||||
pub use source::{get_provenance, store_source};
|
||||
pub use source_registry::{
|
||||
@ -89,3 +92,4 @@ pub use stemedb_claims::{
|
||||
create_claim as create_stemedb_claim, delete_claim as delete_stemedb_claim,
|
||||
get_claim as get_stemedb_claim, list_claims as list_stemedb_claims,
|
||||
};
|
||||
pub use subjects::{list_predicates, list_subjects};
|
||||
|
||||
@ -490,6 +490,7 @@ pub(crate) fn assertion_to_dto_with_warning(
|
||||
timestamp: assertion.timestamp,
|
||||
vector: assertion.vector,
|
||||
source_metadata: assertion.source_metadata.and_then(|bytes| String::from_utf8(bytes).ok()),
|
||||
narrative: assertion.narrative,
|
||||
source_warning,
|
||||
})
|
||||
}
|
||||
|
||||
89
crates/stemedb-api/src/handlers/rejected.rs
Normal file
89
crates/stemedb-api/src/handlers/rejected.rs
Normal file
@ -0,0 +1,89 @@
|
||||
//! Admin endpoint for listing WAL records permanently rejected by the IngestWorker.
|
||||
//!
|
||||
//! These records passed API-level validation but were skipped during WAL replay
|
||||
//! due to permanent failures (invalid signatures, corrupt serialization, etc.).
|
||||
//! With the API-side signature verification fix, new rejected records should be rare.
|
||||
|
||||
use axum::{extract::State, Json};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use stemedb_storage::{key_codec, KVStore};
|
||||
use tracing::instrument;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::{dto::ErrorResponse, state::AppState};
|
||||
|
||||
/// Query parameters for listing rejected records.
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct RejectedParams {
|
||||
/// Maximum number of records to return (default: 100).
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
/// A WAL record that was permanently skipped by the IngestWorker.
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct RejectedRecordDto {
|
||||
/// WAL offset where the record was found.
|
||||
pub offset: u64,
|
||||
/// The record type (Assertion, Vote, Epoch).
|
||||
pub record_type: String,
|
||||
/// Why the record was rejected.
|
||||
pub reason: String,
|
||||
/// When the record was skipped (Unix timestamp).
|
||||
pub timestamp: u64,
|
||||
}
|
||||
|
||||
/// Response listing rejected WAL records.
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
pub struct RejectedRecordsResponse {
|
||||
/// List of rejected records.
|
||||
pub rejected: Vec<RejectedRecordDto>,
|
||||
/// Total number of rejected records found.
|
||||
pub count: usize,
|
||||
}
|
||||
|
||||
/// GET /v1/admin/rejected
|
||||
///
|
||||
/// List WAL records that were permanently rejected by the IngestWorker.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/v1/admin/rejected",
|
||||
params(
|
||||
("limit" = Option<usize>, Query, description = "Maximum records to return (default: 100)")
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "Rejected records listed", body = RejectedRecordsResponse),
|
||||
(status = 500, description = "Internal server error", body = ErrorResponse)
|
||||
),
|
||||
tag = "admin"
|
||||
)]
|
||||
#[instrument(skip(state))]
|
||||
pub async fn list_rejected(
|
||||
State(state): State<AppState>,
|
||||
axum::extract::Query(params): axum::extract::Query<RejectedParams>,
|
||||
) -> std::result::Result<Json<RejectedRecordsResponse>, (axum::http::StatusCode, Json<ErrorResponse>)>
|
||||
{
|
||||
let limit = params.limit.unwrap_or(100);
|
||||
let prefix = key_codec::rejected_records_scan_prefix();
|
||||
|
||||
let entries = state.store.scan_prefix(&prefix).await.map_err(|e| {
|
||||
tracing::error!(error = %e, "Failed to scan rejected records");
|
||||
(
|
||||
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(ErrorResponse {
|
||||
error: "Failed to retrieve rejected records".to_string(),
|
||||
code: "REJECTED_SCAN_ERROR".to_string(),
|
||||
}),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut rejected = Vec::new();
|
||||
for (_key, value) in entries.into_iter().take(limit) {
|
||||
let json_str = String::from_utf8_lossy(&value);
|
||||
if let Ok(dto) = serde_json::from_str::<RejectedRecordDto>(&json_str) {
|
||||
rejected.push(dto);
|
||||
}
|
||||
}
|
||||
|
||||
let count = rejected.len();
|
||||
Ok(Json(RejectedRecordsResponse { rejected, count }))
|
||||
}
|
||||
@ -7,6 +7,7 @@ use axum::{
|
||||
response::{IntoResponse, Response},
|
||||
Json,
|
||||
};
|
||||
use stemedb_core::limits::MAX_SOURCE_CONTENT_LEN;
|
||||
use stemedb_core::types::{SourceRecord, SourceStatus};
|
||||
use stemedb_storage::{GenericIndexStore, GenericSourceRegistry, IndexStore, SourceRegistry};
|
||||
use tracing::instrument;
|
||||
@ -56,12 +57,24 @@ pub async fn register_source(
|
||||
return Err(ApiError::InvalidRequest("Label cannot be empty".to_string()));
|
||||
}
|
||||
|
||||
// Validate content size
|
||||
if let Some(ref content) = req.content {
|
||||
if content.len() > MAX_SOURCE_CONTENT_LEN {
|
||||
return Err(ApiError::InvalidRequest(format!(
|
||||
"Content too large: {} bytes (max {})",
|
||||
content.len(),
|
||||
MAX_SOURCE_CONTENT_LEN
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Get timestamp
|
||||
let timestamp = current_timestamp();
|
||||
|
||||
// Create the record
|
||||
let mut record = SourceRecord::new(hash, req.label.clone(), req.url, req.tier, timestamp);
|
||||
record.notes = req.notes;
|
||||
record.content = req.content;
|
||||
|
||||
// Register in the store
|
||||
let registry = GenericSourceRegistry::new(state.store.clone());
|
||||
@ -206,7 +219,7 @@ pub async fn list_sources(
|
||||
|
||||
let registry = GenericSourceRegistry::new(state.store.clone());
|
||||
|
||||
let sources: Vec<SourceRecordDto> = if let Some(query) = ¶ms.query {
|
||||
let mut sources: Vec<SourceRecordDto> = if let Some(query) = ¶ms.query {
|
||||
// Search by label
|
||||
registry.search(query, limit).await?.into_iter().map(Into::into).collect()
|
||||
} else if let Some(tier) = params.tier {
|
||||
@ -227,6 +240,11 @@ pub async fn list_sources(
|
||||
all.into_iter().map(Into::into).collect()
|
||||
};
|
||||
|
||||
// Strip content from list responses to avoid returning megabytes
|
||||
for dto in &mut sources {
|
||||
dto.content = None;
|
||||
}
|
||||
|
||||
let count = sources.len();
|
||||
Ok(Json(ListSourcesResponse { sources, count }))
|
||||
}
|
||||
@ -629,7 +647,7 @@ async fn build_impact_response(
|
||||
if let Ok(Some(data)) = store_get_with_timeout(&*state.store, &assertion_key).await
|
||||
{
|
||||
if let Ok(assertion) =
|
||||
stemedb_core::serde::deserialize::<stemedb_core::types::Assertion>(&data)
|
||||
stemedb_core::serde::deserialize_assertion_compat(&data)
|
||||
{
|
||||
for sig in &assertion.signatures {
|
||||
let agent_hex = hex::encode(sig.agent_id);
|
||||
|
||||
@ -3,12 +3,16 @@
|
||||
//! These endpoints provide claim storage DIRECTLY in StemeDB (not `.aphoria/claims.toml`).
|
||||
//! Used for remote/hosted mode where claims are stored in the knowledge graph.
|
||||
|
||||
use axum::{extract::{Path, State}, http::StatusCode, Json};
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
http::StatusCode,
|
||||
Json,
|
||||
};
|
||||
use ed25519_dalek::{Signer, SigningKey, VerifyingKey};
|
||||
use tracing::info;
|
||||
use ed25519_dalek::{SigningKey, Signer, VerifyingKey};
|
||||
|
||||
use stemedb_core::types::{Assertion, LifecycleStage, ObjectValue, SignatureEntry};
|
||||
use stemedb_core::signing::compute_content_hash_v2;
|
||||
use stemedb_core::types::{Assertion, LifecycleStage, ObjectValue, SignatureEntry};
|
||||
use stemedb_ingest::worker::serialize_assertion;
|
||||
use stemedb_storage::{key_codec, KVStore};
|
||||
|
||||
@ -86,10 +90,7 @@ pub async fn create_claim(
|
||||
|
||||
state.commit_buffer.append(payload).await?;
|
||||
|
||||
Ok((
|
||||
StatusCode::CREATED,
|
||||
Json(CreateClaimResponse { id: req.claim.id.clone(), stored: true }),
|
||||
))
|
||||
Ok((StatusCode::CREATED, Json(CreateClaimResponse { id: req.claim.id.clone(), stored: true })))
|
||||
}
|
||||
|
||||
/// List all claims, optionally filtered.
|
||||
@ -129,7 +130,7 @@ pub async fn list_claims(
|
||||
let hash_hex = hex::encode(&hash_bytes);
|
||||
let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
|
||||
if let Some(data) = state.store.get(&assertion_key).await? {
|
||||
if let Ok(assertion) = stemedb_core::serde::deserialize::<Assertion>(&data) {
|
||||
if let Ok(assertion) = stemedb_core::serde::deserialize_assertion_compat(&data) {
|
||||
if let Ok(dto) = assertion_to_dto(&assertion) {
|
||||
claims.push(dto);
|
||||
}
|
||||
@ -189,10 +190,11 @@ pub async fn get_claim(
|
||||
let hash_hex = hex::encode(hash_bytes);
|
||||
let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
|
||||
|
||||
let data = state.store.get(&assertion_key).await?
|
||||
.ok_or_else(|| ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate)))?;
|
||||
let data = state.store.get(&assertion_key).await?.ok_or_else(|| {
|
||||
ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate))
|
||||
})?;
|
||||
|
||||
let assertion = stemedb_core::serde::deserialize::<Assertion>(&data)
|
||||
let assertion = stemedb_core::serde::deserialize_assertion_compat(&data)
|
||||
.map_err(|e| ApiError::Serialization(format!("Failed to deserialize assertion: {e}")))?;
|
||||
|
||||
assertion_to_dto(&assertion)
|
||||
@ -237,10 +239,11 @@ pub async fn delete_claim(
|
||||
let hash_hex = hex::encode(hash_bytes);
|
||||
let assertion_key = key_codec::assertion_key(&subject, &hash_hex);
|
||||
|
||||
let data = state.store.get(&assertion_key).await?
|
||||
.ok_or_else(|| ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate)))?;
|
||||
let data = state.store.get(&assertion_key).await?.ok_or_else(|| {
|
||||
ApiError::NotFound(format!("Claim not found: {}/{}", concept_path, predicate))
|
||||
})?;
|
||||
|
||||
let mut assertion = stemedb_core::serde::deserialize::<Assertion>(&data)
|
||||
let mut assertion = stemedb_core::serde::deserialize_assertion_compat(&data)
|
||||
.map_err(|e| ApiError::Serialization(format!("Failed to deserialize assertion: {e}")))?;
|
||||
|
||||
// Mark as deprecated (append-only: create new version)
|
||||
@ -328,12 +331,13 @@ fn dto_to_assertion(dto: &AuthoredClaimDto) -> Result<Assertion> {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: serde_json::to_vec(&metadata).ok(),
|
||||
narrative: None,
|
||||
lifecycle,
|
||||
signatures: vec![], // Signatures added by ingestion pipeline
|
||||
confidence: 1.0, // Authored claims have full confidence
|
||||
timestamp: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.unwrap_or_default()
|
||||
.as_secs(),
|
||||
hlc_timestamp: Default::default(),
|
||||
vector: None,
|
||||
@ -360,10 +364,14 @@ fn assertion_to_dto(assertion: &Assertion) -> Result<AuthoredClaimDto> {
|
||||
let concept_path = assertion
|
||||
.subject
|
||||
.strip_prefix("claim://")
|
||||
.ok_or_else(|| ApiError::Internal("Invalid subject format: missing claim:// prefix".to_string()))?
|
||||
.ok_or_else(|| {
|
||||
ApiError::Internal("Invalid subject format: missing claim:// prefix".to_string())
|
||||
})?
|
||||
.rsplit_once('/')
|
||||
.map(|(cp, _)| cp)
|
||||
.ok_or_else(|| ApiError::Internal("Invalid subject format: missing predicate separator".to_string()))?
|
||||
.ok_or_else(|| {
|
||||
ApiError::Internal("Invalid subject format: missing predicate separator".to_string())
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
// Convert object value
|
||||
@ -393,11 +401,7 @@ fn assertion_to_dto(assertion: &Assertion) -> Result<AuthoredClaimDto> {
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("equals")
|
||||
.to_string(),
|
||||
provenance: metadata
|
||||
.get("provenance")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
provenance: metadata.get("provenance").and_then(|v| v.as_str()).unwrap_or("").to_string(),
|
||||
invariant: metadata.get("invariant").and_then(|v| v.as_str()).unwrap_or("").to_string(),
|
||||
consequence: metadata.get("consequence").and_then(|v| v.as_str()).unwrap_or("").to_string(),
|
||||
authority_tier: source_class_to_tier_string(assertion.source_class),
|
||||
|
||||
97
crates/stemedb-api/src/handlers/subjects.rs
Normal file
97
crates/stemedb-api/src/handlers/subjects.rs
Normal file
@ -0,0 +1,97 @@
|
||||
//! Handlers for subject and predicate discovery endpoints.
|
||||
//!
|
||||
//! These endpoints scan existing Redb indexes to expose the subjects
|
||||
//! and predicates known to the system, enabling autocomplete/typeahead
|
||||
//! in the dashboard.
|
||||
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
Json,
|
||||
};
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::{
|
||||
dto::subjects::{ListPredicatesResponse, ListSubjectsParams, ListSubjectsResponse},
|
||||
error::Result,
|
||||
extractors::QsQuery,
|
||||
state::AppState,
|
||||
};
|
||||
|
||||
use stemedb_storage::{key_codec, KVStore};
|
||||
|
||||
/// List all known subjects, with optional prefix filtering.
|
||||
///
|
||||
/// Scans the `\x00SUBJECTS:` index in Redb. Supports prefix filtering
|
||||
/// via the `q` parameter for typeahead/autocomplete use cases.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/v1/subjects",
|
||||
params(
|
||||
("q" = Option<String>, Query, description = "Prefix filter for subject names"),
|
||||
("limit" = Option<usize>, Query, description = "Max results (default 100, max 1000)")
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "List of subjects", body = ListSubjectsResponse),
|
||||
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse)
|
||||
),
|
||||
tag = "discovery"
|
||||
)]
|
||||
#[instrument(skip(state), fields(q = ?params.q, limit = ?params.limit))]
|
||||
pub async fn list_subjects(
|
||||
State(state): State<AppState>,
|
||||
QsQuery(params): QsQuery<ListSubjectsParams>,
|
||||
) -> Result<Json<ListSubjectsResponse>> {
|
||||
metrics::counter!("stemedb_queries_total", "endpoint" => "list_subjects").increment(1);
|
||||
|
||||
let prefix = if let Some(ref q) = params.q {
|
||||
key_codec::subjects_index_key(q)
|
||||
} else {
|
||||
key_codec::subjects_scan_prefix()
|
||||
};
|
||||
|
||||
let entries = state.store.scan_prefix(&prefix).await?;
|
||||
let total_count = entries.len();
|
||||
let limit = params.limit.unwrap_or(100).min(1000);
|
||||
|
||||
let subjects: Vec<String> = entries
|
||||
.iter()
|
||||
.filter_map(|(k, _)| key_codec::extract_subject_from_subjects_key(k))
|
||||
.take(limit)
|
||||
.collect();
|
||||
|
||||
Ok(Json(ListSubjectsResponse { subjects, total_count }))
|
||||
}
|
||||
|
||||
/// List all predicates for a given subject.
|
||||
///
|
||||
/// Scans the `{subject}\x00SP:` index in Redb to find all predicates
|
||||
/// that have been asserted for this subject.
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/v1/subjects/{subject}/predicates",
|
||||
params(
|
||||
("subject" = String, Path, description = "The subject to list predicates for")
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "List of predicates for the subject", body = ListPredicatesResponse),
|
||||
(status = 500, description = "Internal server error", body = crate::dto::ErrorResponse)
|
||||
),
|
||||
tag = "discovery"
|
||||
)]
|
||||
#[instrument(skip(state), fields(%subject))]
|
||||
pub async fn list_predicates(
|
||||
State(state): State<AppState>,
|
||||
Path(subject): Path<String>,
|
||||
) -> Result<Json<ListPredicatesResponse>> {
|
||||
metrics::counter!("stemedb_queries_total", "endpoint" => "list_predicates").increment(1);
|
||||
|
||||
let prefix = key_codec::subject_predicate_scan_prefix(&subject);
|
||||
let entries = state.store.scan_prefix(&prefix).await?;
|
||||
|
||||
let predicates: Vec<String> = entries
|
||||
.iter()
|
||||
.filter_map(|(k, _)| key_codec::extract_sp_key(k).map(|(_, p)| p))
|
||||
.collect();
|
||||
|
||||
Ok(Json(ListPredicatesResponse { subject, predicates }))
|
||||
}
|
||||
@ -66,7 +66,7 @@ pub use state::AppState;
|
||||
|
||||
// Re-export the path items for OpenAPI
|
||||
use handlers::{
|
||||
admin::__path_decay_trust_ranks,
|
||||
admin::{__path_decay_trust_ranks, __path_rebuild_indexes},
|
||||
admission::__path_get_admission_status,
|
||||
api_keys::{
|
||||
__path_create_api_key, __path_list_api_keys, __path_revoke_api_key, __path_rotate_api_key,
|
||||
@ -83,8 +83,8 @@ use handlers::{
|
||||
},
|
||||
constraints::__path_constraints_query,
|
||||
epoch::__path_create_epoch,
|
||||
feed::__path_feed,
|
||||
escalation::{__path_list_escalations, __path_resolve_escalation},
|
||||
feed::__path_feed,
|
||||
gold_standard::{
|
||||
__path_create_gold_standard, __path_list_gold_standards, __path_remove_gold_standard,
|
||||
__path_verify_agent,
|
||||
@ -104,6 +104,7 @@ use handlers::{
|
||||
__path_list_sources, __path_quarantine_source, __path_register_source,
|
||||
__path_restore_source, __path_update_source_status,
|
||||
},
|
||||
subjects::{__path_list_predicates, __path_list_subjects},
|
||||
supersede::__path_supersede,
|
||||
trace::__path_trace,
|
||||
vote::__path_create_vote,
|
||||
@ -132,6 +133,7 @@ use handlers::{
|
||||
store_source,
|
||||
get_provenance,
|
||||
decay_trust_ranks,
|
||||
rebuild_indexes,
|
||||
list_escalations,
|
||||
resolve_escalation,
|
||||
create_gold_standard,
|
||||
@ -168,6 +170,9 @@ use handlers::{
|
||||
revoke_api_key,
|
||||
rotate_api_key,
|
||||
update_api_key,
|
||||
// Discovery (subject/predicate autocomplete)
|
||||
list_subjects,
|
||||
list_predicates,
|
||||
),
|
||||
components(
|
||||
schemas(
|
||||
@ -215,6 +220,7 @@ use handlers::{
|
||||
dto::ProvenanceResponse,
|
||||
dto::DecayTrustRanksRequest,
|
||||
dto::DecayTrustRanksResponse,
|
||||
dto::RebuildIndexesResponse,
|
||||
dto::EscalationEventDto,
|
||||
dto::EscalationLevelDto,
|
||||
dto::EscalationListResponse,
|
||||
@ -284,6 +290,9 @@ use handlers::{
|
||||
dto::RotateApiKeyResponse,
|
||||
dto::UpdateApiKeyRequest,
|
||||
dto::UpdateApiKeyResponse,
|
||||
// Discovery (subject/predicate autocomplete)
|
||||
dto::ListSubjectsResponse,
|
||||
dto::ListPredicatesResponse,
|
||||
)
|
||||
),
|
||||
tags(
|
||||
@ -302,6 +311,7 @@ use handlers::{
|
||||
(name = "quarantine", description = "Content defense quarantine management"),
|
||||
(name = "circuit_breaker", description = "Per-agent circuit breaker management"),
|
||||
(name = "source-registry", description = "Source metadata registry and impact analysis"),
|
||||
(name = "discovery", description = "Subject and predicate discovery for autocomplete"),
|
||||
),
|
||||
info(
|
||||
title = "Episteme (StemeDB) API",
|
||||
|
||||
@ -276,11 +276,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
info!("API server listening on {} (plaintext)", config.bind_addr);
|
||||
info!("Swagger UI available at http://{}/swagger-ui", config.bind_addr);
|
||||
|
||||
axum::serve(
|
||||
listener,
|
||||
app.into_make_service_with_connect_info::<SocketAddr>(),
|
||||
)
|
||||
.await?;
|
||||
axum::serve(listener, app.into_make_service_with_connect_info::<SocketAddr>()).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@ -410,6 +410,7 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
|
||||
.route("/v1/claims", post(handlers::create_stemedb_claim))
|
||||
// Admin write endpoints
|
||||
.route("/v1/admin/decay-trust-ranks", post(handlers::decay_trust_ranks))
|
||||
.route("/v1/admin/rebuild-indexes", post(handlers::rebuild_indexes))
|
||||
.route("/v1/admin/escalations/:id/resolve", post(handlers::resolve_escalation))
|
||||
.route("/v1/admin/gold-standards", post(handlers::create_gold_standard))
|
||||
.route(
|
||||
@ -449,7 +450,10 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
|
||||
// Claims endpoints (StemeDB-backed)
|
||||
.route("/v1/claims", get(handlers::list_stemedb_claims))
|
||||
.route("/v1/claims/:concept_path/:predicate", get(handlers::get_stemedb_claim))
|
||||
.route("/v1/claims/:concept_path/:predicate", axum::routing::delete(handlers::delete_stemedb_claim))
|
||||
.route(
|
||||
"/v1/claims/:concept_path/:predicate",
|
||||
axum::routing::delete(handlers::delete_stemedb_claim),
|
||||
)
|
||||
.route("/v1/admin/escalations", get(handlers::list_escalations))
|
||||
.route("/v1/admin/gold-standards", get(handlers::list_gold_standards))
|
||||
.route("/v1/concepts/resolve", get(handlers::resolve_alias))
|
||||
@ -459,6 +463,7 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
|
||||
.route("/v1/admission/status", get(handlers::get_admission_status))
|
||||
.route("/v1/admin/quarantine", get(handlers::list_quarantine))
|
||||
.route("/v1/admin/quarantine/:hash", get(handlers::get_quarantine))
|
||||
.route("/v1/admin/rejected", get(handlers::list_rejected))
|
||||
.route("/v1/admin/circuit-breaker/:agent_id", get(handlers::get_circuit_status))
|
||||
.route("/v1/admin/circuit-breakers/tripped", get(handlers::list_tripped_circuits))
|
||||
.route("/v1/admin/api-keys", get(handlers::list_api_keys))
|
||||
@ -466,6 +471,9 @@ fn build_api_routes(config: &SecurityConfig) -> Router<AppState> {
|
||||
.route("/v1/sources/:hash", get(handlers::get_source))
|
||||
.route("/v1/sources/:hash/impact", get(handlers::get_source_impact))
|
||||
.route("/v1/sources/:hash/impact/export", get(handlers::export_source_impact))
|
||||
// Discovery endpoints (subject/predicate autocomplete)
|
||||
.route("/v1/subjects", get(handlers::list_subjects))
|
||||
.route("/v1/subjects/:subject/predicates", get(handlers::list_predicates))
|
||||
.layer(RequestBodyLimitLayer::new(config.read_body_limit)); // P5.1: Configurable limit
|
||||
|
||||
// Add Aphoria endpoints when feature is enabled
|
||||
|
||||
@ -61,12 +61,9 @@ async fn test_health_check_over_tcp() {
|
||||
|
||||
// Serve with ConnectInfo injection (the fix for the 500 bug)
|
||||
tokio::spawn(async move {
|
||||
axum::serve(
|
||||
listener,
|
||||
app.into_make_service_with_connect_info::<SocketAddr>(),
|
||||
)
|
||||
.await
|
||||
.expect("server");
|
||||
axum::serve(listener, app.into_make_service_with_connect_info::<SocketAddr>())
|
||||
.await
|
||||
.expect("server");
|
||||
});
|
||||
|
||||
// Give the server a moment to start
|
||||
@ -74,10 +71,7 @@ async fn test_health_check_over_tcp() {
|
||||
|
||||
// Make a raw HTTP/1.1 request over TCP
|
||||
let mut stream = tokio::net::TcpStream::connect(addr).await.expect("connect");
|
||||
let request = format!(
|
||||
"GET /v1/health HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n\r\n",
|
||||
addr
|
||||
);
|
||||
let request = format!("GET /v1/health HTTP/1.1\r\nHost: {}\r\nConnection: close\r\n\r\n", addr);
|
||||
stream.write_all(request.as_bytes()).await.expect("write");
|
||||
|
||||
let mut response = String::new();
|
||||
@ -95,3 +89,111 @@ async fn test_health_check_over_tcp() {
|
||||
let json: serde_json::Value = serde_json::from_str(body).expect("json parse");
|
||||
assert_eq!(json["status"], "healthy");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Signature Verification Tests (pre-WAL validation)
|
||||
// ============================================================================
|
||||
|
||||
/// Test: POST /v1/assert with invalid signatures returns 400 (not 201).
|
||||
///
|
||||
/// Regression test for the "assert returns 201 but data is silently dropped" bug.
|
||||
/// Previously, the API accepted structurally valid but cryptographically invalid
|
||||
/// signatures, wrote them to the WAL, and returned 201. The IngestWorker would
|
||||
/// then silently reject them, permanently blocking the ingestion pipeline.
|
||||
#[tokio::test]
|
||||
async fn test_assert_invalid_signature_returns_400() {
|
||||
use serde_json::json;
|
||||
|
||||
let env = common::create_test_env().await;
|
||||
let app = create_router(env.state);
|
||||
|
||||
// Construct assertion with structurally valid but cryptographically invalid signature.
|
||||
// agent_id is a SHA-256 hash (not a valid Ed25519 public key).
|
||||
// signature is random 64 bytes.
|
||||
let body = json!({
|
||||
"subject": "test/bug_regression",
|
||||
"predicate": "has_value",
|
||||
"object": {"type": "Text", "value": "hello"},
|
||||
"confidence": 0.9,
|
||||
"source_hash": "0".repeat(64),
|
||||
"signatures": [{
|
||||
"agent_id": "a".repeat(64),
|
||||
"signature": "b".repeat(128),
|
||||
"timestamp": 1700000000
|
||||
}],
|
||||
"timestamp": 1700000000
|
||||
});
|
||||
|
||||
let request = Request::builder()
|
||||
.uri("/v1/assert")
|
||||
.method("POST")
|
||||
.header("Content-Type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&body).expect("json")))
|
||||
.expect("Request");
|
||||
|
||||
let response = app.oneshot(request).await.expect("Request");
|
||||
assert_eq!(
|
||||
response.status(),
|
||||
StatusCode::BAD_REQUEST,
|
||||
"Invalid signature should return 400, not 201"
|
||||
);
|
||||
|
||||
let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.expect("Body");
|
||||
let json: serde_json::Value = serde_json::from_slice(&body).expect("JSON");
|
||||
|
||||
// Verify error message mentions signature
|
||||
let error_msg = json["error"].as_str().unwrap_or("");
|
||||
assert!(
|
||||
error_msg.contains("Signature") || error_msg.contains("signature"),
|
||||
"Error should mention signature failure, got: {}",
|
||||
error_msg
|
||||
);
|
||||
}
|
||||
|
||||
/// Test: POST /v1/assert with valid Ed25519 signature returns 201.
|
||||
#[tokio::test]
|
||||
async fn test_assert_valid_signature_returns_201() {
|
||||
let env = common::create_test_env().await;
|
||||
let app = create_router(env.state);
|
||||
|
||||
let body = common::create_signed_assertion_json("test/valid", "has_value", 42.0);
|
||||
|
||||
let request = Request::builder()
|
||||
.uri("/v1/assert")
|
||||
.method("POST")
|
||||
.header("Content-Type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&body).expect("json")))
|
||||
.expect("Request");
|
||||
|
||||
let response = app.oneshot(request).await.expect("Request");
|
||||
assert_eq!(response.status(), StatusCode::CREATED, "Valid signature should return 201");
|
||||
|
||||
let body = axum::body::to_bytes(response.into_body(), usize::MAX).await.expect("Body");
|
||||
let json: serde_json::Value = serde_json::from_slice(&body).expect("JSON");
|
||||
assert_eq!(json["status"], "created");
|
||||
}
|
||||
|
||||
/// Test: POST /v1/assert with null byte in subject returns 400.
|
||||
#[tokio::test]
|
||||
async fn test_assert_null_byte_subject_returns_400() {
|
||||
let env = common::create_test_env().await;
|
||||
let app = create_router(env.state);
|
||||
|
||||
// Use a properly signed assertion but with null byte in subject
|
||||
let body = common::create_signed_assertion_json("test\x00injected", "has_value", 1.0);
|
||||
|
||||
let request = Request::builder()
|
||||
.uri("/v1/assert")
|
||||
.method("POST")
|
||||
.header("Content-Type", "application/json")
|
||||
.body(Body::from(serde_json::to_vec(&body).expect("json")))
|
||||
.expect("Request");
|
||||
|
||||
let response = app.oneshot(request).await.expect("Request");
|
||||
// Should fail with 400 due to null byte in subject
|
||||
assert_eq!(
|
||||
response.status(),
|
||||
StatusCode::BAD_REQUEST,
|
||||
"Null byte in subject should return 400"
|
||||
);
|
||||
}
|
||||
|
||||
@ -48,6 +48,7 @@ mod tests {
|
||||
visual_hash: Some([1u8; 8]),
|
||||
epoch: Some([2u8; 32]),
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![SignatureEntry {
|
||||
agent_id: [2u8; 32],
|
||||
@ -103,6 +104,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: stage,
|
||||
signatures: vec![],
|
||||
confidence: 1.0,
|
||||
|
||||
@ -55,6 +55,25 @@ pub const MAX_OBJECT_LEN: usize = 4096;
|
||||
/// in the source metadata instead of the raw bytes.
|
||||
pub const MAX_SOURCE_SIZE: usize = 10 * 1024 * 1024;
|
||||
|
||||
/// Maximum allowed narrative length in bytes (64 KB).
|
||||
///
|
||||
/// Narratives are free-text explanations of methodology, limitations, bias,
|
||||
/// and caveats that make an assertion self-contained. This limit prevents
|
||||
/// unbounded memory growth while allowing rich context.
|
||||
///
|
||||
/// # Example
|
||||
/// - Valid: A 2 KB explanation of trial methodology
|
||||
/// - Valid: A 10 KB narrative covering bias, limitations, and caveats
|
||||
/// - Invalid: A 100 KB embedded document in the narrative field
|
||||
pub const MAX_NARRATIVE_LEN: usize = 65_536;
|
||||
|
||||
/// Maximum allowed source content length in bytes (1 MB).
|
||||
///
|
||||
/// Source content is the extracted full text from PDFs or other documents.
|
||||
/// This limit prevents unbounded memory growth while allowing typical
|
||||
/// research papers and regulatory documents.
|
||||
pub const MAX_SOURCE_CONTENT_LEN: usize = 1_048_576;
|
||||
|
||||
/// Default limit for paginated query results.
|
||||
///
|
||||
/// Applied when no explicit limit is provided in the query parameters.
|
||||
|
||||
@ -44,6 +44,11 @@ use rkyv::validation::validators::DefaultValidator;
|
||||
use rkyv::{Archive, CheckBytes, Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::types::{
|
||||
Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
|
||||
SourceRecord, SourceStatus,
|
||||
};
|
||||
|
||||
/// Default scratch buffer size for serialization.
|
||||
///
|
||||
/// 4KB is sufficient for most assertions. Larger payloads will trigger
|
||||
@ -88,6 +93,7 @@ pub enum SerdeError {
|
||||
/// visual_hash: None,
|
||||
/// epoch: None,
|
||||
/// source_metadata: None,
|
||||
/// narrative: None,
|
||||
/// lifecycle: LifecycleStage::Proposed,
|
||||
/// signatures: vec![],
|
||||
/// confidence: 1.0,
|
||||
@ -156,6 +162,131 @@ where
|
||||
.map_err(|e| SerdeError::Deserialization(e.to_string()))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Legacy Assertion (pre-narrative schema)
|
||||
// ============================================================================
|
||||
|
||||
/// Assertion struct matching the pre-narrative rkyv layout.
|
||||
///
|
||||
/// The `narrative: Option<String>` field was added between `source_metadata`
|
||||
/// and `lifecycle`. rkyv doesn't support schema evolution, so data serialized
|
||||
/// before that change needs this struct to deserialize correctly.
|
||||
#[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
|
||||
#[archive(check_bytes)]
|
||||
struct LegacyAssertion {
|
||||
pub subject: String,
|
||||
pub predicate: String,
|
||||
pub object: ObjectValue,
|
||||
pub parent_hash: Option<[u8; 32]>,
|
||||
pub source_hash: [u8; 32],
|
||||
pub source_class: SourceClass,
|
||||
pub visual_hash: Option<[u8; 8]>,
|
||||
pub epoch: Option<[u8; 32]>,
|
||||
pub source_metadata: Option<Vec<u8>>,
|
||||
// narrative: Option<String> did NOT exist in this version
|
||||
pub lifecycle: LifecycleStage,
|
||||
pub signatures: Vec<SignatureEntry>,
|
||||
pub confidence: f32,
|
||||
pub timestamp: u64,
|
||||
pub hlc_timestamp: HlcTimestamp,
|
||||
pub vector: Option<Vec<f32>>,
|
||||
}
|
||||
|
||||
impl From<LegacyAssertion> for Assertion {
|
||||
fn from(legacy: LegacyAssertion) -> Self {
|
||||
Self {
|
||||
subject: legacy.subject,
|
||||
predicate: legacy.predicate,
|
||||
object: legacy.object,
|
||||
parent_hash: legacy.parent_hash,
|
||||
source_hash: legacy.source_hash,
|
||||
source_class: legacy.source_class,
|
||||
visual_hash: legacy.visual_hash,
|
||||
epoch: legacy.epoch,
|
||||
source_metadata: legacy.source_metadata,
|
||||
narrative: None,
|
||||
lifecycle: legacy.lifecycle,
|
||||
signatures: legacy.signatures,
|
||||
confidence: legacy.confidence,
|
||||
timestamp: legacy.timestamp,
|
||||
hlc_timestamp: legacy.hlc_timestamp,
|
||||
vector: legacy.vector,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Deserialize an assertion with backward compatibility.
|
||||
///
|
||||
/// Tries the current `Assertion` layout first. If that fails, tries the
|
||||
/// legacy layout (before `narrative` field was added) and converts.
|
||||
///
|
||||
/// This allows the system to read assertions written before schema changes
|
||||
/// without requiring a data migration.
|
||||
pub fn deserialize_assertion_compat(data: &[u8]) -> Result<Assertion, SerdeError> {
|
||||
// Try current format first (fast path for new data)
|
||||
if let Ok(assertion) = deserialize::<Assertion>(data) {
|
||||
return Ok(assertion);
|
||||
}
|
||||
|
||||
// Fallback: try legacy format (no narrative field)
|
||||
let legacy: LegacyAssertion = deserialize(data)?;
|
||||
Ok(legacy.into())
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Legacy SourceRecord (pre-content schema)
|
||||
// ============================================================================
|
||||
|
||||
/// SourceRecord struct matching the pre-content rkyv layout.
|
||||
///
|
||||
/// The `content: Option<String>` field was added after `notes`.
|
||||
/// rkyv doesn't support schema evolution, so data serialized
|
||||
/// before that change needs this struct to deserialize correctly.
|
||||
#[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
|
||||
#[archive(check_bytes)]
|
||||
struct LegacySourceRecord {
|
||||
pub hash: [u8; 32],
|
||||
pub label: String,
|
||||
pub url: Option<String>,
|
||||
pub tier: u8,
|
||||
pub status: SourceStatus,
|
||||
pub created_at: u64,
|
||||
pub updated_at: u64,
|
||||
pub notes: Option<String>,
|
||||
// content: Option<String> did NOT exist in this version
|
||||
}
|
||||
|
||||
impl From<LegacySourceRecord> for SourceRecord {
|
||||
fn from(legacy: LegacySourceRecord) -> Self {
|
||||
Self {
|
||||
hash: legacy.hash,
|
||||
label: legacy.label,
|
||||
url: legacy.url,
|
||||
tier: legacy.tier,
|
||||
status: legacy.status,
|
||||
created_at: legacy.created_at,
|
||||
updated_at: legacy.updated_at,
|
||||
notes: legacy.notes,
|
||||
content: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Deserialize a source record with backward compatibility.
|
||||
///
|
||||
/// Tries the current `SourceRecord` layout first. If that fails, tries the
|
||||
/// legacy layout (before `content` field was added) and converts.
|
||||
pub fn deserialize_source_record_compat(data: &[u8]) -> Result<SourceRecord, SerdeError> {
|
||||
// Try current format first (fast path for new data)
|
||||
if let Ok(record) = deserialize::<SourceRecord>(data) {
|
||||
return Ok(record);
|
||||
}
|
||||
|
||||
// Fallback: try legacy format (no content field)
|
||||
let legacy: LegacySourceRecord = deserialize(data)?;
|
||||
Ok(legacy.into())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -176,6 +307,7 @@ mod tests {
|
||||
visual_hash: Some([1u8; 8]),
|
||||
epoch: Some([2u8; 32]),
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![SignatureEntry {
|
||||
agent_id: [2u8; 32],
|
||||
@ -303,6 +435,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![],
|
||||
confidence: 0.0,
|
||||
@ -330,6 +463,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: Some(metadata.as_bytes().to_vec()),
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![],
|
||||
confidence: 0.85,
|
||||
@ -357,6 +491,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![],
|
||||
confidence: 1.0,
|
||||
@ -371,4 +506,127 @@ mod tests {
|
||||
assert_eq!(assertion, recovered);
|
||||
assert!(recovered.source_metadata.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_legacy_assertion_compat_deserialize() {
|
||||
// Simulate data serialized with the pre-narrative struct layout.
|
||||
let legacy = LegacyAssertion {
|
||||
subject: "Semaglutide".to_string(),
|
||||
predicate: "reduces_weight".to_string(),
|
||||
object: ObjectValue::Text("significant".to_string()),
|
||||
parent_hash: None,
|
||||
source_hash: [1u8; 32],
|
||||
source_class: SourceClass::Clinical,
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: Some(b"{}".to_vec()),
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![SignatureEntry {
|
||||
agent_id: [2u8; 32],
|
||||
signature: [3u8; 64],
|
||||
timestamp: 1000,
|
||||
version: 1,
|
||||
}],
|
||||
confidence: 0.95,
|
||||
timestamp: 1700000000,
|
||||
hlc_timestamp: HlcTimestamp::default(),
|
||||
vector: Some(vec![0.1, 0.2]),
|
||||
};
|
||||
|
||||
let bytes = serialize(&legacy).expect("serialize legacy");
|
||||
|
||||
// Current format should fail (different layout)
|
||||
assert!(deserialize::<Assertion>(&bytes).is_err());
|
||||
|
||||
// Compat function should succeed
|
||||
let recovered = deserialize_assertion_compat(&bytes)
|
||||
.expect("compat deserialize should succeed");
|
||||
|
||||
assert_eq!(recovered.subject, "Semaglutide");
|
||||
assert_eq!(recovered.predicate, "reduces_weight");
|
||||
assert_eq!(recovered.confidence, 0.95);
|
||||
assert_eq!(recovered.signatures.len(), 1);
|
||||
assert!(recovered.narrative.is_none()); // Wasn't in legacy
|
||||
assert!(recovered.source_metadata.is_some());
|
||||
assert_eq!(recovered.timestamp, 1700000000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_current_assertion_also_works_via_compat() {
|
||||
// Current-format assertions should work via the compat path too.
|
||||
let assertion = Assertion {
|
||||
subject: "test".to_string(),
|
||||
predicate: "works".to_string(),
|
||||
object: ObjectValue::Boolean(true),
|
||||
parent_hash: None,
|
||||
source_hash: [0u8; 32],
|
||||
source_class: SourceClass::Expert,
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: Some("This is a narrative.".to_string()),
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![],
|
||||
confidence: 1.0,
|
||||
timestamp: 0,
|
||||
hlc_timestamp: HlcTimestamp::default(),
|
||||
vector: None,
|
||||
};
|
||||
|
||||
let bytes = serialize(&assertion).expect("serialize");
|
||||
let recovered = deserialize_assertion_compat(&bytes)
|
||||
.expect("compat deserialize should succeed for current format");
|
||||
|
||||
assert_eq!(recovered, assertion);
|
||||
assert_eq!(recovered.narrative, Some("This is a narrative.".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_legacy_source_record_compat_deserialize() {
|
||||
// Simulate data serialized with the pre-content struct layout.
|
||||
let legacy = LegacySourceRecord {
|
||||
hash: [42u8; 32],
|
||||
label: "RFC 7519".to_string(),
|
||||
url: Some("https://tools.ietf.org/html/rfc7519".to_string()),
|
||||
tier: 0,
|
||||
status: SourceStatus::Active,
|
||||
created_at: 1000,
|
||||
updated_at: 2000,
|
||||
notes: Some("JWT spec".to_string()),
|
||||
};
|
||||
|
||||
let bytes = serialize(&legacy).expect("serialize legacy");
|
||||
|
||||
// Current format should fail (different layout)
|
||||
assert!(deserialize::<SourceRecord>(&bytes).is_err());
|
||||
|
||||
// Compat function should succeed
|
||||
let recovered = deserialize_source_record_compat(&bytes)
|
||||
.expect("compat deserialize should succeed");
|
||||
|
||||
assert_eq!(recovered.hash, [42u8; 32]);
|
||||
assert_eq!(recovered.label, "RFC 7519");
|
||||
assert_eq!(recovered.tier, 0);
|
||||
assert_eq!(recovered.notes, Some("JWT spec".to_string()));
|
||||
assert!(recovered.content.is_none()); // Wasn't in legacy
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_current_source_record_also_works_via_compat() {
|
||||
let record = SourceRecord::new(
|
||||
[1u8; 32],
|
||||
"Test".to_string(),
|
||||
None,
|
||||
2,
|
||||
1000,
|
||||
)
|
||||
.with_content(Some("Full text content".to_string()));
|
||||
|
||||
let bytes = serialize(&record).expect("serialize");
|
||||
let recovered = deserialize_source_record_compat(&bytes)
|
||||
.expect("compat deserialize should succeed for current format");
|
||||
|
||||
assert_eq!(recovered, record);
|
||||
assert_eq!(recovered.content, Some("Full text content".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,6 +21,140 @@
|
||||
//! ```
|
||||
|
||||
use crate::types::{Assertion, ObjectValue};
|
||||
use ed25519_dalek::{Signature, Verifier, VerifyingKey};
|
||||
|
||||
/// Errors from signature verification.
|
||||
///
|
||||
/// Error messages are written for API consumers who may not understand Ed25519
|
||||
/// cryptography, explaining what fields must contain and common mistakes.
|
||||
#[derive(Debug)]
|
||||
pub enum SignatureError {
|
||||
/// No signatures present on the assertion.
|
||||
Empty,
|
||||
|
||||
/// Unknown signature version.
|
||||
UnknownVersion {
|
||||
/// The unrecognized version number.
|
||||
version: u8,
|
||||
/// Which signature in the list.
|
||||
index: usize,
|
||||
},
|
||||
|
||||
/// The agent_id bytes are not a valid Ed25519 public key.
|
||||
InvalidPublicKey {
|
||||
/// Which signature in the list.
|
||||
index: usize,
|
||||
/// The underlying error detail.
|
||||
detail: String,
|
||||
},
|
||||
|
||||
/// The signature does not verify against the message.
|
||||
VerificationFailed {
|
||||
/// Which signature in the list.
|
||||
index: usize,
|
||||
/// The signature version.
|
||||
version: u8,
|
||||
/// The underlying error detail.
|
||||
detail: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SignatureError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Empty => write!(
|
||||
f,
|
||||
"Assertion must have at least one signature. \
|
||||
Each signature requires: agent_id (32-byte Ed25519 public key, hex-encoded as 64 chars), \
|
||||
signature (64-byte Ed25519 signature, hex-encoded as 128 chars). \
|
||||
SHA-256/SHA-512 hashes cannot be used as agent_id or signature"
|
||||
),
|
||||
Self::UnknownVersion { version, index } => write!(
|
||||
f,
|
||||
"Signature {index}: unknown version {version}. \
|
||||
Supported versions: 1 (signs '{{subject}}:{{predicate}}'), 2 (signs BLAKE3 content hash)"
|
||||
),
|
||||
Self::InvalidPublicKey { index, detail } => write!(
|
||||
f,
|
||||
"Signature {index}: agent_id is not a valid Ed25519 public key ({detail}). \
|
||||
agent_id must be a 32-byte Ed25519 public key (hex-encoded as 64 chars). \
|
||||
Common mistake: using SHA-256 or other hashes as agent_id. \
|
||||
Generate a keypair with Ed25519 (e.g., ed25519-dalek, crypto/ed25519, or openssl)"
|
||||
),
|
||||
Self::VerificationFailed { index, version, detail } => {
|
||||
let message_desc = match version {
|
||||
1 => "'{subject}:{predicate}' (UTF-8 bytes)",
|
||||
2 => "the BLAKE3 content hash of the assertion",
|
||||
_ => "unknown",
|
||||
};
|
||||
write!(
|
||||
f,
|
||||
"Signature {index}: Ed25519 verification failed ({detail}). \
|
||||
For v{version} signatures, signature must be Ed25519_sign(private_key, {message_desc}). \
|
||||
Common mistakes: (1) using SHA hashes instead of Ed25519 signatures, \
|
||||
(2) signing the wrong message, (3) agent_id doesn't match the signing key"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for SignatureError {}
|
||||
|
||||
/// Verify all Ed25519 signatures on an assertion.
|
||||
///
|
||||
/// Supports two signature versions:
|
||||
/// - **Version 1 (legacy):** signs `"{subject}:{predicate}"` — only protects those fields
|
||||
/// - **Version 2 (enterprise):** signs the BLAKE3 content hash — protects ALL fields
|
||||
///
|
||||
/// All signatures must be valid for the assertion to be accepted.
|
||||
///
|
||||
/// This function is used at both the API boundary (fail fast with 400) and in the
|
||||
/// IngestWorker (defense in depth). Keeping it in `stemedb-core` avoids duplication.
|
||||
pub fn verify_assertion_signatures(
|
||||
assertion: &Assertion,
|
||||
) -> std::result::Result<(), SignatureError> {
|
||||
if assertion.signatures.is_empty() {
|
||||
return Err(SignatureError::Empty);
|
||||
}
|
||||
|
||||
// Pre-compute v1 message (subject:predicate) — only used if v1 signatures exist
|
||||
let v1_message = format!("{}:{}", assertion.subject, assertion.predicate);
|
||||
|
||||
// Pre-compute v2 content hash — only if any v2 signature exists
|
||||
let v2_content_hash: Option<[u8; 32]> = if assertion.signatures.iter().any(|s| s.version == 2) {
|
||||
Some(compute_content_hash_v2(assertion))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
for (idx, sig_entry) in assertion.signatures.iter().enumerate() {
|
||||
let message_bytes: &[u8] = match sig_entry.version {
|
||||
1 => v1_message.as_bytes(),
|
||||
2 => v2_content_hash
|
||||
.as_ref()
|
||||
.ok_or(SignatureError::UnknownVersion { version: 2, index: idx })?,
|
||||
v => {
|
||||
return Err(SignatureError::UnknownVersion { version: v, index: idx });
|
||||
}
|
||||
};
|
||||
|
||||
let verifying_key = VerifyingKey::from_bytes(&sig_entry.agent_id)
|
||||
.map_err(|e| SignatureError::InvalidPublicKey { index: idx, detail: e.to_string() })?;
|
||||
|
||||
let signature = Signature::from_bytes(&sig_entry.signature);
|
||||
|
||||
verifying_key.verify(message_bytes, &signature).map_err(|e| {
|
||||
SignatureError::VerificationFailed {
|
||||
index: idx,
|
||||
version: sig_entry.version,
|
||||
detail: e.to_string(),
|
||||
}
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compute the canonical content hash for v2 (enterprise) signing.
|
||||
///
|
||||
@ -37,6 +171,10 @@ use crate::types::{Assertion, ObjectValue};
|
||||
/// - `source_metadata`: Variable-length, domain-specific
|
||||
/// - `lifecycle`: Can change over time
|
||||
///
|
||||
/// **Narrative IS included** because it is content-bearing (methodology, limitations).
|
||||
/// Changing the narrative changes the assertion's meaning. When `None`, no bytes
|
||||
/// are added, preserving backward compatibility with pre-narrative hashes.
|
||||
///
|
||||
/// # Format
|
||||
///
|
||||
/// The hash is computed over:
|
||||
@ -93,6 +231,12 @@ pub fn compute_content_hash_v2(assertion: &Assertion) -> [u8; 32] {
|
||||
hasher.update(b":");
|
||||
hasher.update(&assertion.timestamp.to_le_bytes());
|
||||
|
||||
// Narrative (only when present, so None preserves backward-compat hash)
|
||||
if let Some(ref narrative) = assertion.narrative {
|
||||
hasher.update(b":narrative:");
|
||||
hasher.update(narrative.as_bytes());
|
||||
}
|
||||
|
||||
*hasher.finalize().as_bytes()
|
||||
}
|
||||
|
||||
@ -123,6 +267,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![],
|
||||
confidence: 0.95,
|
||||
@ -246,4 +391,46 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_content_hash_changes_with_narrative() {
|
||||
let mut assertion = test_assertion();
|
||||
let hash_none = compute_content_hash_v2(&assertion);
|
||||
|
||||
assertion.narrative =
|
||||
Some("This drug carries a boxed warning for thyroid C-cell tumors.".to_string());
|
||||
let hash_some = compute_content_hash_v2(&assertion);
|
||||
|
||||
assert_ne!(hash_none, hash_some, "Narrative should change the content hash");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_content_hash_backward_compat_narrative_none() {
|
||||
// Capture a hash with narrative: None
|
||||
let assertion = test_assertion();
|
||||
let hash1 = compute_content_hash_v2(&assertion);
|
||||
|
||||
// Build the same assertion again independently
|
||||
let assertion2 = Assertion {
|
||||
subject: "Semaglutide".to_string(),
|
||||
predicate: "has_boxed_warning".to_string(),
|
||||
object: ObjectValue::Boolean(true),
|
||||
parent_hash: None,
|
||||
source_hash: [1u8; 32],
|
||||
source_class: SourceClass::Regulatory,
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![],
|
||||
confidence: 0.95,
|
||||
timestamp: 1704067200,
|
||||
hlc_timestamp: HlcTimestamp::default(),
|
||||
vector: None,
|
||||
};
|
||||
let hash2 = compute_content_hash_v2(&assertion2);
|
||||
|
||||
assert_eq!(hash1, hash2, "narrative: None must produce identical hash for backward compat");
|
||||
}
|
||||
}
|
||||
|
||||
@ -49,6 +49,7 @@ pub struct AssertionBuilder {
|
||||
visual_hash: Option<[u8; 8]>,
|
||||
epoch: Option<[u8; 32]>,
|
||||
source_metadata: Option<Vec<u8>>,
|
||||
narrative: Option<String>,
|
||||
lifecycle: LifecycleStage,
|
||||
signatures: Option<Vec<SignatureEntry>>,
|
||||
agent_id: [u8; 32],
|
||||
@ -77,6 +78,7 @@ impl AssertionBuilder {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: None, // Will use agent_id to build default
|
||||
agent_id: [1u8; 32],
|
||||
@ -199,6 +201,12 @@ impl AssertionBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the narrative (free-text methodology, limitations, caveats).
|
||||
pub fn narrative(mut self, narrative: &str) -> Self {
|
||||
self.narrative = Some(narrative.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Provide explicit signatures (overrides the default single-signature behavior).
|
||||
pub fn signatures(mut self, signatures: Vec<SignatureEntry>) -> Self {
|
||||
self.signatures = Some(signatures);
|
||||
@ -226,6 +234,7 @@ impl AssertionBuilder {
|
||||
visual_hash: self.visual_hash,
|
||||
epoch: self.epoch,
|
||||
source_metadata: self.source_metadata,
|
||||
narrative: self.narrative,
|
||||
lifecycle: self.lifecycle,
|
||||
signatures,
|
||||
confidence: self.confidence,
|
||||
|
||||
@ -33,6 +33,15 @@ pub struct Assertion {
|
||||
/// Schema is domain-specific (journal info, social metrics, etc.).
|
||||
/// Use `Vec<u8>` for rkyv zero-copy compatibility.
|
||||
pub source_metadata: Option<Vec<u8>>,
|
||||
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
|
||||
///
|
||||
/// Makes the assertion self-contained: pick it up, read it, understand the
|
||||
/// full claim without dereferencing anything. Not structured into categories
|
||||
/// because there are too many kinds of information to pre-categorize.
|
||||
///
|
||||
/// Included in v2 content hash (narrative is content-bearing).
|
||||
/// Max length: `limits::MAX_NARRATIVE_LEN` (64 KB).
|
||||
pub narrative: Option<String>,
|
||||
/// The lifecycle stage (Proposed, UnderReview, Approved, Deprecated, Rejected).
|
||||
pub lifecycle: LifecycleStage,
|
||||
|
||||
|
||||
@ -102,6 +102,10 @@ pub struct SourceRecord {
|
||||
/// Optional curator notes about the source.
|
||||
/// Examples: "Deprecated in favor of RFC 9068", "Under review for accuracy"
|
||||
pub notes: Option<String>,
|
||||
|
||||
/// Optional full-text content of the source document.
|
||||
/// Populated by pipelines that extract text from PDFs or other formats.
|
||||
pub content: Option<String>,
|
||||
}
|
||||
|
||||
impl SourceRecord {
|
||||
@ -122,6 +126,7 @@ impl SourceRecord {
|
||||
created_at: timestamp,
|
||||
updated_at: timestamp,
|
||||
notes: None,
|
||||
content: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -137,7 +142,13 @@ impl SourceRecord {
|
||||
updated_at: u64,
|
||||
notes: Option<String>,
|
||||
) -> Self {
|
||||
Self { hash, label, url, tier: tier.min(5), status, created_at, updated_at, notes }
|
||||
Self { hash, label, url, tier: tier.min(5), status, created_at, updated_at, notes, content: None }
|
||||
}
|
||||
|
||||
/// Set the full-text content of the source document.
|
||||
pub fn with_content(mut self, content: Option<String>) -> Self {
|
||||
self.content = content;
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns the tier label based on the tier number.
|
||||
@ -186,6 +197,7 @@ mod tests {
|
||||
assert_eq!(record.created_at, 1000);
|
||||
assert_eq!(record.updated_at, 1000);
|
||||
assert!(record.notes.is_none());
|
||||
assert!(record.content.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -263,5 +275,38 @@ mod tests {
|
||||
crate::serde::deserialize(&bytes).expect("Failed to deserialize SourceRecord");
|
||||
|
||||
assert_eq!(record, recovered);
|
||||
assert!(recovered.content.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rkyv_roundtrip_with_content() {
|
||||
let hash = [42u8; 32];
|
||||
let record = SourceRecord::new(
|
||||
hash,
|
||||
"FDA Approval Letter".to_string(),
|
||||
None,
|
||||
0,
|
||||
1000,
|
||||
)
|
||||
.with_content(Some("Full text of the FDA approval letter...".to_string()));
|
||||
|
||||
let bytes = crate::serde::serialize(&record).expect("Failed to serialize SourceRecord");
|
||||
let recovered: SourceRecord =
|
||||
crate::serde::deserialize(&bytes).expect("Failed to deserialize SourceRecord");
|
||||
|
||||
assert_eq!(record, recovered);
|
||||
assert_eq!(recovered.content, Some("Full text of the FDA approval letter...".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_with_content_builder() {
|
||||
let hash = [1u8; 32];
|
||||
let record = SourceRecord::new(hash, "Test".to_string(), None, 0, 1000)
|
||||
.with_content(Some("content".to_string()));
|
||||
assert_eq!(record.content, Some("content".to_string()));
|
||||
|
||||
let record_none = SourceRecord::new(hash, "Test".to_string(), None, 0, 1000)
|
||||
.with_content(None);
|
||||
assert!(record_none.content.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@ -23,6 +23,8 @@ ed25519-dalek = { version = "2.1", features = ["rand_core"] }
|
||||
uhlc = "0.7"
|
||||
# Async traits
|
||||
async-trait = "0.1"
|
||||
# Metrics
|
||||
metrics = "0.23"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.10"
|
||||
|
||||
@ -32,3 +32,23 @@ pub enum IngestError {
|
||||
#[error("Input validation failed: {0}")]
|
||||
InputValidation(String),
|
||||
}
|
||||
|
||||
impl IngestError {
|
||||
/// Returns true if retrying this exact WAL record could succeed.
|
||||
///
|
||||
/// Transient errors (I/O, storage engine) may resolve on retry.
|
||||
/// Permanent errors (invalid signature, bad input, corrupt serialization)
|
||||
/// will never succeed — the bytes in the WAL are immutable.
|
||||
pub fn is_retryable(&self) -> bool {
|
||||
match self {
|
||||
// I/O and storage errors: disk might recover, RocksDB might unblock
|
||||
IngestError::Wal(_) | IngestError::Storage(_) => true,
|
||||
// The WAL record bytes are immutable — these will never pass
|
||||
IngestError::InvalidSignature(_)
|
||||
| IngestError::InputValidation(_)
|
||||
| IngestError::Serialization(_) => false,
|
||||
// Worker errors are ambiguous; treat as retryable to be safe
|
||||
IngestError::Worker(_) => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,9 +4,8 @@
|
||||
use super::record_types::RECORD_HEADER_SIZE;
|
||||
use super::{IngestWorker, RecordType};
|
||||
use crate::error::{IngestError, Result};
|
||||
use ed25519_dalek::{Signature, Verifier, VerifyingKey};
|
||||
use stemedb_core::serde::deserialize;
|
||||
use stemedb_core::signing::compute_content_hash_v2;
|
||||
use stemedb_core::signing;
|
||||
use stemedb_core::types::{Assertion, Epoch, Hash, Vote};
|
||||
use stemedb_storage::key_codec;
|
||||
use stemedb_storage::{IndexStore, KVStore, VoteStore};
|
||||
@ -82,10 +81,77 @@ impl<S: KVStore + 'static> IngestWorker<S> {
|
||||
let record_type = RecordType::try_from(record.payload[0])?;
|
||||
let data = &record.payload[RECORD_HEADER_SIZE..];
|
||||
|
||||
match record_type {
|
||||
RecordType::Assertion => self.ingest_assertion(data).await?,
|
||||
RecordType::Vote => self.ingest_vote(data).await?,
|
||||
RecordType::Epoch => self.ingest_epoch(data).await?,
|
||||
let ingest_result = match record_type {
|
||||
RecordType::Assertion => self.ingest_assertion(data).await,
|
||||
RecordType::Vote => self.ingest_vote(data).await,
|
||||
RecordType::Epoch => self.ingest_epoch(data).await,
|
||||
};
|
||||
|
||||
if let Err(e) = ingest_result {
|
||||
if !e.is_retryable() {
|
||||
// Permanent failure: the WAL record bytes are immutable and will
|
||||
// never pass validation. Advance the cursor past this poison record
|
||||
// so it doesn't block all subsequent ingestion.
|
||||
let skip_offset = self.current_offset;
|
||||
self.current_offset += bytes_read;
|
||||
|
||||
let cursor_key = key_codec::cursor_key();
|
||||
// Best-effort cursor persist. If this fails, on restart we will
|
||||
// re-encounter this record, classify it as permanent again, and
|
||||
// skip it. No data is lost.
|
||||
if let Err(persist_err) =
|
||||
self.store.put(&cursor_key, &self.current_offset.to_le_bytes()).await
|
||||
{
|
||||
warn!(
|
||||
offset = skip_offset,
|
||||
error = %persist_err,
|
||||
"Failed to persist cursor after skipping poison record"
|
||||
);
|
||||
}
|
||||
|
||||
warn!(
|
||||
record_type = ?record_type,
|
||||
offset = skip_offset,
|
||||
new_offset = self.current_offset,
|
||||
error = %e,
|
||||
"Skipped permanently invalid WAL record"
|
||||
);
|
||||
|
||||
// Store rejection metadata for admin observability.
|
||||
// Best-effort: failure to persist metadata should not block ingestion.
|
||||
let rejection_key = key_codec::rejected_record_key(skip_offset);
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0);
|
||||
let rejection_json = format!(
|
||||
r#"{{"offset":{},"record_type":"{:?}","reason":"{}","timestamp":{}}}"#,
|
||||
skip_offset,
|
||||
record_type,
|
||||
e.to_string().replace('"', "'"),
|
||||
now
|
||||
);
|
||||
if let Err(store_err) =
|
||||
self.store.put(&rejection_key, rejection_json.as_bytes()).await
|
||||
{
|
||||
warn!(
|
||||
offset = skip_offset,
|
||||
error = %store_err,
|
||||
"Failed to store rejection metadata"
|
||||
);
|
||||
}
|
||||
|
||||
metrics::counter!(
|
||||
"stemedb_ingest_records_skipped_total",
|
||||
"reason" => e.to_string()
|
||||
)
|
||||
.increment(1);
|
||||
}
|
||||
|
||||
// Return the error so the run loop can log it.
|
||||
// For permanent errors the cursor has already advanced;
|
||||
// for transient errors the cursor is unchanged (will retry).
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
let prev_offset = self.current_offset;
|
||||
@ -170,6 +236,16 @@ impl<S: KVStore + 'static> IngestWorker<S> {
|
||||
// This enables O(1) lookup of "which assertions cite this source?"
|
||||
self.index_store.add_to_source_index(&assertion.source_hash, &assertion_hash).await?;
|
||||
|
||||
// Write feed index: \x00FEED:{inverted_ts}:{hash_hex} -> subject
|
||||
// Uses server clock (not assertion.timestamp) for ingestion ordering.
|
||||
// This separates "when the claim was made" from "when the system learned about it".
|
||||
let ingested_at = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0);
|
||||
let feed_idx_key = key_codec::feed_key(ingested_at, &hash_hex);
|
||||
self.store.put(&feed_idx_key, assertion.subject.as_bytes()).await?;
|
||||
|
||||
// Insert into vector index if present and assertion has a vector
|
||||
if let (Some(ref vector_index), Some(ref vector)) = (&self.vector_index, &assertion.vector)
|
||||
{
|
||||
@ -282,6 +358,17 @@ impl<S: KVStore + 'static> IngestWorker<S> {
|
||||
)));
|
||||
}
|
||||
|
||||
// Validate narrative length
|
||||
if let Some(ref narrative) = assertion.narrative {
|
||||
if narrative.len() > stemedb_core::limits::MAX_NARRATIVE_LEN {
|
||||
return Err(IngestError::InputValidation(format!(
|
||||
"narrative exceeds {} bytes (got {})",
|
||||
stemedb_core::limits::MAX_NARRATIVE_LEN,
|
||||
narrative.len()
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Validate timestamp: reject if more than 1 hour in future (clock skew protection)
|
||||
let now = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
@ -300,109 +387,19 @@ impl<S: KVStore + 'static> IngestWorker<S> {
|
||||
|
||||
/// Verify all Ed25519 signatures on an assertion.
|
||||
///
|
||||
/// Supports two signature versions:
|
||||
/// - Version 1 (legacy): signs `"{subject}:{predicate}"` - only protects those fields
|
||||
/// - Version 2 (enterprise): signs the BLAKE3 content hash - protects ALL fields
|
||||
///
|
||||
/// For v2 signatures, the content hash is computed from the assertion with
|
||||
/// empty signatures (canonical form), so tampering with any field except
|
||||
/// signatures will invalidate the signature.
|
||||
///
|
||||
/// All signatures must be valid for the assertion to be accepted.
|
||||
/// Delegates to `stemedb_core::signing::verify_assertion_signatures` which
|
||||
/// is the single source of truth for signature verification logic, shared
|
||||
/// between the API handler (fail fast) and this worker (defense in depth).
|
||||
fn verify_assertion_signatures(&self, assertion: &Assertion) -> Result<()> {
|
||||
if assertion.signatures.is_empty() {
|
||||
signing::verify_assertion_signatures(assertion).map_err(|e| {
|
||||
warn!(
|
||||
subject = %assertion.subject,
|
||||
predicate = %assertion.predicate,
|
||||
"Assertion has no signatures"
|
||||
error = %e,
|
||||
"Signature verification failed"
|
||||
);
|
||||
return Err(IngestError::InvalidSignature(
|
||||
"Assertion must have at least one signature".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Pre-compute v1 message (subject:predicate) - only used if v1 signatures exist
|
||||
let v1_message = format!("{}:{}", assertion.subject, assertion.predicate);
|
||||
|
||||
// Pre-compute v2 content hash using the shared utility from stemedb-core.
|
||||
// This must match exactly what the signing code uses in compute_content_hash_v2().
|
||||
// The hash covers: subject, predicate, object, source_hash, source_class, confidence, timestamp.
|
||||
let v2_content_hash: Option<[u8; 32]> =
|
||||
if assertion.signatures.iter().any(|s| s.version == 2) {
|
||||
// Debug: show exact number format for comparison with signing
|
||||
let object_str = match &assertion.object {
|
||||
stemedb_core::types::ObjectValue::Number(n) => format!("Number({:.17})", n),
|
||||
other => format!("{:?}", other),
|
||||
};
|
||||
let confidence_str = format!("{:.17}", assertion.confidence);
|
||||
let hash = compute_content_hash_v2(assertion);
|
||||
debug!(
|
||||
subject = %assertion.subject,
|
||||
predicate = %assertion.predicate,
|
||||
object = %object_str,
|
||||
source_hash = %hex::encode(assertion.source_hash),
|
||||
source_class = ?assertion.source_class,
|
||||
confidence = %confidence_str,
|
||||
timestamp = %assertion.timestamp,
|
||||
content_hash = %hex::encode(hash),
|
||||
"Computed v2 content hash for verification"
|
||||
);
|
||||
Some(hash)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
for (idx, sig_entry) in assertion.signatures.iter().enumerate() {
|
||||
// Determine which message was signed based on version
|
||||
let message_bytes: &[u8] = match sig_entry.version {
|
||||
1 => {
|
||||
// v1 (legacy): signs "{subject}:{predicate}"
|
||||
v1_message.as_bytes()
|
||||
}
|
||||
2 => {
|
||||
// v2 (enterprise): signs the content hash computed by compute_content_hash_v2
|
||||
v2_content_hash.as_ref().ok_or_else(|| {
|
||||
IngestError::InvalidSignature(
|
||||
"v2 signature present but v2_content_hash was not computed".to_string(),
|
||||
)
|
||||
})?
|
||||
}
|
||||
v => {
|
||||
return Err(IngestError::InvalidSignature(format!(
|
||||
"Unknown signature version {} for signature {}",
|
||||
v, idx
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
// Reconstruct the verifying key from the stored agent_id
|
||||
let verifying_key = VerifyingKey::from_bytes(&sig_entry.agent_id).map_err(|e| {
|
||||
IngestError::InvalidSignature(format!(
|
||||
"Invalid public key for signature {}: {}",
|
||||
idx, e
|
||||
))
|
||||
})?;
|
||||
|
||||
// Reconstruct the signature
|
||||
let signature = Signature::from_bytes(&sig_entry.signature);
|
||||
|
||||
// Verify the signature
|
||||
verifying_key.verify(message_bytes, &signature).map_err(|e| {
|
||||
IngestError::InvalidSignature(format!(
|
||||
"Signature {} (v{}) failed verification: {}",
|
||||
idx, sig_entry.version, e
|
||||
))
|
||||
})?;
|
||||
|
||||
debug!(
|
||||
agent_id = %hex::encode(&sig_entry.agent_id[..8]),
|
||||
signature_idx = idx,
|
||||
version = sig_entry.version,
|
||||
"Signature verified"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
IngestError::InvalidSignature(e.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
/// Ingest a vote into the KV store via VoteStore.
|
||||
|
||||
@ -3,7 +3,6 @@
|
||||
//! Contains the continuous ingestion loop that tails the WAL.
|
||||
|
||||
use super::IngestWorker;
|
||||
use crate::error::IngestError;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
use stemedb_storage::KVStore;
|
||||
@ -72,18 +71,17 @@ impl<S: KVStore + 'static> IngestWorker<S> {
|
||||
debug!("Error during shutdown (expected): {:?}", e);
|
||||
break;
|
||||
}
|
||||
match &e {
|
||||
IngestError::InputValidation(msg) => {
|
||||
warn!("Rejected invalid input: {}", msg);
|
||||
}
|
||||
IngestError::InvalidSignature(msg) => {
|
||||
warn!("Rejected invalid signature: {}", msg);
|
||||
}
|
||||
_ => {
|
||||
error!("Ingestion error: {:?}", e);
|
||||
}
|
||||
|
||||
if e.is_retryable() {
|
||||
// Transient error: back off and retry the same record
|
||||
error!("Transient ingestion error (will retry): {:?}", e);
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
} else {
|
||||
// Permanent error: step() already advanced the cursor past
|
||||
// the poison record, so continue immediately to process the
|
||||
// next record without sleeping.
|
||||
warn!("Permanent ingestion error (record skipped): {}", e);
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,6 +25,7 @@ async fn test_rejects_invalid_signature() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -83,6 +84,7 @@ async fn test_rejects_unsigned_assertion() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![], // No signatures!
|
||||
confidence: 0.95,
|
||||
@ -112,6 +114,88 @@ async fn test_rejects_unsigned_assertion() {
|
||||
);
|
||||
}
|
||||
|
||||
/// Test: Invalid signature advances cursor past poison record so subsequent records process.
|
||||
///
|
||||
/// This is the core regression test for the "assert returns 201 but data not queryable" bug.
|
||||
/// Previously, an invalid-signature record would cause the IngestWorker to retry the same
|
||||
/// offset forever, blocking all subsequent records.
|
||||
#[tokio::test]
|
||||
async fn test_invalid_signature_skips_and_continues() {
|
||||
let dir = tempdir().expect("Failed to create temp dir");
|
||||
let wal_dir = dir.path().join("wal");
|
||||
let db_dir = dir.path().join("db");
|
||||
|
||||
// Record 1: Invalid signature (poison record)
|
||||
let bad_assertion = Assertion {
|
||||
subject: "Bad".to_string(),
|
||||
predicate: "poison".to_string(),
|
||||
object: ObjectValue::Text("should be skipped".to_string()),
|
||||
parent_hash: None,
|
||||
source_hash: [0u8; 32],
|
||||
source_class: SourceClass::Expert,
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
agent_id: [1u8; 32], // Invalid Ed25519 public key
|
||||
signature: [2u8; 64], // Invalid signature
|
||||
timestamp: 1000,
|
||||
}],
|
||||
confidence: 0.95,
|
||||
timestamp: 1000,
|
||||
hlc_timestamp: HlcTimestamp::default(),
|
||||
vector: None,
|
||||
};
|
||||
|
||||
// Record 2: Valid signature (should be processed after skipping record 1)
|
||||
let good_assertion = create_signed_assertion("Good", "valid");
|
||||
|
||||
let mut journal = Journal::open(&wal_dir).expect("Failed to open journal");
|
||||
let store = HybridStore::open(&db_dir).expect("Failed to open store");
|
||||
|
||||
// Write both records to WAL
|
||||
journal.append(serialize_assertion(&bad_assertion).expect("ser")).expect("append bad");
|
||||
journal.append(serialize_assertion(&good_assertion).expect("ser")).expect("append good");
|
||||
|
||||
let journal = Arc::new(Mutex::new(journal));
|
||||
let store = Arc::new(store);
|
||||
let mut worker =
|
||||
IngestWorker::new(journal, store.clone()).await.expect("Failed to create worker");
|
||||
|
||||
// Step 1: Should fail with InvalidSignature but advance cursor past the poison record
|
||||
let result1 = worker.step().await;
|
||||
assert!(result1.is_err(), "Should reject invalid signature");
|
||||
assert!(
|
||||
matches!(result1.unwrap_err(), IngestError::InvalidSignature(_)),
|
||||
"Should be InvalidSignature"
|
||||
);
|
||||
|
||||
// Step 2: Should succeed — the cursor moved past the poison record
|
||||
let result2 = worker.step().await;
|
||||
assert!(
|
||||
result2.is_ok(),
|
||||
"Should process valid record after skipping poison, got: {:?}",
|
||||
result2
|
||||
);
|
||||
let bytes = result2.expect("step 2");
|
||||
assert!(bytes > 0, "Should have read bytes from the valid record");
|
||||
|
||||
// Verify the good assertion was stored
|
||||
let count_key = key_codec::assertion_count_key();
|
||||
let count_entry = store.get(&count_key).await.expect("get").expect("should have count");
|
||||
let count = u64::from_le_bytes(count_entry.try_into().expect("8 bytes"));
|
||||
assert_eq!(count, 1, "Exactly one assertion should be stored (the good one)");
|
||||
|
||||
// Verify rejection metadata was recorded
|
||||
use stemedb_storage::KVStore;
|
||||
let rejected_prefix = key_codec::rejected_records_scan_prefix();
|
||||
let rejected = store.scan_prefix(&rejected_prefix).await.expect("scan rejected");
|
||||
assert_eq!(rejected.len(), 1, "Should have exactly one rejected record entry");
|
||||
}
|
||||
|
||||
/// Test: Multi-signature assertions require all signatures to be valid.
|
||||
#[tokio::test]
|
||||
async fn test_multisig_all_must_be_valid() {
|
||||
@ -136,6 +220,7 @@ async fn test_multisig_all_must_be_valid() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![
|
||||
// Valid signature
|
||||
|
||||
@ -29,6 +29,7 @@ async fn test_rejects_high_confidence() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -86,6 +87,7 @@ async fn test_rejects_negative_confidence() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -213,6 +215,7 @@ async fn test_rejects_oversized_subject() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -273,6 +276,7 @@ async fn test_rejects_oversized_predicate() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -335,6 +339,7 @@ async fn test_accepts_exact_max_subject_length() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -393,6 +398,7 @@ async fn test_accepts_exact_max_predicate_length() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -446,6 +452,7 @@ async fn test_rejects_nan_confidence() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
|
||||
@ -29,6 +29,7 @@ async fn test_rejects_infinite_confidence() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -172,6 +173,7 @@ async fn test_rejects_future_timestamp() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -237,6 +239,7 @@ async fn test_accepts_near_future_timestamp() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -287,6 +290,7 @@ async fn test_accepts_zero_confidence() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
@ -337,6 +341,7 @@ async fn test_accepts_one_confidence() {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Proposed,
|
||||
signatures: vec![SignatureEntry {
|
||||
version: 1,
|
||||
|
||||
@ -24,6 +24,7 @@ pub fn assertion_to_request(assertion: &Assertion) -> CreateAssertionRequest {
|
||||
.source_metadata
|
||||
.as_ref()
|
||||
.map(|b| String::from_utf8_lossy(b).into_owned()),
|
||||
narrative: assertion.narrative.clone(),
|
||||
// Include timestamps for v2 signature verification
|
||||
timestamp: Some(assertion.timestamp),
|
||||
hlc_timestamp: Some(HlcTimestampDto {
|
||||
@ -94,6 +95,7 @@ mod tests {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle: LifecycleStage::Approved,
|
||||
signatures: vec![SignatureEntry {
|
||||
agent_id: [1u8; 32],
|
||||
|
||||
@ -41,6 +41,10 @@ pub struct CreateAssertionRequest {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_metadata: Option<String>,
|
||||
|
||||
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub narrative: Option<String>,
|
||||
|
||||
/// Unix timestamp when the assertion was created.
|
||||
/// Required for v2 signatures to verify correctly.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
|
||||
@ -66,6 +66,10 @@ pub struct AssertionDto {
|
||||
/// Structured source metadata as a JSON string (optional).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_metadata: Option<String>,
|
||||
|
||||
/// Free-text narrative explaining methodology, limitations, bias, and caveats.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub narrative: Option<String>,
|
||||
}
|
||||
|
||||
/// Response from a query operation.
|
||||
|
||||
@ -233,6 +233,7 @@ impl MedicalClaim {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata,
|
||||
narrative: None,
|
||||
lifecycle,
|
||||
signatures: Vec::new(),
|
||||
confidence: self.confidence,
|
||||
|
||||
@ -236,9 +236,12 @@ impl<S: KVStore + 'static> QueryEngine<S> {
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
/// Deserialize an assertion using the canonical serde module.
|
||||
/// Deserialize an assertion with backward compatibility.
|
||||
///
|
||||
/// Tries current format first, then falls back to legacy (pre-narrative)
|
||||
/// format for assertions serialized before the schema change.
|
||||
pub(super) fn deserialize_assertion(&self, data: &[u8]) -> Result<Assertion> {
|
||||
stemedb_core::serde::deserialize(data)
|
||||
stemedb_core::serde::deserialize_assertion_compat(data)
|
||||
.map_err(|e| QueryError::Deserialization(e.to_string()))
|
||||
}
|
||||
|
||||
|
||||
@ -362,7 +362,7 @@ impl<S: KVStore + 'static> Materializer<S> {
|
||||
for hash in hash_list {
|
||||
let key = key_codec::assertion_key(subject, &hex::encode(hash));
|
||||
if let Some(data) = self.store.get(&key).await? {
|
||||
match stemedb_core::serde::deserialize::<Assertion>(&data) {
|
||||
match stemedb_core::serde::deserialize_assertion_compat(&data) {
|
||||
Ok(assertion) => candidates.push(assertion),
|
||||
Err(e) => {
|
||||
debug!(
|
||||
|
||||
@ -57,6 +57,7 @@ impl Agent {
|
||||
visual_hash: None,
|
||||
epoch: None,
|
||||
source_metadata: None,
|
||||
narrative: None,
|
||||
lifecycle,
|
||||
signatures: vec![SignatureEntry {
|
||||
agent_id: self.verifying_key.to_bytes(),
|
||||
|
||||
@ -106,6 +106,26 @@ impl HybridStore {
|
||||
|
||||
Ok(Self { fjall, redb, _temp_dir: Some(temp_dir) })
|
||||
}
|
||||
|
||||
/// Scan all assertion key-value pairs from the Fjall backend.
|
||||
///
|
||||
/// This scans all keys in Fjall and returns only those with the `H:` tag
|
||||
/// (assertion data). Used by the admin rebuild-indexes endpoint to
|
||||
/// reconstruct missing Redb secondary indexes.
|
||||
///
|
||||
/// Returns `Vec<(key, value)>` where keys are `{subject}\x00H:{hash_hex}`.
|
||||
#[instrument(skip_all)]
|
||||
pub async fn scan_fjall_assertions(&self) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
|
||||
let all_fjall = self.fjall.scan_prefix(b"").await?;
|
||||
let assertions: Vec<(Vec<u8>, Vec<u8>)> = all_fjall
|
||||
.into_iter()
|
||||
.filter(|(key, _)| {
|
||||
let tag = key_codec::extract_tag(key);
|
||||
tag.starts_with(b"H:")
|
||||
})
|
||||
.collect();
|
||||
Ok(assertions)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@ -113,3 +113,33 @@ pub fn assertion_count_key() -> Vec<u8> {
|
||||
pub fn trust_rank_scan_prefix() -> Vec<u8> {
|
||||
global_key(b"TRUST:", b"")
|
||||
}
|
||||
|
||||
/// Rejected WAL record key: `\x00REJECTED:{offset}`
|
||||
///
|
||||
/// Stores metadata about WAL records that were permanently skipped
|
||||
/// by the IngestWorker (invalid signatures, validation failures, etc.)
|
||||
pub fn rejected_record_key(offset: u64) -> Vec<u8> {
|
||||
global_key(b"REJECTED:", offset.to_string().as_bytes())
|
||||
}
|
||||
|
||||
/// Rejected records scan prefix: `\x00REJECTED:`
|
||||
pub fn rejected_records_scan_prefix() -> Vec<u8> {
|
||||
global_key(b"REJECTED:", b"")
|
||||
}
|
||||
|
||||
/// Feed index key: `\x00FEED:{inverted_ts_hex}:{hash_hex}`
|
||||
///
|
||||
/// Uses inverted timestamp (`u64::MAX - ingested_at`) so lexicographic
|
||||
/// scan order = reverse chronological (newest first).
|
||||
/// Value stores the subject so the feed handler can construct the
|
||||
/// assertion key without a reverse lookup.
|
||||
pub fn feed_key(ingested_at: u64, hash_hex: &str) -> Vec<u8> {
|
||||
let inverted = u64::MAX - ingested_at;
|
||||
let suffix = format!("{}:{}", hex::encode(inverted.to_be_bytes()), hash_hex);
|
||||
global_key(b"FEED:", suffix.as_bytes())
|
||||
}
|
||||
|
||||
/// Feed index scan prefix: `\x00FEED:`
|
||||
pub fn feed_scan_prefix() -> Vec<u8> {
|
||||
global_key(b"FEED:", b"")
|
||||
}
|
||||
|
||||
@ -57,10 +57,10 @@ pub use subject_keys::{
|
||||
// Global keys
|
||||
pub use global_keys::{
|
||||
assertion_count_key, audit_agent_index_key, audit_agent_prefix, audit_key, audit_scan_prefix,
|
||||
cursor_key, epoch_key, escalation_key, escalation_scan_prefix, gs_verified_key, quota_key,
|
||||
quota_limit_key, superseded_key, supersession_index_key, supersession_index_prefix,
|
||||
supersession_key, trust_pack_key, trust_pack_scan_prefix, trust_rank_key,
|
||||
trust_rank_scan_prefix,
|
||||
cursor_key, epoch_key, escalation_key, escalation_scan_prefix, feed_key, feed_scan_prefix,
|
||||
gs_verified_key, quota_key, quota_limit_key, rejected_record_key, rejected_records_scan_prefix,
|
||||
superseded_key, supersession_index_key, supersession_index_prefix, supersession_key,
|
||||
trust_pack_key, trust_pack_scan_prefix, trust_rank_key, trust_rank_scan_prefix,
|
||||
};
|
||||
|
||||
// Index keys
|
||||
|
||||
@ -229,3 +229,45 @@ fn test_global_keys_sort_first() {
|
||||
let subject = assertion_key("Apple", "abc");
|
||||
assert!(global < subject, "Global keys should sort before subject keys");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_feed_key_newest_first_ordering() {
|
||||
let k_older = feed_key(1000, "aaaa");
|
||||
let k_newer = feed_key(2000, "bbbb");
|
||||
// Newer timestamp should sort BEFORE older (inverted for newest-first)
|
||||
assert!(k_newer < k_older, "Newer feed keys should sort before older ones");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_feed_key_same_timestamp_tiebreak() {
|
||||
let k1 = feed_key(1000, "aaaa");
|
||||
let k2 = feed_key(1000, "zzzz");
|
||||
// Same timestamp: lexicographic tiebreak on hash_hex
|
||||
assert!(k1 < k2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_feed_key_starts_with_scan_prefix() {
|
||||
let prefix = feed_scan_prefix();
|
||||
let k = feed_key(1000, "abc123def456");
|
||||
assert!(k.starts_with(&prefix), "Feed key should start with feed scan prefix");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_feed_key_format() {
|
||||
let k = feed_key(0, "deadbeef");
|
||||
// \x00FEED: prefix
|
||||
assert_eq!(&k[..6], b"\x00FEED:");
|
||||
// With ingested_at=0, inverted = u64::MAX, hex = "ffffffffffffffff"
|
||||
assert_eq!(&k[6..22], b"ffffffffffffffff");
|
||||
// Separator
|
||||
assert_eq!(k[22], b':');
|
||||
// hash_hex
|
||||
assert_eq!(&k[23..], b"deadbeef");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_feed_scan_prefix() {
|
||||
let prefix = feed_scan_prefix();
|
||||
assert_eq!(prefix, b"\x00FEED:");
|
||||
}
|
||||
|
||||
@ -44,6 +44,16 @@ where
|
||||
stemedb_core::serde::deserialize(data).map_err(|e| StorageError::Serialization(e.to_string()))
|
||||
}
|
||||
|
||||
/// Deserialize a SourceRecord with backward compatibility for the pre-content layout.
|
||||
///
|
||||
/// Maps deserialization errors to [`StorageError::Serialization`].
|
||||
pub fn deserialize_source_record_compat(
|
||||
data: &[u8],
|
||||
) -> Result<stemedb_core::types::SourceRecord> {
|
||||
stemedb_core::serde::deserialize_source_record_compat(data)
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@ -7,7 +7,7 @@ use tracing::{debug, instrument};
|
||||
use super::SourceRegistry;
|
||||
use crate::error::{Result, StorageError};
|
||||
use crate::key_codec;
|
||||
use crate::serde_helpers::{deserialize, serialize};
|
||||
use crate::serde_helpers::{deserialize_source_record_compat, serialize};
|
||||
use crate::traits::KVStore;
|
||||
|
||||
/// Generic SourceRegistry implementation backed by any KVStore.
|
||||
@ -80,7 +80,7 @@ impl<S: KVStore + 'static> SourceRegistry for GenericSourceRegistry<S> {
|
||||
|
||||
match self.store.get(&key).await? {
|
||||
Some(data) => {
|
||||
let record: SourceRecord = deserialize(&data)?;
|
||||
let record: SourceRecord = deserialize_source_record_compat(&data)?;
|
||||
Ok(Some(record))
|
||||
}
|
||||
None => Ok(None),
|
||||
|
||||
@ -9,8 +9,8 @@ use crate::error::Result;
|
||||
use metrics::{counter, gauge};
|
||||
use std::collections::HashSet;
|
||||
use std::sync::atomic::Ordering;
|
||||
use stemedb_core::serde::deserialize;
|
||||
use stemedb_core::types::{detect_clock_skew, Assertion, HlcTimestamp};
|
||||
use stemedb_core::serde::deserialize_assertion_compat;
|
||||
use stemedb_core::types::{detect_clock_skew, HlcTimestamp};
|
||||
use stemedb_rpc::proto::{FetchRequest, GetLeavesRequest, RootExchangeRequest};
|
||||
use stemedb_storage::crdt::AssertionTransfer;
|
||||
use stemedb_storage::KVStore;
|
||||
@ -201,7 +201,7 @@ impl<S: KVStore + 'static> AntiEntropyWorker<S> {
|
||||
}
|
||||
|
||||
// Extract subject and HLC timestamp from the assertion data
|
||||
let (subject, remote_hlc) = match deserialize::<Assertion>(&transfer.data) {
|
||||
let (subject, remote_hlc) = match deserialize_assertion_compat(&transfer.data) {
|
||||
Ok(assertion) => (assertion.subject.clone(), assertion.hlc_timestamp),
|
||||
Err(e) => {
|
||||
warn!(
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# StemeDB Data Structures
|
||||
|
||||
> **Last Updated:** 2026-01-31
|
||||
> **Last Updated:** 2026-02-19
|
||||
> **Source:** `crates/stemedb-core/src/types.rs`
|
||||
|
||||
This document describes the core data structures in StemeDB (Episteme). These types form the foundation of the "Git for Truth" knowledge graph.
|
||||
@ -417,6 +417,50 @@ pub struct TrustPack {
|
||||
|
||||
---
|
||||
|
||||
## The SourceRecord (Source Registry)
|
||||
|
||||
The Source Registry maps content-addressed source hashes to human-readable metadata. This enables the dashboard to show "FDA Approval Letter for Wegovy" instead of a raw BLAKE3 hash.
|
||||
|
||||
```rust
|
||||
pub struct SourceRecord {
|
||||
/// Content-addressed hash of the source (BLAKE3, 32 bytes).
|
||||
pub hash: [u8; 32],
|
||||
|
||||
/// Human-readable label.
|
||||
pub label: String,
|
||||
|
||||
/// Optional URL where the source can be accessed.
|
||||
pub url: Option<String>,
|
||||
|
||||
/// Authority tier (0-5), matching SourceClass.
|
||||
pub tier: u8,
|
||||
|
||||
/// Current status (Active, Deprecated, Quarantined).
|
||||
pub status: SourceStatus,
|
||||
|
||||
/// HLC timestamp when the record was created.
|
||||
pub created_at: u64,
|
||||
|
||||
/// HLC timestamp of the last update.
|
||||
pub updated_at: u64,
|
||||
|
||||
/// Optional curator notes about the source.
|
||||
pub notes: Option<String>,
|
||||
|
||||
/// Optional full-text content of the source document.
|
||||
/// Populated by pipelines that extract text from PDFs.
|
||||
/// Max size: 1 MB (MAX_SOURCE_CONTENT_LEN).
|
||||
pub content: Option<String>,
|
||||
}
|
||||
```
|
||||
|
||||
**Key Points:**
|
||||
- **Status lifecycle:** Active → Deprecated or Quarantined (curator-driven)
|
||||
- **Content field:** Stores extracted document text (e.g., from `pdftotext`). Stripped from list responses (`GET /v1/sources`) to avoid returning megabytes; included in single-source responses (`GET /v1/sources/{hash}`)
|
||||
- **rkyv compat:** Uses `deserialize_source_record_compat()` for backward compatibility with data written before the `content` field was added
|
||||
|
||||
---
|
||||
|
||||
## Serialization
|
||||
|
||||
All types use `rkyv` for zero-copy deserialization:
|
||||
@ -433,6 +477,17 @@ let assertion: Assertion = deserialize(&bytes)?;
|
||||
|
||||
**Critical Rule**: Never use raw `AllocSerializer` in production code. Always use `stemedb_core::serde::{serialize, deserialize}`.
|
||||
|
||||
### Schema Evolution (rkyv Compat)
|
||||
|
||||
rkyv does **not** support schema evolution. When a field is added to a struct, old data can't be deserialized with the new struct. The solution is a legacy compat pattern:
|
||||
|
||||
| Type | Compat Function | Legacy Struct |
|
||||
|------|----------------|---------------|
|
||||
| `Assertion` | `deserialize_assertion_compat()` | `LegacyAssertion` (pre-`narrative`) |
|
||||
| `SourceRecord` | `deserialize_source_record_compat()` | `LegacySourceRecord` (pre-`content`) |
|
||||
|
||||
All assertion deserialization should use `deserialize_assertion_compat()`. All source record deserialization should use `deserialize_source_record_compat()`. When adding fields to rkyv structs in the future, always add a legacy compat deserializer following this pattern.
|
||||
|
||||
---
|
||||
|
||||
## Relationship Diagram
|
||||
|
||||
@ -45,6 +45,9 @@ type Assertion struct {
|
||||
|
||||
// Semantic embedding vector (optional)
|
||||
Vector []float32 `json:"vector,omitempty"`
|
||||
|
||||
// Free-text narrative explaining methodology, limitations, bias, and caveats (optional)
|
||||
Narrative *string `json:"narrative,omitempty"`
|
||||
}
|
||||
|
||||
// AssertionBuilder provides a fluent API for building assertions.
|
||||
@ -150,6 +153,12 @@ func (b *AssertionBuilder) WithEpoch(epochHex string) *AssertionBuilder {
|
||||
return b
|
||||
}
|
||||
|
||||
// WithNarrative sets the free-text narrative (methodology, limitations, caveats).
|
||||
func (b *AssertionBuilder) WithNarrative(narrative string) *AssertionBuilder {
|
||||
b.assertion.Narrative = &narrative
|
||||
return b
|
||||
}
|
||||
|
||||
// WithVector sets the semantic embedding vector.
|
||||
func (b *AssertionBuilder) WithVector(vector []float32) *AssertionBuilder {
|
||||
b.assertion.Vector = vector
|
||||
|
||||
@ -174,6 +174,9 @@ type AssertionResponse struct {
|
||||
|
||||
// Semantic embedding vector (optional)
|
||||
Vector []float32 `json:"vector,omitempty"`
|
||||
|
||||
// Free-text narrative explaining methodology, limitations, bias, and caveats (optional)
|
||||
Narrative *string `json:"narrative,omitempty"`
|
||||
}
|
||||
|
||||
// CreateResponse represents the response from a create operation.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user