Major additions: - Community Next.js app (port 18187) for browsing claims with API docs - stemedb-chaos crate: Fault injection, chaos testing, CRDT properties - Latent ingestion system: Reddit/FDA ingesters with ADK-Go agents - Disputed claims handling: Manual review workflows and validation - Aphoria security scanner: New extractors (SQL injection, command injection, weak crypto, TLS version), policy-based ignores, UAT reports - Docker infrastructure: Dockerfile, docker-compose.yml for full stack - VulnBank demo: Intentionally vulnerable multi-language test corpus SDK & API enhancements: - Source registry handlers for tracking data provenance - Metrics endpoint - Skeptic filtering improvements Code quality: - Split 14 large files (>500 lines) into focused modules - All files now under 500-line limit per project guidelines Documentation: - Chaos testing guide, circuit breakers, observability docs - Phase 7 UAT documentation updates - Martin Kleppmann technical writer agent Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
504 lines
13 KiB
TypeScript
504 lines
13 KiB
TypeScript
/**
|
|
* Seed script for populating StemeDB with demo claims.
|
|
*
|
|
* This script:
|
|
* 1. Waits for the API to be healthy
|
|
* 2. Registers source documents with human-readable labels
|
|
* 3. Creates assertions that match the mock data in page.tsx
|
|
* 4. Verifies the data is queryable via SkepticLens
|
|
*
|
|
* Usage:
|
|
* npx tsx scripts/seed-claims.ts
|
|
*
|
|
* Environment:
|
|
* STEMEDB_API_URL - API base URL (default: http://127.0.0.1:18180)
|
|
*/
|
|
|
|
import * as ed from "@noble/ed25519";
|
|
import { sha512 } from "@noble/hashes/sha512";
|
|
|
|
// Configure ed25519 to use sha512
|
|
ed.etc.sha512Sync = (...m) => sha512(ed.etc.concatBytes(...m));
|
|
|
|
const API_URL = process.env.STEMEDB_API_URL || "http://127.0.0.1:18180";
|
|
|
|
// ============================================================================
|
|
// Types
|
|
// ============================================================================
|
|
|
|
interface Agent {
|
|
name: string;
|
|
seed: string;
|
|
privateKey: Uint8Array;
|
|
publicKey: Uint8Array;
|
|
}
|
|
|
|
interface Source {
|
|
hash: string;
|
|
label: string;
|
|
tier: number;
|
|
url?: string;
|
|
}
|
|
|
|
interface ClaimSet {
|
|
subject: string;
|
|
predicate: string;
|
|
claims: {
|
|
value: string;
|
|
confidence: number;
|
|
sourceIndex: number;
|
|
agentIndex: number;
|
|
}[];
|
|
}
|
|
|
|
// ============================================================================
|
|
// Helpers
|
|
// ============================================================================
|
|
|
|
function toHex(bytes: Uint8Array): string {
|
|
return Array.from(bytes)
|
|
.map((b) => b.toString(16).padStart(2, "0"))
|
|
.join("");
|
|
}
|
|
|
|
function sha256(data: string): Uint8Array {
|
|
const encoder = new TextEncoder();
|
|
const bytes = encoder.encode(data);
|
|
// Simple deterministic hash for seed purposes (not crypto-secure, but deterministic)
|
|
const hash = new Uint8Array(32);
|
|
for (let i = 0; i < bytes.length; i++) {
|
|
hash[i % 32] ^= bytes[i];
|
|
hash[(i + 1) % 32] = (hash[(i + 1) % 32] + bytes[i]) % 256;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
async function waitForHealth(maxRetries = 30, delayMs = 2000): Promise<void> {
|
|
console.log(`Waiting for API at ${API_URL}...`);
|
|
|
|
for (let i = 0; i < maxRetries; i++) {
|
|
try {
|
|
const response = await fetch(`${API_URL}/v1/health`);
|
|
if (response.ok) {
|
|
const data = await response.json();
|
|
console.log(`API is healthy: v${data.version}, ${data.assertions_count} assertions`);
|
|
return;
|
|
}
|
|
} catch {
|
|
// Retry
|
|
}
|
|
|
|
if (i < maxRetries - 1) {
|
|
console.log(` Retry ${i + 1}/${maxRetries}...`);
|
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
}
|
|
}
|
|
|
|
throw new Error(`API not healthy after ${maxRetries} retries`);
|
|
}
|
|
|
|
async function createAgent(name: string, seed: string): Promise<Agent> {
|
|
// Generate deterministic private key from seed
|
|
const seedHash = sha256(`agent-seed-${seed}-${name}`);
|
|
const privateKey = seedHash;
|
|
const publicKey = await ed.getPublicKeyAsync(privateKey);
|
|
|
|
return { name, seed, privateKey, publicKey };
|
|
}
|
|
|
|
async function signAssertion(
|
|
agent: Agent,
|
|
subject: string,
|
|
predicate: string
|
|
): Promise<{ signature: string; timestamp: number }> {
|
|
const timestamp = Math.floor(Date.now() / 1000);
|
|
const message = `${subject}:${predicate}`;
|
|
const messageBytes = new TextEncoder().encode(message);
|
|
const signature = await ed.signAsync(messageBytes, agent.privateKey);
|
|
|
|
return {
|
|
signature: toHex(signature),
|
|
timestamp,
|
|
};
|
|
}
|
|
|
|
function generateSourceHash(label: string): string {
|
|
const hash = sha256(`source-${label}`);
|
|
return toHex(hash);
|
|
}
|
|
|
|
async function registerSource(source: Source): Promise<void> {
|
|
const response = await fetch(`${API_URL}/v1/sources`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
hash: source.hash,
|
|
label: source.label,
|
|
tier: source.tier,
|
|
url: source.url,
|
|
}),
|
|
});
|
|
|
|
if (!response.ok && response.status !== 409) {
|
|
// 409 = already exists, which is fine
|
|
const text = await response.text();
|
|
console.warn(` Warning: Failed to register source ${source.label}: ${text}`);
|
|
}
|
|
}
|
|
|
|
async function createAssertion(
|
|
agent: Agent,
|
|
subject: string,
|
|
predicate: string,
|
|
value: string,
|
|
confidence: number,
|
|
sourceHash: string,
|
|
sourceClass: string
|
|
): Promise<string | null> {
|
|
const { signature, timestamp } = await signAssertion(agent, subject, predicate);
|
|
|
|
const request = {
|
|
subject,
|
|
predicate,
|
|
object: { type: "Text", value },
|
|
confidence,
|
|
source_hash: sourceHash,
|
|
source_class: sourceClass,
|
|
signatures: [
|
|
{
|
|
agent_id: toHex(agent.publicKey),
|
|
signature,
|
|
timestamp,
|
|
version: 1,
|
|
},
|
|
],
|
|
};
|
|
|
|
const response = await fetch(`${API_URL}/v1/assert`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify(request),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const text = await response.text();
|
|
console.warn(` Warning: Failed to create assertion: ${text}`);
|
|
return null;
|
|
}
|
|
|
|
const data = await response.json();
|
|
return data.hash;
|
|
}
|
|
|
|
async function verifySkeptic(subject: string, predicate: string): Promise<void> {
|
|
const url = `${API_URL}/v1/skeptic?subject=${encodeURIComponent(subject)}&predicate=${encodeURIComponent(predicate)}&include_source_metadata=true`;
|
|
const response = await fetch(url);
|
|
|
|
if (!response.ok) {
|
|
console.warn(` Warning: Skeptic query failed for ${subject}/${predicate}`);
|
|
return;
|
|
}
|
|
|
|
const data = await response.json();
|
|
console.log(
|
|
` Verified: ${subject}/${predicate} -> ${data.status} (${data.claims.length} claims, conflict=${data.conflict_score.toFixed(2)})`
|
|
);
|
|
}
|
|
|
|
// ============================================================================
|
|
// Data Definitions (matching page.tsx mock data)
|
|
// ============================================================================
|
|
|
|
const SOURCES: Source[] = [
|
|
{
|
|
hash: "", // Will be generated
|
|
label: "PostgreSQL 16 Documentation - DDL Constraints",
|
|
tier: 0,
|
|
url: "https://www.postgresql.org/docs/current/ddl-constraints.html",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "Snodgrass - Developing Time-Oriented Database Applications",
|
|
tier: 1,
|
|
url: "https://www2.cs.arizona.edu/~rts/tdbbook.pdf",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "Shapiro et al. - Conflict-free Replicated Data Types (SSS 2011)",
|
|
tier: 1,
|
|
url: "https://hal.inria.fr/inria-00609399/document",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "Almeida et al. - Delta State Replicated Data Types (2018)",
|
|
tier: 1,
|
|
url: "https://arxiv.org/abs/1603.01529",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "StemeDB Design Notes - Why Not CRDTs",
|
|
tier: 3,
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "BLAKE3 Specification - One Function, Fast Everywhere",
|
|
tier: 0,
|
|
url: "https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "StemeDB Source Code - lens/recency.rs",
|
|
tier: 0,
|
|
url: "https://github.com/orchard9/stemedb/blob/main/crates/stemedb-lens/src/recency.rs",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "GitHub Issue #142 - Optimize RecencyLens",
|
|
tier: 4,
|
|
url: "https://github.com/orchard9/stemedb/issues/142",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "Kleppmann - Designing Data-Intensive Applications",
|
|
tier: 1,
|
|
url: "https://dataintensive.net/",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "StemeDB Implementation Notes",
|
|
tier: 3,
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "Kamvar et al. - The EigenTrust Algorithm (WWW 2003)",
|
|
tier: 1,
|
|
url: "https://nlp.stanford.edu/pubs/eigentrust.pdf",
|
|
},
|
|
{
|
|
hash: "",
|
|
label: "Medical AI Safety Working Group - Trust Calibration Report",
|
|
tier: 2,
|
|
},
|
|
];
|
|
|
|
// Generate hashes for sources
|
|
for (const source of SOURCES) {
|
|
source.hash = generateSourceHash(source.label);
|
|
}
|
|
|
|
const SOURCE_CLASS_MAP: Record<number, string> = {
|
|
0: "Regulatory",
|
|
1: "Clinical",
|
|
2: "Observational",
|
|
3: "Expert",
|
|
4: "Community",
|
|
5: "Anecdotal",
|
|
};
|
|
|
|
const CLAIM_SETS: ClaimSet[] = [
|
|
// Single value claim (agreed)
|
|
{
|
|
subject: "Episteme",
|
|
predicate: "storage_model",
|
|
claims: [
|
|
{
|
|
value: "Single value per key is the dominant paradigm",
|
|
confidence: 0.92,
|
|
sourceIndex: 0, // PostgreSQL docs
|
|
agentIndex: 0,
|
|
},
|
|
{
|
|
value: "Bitemporal and event stores are exceptions",
|
|
confidence: 0.78,
|
|
sourceIndex: 1, // Snodgrass
|
|
agentIndex: 1,
|
|
},
|
|
],
|
|
},
|
|
|
|
// CRDT claim (contested)
|
|
{
|
|
subject: "CRDT",
|
|
predicate: "replica_assumption",
|
|
claims: [
|
|
{
|
|
value: "CRDTs assume replicas are authoritative copies of same data",
|
|
confidence: 0.94,
|
|
sourceIndex: 2, // Shapiro
|
|
agentIndex: 0,
|
|
},
|
|
{
|
|
value: "CRDTs can model multi-source disagreement with delta states",
|
|
confidence: 0.76,
|
|
sourceIndex: 3, // Almeida
|
|
agentIndex: 1,
|
|
},
|
|
{
|
|
value: "CRDTs don't preserve provenance of conflicting sources",
|
|
confidence: 0.65,
|
|
sourceIndex: 4, // StemeDB notes
|
|
agentIndex: 2,
|
|
},
|
|
],
|
|
},
|
|
|
|
// Content addressing (unanimous)
|
|
{
|
|
subject: "Episteme",
|
|
predicate: "content_addressing",
|
|
claims: [
|
|
{
|
|
value: "Content-addressing provides deduplication, integrity, and efficient comparison",
|
|
confidence: 0.96,
|
|
sourceIndex: 5, // BLAKE3 spec
|
|
agentIndex: 0,
|
|
},
|
|
],
|
|
},
|
|
|
|
// Recency complexity (agreed)
|
|
{
|
|
subject: "RecencyLens",
|
|
predicate: "complexity",
|
|
claims: [
|
|
{
|
|
value: "RecencyLens is O(n) where n = candidates",
|
|
confidence: 0.88,
|
|
sourceIndex: 6, // Source code
|
|
agentIndex: 0,
|
|
},
|
|
{
|
|
value: "Could be O(log n) with a heap-based implementation",
|
|
confidence: 0.52,
|
|
sourceIndex: 7, // GitHub issue
|
|
agentIndex: 3,
|
|
},
|
|
],
|
|
},
|
|
|
|
// Storage growth (unanimous)
|
|
{
|
|
subject: "Episteme",
|
|
predicate: "storage_growth",
|
|
claims: [
|
|
{
|
|
value: "Append-only storage grows without bound",
|
|
confidence: 0.97,
|
|
sourceIndex: 8, // Kleppmann
|
|
agentIndex: 0,
|
|
},
|
|
],
|
|
},
|
|
|
|
// Trust parameters (contested)
|
|
{
|
|
subject: "EigenTrust",
|
|
predicate: "parameters",
|
|
claims: [
|
|
{
|
|
value: "Trust parameters (0.5 start, +0.05/-0.1) are reasonable heuristics",
|
|
confidence: 0.72,
|
|
sourceIndex: 9, // Implementation notes
|
|
agentIndex: 2,
|
|
},
|
|
{
|
|
value: "EigenTrust provides theoretical foundation for trust propagation",
|
|
confidence: 0.89,
|
|
sourceIndex: 10, // Kamvar paper
|
|
agentIndex: 0,
|
|
},
|
|
{
|
|
value: "Heuristics without formal verification are dangerous for high-stakes domains",
|
|
confidence: 0.61,
|
|
sourceIndex: 11, // Medical AI Safety
|
|
agentIndex: 1,
|
|
},
|
|
],
|
|
},
|
|
];
|
|
|
|
// ============================================================================
|
|
// Main
|
|
// ============================================================================
|
|
|
|
async function main(): Promise<void> {
|
|
console.log("StemeDB Seed Script");
|
|
console.log("===================\n");
|
|
|
|
// Wait for API
|
|
await waitForHealth();
|
|
console.log();
|
|
|
|
// Create agents with deterministic keys
|
|
console.log("Creating agents...");
|
|
const agents: Agent[] = await Promise.all([
|
|
createAgent("regulatory_authority", "fda-agent-seed-001"),
|
|
createAgent("clinical_researcher", "clinical-agent-seed-002"),
|
|
createAgent("expert_opinion", "expert-agent-seed-003"),
|
|
createAgent("community_voice", "community-agent-seed-004"),
|
|
]);
|
|
|
|
for (const agent of agents) {
|
|
console.log(` ${agent.name}: ${toHex(agent.publicKey).slice(0, 16)}...`);
|
|
}
|
|
console.log();
|
|
|
|
// Register sources
|
|
console.log("Registering sources...");
|
|
for (const source of SOURCES) {
|
|
await registerSource(source);
|
|
console.log(` ${source.label.slice(0, 50)}...`);
|
|
}
|
|
console.log();
|
|
|
|
// Create assertions
|
|
console.log("Creating assertions...");
|
|
let totalAssertions = 0;
|
|
|
|
for (const claimSet of CLAIM_SETS) {
|
|
console.log(`\n ${claimSet.subject}/${claimSet.predicate}:`);
|
|
|
|
for (const claim of claimSet.claims) {
|
|
const source = SOURCES[claim.sourceIndex];
|
|
const agent = agents[claim.agentIndex];
|
|
const sourceClass = SOURCE_CLASS_MAP[source.tier];
|
|
|
|
const hash = await createAssertion(
|
|
agent,
|
|
claimSet.subject,
|
|
claimSet.predicate,
|
|
claim.value,
|
|
claim.confidence,
|
|
source.hash,
|
|
sourceClass
|
|
);
|
|
|
|
if (hash) {
|
|
totalAssertions++;
|
|
console.log(` + "${claim.value.slice(0, 40)}..." (${sourceClass})`);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`\nCreated ${totalAssertions} assertions.`);
|
|
console.log();
|
|
|
|
// Wait a moment for materialization
|
|
console.log("Waiting for materialization...");
|
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
console.log();
|
|
|
|
// Verify with Skeptic
|
|
console.log("Verifying via SkepticLens...");
|
|
for (const claimSet of CLAIM_SETS) {
|
|
await verifySkeptic(claimSet.subject, claimSet.predicate);
|
|
}
|
|
|
|
console.log("\nDone! Seeded claims are ready for the Community app.");
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error("Seed failed:", error);
|
|
process.exit(1);
|
|
});
|