/** * Seed script for populating StemeDB with demo claims. * * This script: * 1. Waits for the API to be healthy * 2. Registers source documents with human-readable labels * 3. Creates assertions that match the mock data in page.tsx * 4. Verifies the data is queryable via SkepticLens * * Usage: * npx tsx scripts/seed-claims.ts * * Environment: * STEMEDB_API_URL - API base URL (default: http://127.0.0.1:18180) */ import * as ed from "@noble/ed25519"; import { sha512 } from "@noble/hashes/sha512"; // Configure ed25519 to use sha512 ed.etc.sha512Sync = (...m) => sha512(ed.etc.concatBytes(...m)); const API_URL = process.env.STEMEDB_API_URL || "http://127.0.0.1:18180"; // ============================================================================ // Types // ============================================================================ interface Agent { name: string; seed: string; privateKey: Uint8Array; publicKey: Uint8Array; } interface Source { hash: string; label: string; tier: number; url?: string; } interface ClaimSet { subject: string; predicate: string; claims: { value: string; confidence: number; sourceIndex: number; agentIndex: number; }[]; } // ============================================================================ // Helpers // ============================================================================ function toHex(bytes: Uint8Array): string { return Array.from(bytes) .map((b) => b.toString(16).padStart(2, "0")) .join(""); } function sha256(data: string): Uint8Array { const encoder = new TextEncoder(); const bytes = encoder.encode(data); // Simple deterministic hash for seed purposes (not crypto-secure, but deterministic) const hash = new Uint8Array(32); for (let i = 0; i < bytes.length; i++) { hash[i % 32] ^= bytes[i]; hash[(i + 1) % 32] = (hash[(i + 1) % 32] + bytes[i]) % 256; } return hash; } async function waitForHealth(maxRetries = 30, delayMs = 2000): Promise { console.log(`Waiting for API at ${API_URL}...`); for (let i = 0; i < maxRetries; i++) { try { const response = await fetch(`${API_URL}/v1/health`); if (response.ok) { const data = await response.json(); console.log(`API is healthy: v${data.version}, ${data.assertions_count} assertions`); return; } } catch { // Retry } if (i < maxRetries - 1) { console.log(` Retry ${i + 1}/${maxRetries}...`); await new Promise((resolve) => setTimeout(resolve, delayMs)); } } throw new Error(`API not healthy after ${maxRetries} retries`); } async function createAgent(name: string, seed: string): Promise { // Generate deterministic private key from seed const seedHash = sha256(`agent-seed-${seed}-${name}`); const privateKey = seedHash; const publicKey = await ed.getPublicKeyAsync(privateKey); return { name, seed, privateKey, publicKey }; } async function signAssertion( agent: Agent, subject: string, predicate: string ): Promise<{ signature: string; timestamp: number }> { const timestamp = Math.floor(Date.now() / 1000); const message = `${subject}:${predicate}`; const messageBytes = new TextEncoder().encode(message); const signature = await ed.signAsync(messageBytes, agent.privateKey); return { signature: toHex(signature), timestamp, }; } function generateSourceHash(label: string): string { const hash = sha256(`source-${label}`); return toHex(hash); } async function registerSource(source: Source): Promise { const response = await fetch(`${API_URL}/v1/sources`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ hash: source.hash, label: source.label, tier: source.tier, url: source.url, }), }); if (!response.ok && response.status !== 409) { // 409 = already exists, which is fine const text = await response.text(); console.warn(` Warning: Failed to register source ${source.label}: ${text}`); } } async function createAssertion( agent: Agent, subject: string, predicate: string, value: string, confidence: number, sourceHash: string, sourceClass: string ): Promise { const { signature, timestamp } = await signAssertion(agent, subject, predicate); const request = { subject, predicate, object: { type: "Text", value }, confidence, source_hash: sourceHash, source_class: sourceClass, signatures: [ { agent_id: toHex(agent.publicKey), signature, timestamp, version: 1, }, ], }; const response = await fetch(`${API_URL}/v1/assert`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(request), }); if (!response.ok) { const text = await response.text(); console.warn(` Warning: Failed to create assertion: ${text}`); return null; } const data = await response.json(); return data.hash; } async function verifySkeptic(subject: string, predicate: string): Promise { const url = `${API_URL}/v1/skeptic?subject=${encodeURIComponent(subject)}&predicate=${encodeURIComponent(predicate)}&include_source_metadata=true`; const response = await fetch(url); if (!response.ok) { console.warn(` Warning: Skeptic query failed for ${subject}/${predicate}`); return; } const data = await response.json(); console.log( ` Verified: ${subject}/${predicate} -> ${data.status} (${data.claims.length} claims, conflict=${data.conflict_score.toFixed(2)})` ); } // ============================================================================ // Data Definitions (matching page.tsx mock data) // ============================================================================ const SOURCES: Source[] = [ { hash: "", // Will be generated label: "PostgreSQL 16 Documentation - DDL Constraints", tier: 0, url: "https://www.postgresql.org/docs/current/ddl-constraints.html", }, { hash: "", label: "Snodgrass - Developing Time-Oriented Database Applications", tier: 1, url: "https://www2.cs.arizona.edu/~rts/tdbbook.pdf", }, { hash: "", label: "Shapiro et al. - Conflict-free Replicated Data Types (SSS 2011)", tier: 1, url: "https://hal.inria.fr/inria-00609399/document", }, { hash: "", label: "Almeida et al. - Delta State Replicated Data Types (2018)", tier: 1, url: "https://arxiv.org/abs/1603.01529", }, { hash: "", label: "StemeDB Design Notes - Why Not CRDTs", tier: 3, }, { hash: "", label: "BLAKE3 Specification - One Function, Fast Everywhere", tier: 0, url: "https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf", }, { hash: "", label: "StemeDB Source Code - lens/recency.rs", tier: 0, url: "https://github.com/orchard9/stemedb/blob/main/crates/stemedb-lens/src/recency.rs", }, { hash: "", label: "GitHub Issue #142 - Optimize RecencyLens", tier: 4, url: "https://github.com/orchard9/stemedb/issues/142", }, { hash: "", label: "Kleppmann - Designing Data-Intensive Applications", tier: 1, url: "https://dataintensive.net/", }, { hash: "", label: "StemeDB Implementation Notes", tier: 3, }, { hash: "", label: "Kamvar et al. - The EigenTrust Algorithm (WWW 2003)", tier: 1, url: "https://nlp.stanford.edu/pubs/eigentrust.pdf", }, { hash: "", label: "Medical AI Safety Working Group - Trust Calibration Report", tier: 2, }, ]; // Generate hashes for sources for (const source of SOURCES) { source.hash = generateSourceHash(source.label); } const SOURCE_CLASS_MAP: Record = { 0: "Regulatory", 1: "Clinical", 2: "Observational", 3: "Expert", 4: "Community", 5: "Anecdotal", }; const CLAIM_SETS: ClaimSet[] = [ // Single value claim (agreed) { subject: "Episteme", predicate: "storage_model", claims: [ { value: "Single value per key is the dominant paradigm", confidence: 0.92, sourceIndex: 0, // PostgreSQL docs agentIndex: 0, }, { value: "Bitemporal and event stores are exceptions", confidence: 0.78, sourceIndex: 1, // Snodgrass agentIndex: 1, }, ], }, // CRDT claim (contested) { subject: "CRDT", predicate: "replica_assumption", claims: [ { value: "CRDTs assume replicas are authoritative copies of same data", confidence: 0.94, sourceIndex: 2, // Shapiro agentIndex: 0, }, { value: "CRDTs can model multi-source disagreement with delta states", confidence: 0.76, sourceIndex: 3, // Almeida agentIndex: 1, }, { value: "CRDTs don't preserve provenance of conflicting sources", confidence: 0.65, sourceIndex: 4, // StemeDB notes agentIndex: 2, }, ], }, // Content addressing (unanimous) { subject: "Episteme", predicate: "content_addressing", claims: [ { value: "Content-addressing provides deduplication, integrity, and efficient comparison", confidence: 0.96, sourceIndex: 5, // BLAKE3 spec agentIndex: 0, }, ], }, // Recency complexity (agreed) { subject: "RecencyLens", predicate: "complexity", claims: [ { value: "RecencyLens is O(n) where n = candidates", confidence: 0.88, sourceIndex: 6, // Source code agentIndex: 0, }, { value: "Could be O(log n) with a heap-based implementation", confidence: 0.52, sourceIndex: 7, // GitHub issue agentIndex: 3, }, ], }, // Storage growth (unanimous) { subject: "Episteme", predicate: "storage_growth", claims: [ { value: "Append-only storage grows without bound", confidence: 0.97, sourceIndex: 8, // Kleppmann agentIndex: 0, }, ], }, // Trust parameters (contested) { subject: "EigenTrust", predicate: "parameters", claims: [ { value: "Trust parameters (0.5 start, +0.05/-0.1) are reasonable heuristics", confidence: 0.72, sourceIndex: 9, // Implementation notes agentIndex: 2, }, { value: "EigenTrust provides theoretical foundation for trust propagation", confidence: 0.89, sourceIndex: 10, // Kamvar paper agentIndex: 0, }, { value: "Heuristics without formal verification are dangerous for high-stakes domains", confidence: 0.61, sourceIndex: 11, // Medical AI Safety agentIndex: 1, }, ], }, ]; // ============================================================================ // Main // ============================================================================ async function main(): Promise { console.log("StemeDB Seed Script"); console.log("===================\n"); // Wait for API await waitForHealth(); console.log(); // Create agents with deterministic keys console.log("Creating agents..."); const agents: Agent[] = await Promise.all([ createAgent("regulatory_authority", "fda-agent-seed-001"), createAgent("clinical_researcher", "clinical-agent-seed-002"), createAgent("expert_opinion", "expert-agent-seed-003"), createAgent("community_voice", "community-agent-seed-004"), ]); for (const agent of agents) { console.log(` ${agent.name}: ${toHex(agent.publicKey).slice(0, 16)}...`); } console.log(); // Register sources console.log("Registering sources..."); for (const source of SOURCES) { await registerSource(source); console.log(` ${source.label.slice(0, 50)}...`); } console.log(); // Create assertions console.log("Creating assertions..."); let totalAssertions = 0; for (const claimSet of CLAIM_SETS) { console.log(`\n ${claimSet.subject}/${claimSet.predicate}:`); for (const claim of claimSet.claims) { const source = SOURCES[claim.sourceIndex]; const agent = agents[claim.agentIndex]; const sourceClass = SOURCE_CLASS_MAP[source.tier]; const hash = await createAssertion( agent, claimSet.subject, claimSet.predicate, claim.value, claim.confidence, source.hash, sourceClass ); if (hash) { totalAssertions++; console.log(` + "${claim.value.slice(0, 40)}..." (${sourceClass})`); } } } console.log(`\nCreated ${totalAssertions} assertions.`); console.log(); // Wait a moment for materialization console.log("Waiting for materialization..."); await new Promise((resolve) => setTimeout(resolve, 2000)); console.log(); // Verify with Skeptic console.log("Verifying via SkepticLens..."); for (const claimSet of CLAIM_SETS) { await verifySkeptic(claimSet.subject, claimSet.predicate); } console.log("\nDone! Seeded claims are ready for the Community app."); } main().catch((error) => { console.error("Seed failed:", error); process.exit(1); });