#!/usr/bin/env npx tsx /** * Seed whitepaper claims to StemeDB. * * This script: * 1. Loads whitepaper sections from data/whitepaper-sections.json * 2. Extracts claims from each section (using hardcoded curated claims) * 3. Creates agents with deterministic keys * 4. Registers sources and submits assertions to StemeDB * * Usage: * npx tsx scripts/seed-whitepaper.ts * npx tsx scripts/seed-whitepaper.ts --dry-run * * Environment: * STEMEDB_API_URL - API base URL (default: http://127.0.0.1:18180) */ import * as ed from "@noble/ed25519"; import { sha512 } from "@noble/hashes/sha512"; import { readFileSync } from "fs"; import { join } from "path"; // Configure ed25519 to use sha512 ed.etc.sha512Sync = (...m) => sha512(ed.etc.concatBytes(...m)); const API_URL = process.env.STEMEDB_API_URL || "http://127.0.0.1:18180"; // ============================================================================ // Types // ============================================================================ interface Agent { name: string; privateKey: Uint8Array; publicKey: Uint8Array; } type SourceClass = "Regulatory" | "Clinical" | "Observational" | "Expert" | "Community" | "Anecdotal"; type ObjectType = "Text" | "Number" | "Boolean" | "Reference"; interface ObjectValue { type: ObjectType; value: string | number | boolean; } interface CuratedClaim { subject: string; predicate: string; object: ObjectValue; confidence: number; sourceClass: SourceClass; sourceLabel: string; sourceUrl?: string; note?: string; } // ============================================================================ // Helpers // ============================================================================ function toHex(bytes: Uint8Array): string { return Array.from(bytes) .map((b) => b.toString(16).padStart(2, "0")) .join(""); } function sha256(data: string): Uint8Array { const encoder = new TextEncoder(); const bytes = encoder.encode(data); const hash = new Uint8Array(32); for (let i = 0; i < bytes.length; i++) { hash[i % 32] ^= bytes[i]; hash[(i + 1) % 32] = (hash[(i + 1) % 32] + bytes[i]) % 256; } return hash; } function generateSourceHash(label: string): string { return toHex(sha256(`source-whitepaper-${label}`)); } async function createAgent(name: string): Promise { const seedHash = sha256(`whitepaper-seed-agent-${name}`); const privateKey = seedHash; const publicKey = await ed.getPublicKeyAsync(privateKey); return { name, privateKey, publicKey }; } async function signAssertion( agent: Agent, subject: string, predicate: string ): Promise<{ signature: string; timestamp: number }> { const timestamp = Math.floor(Date.now() / 1000); const message = `${subject}:${predicate}`; const messageBytes = new TextEncoder().encode(message); const signature = await ed.signAsync(messageBytes, agent.privateKey); return { signature: toHex(signature), timestamp }; } // ============================================================================ // Curated Claims from Whitepaper // These are hand-curated to ensure quality and relevance // ============================================================================ const WHITEPAPER_CLAIMS: CuratedClaim[] = [ // Storage & Architecture { subject: "StemeDB", predicate: "storage_model", object: { type: "Text", value: "append-only Merkle DAG" }, confidence: 0.98, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 5.1", note: "Core architectural claim" }, { subject: "StemeDB", predicate: "hash_algorithm", object: { type: "Text", value: "BLAKE3" }, confidence: 0.99, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.3", note: "Content-addressing algorithm" }, { subject: "StemeDB", predicate: "signature_algorithm", object: { type: "Text", value: "Ed25519" }, confidence: 0.99, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.4", note: "Cryptographic signature algorithm" }, { subject: "StemeDB", predicate: "serialization_format", object: { type: "Text", value: "rkyv (zero-copy)" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 5.5" }, { subject: "StemeDB", predicate: "data_model", object: { type: "Text", value: "subject-predicate-object triples with provenance" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.1" }, // Lens Complexity Claims { subject: "RecencyLens", predicate: "time_complexity", object: { type: "Text", value: "O(n)" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.1", note: "Where n = number of candidates" }, { subject: "RecencyLens", predicate: "space_complexity", object: { type: "Text", value: "O(1)" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.1" }, { subject: "ConsensusLens", predicate: "time_complexity", object: { type: "Text", value: "O(n)" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.2" }, { subject: "ConsensusLens", predicate: "space_complexity", object: { type: "Text", value: "O(k)" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.2", note: "Where k = distinct object values" }, { subject: "AuthorityLens", predicate: "time_complexity", object: { type: "Text", value: "O(n)" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.3" }, { subject: "SkepticLens", predicate: "resolution_type", object: { type: "Text", value: "conflict analysis without winner selection" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.4" }, { subject: "SkepticLens", predicate: "conflict_metric", object: { type: "Text", value: "normalized Shannon entropy" }, confidence: 0.98, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.4" }, // Lens Properties { subject: "Lens", predicate: "property_stateless", object: { type: "Boolean", value: true }, confidence: 0.98, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4" }, { subject: "Lens", predicate: "property_deterministic", object: { type: "Boolean", value: true }, confidence: 0.98, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4" }, { subject: "Lens", predicate: "property_composable", object: { type: "Boolean", value: true }, confidence: 0.98, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4" }, // Trust Parameters (contested - honest limitation) { subject: "EigenTrust", predicate: "initial_trust_score", object: { type: "Number", value: 0.5 }, confidence: 0.72, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 7.1", note: "Heuristic without theoretical foundation" }, { subject: "EigenTrust", predicate: "reward_delta", object: { type: "Number", value: 0.05 }, confidence: 0.72, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 7.1", note: "Heuristic for correct assertions" }, { subject: "EigenTrust", predicate: "penalty_delta", object: { type: "Number", value: 0.1 }, confidence: 0.72, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 7.1", note: "Heuristic for incorrect assertions" }, // Source Tier Weights { subject: "SourceClass", predicate: "tier_0_weight", object: { type: "Number", value: 1.0 }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.2", note: "Regulatory tier" }, { subject: "SourceClass", predicate: "tier_1_weight", object: { type: "Number", value: 0.9 }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.2", note: "Clinical tier" }, { subject: "SourceClass", predicate: "tier_2_weight", object: { type: "Number", value: 0.7 }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.2", note: "Observational tier" }, { subject: "SourceClass", predicate: "tier_3_weight", object: { type: "Number", value: 0.5 }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.2", note: "Expert tier" }, { subject: "SourceClass", predicate: "tier_4_weight", object: { type: "Number", value: 0.2 }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.2", note: "Community tier" }, { subject: "SourceClass", predicate: "tier_5_weight", object: { type: "Number", value: 0.1 }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.2", note: "Anecdotal tier" }, // MaterializedView { subject: "MaterializedView", predicate: "read_complexity", object: { type: "Text", value: "O(1)" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.4" }, { subject: "MaterializedView", predicate: "consistency_model", object: { type: "Text", value: "eventual consistency" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 6.3" }, // Content Addressing Properties { subject: "content_addressing", predicate: "provides_deduplication", object: { type: "Boolean", value: true }, confidence: 0.98, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.3" }, { subject: "content_addressing", predicate: "provides_integrity", object: { type: "Boolean", value: true }, confidence: 0.98, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.3" }, { subject: "content_addressing", predicate: "enables_efficient_comparison", object: { type: "Boolean", value: true }, confidence: 0.98, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 3.3" }, // Tradeoffs { subject: "StemeDB", predicate: "storage_tradeoff", object: { type: "Text", value: "append-only storage grows without bound" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 6.1" }, { subject: "StemeDB", predicate: "not_suitable_for", object: { type: "Text", value: "ACID transactions" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 6.4" }, { subject: "StemeDB", predicate: "not_suitable_for", object: { type: "Text", value: "high-frequency CRUD" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 6.4" }, // Write/Read Paths { subject: "StemeDB", predicate: "write_path_includes", object: { type: "Text", value: "WAL with fsync" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 5.2" }, { subject: "StemeDB", predicate: "fast_read_path", object: { type: "Text", value: "O(1) via materialized views" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 5.3" }, { subject: "StemeDB", predicate: "full_resolution_path", object: { type: "Text", value: "O(n) for custom lenses" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 5.3" }, // Conflict Status Thresholds { subject: "SkepticLens", predicate: "unanimous_threshold", object: { type: "Text", value: "conflict_score < 0.1" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.4" }, { subject: "SkepticLens", predicate: "agreed_threshold", object: { type: "Text", value: "conflict_score < 0.4" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.4" }, { subject: "SkepticLens", predicate: "contested_threshold", object: { type: "Text", value: "conflict_score >= 0.4" }, confidence: 0.95, sourceClass: "Expert", sourceLabel: "StemeDB Whitepaper - Section 4.2.4" }, ]; // ============================================================================ // API Functions // ============================================================================ async function registerSource(hash: string, label: string, tier: number, url?: string): Promise { const SOURCE_CLASS_MAP: Record = { 0: "Regulatory", 1: "Clinical", 2: "Observational", 3: "Expert", 4: "Community", 5: "Anecdotal", }; const response = await fetch(`${API_URL}/v1/sources`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ hash, label, tier, tier_label: SOURCE_CLASS_MAP[tier], url, }), }); if (!response.ok && response.status !== 409) { const text = await response.text(); console.warn(` Warning: Failed to register source ${label}: ${text}`); } } async function createAssertion( agent: Agent, claim: CuratedClaim, sourceHash: string ): Promise { const { signature, timestamp } = await signAssertion(agent, claim.subject, claim.predicate); const request = { subject: claim.subject, predicate: claim.predicate, object: claim.object, confidence: claim.confidence, source_hash: sourceHash, source_class: claim.sourceClass, signatures: [ { agent_id: toHex(agent.publicKey), signature, timestamp, version: 1, }, ], }; const response = await fetch(`${API_URL}/v1/assert`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(request), }); if (!response.ok) { const text = await response.text(); console.warn(` Warning: Failed to create assertion: ${text}`); return null; } const data = await response.json(); return data.hash; } // ============================================================================ // Main // ============================================================================ async function main(): Promise { const args = process.argv.slice(2); const dryRun = args.includes("--dry-run") || args.includes("-d"); console.log("StemeDB Whitepaper Seed Script"); console.log("==============================\n"); if (dryRun) { console.log("DRY RUN MODE - No data will be submitted\n"); } // Create agent console.log("Creating agent..."); const agent = await createAgent("whitepaper-author"); console.log(` Agent: ${toHex(agent.publicKey).slice(0, 16)}...`); console.log(); // Group claims by source for registration const sourceMap = new Map(); const SOURCE_CLASS_TO_TIER: Record = { Regulatory: 0, Clinical: 1, Observational: 2, Expert: 3, Community: 4, Anecdotal: 5, }; for (const claim of WHITEPAPER_CLAIMS) { const hash = generateSourceHash(claim.sourceLabel); if (!sourceMap.has(hash)) { sourceMap.set(hash, { label: claim.sourceLabel, tier: SOURCE_CLASS_TO_TIER[claim.sourceClass], url: claim.sourceUrl, }); } } // Register sources console.log(`Registering ${sourceMap.size} sources...`); if (!dryRun) { for (const [hash, source] of sourceMap) { await registerSource(hash, source.label, source.tier, source.url); console.log(` + ${source.label.slice(0, 50)}...`); } } else { for (const [hash, source] of sourceMap) { console.log(` [DRY] Would register: ${source.label.slice(0, 50)}...`); } } console.log(); // Create assertions console.log(`Creating ${WHITEPAPER_CLAIMS.length} assertions...`); let created = 0; let failed = 0; for (const claim of WHITEPAPER_CLAIMS) { const sourceHash = generateSourceHash(claim.sourceLabel); if (dryRun) { console.log(` [DRY] ${claim.subject}/${claim.predicate} = "${String(claim.object.value).slice(0, 30)}..."`); created++; } else { const hash = await createAssertion(agent, claim, sourceHash); if (hash) { created++; console.log(` + ${claim.subject}/${claim.predicate} -> ${hash.slice(0, 16)}...`); } else { failed++; } } } console.log(`\nCreated ${created} assertions${failed > 0 ? ` (${failed} failed)` : ""}`); if (!dryRun) { // Wait for materialization console.log("\nWaiting for materialization..."); await new Promise((resolve) => setTimeout(resolve, 2000)); console.log("Done!"); } } main().catch((error) => { console.error("Error:", error.message); process.exit(1); });