stemedb/community/scripts/seed-claims.ts
jordan b3e8a9a058 feat: Multi-application expansion with chaos testing and community UI
Major additions:
- Community Next.js app (port 18187) for browsing claims with API docs
- stemedb-chaos crate: Fault injection, chaos testing, CRDT properties
- Latent ingestion system: Reddit/FDA ingesters with ADK-Go agents
- Disputed claims handling: Manual review workflows and validation
- Aphoria security scanner: New extractors (SQL injection, command
  injection, weak crypto, TLS version), policy-based ignores, UAT reports
- Docker infrastructure: Dockerfile, docker-compose.yml for full stack
- VulnBank demo: Intentionally vulnerable multi-language test corpus

SDK & API enhancements:
- Source registry handlers for tracking data provenance
- Metrics endpoint
- Skeptic filtering improvements

Code quality:
- Split 14 large files (>500 lines) into focused modules
- All files now under 500-line limit per project guidelines

Documentation:
- Chaos testing guide, circuit breakers, observability docs
- Phase 7 UAT documentation updates
- Martin Kleppmann technical writer agent

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 01:24:14 -07:00

504 lines
13 KiB
TypeScript

/**
* Seed script for populating StemeDB with demo claims.
*
* This script:
* 1. Waits for the API to be healthy
* 2. Registers source documents with human-readable labels
* 3. Creates assertions that match the mock data in page.tsx
* 4. Verifies the data is queryable via SkepticLens
*
* Usage:
* npx tsx scripts/seed-claims.ts
*
* Environment:
* STEMEDB_API_URL - API base URL (default: http://127.0.0.1:18180)
*/
import * as ed from "@noble/ed25519";
import { sha512 } from "@noble/hashes/sha512";
// Configure ed25519 to use sha512
ed.etc.sha512Sync = (...m) => sha512(ed.etc.concatBytes(...m));
const API_URL = process.env.STEMEDB_API_URL || "http://127.0.0.1:18180";
// ============================================================================
// Types
// ============================================================================
interface Agent {
name: string;
seed: string;
privateKey: Uint8Array;
publicKey: Uint8Array;
}
interface Source {
hash: string;
label: string;
tier: number;
url?: string;
}
interface ClaimSet {
subject: string;
predicate: string;
claims: {
value: string;
confidence: number;
sourceIndex: number;
agentIndex: number;
}[];
}
// ============================================================================
// Helpers
// ============================================================================
function toHex(bytes: Uint8Array): string {
return Array.from(bytes)
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
}
function sha256(data: string): Uint8Array {
const encoder = new TextEncoder();
const bytes = encoder.encode(data);
// Simple deterministic hash for seed purposes (not crypto-secure, but deterministic)
const hash = new Uint8Array(32);
for (let i = 0; i < bytes.length; i++) {
hash[i % 32] ^= bytes[i];
hash[(i + 1) % 32] = (hash[(i + 1) % 32] + bytes[i]) % 256;
}
return hash;
}
async function waitForHealth(maxRetries = 30, delayMs = 2000): Promise<void> {
console.log(`Waiting for API at ${API_URL}...`);
for (let i = 0; i < maxRetries; i++) {
try {
const response = await fetch(`${API_URL}/v1/health`);
if (response.ok) {
const data = await response.json();
console.log(`API is healthy: v${data.version}, ${data.assertions_count} assertions`);
return;
}
} catch {
// Retry
}
if (i < maxRetries - 1) {
console.log(` Retry ${i + 1}/${maxRetries}...`);
await new Promise((resolve) => setTimeout(resolve, delayMs));
}
}
throw new Error(`API not healthy after ${maxRetries} retries`);
}
async function createAgent(name: string, seed: string): Promise<Agent> {
// Generate deterministic private key from seed
const seedHash = sha256(`agent-seed-${seed}-${name}`);
const privateKey = seedHash;
const publicKey = await ed.getPublicKeyAsync(privateKey);
return { name, seed, privateKey, publicKey };
}
async function signAssertion(
agent: Agent,
subject: string,
predicate: string
): Promise<{ signature: string; timestamp: number }> {
const timestamp = Math.floor(Date.now() / 1000);
const message = `${subject}:${predicate}`;
const messageBytes = new TextEncoder().encode(message);
const signature = await ed.signAsync(messageBytes, agent.privateKey);
return {
signature: toHex(signature),
timestamp,
};
}
function generateSourceHash(label: string): string {
const hash = sha256(`source-${label}`);
return toHex(hash);
}
async function registerSource(source: Source): Promise<void> {
const response = await fetch(`${API_URL}/v1/sources`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
hash: source.hash,
label: source.label,
tier: source.tier,
url: source.url,
}),
});
if (!response.ok && response.status !== 409) {
// 409 = already exists, which is fine
const text = await response.text();
console.warn(` Warning: Failed to register source ${source.label}: ${text}`);
}
}
async function createAssertion(
agent: Agent,
subject: string,
predicate: string,
value: string,
confidence: number,
sourceHash: string,
sourceClass: string
): Promise<string | null> {
const { signature, timestamp } = await signAssertion(agent, subject, predicate);
const request = {
subject,
predicate,
object: { type: "Text", value },
confidence,
source_hash: sourceHash,
source_class: sourceClass,
signatures: [
{
agent_id: toHex(agent.publicKey),
signature,
timestamp,
version: 1,
},
],
};
const response = await fetch(`${API_URL}/v1/assert`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(request),
});
if (!response.ok) {
const text = await response.text();
console.warn(` Warning: Failed to create assertion: ${text}`);
return null;
}
const data = await response.json();
return data.hash;
}
async function verifySkeptic(subject: string, predicate: string): Promise<void> {
const url = `${API_URL}/v1/skeptic?subject=${encodeURIComponent(subject)}&predicate=${encodeURIComponent(predicate)}&include_source_metadata=true`;
const response = await fetch(url);
if (!response.ok) {
console.warn(` Warning: Skeptic query failed for ${subject}/${predicate}`);
return;
}
const data = await response.json();
console.log(
` Verified: ${subject}/${predicate} -> ${data.status} (${data.claims.length} claims, conflict=${data.conflict_score.toFixed(2)})`
);
}
// ============================================================================
// Data Definitions (matching page.tsx mock data)
// ============================================================================
const SOURCES: Source[] = [
{
hash: "", // Will be generated
label: "PostgreSQL 16 Documentation - DDL Constraints",
tier: 0,
url: "https://www.postgresql.org/docs/current/ddl-constraints.html",
},
{
hash: "",
label: "Snodgrass - Developing Time-Oriented Database Applications",
tier: 1,
url: "https://www2.cs.arizona.edu/~rts/tdbbook.pdf",
},
{
hash: "",
label: "Shapiro et al. - Conflict-free Replicated Data Types (SSS 2011)",
tier: 1,
url: "https://hal.inria.fr/inria-00609399/document",
},
{
hash: "",
label: "Almeida et al. - Delta State Replicated Data Types (2018)",
tier: 1,
url: "https://arxiv.org/abs/1603.01529",
},
{
hash: "",
label: "StemeDB Design Notes - Why Not CRDTs",
tier: 3,
},
{
hash: "",
label: "BLAKE3 Specification - One Function, Fast Everywhere",
tier: 0,
url: "https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf",
},
{
hash: "",
label: "StemeDB Source Code - lens/recency.rs",
tier: 0,
url: "https://github.com/orchard9/stemedb/blob/main/crates/stemedb-lens/src/recency.rs",
},
{
hash: "",
label: "GitHub Issue #142 - Optimize RecencyLens",
tier: 4,
url: "https://github.com/orchard9/stemedb/issues/142",
},
{
hash: "",
label: "Kleppmann - Designing Data-Intensive Applications",
tier: 1,
url: "https://dataintensive.net/",
},
{
hash: "",
label: "StemeDB Implementation Notes",
tier: 3,
},
{
hash: "",
label: "Kamvar et al. - The EigenTrust Algorithm (WWW 2003)",
tier: 1,
url: "https://nlp.stanford.edu/pubs/eigentrust.pdf",
},
{
hash: "",
label: "Medical AI Safety Working Group - Trust Calibration Report",
tier: 2,
},
];
// Generate hashes for sources
for (const source of SOURCES) {
source.hash = generateSourceHash(source.label);
}
const SOURCE_CLASS_MAP: Record<number, string> = {
0: "Regulatory",
1: "Clinical",
2: "Observational",
3: "Expert",
4: "Community",
5: "Anecdotal",
};
const CLAIM_SETS: ClaimSet[] = [
// Single value claim (agreed)
{
subject: "Episteme",
predicate: "storage_model",
claims: [
{
value: "Single value per key is the dominant paradigm",
confidence: 0.92,
sourceIndex: 0, // PostgreSQL docs
agentIndex: 0,
},
{
value: "Bitemporal and event stores are exceptions",
confidence: 0.78,
sourceIndex: 1, // Snodgrass
agentIndex: 1,
},
],
},
// CRDT claim (contested)
{
subject: "CRDT",
predicate: "replica_assumption",
claims: [
{
value: "CRDTs assume replicas are authoritative copies of same data",
confidence: 0.94,
sourceIndex: 2, // Shapiro
agentIndex: 0,
},
{
value: "CRDTs can model multi-source disagreement with delta states",
confidence: 0.76,
sourceIndex: 3, // Almeida
agentIndex: 1,
},
{
value: "CRDTs don't preserve provenance of conflicting sources",
confidence: 0.65,
sourceIndex: 4, // StemeDB notes
agentIndex: 2,
},
],
},
// Content addressing (unanimous)
{
subject: "Episteme",
predicate: "content_addressing",
claims: [
{
value: "Content-addressing provides deduplication, integrity, and efficient comparison",
confidence: 0.96,
sourceIndex: 5, // BLAKE3 spec
agentIndex: 0,
},
],
},
// Recency complexity (agreed)
{
subject: "RecencyLens",
predicate: "complexity",
claims: [
{
value: "RecencyLens is O(n) where n = candidates",
confidence: 0.88,
sourceIndex: 6, // Source code
agentIndex: 0,
},
{
value: "Could be O(log n) with a heap-based implementation",
confidence: 0.52,
sourceIndex: 7, // GitHub issue
agentIndex: 3,
},
],
},
// Storage growth (unanimous)
{
subject: "Episteme",
predicate: "storage_growth",
claims: [
{
value: "Append-only storage grows without bound",
confidence: 0.97,
sourceIndex: 8, // Kleppmann
agentIndex: 0,
},
],
},
// Trust parameters (contested)
{
subject: "EigenTrust",
predicate: "parameters",
claims: [
{
value: "Trust parameters (0.5 start, +0.05/-0.1) are reasonable heuristics",
confidence: 0.72,
sourceIndex: 9, // Implementation notes
agentIndex: 2,
},
{
value: "EigenTrust provides theoretical foundation for trust propagation",
confidence: 0.89,
sourceIndex: 10, // Kamvar paper
agentIndex: 0,
},
{
value: "Heuristics without formal verification are dangerous for high-stakes domains",
confidence: 0.61,
sourceIndex: 11, // Medical AI Safety
agentIndex: 1,
},
],
},
];
// ============================================================================
// Main
// ============================================================================
async function main(): Promise<void> {
console.log("StemeDB Seed Script");
console.log("===================\n");
// Wait for API
await waitForHealth();
console.log();
// Create agents with deterministic keys
console.log("Creating agents...");
const agents: Agent[] = await Promise.all([
createAgent("regulatory_authority", "fda-agent-seed-001"),
createAgent("clinical_researcher", "clinical-agent-seed-002"),
createAgent("expert_opinion", "expert-agent-seed-003"),
createAgent("community_voice", "community-agent-seed-004"),
]);
for (const agent of agents) {
console.log(` ${agent.name}: ${toHex(agent.publicKey).slice(0, 16)}...`);
}
console.log();
// Register sources
console.log("Registering sources...");
for (const source of SOURCES) {
await registerSource(source);
console.log(` ${source.label.slice(0, 50)}...`);
}
console.log();
// Create assertions
console.log("Creating assertions...");
let totalAssertions = 0;
for (const claimSet of CLAIM_SETS) {
console.log(`\n ${claimSet.subject}/${claimSet.predicate}:`);
for (const claim of claimSet.claims) {
const source = SOURCES[claim.sourceIndex];
const agent = agents[claim.agentIndex];
const sourceClass = SOURCE_CLASS_MAP[source.tier];
const hash = await createAssertion(
agent,
claimSet.subject,
claimSet.predicate,
claim.value,
claim.confidence,
source.hash,
sourceClass
);
if (hash) {
totalAssertions++;
console.log(` + "${claim.value.slice(0, 40)}..." (${sourceClass})`);
}
}
}
console.log(`\nCreated ${totalAssertions} assertions.`);
console.log();
// Wait a moment for materialization
console.log("Waiting for materialization...");
await new Promise((resolve) => setTimeout(resolve, 2000));
console.log();
// Verify with Skeptic
console.log("Verifying via SkepticLens...");
for (const claimSet of CLAIM_SETS) {
await verifySkeptic(claimSet.subject, claimSet.predicate);
}
console.log("\nDone! Seeded claims are ready for the Community app.");
}
main().catch((error) => {
console.error("Seed failed:", error);
process.exit(1);
});