- Add PolicySourceStore for tracking where policies come from - Implement claim extraction skill and API endpoints - Add community UI text selection extractor component - Create Go SDK aphoria client for policy operations - Document patent specifications and legal disclosures - Add guides: golden path loop, policy audit trails, pre-flight checks - Expand Unreal Engine config extractor with source tracking - Add UAT reports for policy source tracking validation - Refactor tests.rs into modular test files Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
612 lines
17 KiB
TypeScript
612 lines
17 KiB
TypeScript
#!/usr/bin/env npx tsx
|
|
/**
|
|
* Seed whitepaper claims to StemeDB.
|
|
*
|
|
* This script:
|
|
* 1. Loads whitepaper sections from data/whitepaper-sections.json
|
|
* 2. Extracts claims from each section (using hardcoded curated claims)
|
|
* 3. Creates agents with deterministic keys
|
|
* 4. Registers sources and submits assertions to StemeDB
|
|
*
|
|
* Usage:
|
|
* npx tsx scripts/seed-whitepaper.ts
|
|
* npx tsx scripts/seed-whitepaper.ts --dry-run
|
|
*
|
|
* Environment:
|
|
* STEMEDB_API_URL - API base URL (default: http://127.0.0.1:18180)
|
|
*/
|
|
|
|
import * as ed from "@noble/ed25519";
|
|
import { sha512 } from "@noble/hashes/sha512";
|
|
import { readFileSync } from "fs";
|
|
import { join } from "path";
|
|
|
|
// Configure ed25519 to use sha512
|
|
ed.etc.sha512Sync = (...m) => sha512(ed.etc.concatBytes(...m));
|
|
|
|
const API_URL = process.env.STEMEDB_API_URL || "http://127.0.0.1:18180";
|
|
|
|
// ============================================================================
|
|
// Types
|
|
// ============================================================================
|
|
|
|
interface Agent {
|
|
name: string;
|
|
privateKey: Uint8Array;
|
|
publicKey: Uint8Array;
|
|
}
|
|
|
|
type SourceClass = "Regulatory" | "Clinical" | "Observational" | "Expert" | "Community" | "Anecdotal";
|
|
type ObjectType = "Text" | "Number" | "Boolean" | "Reference";
|
|
|
|
interface ObjectValue {
|
|
type: ObjectType;
|
|
value: string | number | boolean;
|
|
}
|
|
|
|
interface CuratedClaim {
|
|
subject: string;
|
|
predicate: string;
|
|
object: ObjectValue;
|
|
confidence: number;
|
|
sourceClass: SourceClass;
|
|
sourceLabel: string;
|
|
sourceUrl?: string;
|
|
note?: string;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Helpers
|
|
// ============================================================================
|
|
|
|
function toHex(bytes: Uint8Array): string {
|
|
return Array.from(bytes)
|
|
.map((b) => b.toString(16).padStart(2, "0"))
|
|
.join("");
|
|
}
|
|
|
|
function sha256(data: string): Uint8Array {
|
|
const encoder = new TextEncoder();
|
|
const bytes = encoder.encode(data);
|
|
const hash = new Uint8Array(32);
|
|
for (let i = 0; i < bytes.length; i++) {
|
|
hash[i % 32] ^= bytes[i];
|
|
hash[(i + 1) % 32] = (hash[(i + 1) % 32] + bytes[i]) % 256;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
function generateSourceHash(label: string): string {
|
|
return toHex(sha256(`source-whitepaper-${label}`));
|
|
}
|
|
|
|
async function createAgent(name: string): Promise<Agent> {
|
|
const seedHash = sha256(`whitepaper-seed-agent-${name}`);
|
|
const privateKey = seedHash;
|
|
const publicKey = await ed.getPublicKeyAsync(privateKey);
|
|
return { name, privateKey, publicKey };
|
|
}
|
|
|
|
async function signAssertion(
|
|
agent: Agent,
|
|
subject: string,
|
|
predicate: string
|
|
): Promise<{ signature: string; timestamp: number }> {
|
|
const timestamp = Math.floor(Date.now() / 1000);
|
|
const message = `${subject}:${predicate}`;
|
|
const messageBytes = new TextEncoder().encode(message);
|
|
const signature = await ed.signAsync(messageBytes, agent.privateKey);
|
|
return { signature: toHex(signature), timestamp };
|
|
}
|
|
|
|
// ============================================================================
|
|
// Curated Claims from Whitepaper
|
|
// These are hand-curated to ensure quality and relevance
|
|
// ============================================================================
|
|
|
|
const WHITEPAPER_CLAIMS: CuratedClaim[] = [
|
|
// Storage & Architecture
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "storage_model",
|
|
object: { type: "Text", value: "append-only Merkle DAG" },
|
|
confidence: 0.98,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 5.1",
|
|
note: "Core architectural claim"
|
|
},
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "hash_algorithm",
|
|
object: { type: "Text", value: "BLAKE3" },
|
|
confidence: 0.99,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.3",
|
|
note: "Content-addressing algorithm"
|
|
},
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "signature_algorithm",
|
|
object: { type: "Text", value: "Ed25519" },
|
|
confidence: 0.99,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.4",
|
|
note: "Cryptographic signature algorithm"
|
|
},
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "serialization_format",
|
|
object: { type: "Text", value: "rkyv (zero-copy)" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 5.5"
|
|
},
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "data_model",
|
|
object: { type: "Text", value: "subject-predicate-object triples with provenance" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.1"
|
|
},
|
|
|
|
// Lens Complexity Claims
|
|
{
|
|
subject: "RecencyLens",
|
|
predicate: "time_complexity",
|
|
object: { type: "Text", value: "O(n)" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.1",
|
|
note: "Where n = number of candidates"
|
|
},
|
|
{
|
|
subject: "RecencyLens",
|
|
predicate: "space_complexity",
|
|
object: { type: "Text", value: "O(1)" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.1"
|
|
},
|
|
{
|
|
subject: "ConsensusLens",
|
|
predicate: "time_complexity",
|
|
object: { type: "Text", value: "O(n)" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.2"
|
|
},
|
|
{
|
|
subject: "ConsensusLens",
|
|
predicate: "space_complexity",
|
|
object: { type: "Text", value: "O(k)" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.2",
|
|
note: "Where k = distinct object values"
|
|
},
|
|
{
|
|
subject: "AuthorityLens",
|
|
predicate: "time_complexity",
|
|
object: { type: "Text", value: "O(n)" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.3"
|
|
},
|
|
{
|
|
subject: "SkepticLens",
|
|
predicate: "resolution_type",
|
|
object: { type: "Text", value: "conflict analysis without winner selection" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.4"
|
|
},
|
|
{
|
|
subject: "SkepticLens",
|
|
predicate: "conflict_metric",
|
|
object: { type: "Text", value: "normalized Shannon entropy" },
|
|
confidence: 0.98,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.4"
|
|
},
|
|
|
|
// Lens Properties
|
|
{
|
|
subject: "Lens",
|
|
predicate: "property_stateless",
|
|
object: { type: "Boolean", value: true },
|
|
confidence: 0.98,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4"
|
|
},
|
|
{
|
|
subject: "Lens",
|
|
predicate: "property_deterministic",
|
|
object: { type: "Boolean", value: true },
|
|
confidence: 0.98,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4"
|
|
},
|
|
{
|
|
subject: "Lens",
|
|
predicate: "property_composable",
|
|
object: { type: "Boolean", value: true },
|
|
confidence: 0.98,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4"
|
|
},
|
|
|
|
// Trust Parameters (contested - honest limitation)
|
|
{
|
|
subject: "EigenTrust",
|
|
predicate: "initial_trust_score",
|
|
object: { type: "Number", value: 0.5 },
|
|
confidence: 0.72,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 7.1",
|
|
note: "Heuristic without theoretical foundation"
|
|
},
|
|
{
|
|
subject: "EigenTrust",
|
|
predicate: "reward_delta",
|
|
object: { type: "Number", value: 0.05 },
|
|
confidence: 0.72,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 7.1",
|
|
note: "Heuristic for correct assertions"
|
|
},
|
|
{
|
|
subject: "EigenTrust",
|
|
predicate: "penalty_delta",
|
|
object: { type: "Number", value: 0.1 },
|
|
confidence: 0.72,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 7.1",
|
|
note: "Heuristic for incorrect assertions"
|
|
},
|
|
|
|
// Source Tier Weights
|
|
{
|
|
subject: "SourceClass",
|
|
predicate: "tier_0_weight",
|
|
object: { type: "Number", value: 1.0 },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.2",
|
|
note: "Regulatory tier"
|
|
},
|
|
{
|
|
subject: "SourceClass",
|
|
predicate: "tier_1_weight",
|
|
object: { type: "Number", value: 0.9 },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.2",
|
|
note: "Clinical tier"
|
|
},
|
|
{
|
|
subject: "SourceClass",
|
|
predicate: "tier_2_weight",
|
|
object: { type: "Number", value: 0.7 },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.2",
|
|
note: "Observational tier"
|
|
},
|
|
{
|
|
subject: "SourceClass",
|
|
predicate: "tier_3_weight",
|
|
object: { type: "Number", value: 0.5 },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.2",
|
|
note: "Expert tier"
|
|
},
|
|
{
|
|
subject: "SourceClass",
|
|
predicate: "tier_4_weight",
|
|
object: { type: "Number", value: 0.2 },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.2",
|
|
note: "Community tier"
|
|
},
|
|
{
|
|
subject: "SourceClass",
|
|
predicate: "tier_5_weight",
|
|
object: { type: "Number", value: 0.1 },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.2",
|
|
note: "Anecdotal tier"
|
|
},
|
|
|
|
// MaterializedView
|
|
{
|
|
subject: "MaterializedView",
|
|
predicate: "read_complexity",
|
|
object: { type: "Text", value: "O(1)" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.4"
|
|
},
|
|
{
|
|
subject: "MaterializedView",
|
|
predicate: "consistency_model",
|
|
object: { type: "Text", value: "eventual consistency" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 6.3"
|
|
},
|
|
|
|
// Content Addressing Properties
|
|
{
|
|
subject: "content_addressing",
|
|
predicate: "provides_deduplication",
|
|
object: { type: "Boolean", value: true },
|
|
confidence: 0.98,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.3"
|
|
},
|
|
{
|
|
subject: "content_addressing",
|
|
predicate: "provides_integrity",
|
|
object: { type: "Boolean", value: true },
|
|
confidence: 0.98,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.3"
|
|
},
|
|
{
|
|
subject: "content_addressing",
|
|
predicate: "enables_efficient_comparison",
|
|
object: { type: "Boolean", value: true },
|
|
confidence: 0.98,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 3.3"
|
|
},
|
|
|
|
// Tradeoffs
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "storage_tradeoff",
|
|
object: { type: "Text", value: "append-only storage grows without bound" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 6.1"
|
|
},
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "not_suitable_for",
|
|
object: { type: "Text", value: "ACID transactions" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 6.4"
|
|
},
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "not_suitable_for",
|
|
object: { type: "Text", value: "high-frequency CRUD" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 6.4"
|
|
},
|
|
|
|
// Write/Read Paths
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "write_path_includes",
|
|
object: { type: "Text", value: "WAL with fsync" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 5.2"
|
|
},
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "fast_read_path",
|
|
object: { type: "Text", value: "O(1) via materialized views" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 5.3"
|
|
},
|
|
{
|
|
subject: "StemeDB",
|
|
predicate: "full_resolution_path",
|
|
object: { type: "Text", value: "O(n) for custom lenses" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 5.3"
|
|
},
|
|
|
|
// Conflict Status Thresholds
|
|
{
|
|
subject: "SkepticLens",
|
|
predicate: "unanimous_threshold",
|
|
object: { type: "Text", value: "conflict_score < 0.1" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.4"
|
|
},
|
|
{
|
|
subject: "SkepticLens",
|
|
predicate: "agreed_threshold",
|
|
object: { type: "Text", value: "conflict_score < 0.4" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.4"
|
|
},
|
|
{
|
|
subject: "SkepticLens",
|
|
predicate: "contested_threshold",
|
|
object: { type: "Text", value: "conflict_score >= 0.4" },
|
|
confidence: 0.95,
|
|
sourceClass: "Expert",
|
|
sourceLabel: "StemeDB Whitepaper - Section 4.2.4"
|
|
},
|
|
];
|
|
|
|
// ============================================================================
|
|
// API Functions
|
|
// ============================================================================
|
|
|
|
async function registerSource(hash: string, label: string, tier: number, url?: string): Promise<void> {
|
|
const SOURCE_CLASS_MAP: Record<number, string> = {
|
|
0: "Regulatory",
|
|
1: "Clinical",
|
|
2: "Observational",
|
|
3: "Expert",
|
|
4: "Community",
|
|
5: "Anecdotal",
|
|
};
|
|
|
|
const response = await fetch(`${API_URL}/v1/sources`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
hash,
|
|
label,
|
|
tier,
|
|
tier_label: SOURCE_CLASS_MAP[tier],
|
|
url,
|
|
}),
|
|
});
|
|
|
|
if (!response.ok && response.status !== 409) {
|
|
const text = await response.text();
|
|
console.warn(` Warning: Failed to register source ${label}: ${text}`);
|
|
}
|
|
}
|
|
|
|
async function createAssertion(
|
|
agent: Agent,
|
|
claim: CuratedClaim,
|
|
sourceHash: string
|
|
): Promise<string | null> {
|
|
const { signature, timestamp } = await signAssertion(agent, claim.subject, claim.predicate);
|
|
|
|
const request = {
|
|
subject: claim.subject,
|
|
predicate: claim.predicate,
|
|
object: claim.object,
|
|
confidence: claim.confidence,
|
|
source_hash: sourceHash,
|
|
source_class: claim.sourceClass,
|
|
signatures: [
|
|
{
|
|
agent_id: toHex(agent.publicKey),
|
|
signature,
|
|
timestamp,
|
|
version: 1,
|
|
},
|
|
],
|
|
};
|
|
|
|
const response = await fetch(`${API_URL}/v1/assert`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify(request),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const text = await response.text();
|
|
console.warn(` Warning: Failed to create assertion: ${text}`);
|
|
return null;
|
|
}
|
|
|
|
const data = await response.json();
|
|
return data.hash;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Main
|
|
// ============================================================================
|
|
|
|
async function main(): Promise<void> {
|
|
const args = process.argv.slice(2);
|
|
const dryRun = args.includes("--dry-run") || args.includes("-d");
|
|
|
|
console.log("StemeDB Whitepaper Seed Script");
|
|
console.log("==============================\n");
|
|
|
|
if (dryRun) {
|
|
console.log("DRY RUN MODE - No data will be submitted\n");
|
|
}
|
|
|
|
// Create agent
|
|
console.log("Creating agent...");
|
|
const agent = await createAgent("whitepaper-author");
|
|
console.log(` Agent: ${toHex(agent.publicKey).slice(0, 16)}...`);
|
|
console.log();
|
|
|
|
// Group claims by source for registration
|
|
const sourceMap = new Map<string, { label: string; tier: number; url?: string }>();
|
|
const SOURCE_CLASS_TO_TIER: Record<SourceClass, number> = {
|
|
Regulatory: 0,
|
|
Clinical: 1,
|
|
Observational: 2,
|
|
Expert: 3,
|
|
Community: 4,
|
|
Anecdotal: 5,
|
|
};
|
|
|
|
for (const claim of WHITEPAPER_CLAIMS) {
|
|
const hash = generateSourceHash(claim.sourceLabel);
|
|
if (!sourceMap.has(hash)) {
|
|
sourceMap.set(hash, {
|
|
label: claim.sourceLabel,
|
|
tier: SOURCE_CLASS_TO_TIER[claim.sourceClass],
|
|
url: claim.sourceUrl,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Register sources
|
|
console.log(`Registering ${sourceMap.size} sources...`);
|
|
if (!dryRun) {
|
|
for (const [hash, source] of sourceMap) {
|
|
await registerSource(hash, source.label, source.tier, source.url);
|
|
console.log(` + ${source.label.slice(0, 50)}...`);
|
|
}
|
|
} else {
|
|
for (const [hash, source] of sourceMap) {
|
|
console.log(` [DRY] Would register: ${source.label.slice(0, 50)}...`);
|
|
}
|
|
}
|
|
console.log();
|
|
|
|
// Create assertions
|
|
console.log(`Creating ${WHITEPAPER_CLAIMS.length} assertions...`);
|
|
let created = 0;
|
|
let failed = 0;
|
|
|
|
for (const claim of WHITEPAPER_CLAIMS) {
|
|
const sourceHash = generateSourceHash(claim.sourceLabel);
|
|
|
|
if (dryRun) {
|
|
console.log(` [DRY] ${claim.subject}/${claim.predicate} = "${String(claim.object.value).slice(0, 30)}..."`);
|
|
created++;
|
|
} else {
|
|
const hash = await createAssertion(agent, claim, sourceHash);
|
|
if (hash) {
|
|
created++;
|
|
console.log(` + ${claim.subject}/${claim.predicate} -> ${hash.slice(0, 16)}...`);
|
|
} else {
|
|
failed++;
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`\nCreated ${created} assertions${failed > 0 ? ` (${failed} failed)` : ""}`);
|
|
|
|
if (!dryRun) {
|
|
// Wait for materialization
|
|
console.log("\nWaiting for materialization...");
|
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
console.log("Done!");
|
|
}
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error("Error:", error.message);
|
|
process.exit(1);
|
|
});
|