stemedb/community/scripts/seed-external.ts
jordan 1cc453c97b feat: Aphoria policy source tracking + claim extraction pipeline
- Add PolicySourceStore for tracking where policies come from
- Implement claim extraction skill and API endpoints
- Add community UI text selection extractor component
- Create Go SDK aphoria client for policy operations
- Document patent specifications and legal disclosures
- Add guides: golden path loop, policy audit trails, pre-flight checks
- Expand Unreal Engine config extractor with source tracking
- Add UAT reports for policy source tracking validation
- Refactor tests.rs into modular test files

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 02:35:02 -07:00

347 lines
9.5 KiB
TypeScript

#!/usr/bin/env npx tsx
/**
* Seed external source claims to StemeDB.
*
* This script:
* 1. Loads external sources from data/external-sources.json
* 2. Creates agents with varying trust levels
* 3. Registers sources and submits assertions to StemeDB
* 4. Ensures curated conflicts are created for demo purposes
*
* Usage:
* npx tsx scripts/seed-external.ts
* npx tsx scripts/seed-external.ts --dry-run
*
* Environment:
* STEMEDB_API_URL - API base URL (default: http://127.0.0.1:18180)
*/
import * as ed from "@noble/ed25519";
import { sha512 } from "@noble/hashes/sha512";
import { readFileSync } from "fs";
import { join } from "path";
// Configure ed25519 to use sha512
ed.etc.sha512Sync = (...m) => sha512(ed.etc.concatBytes(...m));
const API_URL = process.env.STEMEDB_API_URL || "http://127.0.0.1:18180";
// ============================================================================
// Types
// ============================================================================
interface Agent {
name: string;
privateKey: Uint8Array;
publicKey: Uint8Array;
}
type ObjectType = "Text" | "Number" | "Boolean" | "Reference";
interface ObjectValue {
type: ObjectType;
value: string | number | boolean;
}
interface ExternalClaim {
subject: string;
predicate: string;
object: ObjectValue;
confidence: number;
note?: string;
}
interface ExternalSource {
id: string;
label: string;
url: string;
tier: number;
tierLabel: string;
category: string;
claims: ExternalClaim[];
}
interface CuratedConflict {
id: string;
subject: string;
predicate: string;
description: string;
sources: string[];
values: Array<{
source: string;
value: string;
interpretation: string;
}>;
demoNote: string;
}
interface ExternalSourcesData {
sources: ExternalSource[];
curatedConflicts: CuratedConflict[];
}
// ============================================================================
// Helpers
// ============================================================================
function toHex(bytes: Uint8Array): string {
return Array.from(bytes)
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
}
function sha256(data: string): Uint8Array {
const encoder = new TextEncoder();
const bytes = encoder.encode(data);
const hash = new Uint8Array(32);
for (let i = 0; i < bytes.length; i++) {
hash[i % 32] ^= bytes[i];
hash[(i + 1) % 32] = (hash[(i + 1) % 32] + bytes[i]) % 256;
}
return hash;
}
function generateSourceHash(sourceId: string): string {
return toHex(sha256(`external-source-${sourceId}`));
}
async function createAgent(name: string, seed: string): Promise<Agent> {
const seedHash = sha256(`external-seed-agent-${name}-${seed}`);
const privateKey = seedHash;
const publicKey = await ed.getPublicKeyAsync(privateKey);
return { name, privateKey, publicKey };
}
async function signAssertion(
agent: Agent,
subject: string,
predicate: string
): Promise<{ signature: string; timestamp: number }> {
const timestamp = Math.floor(Date.now() / 1000);
const message = `${subject}:${predicate}`;
const messageBytes = new TextEncoder().encode(message);
const signature = await ed.signAsync(messageBytes, agent.privateKey);
return { signature: toHex(signature), timestamp };
}
// ============================================================================
// API Functions
// ============================================================================
async function registerSource(
hash: string,
label: string,
tier: number,
tierLabel: string,
url?: string
): Promise<void> {
const response = await fetch(`${API_URL}/v1/sources`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
hash,
label,
tier,
tier_label: tierLabel,
url,
}),
});
if (!response.ok && response.status !== 409) {
const text = await response.text();
console.warn(` Warning: Failed to register source ${label}: ${text}`);
}
}
async function createAssertion(
agent: Agent,
subject: string,
predicate: string,
object: ObjectValue,
confidence: number,
sourceHash: string,
sourceClass: string
): Promise<string | null> {
const { signature, timestamp } = await signAssertion(agent, subject, predicate);
const request = {
subject,
predicate,
object,
confidence,
source_hash: sourceHash,
source_class: sourceClass,
signatures: [
{
agent_id: toHex(agent.publicKey),
signature,
timestamp,
version: 1,
},
],
};
const response = await fetch(`${API_URL}/v1/assert`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(request),
});
if (!response.ok) {
const text = await response.text();
console.warn(` Warning: Failed to create assertion: ${text}`);
return null;
}
const data = await response.json();
return data.hash;
}
// ============================================================================
// Agent Pool
// ============================================================================
interface AgentPool {
regulatory: Agent;
clinical: Agent;
observational: Agent;
expert: Agent;
community: Agent;
}
async function createAgentPool(): Promise<AgentPool> {
return {
regulatory: await createAgent("regulatory_authority", "reg-001"),
clinical: await createAgent("clinical_researcher", "clin-002"),
observational: await createAgent("observational_analyst", "obs-003"),
expert: await createAgent("domain_expert", "exp-004"),
community: await createAgent("community_contributor", "comm-005"),
};
}
function getAgentForTier(pool: AgentPool, tier: number): Agent {
switch (tier) {
case 0:
return pool.regulatory;
case 1:
return pool.clinical;
case 2:
return pool.observational;
case 3:
return pool.expert;
case 4:
case 5:
return pool.community;
default:
return pool.expert;
}
}
// ============================================================================
// Main
// ============================================================================
async function main(): Promise<void> {
const args = process.argv.slice(2);
const dryRun = args.includes("--dry-run") || args.includes("-d");
console.log("StemeDB External Sources Seed Script");
console.log("====================================\n");
if (dryRun) {
console.log("DRY RUN MODE - No data will be submitted\n");
}
// Load external sources data
const dataPath = join(process.cwd(), "data", "external-sources.json");
const data: ExternalSourcesData = JSON.parse(readFileSync(dataPath, "utf-8"));
console.log(`Loaded ${data.sources.length} external sources`);
console.log(`Loaded ${data.curatedConflicts.length} curated conflicts\n`);
// Create agent pool
console.log("Creating agent pool...");
const agents = await createAgentPool();
console.log(` regulatory: ${toHex(agents.regulatory.publicKey).slice(0, 16)}...`);
console.log(` clinical: ${toHex(agents.clinical.publicKey).slice(0, 16)}...`);
console.log(` observational: ${toHex(agents.observational.publicKey).slice(0, 16)}...`);
console.log(` expert: ${toHex(agents.expert.publicKey).slice(0, 16)}...`);
console.log(` community: ${toHex(agents.community.publicKey).slice(0, 16)}...`);
console.log();
// Register all sources
console.log("Registering sources...");
for (const source of data.sources) {
const hash = generateSourceHash(source.id);
if (dryRun) {
console.log(` [DRY] Would register: ${source.label.slice(0, 50)}...`);
} else {
await registerSource(hash, source.label, source.tier, source.tierLabel, source.url);
console.log(` + T${source.tier} ${source.label.slice(0, 50)}...`);
}
}
console.log();
// Create assertions from all sources
console.log("Creating assertions from external sources...");
let totalCreated = 0;
let totalFailed = 0;
for (const source of data.sources) {
const sourceHash = generateSourceHash(source.id);
const agent = getAgentForTier(agents, source.tier);
console.log(`\n ${source.label}:`);
for (const claim of source.claims) {
if (dryRun) {
console.log(
` [DRY] ${claim.subject}/${claim.predicate} = "${String(claim.object.value).slice(0, 25)}..."`
);
totalCreated++;
} else {
const hash = await createAssertion(
agent,
claim.subject,
claim.predicate,
claim.object,
claim.confidence,
sourceHash,
source.tierLabel
);
if (hash) {
totalCreated++;
console.log(` + ${claim.subject}/${claim.predicate} -> ${hash.slice(0, 12)}...`);
} else {
totalFailed++;
}
}
}
}
console.log(`\nCreated ${totalCreated} assertions${totalFailed > 0 ? ` (${totalFailed} failed)` : ""}`);
// Log curated conflicts for reference
console.log("\n--- Curated Conflicts for Demo ---");
for (const conflict of data.curatedConflicts) {
console.log(`\n ${conflict.id}:`);
console.log(` Subject: ${conflict.subject}/${conflict.predicate}`);
console.log(` ${conflict.description}`);
console.log(` Demo note: ${conflict.demoNote.slice(0, 60)}...`);
}
if (!dryRun) {
// Wait for materialization
console.log("\nWaiting for materialization...");
await new Promise((resolve) => setTimeout(resolve, 2000));
console.log("Done!");
}
}
main().catch((error) => {
console.error("Error:", error.message);
process.exit(1);
});