stemedb/latent/ingest-reddit/adk-agent/main.py
jordan b3e8a9a058 feat: Multi-application expansion with chaos testing and community UI
Major additions:
- Community Next.js app (port 18187) for browsing claims with API docs
- stemedb-chaos crate: Fault injection, chaos testing, CRDT properties
- Latent ingestion system: Reddit/FDA ingesters with ADK-Go agents
- Disputed claims handling: Manual review workflows and validation
- Aphoria security scanner: New extractors (SQL injection, command
  injection, weak crypto, TLS version), policy-based ignores, UAT reports
- Docker infrastructure: Dockerfile, docker-compose.yml for full stack
- VulnBank demo: Intentionally vulnerable multi-language test corpus

SDK & API enhancements:
- Source registry handlers for tracking data provenance
- Metrics endpoint
- Skeptic filtering improvements

Code quality:
- Split 14 large files (>500 lines) into focused modules
- All files now under 500-line limit per project guidelines

Documentation:
- Chaos testing guide, circuit breakers, observability docs
- Phase 7 UAT documentation updates
- Martin Kleppmann technical writer agent

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 01:24:14 -07:00

317 lines
9.1 KiB
Python

#!/usr/bin/env python3
"""
Entry point for the Reddit Adverse Event Agent.
Usage:
python main.py --generate-key # Generate new Ed25519 keypair
python main.py --check # Validate environment and connectivity
python main.py --batch # Run batch extraction from all subreddits
adk web # Start interactive ADK web UI
"""
import argparse
import os
import sys
import time
# Add the current directory to path for local imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from dotenv import load_dotenv
load_dotenv()
def generate_key():
"""Generate a new Ed25519 keypair for signing assertions."""
from signer import Signer
signer = Signer.generate()
print("=" * 60)
print("New Ed25519 Keypair Generated")
print("=" * 60)
print()
print(f"Public Key (agent_id): {signer.public_key}")
print()
print(f"Private Seed (SAVE THIS SECURELY):")
print(f" {signer.seed}")
print()
print("Add to .env:")
print(f" STEMEDB_AGENT_SEED={signer.seed}")
print("=" * 60)
def check_environment():
"""Validate environment configuration and connectivity."""
import os
from config import STEMEDB_URL, ENV_STEMEDB_AGENT_SEED, ENV_GOOGLE_API_KEY
print("=" * 60)
print("Environment Check")
print("=" * 60)
print()
errors = []
# Check STEMEDB_AGENT_SEED
seed = os.getenv(ENV_STEMEDB_AGENT_SEED)
if not seed:
print(f"[FAIL] {ENV_STEMEDB_AGENT_SEED} not set")
errors.append("Missing agent seed")
elif len(seed) != 64:
print(f"[FAIL] {ENV_STEMEDB_AGENT_SEED} wrong length ({len(seed)}, expected 64)")
errors.append("Invalid agent seed length")
else:
try:
from signer import Signer
signer = Signer.from_hex(seed)
print(f"[OK] Agent ID: {signer.public_key[:16]}...")
except Exception as e:
print(f"[FAIL] Invalid agent seed: {e}")
errors.append("Invalid agent seed")
# Check GOOGLE_API_KEY
google_key = os.getenv(ENV_GOOGLE_API_KEY)
if not google_key:
print(f"[FAIL] {ENV_GOOGLE_API_KEY} not set")
errors.append("Missing Google API key")
else:
print(f"[OK] {ENV_GOOGLE_API_KEY} is set")
# Check StemeDB connectivity
print()
print(f"Checking StemeDB at {STEMEDB_URL}...")
try:
from stemedb_client import StemeDBClient
client = StemeDBClient(STEMEDB_URL)
health = client.health()
print(f"[OK] StemeDB {health.version} - {health.assertions_count} assertions")
except Exception as e:
print(f"[FAIL] Cannot connect to StemeDB: {e}")
errors.append("StemeDB connection failed")
# Check Reddit connectivity
print()
print("Checking Reddit API...")
try:
from tools import fetch_reddit_posts
result = fetch_reddit_posts("Ozempic", limit=1)
if "error" in result:
print(f"[FAIL] Reddit API error: {result['error']}")
errors.append("Reddit API failed")
else:
print(f"[OK] Reddit API accessible (fetched {result['total_fetched']} posts)")
except Exception as e:
print(f"[FAIL] Reddit API error: {e}")
errors.append("Reddit API failed")
print()
if errors:
print(f"[RESULT] {len(errors)} errors found")
for err in errors:
print(f" - {err}")
return False
else:
print("[RESULT] All checks passed!")
return True
def run_batch():
"""Run batch extraction from all configured subreddits."""
from config import TARGET_SUBREDDITS, ENV_STEMEDB_AGENT_SEED
from tools import fetch_reddit_posts, store_assertion
from signer import Signer
print("=" * 60)
print("Reddit Adverse Event Extraction - Batch Mode")
print("=" * 60)
print()
# Validate signer
try:
Signer.from_env(ENV_STEMEDB_AGENT_SEED)
except ValueError as e:
print(f"[ERROR] {e}")
print("Run: python main.py --generate-key")
return
total_posts = 0
total_extractions = 0
for subreddit in TARGET_SUBREDDITS:
print(f"\n[*] Scanning r/{subreddit}...")
result = fetch_reddit_posts(subreddit, limit=25)
if "error" in result:
print(f" [ERROR] {result['error']}")
continue
print(f" Found {result['matched_posts']} posts with keywords")
total_posts += result["matched_posts"]
for post in result["posts"]:
# Simple heuristic extraction (without LLM for batch mode)
extractions = extract_simple(post)
for extraction in extractions:
stored = store_assertion(
subject=extraction["subject"],
predicate=extraction["predicate"],
object_value=extraction["object"],
confidence=extraction["confidence"],
source_url=post["url"],
severity=extraction.get("severity"),
reddit_post_id=post["id"],
)
if stored["success"]:
total_extractions += 1
print(f" -> {extraction['predicate']}:{extraction['object']} @ {extraction['confidence']:.2f}")
else:
print(f" [FAIL] {stored['error']}")
# Rate limit politeness
time.sleep(2)
print()
print("=" * 60)
print(f"[DONE] Processed {total_posts} posts, stored {total_extractions} assertions")
print("=" * 60)
def extract_simple(post: dict) -> list:
"""
Simple heuristic extraction for batch mode.
For full LLM-powered extraction, use the agent via `adk web`.
"""
extractions = []
text = f"{post['title']} {post['text']}".lower()
subject = post.get("detected_drug", "glp1_agonist")
# Side effects
if "paralysis" in text or "gastroparesis" in text:
extractions.append({
"subject": subject,
"predicate": "adverse_event",
"object": "gastroparesis",
"confidence": 0.55,
"severity": "high",
})
if "vomit" in text or "throw up" in text or "throwing up" in text:
extractions.append({
"subject": subject,
"predicate": "side_effect",
"object": "vomiting",
"confidence": 0.5,
"severity": "medium",
})
if "nausea" in text and "vomit" not in text:
extractions.append({
"subject": subject,
"predicate": "side_effect",
"object": "nausea",
"confidence": 0.45,
"severity": "low",
})
if "hair loss" in text or "losing hair" in text:
extractions.append({
"subject": subject,
"predicate": "side_effect",
"object": "hair_loss",
"confidence": 0.5,
"severity": "medium",
})
if "hospital" in text or "emergency" in text or " er " in text:
extractions.append({
"subject": subject,
"predicate": "adverse_event",
"object": "hospitalization",
"confidence": 0.55,
"severity": "high",
})
if "stopped working" in text or "not working" in text:
extractions.append({
"subject": subject,
"predicate": "efficacy_issue",
"object": "tolerance",
"confidence": 0.45,
"severity": "medium",
})
if "headache" in text:
extractions.append({
"subject": subject,
"predicate": "side_effect",
"object": "headache",
"confidence": 0.4,
"severity": "low",
})
if "fatigue" in text or "tired" in text or "exhausted" in text:
extractions.append({
"subject": subject,
"predicate": "side_effect",
"object": "fatigue",
"confidence": 0.4,
"severity": "low",
})
return extractions
def main():
parser = argparse.ArgumentParser(
description="Reddit Adverse Event Agent",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python main.py --generate-key Generate new signing keypair
python main.py --check Check environment and connectivity
python main.py --batch Run batch extraction
adk web Start interactive web UI
""",
)
parser.add_argument(
"--generate-key",
action="store_true",
help="Generate a new Ed25519 keypair",
)
parser.add_argument(
"--check",
action="store_true",
help="Validate environment and connectivity",
)
parser.add_argument(
"--batch",
action="store_true",
help="Run batch extraction from all subreddits",
)
args = parser.parse_args()
if args.generate_key:
generate_key()
elif args.check:
success = check_environment()
sys.exit(0 if success else 1)
elif args.batch:
run_batch()
else:
parser.print_help()
print()
print("For interactive mode, run: adk web")
if __name__ == "__main__":
main()