Major additions: - Community Next.js app (port 18187) for browsing claims with API docs - stemedb-chaos crate: Fault injection, chaos testing, CRDT properties - Latent ingestion system: Reddit/FDA ingesters with ADK-Go agents - Disputed claims handling: Manual review workflows and validation - Aphoria security scanner: New extractors (SQL injection, command injection, weak crypto, TLS version), policy-based ignores, UAT reports - Docker infrastructure: Dockerfile, docker-compose.yml for full stack - VulnBank demo: Intentionally vulnerable multi-language test corpus SDK & API enhancements: - Source registry handlers for tracking data provenance - Metrics endpoint - Skeptic filtering improvements Code quality: - Split 14 large files (>500 lines) into focused modules - All files now under 500-line limit per project guidelines Documentation: - Chaos testing guide, circuit breakers, observability docs - Phase 7 UAT documentation updates - Martin Kleppmann technical writer agent Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
317 lines
9.1 KiB
Python
317 lines
9.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Entry point for the Reddit Adverse Event Agent.
|
|
|
|
Usage:
|
|
python main.py --generate-key # Generate new Ed25519 keypair
|
|
python main.py --check # Validate environment and connectivity
|
|
python main.py --batch # Run batch extraction from all subreddits
|
|
adk web # Start interactive ADK web UI
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
# Add the current directory to path for local imports
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
|
|
def generate_key():
|
|
"""Generate a new Ed25519 keypair for signing assertions."""
|
|
from signer import Signer
|
|
|
|
signer = Signer.generate()
|
|
print("=" * 60)
|
|
print("New Ed25519 Keypair Generated")
|
|
print("=" * 60)
|
|
print()
|
|
print(f"Public Key (agent_id): {signer.public_key}")
|
|
print()
|
|
print(f"Private Seed (SAVE THIS SECURELY):")
|
|
print(f" {signer.seed}")
|
|
print()
|
|
print("Add to .env:")
|
|
print(f" STEMEDB_AGENT_SEED={signer.seed}")
|
|
print("=" * 60)
|
|
|
|
|
|
def check_environment():
|
|
"""Validate environment configuration and connectivity."""
|
|
import os
|
|
|
|
from config import STEMEDB_URL, ENV_STEMEDB_AGENT_SEED, ENV_GOOGLE_API_KEY
|
|
|
|
print("=" * 60)
|
|
print("Environment Check")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
errors = []
|
|
|
|
# Check STEMEDB_AGENT_SEED
|
|
seed = os.getenv(ENV_STEMEDB_AGENT_SEED)
|
|
if not seed:
|
|
print(f"[FAIL] {ENV_STEMEDB_AGENT_SEED} not set")
|
|
errors.append("Missing agent seed")
|
|
elif len(seed) != 64:
|
|
print(f"[FAIL] {ENV_STEMEDB_AGENT_SEED} wrong length ({len(seed)}, expected 64)")
|
|
errors.append("Invalid agent seed length")
|
|
else:
|
|
try:
|
|
from signer import Signer
|
|
|
|
signer = Signer.from_hex(seed)
|
|
print(f"[OK] Agent ID: {signer.public_key[:16]}...")
|
|
except Exception as e:
|
|
print(f"[FAIL] Invalid agent seed: {e}")
|
|
errors.append("Invalid agent seed")
|
|
|
|
# Check GOOGLE_API_KEY
|
|
google_key = os.getenv(ENV_GOOGLE_API_KEY)
|
|
if not google_key:
|
|
print(f"[FAIL] {ENV_GOOGLE_API_KEY} not set")
|
|
errors.append("Missing Google API key")
|
|
else:
|
|
print(f"[OK] {ENV_GOOGLE_API_KEY} is set")
|
|
|
|
# Check StemeDB connectivity
|
|
print()
|
|
print(f"Checking StemeDB at {STEMEDB_URL}...")
|
|
try:
|
|
from stemedb_client import StemeDBClient
|
|
|
|
client = StemeDBClient(STEMEDB_URL)
|
|
health = client.health()
|
|
print(f"[OK] StemeDB {health.version} - {health.assertions_count} assertions")
|
|
except Exception as e:
|
|
print(f"[FAIL] Cannot connect to StemeDB: {e}")
|
|
errors.append("StemeDB connection failed")
|
|
|
|
# Check Reddit connectivity
|
|
print()
|
|
print("Checking Reddit API...")
|
|
try:
|
|
from tools import fetch_reddit_posts
|
|
|
|
result = fetch_reddit_posts("Ozempic", limit=1)
|
|
if "error" in result:
|
|
print(f"[FAIL] Reddit API error: {result['error']}")
|
|
errors.append("Reddit API failed")
|
|
else:
|
|
print(f"[OK] Reddit API accessible (fetched {result['total_fetched']} posts)")
|
|
except Exception as e:
|
|
print(f"[FAIL] Reddit API error: {e}")
|
|
errors.append("Reddit API failed")
|
|
|
|
print()
|
|
if errors:
|
|
print(f"[RESULT] {len(errors)} errors found")
|
|
for err in errors:
|
|
print(f" - {err}")
|
|
return False
|
|
else:
|
|
print("[RESULT] All checks passed!")
|
|
return True
|
|
|
|
|
|
def run_batch():
|
|
"""Run batch extraction from all configured subreddits."""
|
|
from config import TARGET_SUBREDDITS, ENV_STEMEDB_AGENT_SEED
|
|
from tools import fetch_reddit_posts, store_assertion
|
|
from signer import Signer
|
|
|
|
print("=" * 60)
|
|
print("Reddit Adverse Event Extraction - Batch Mode")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
# Validate signer
|
|
try:
|
|
Signer.from_env(ENV_STEMEDB_AGENT_SEED)
|
|
except ValueError as e:
|
|
print(f"[ERROR] {e}")
|
|
print("Run: python main.py --generate-key")
|
|
return
|
|
|
|
total_posts = 0
|
|
total_extractions = 0
|
|
|
|
for subreddit in TARGET_SUBREDDITS:
|
|
print(f"\n[*] Scanning r/{subreddit}...")
|
|
result = fetch_reddit_posts(subreddit, limit=25)
|
|
|
|
if "error" in result:
|
|
print(f" [ERROR] {result['error']}")
|
|
continue
|
|
|
|
print(f" Found {result['matched_posts']} posts with keywords")
|
|
total_posts += result["matched_posts"]
|
|
|
|
for post in result["posts"]:
|
|
# Simple heuristic extraction (without LLM for batch mode)
|
|
extractions = extract_simple(post)
|
|
|
|
for extraction in extractions:
|
|
stored = store_assertion(
|
|
subject=extraction["subject"],
|
|
predicate=extraction["predicate"],
|
|
object_value=extraction["object"],
|
|
confidence=extraction["confidence"],
|
|
source_url=post["url"],
|
|
severity=extraction.get("severity"),
|
|
reddit_post_id=post["id"],
|
|
)
|
|
|
|
if stored["success"]:
|
|
total_extractions += 1
|
|
print(f" -> {extraction['predicate']}:{extraction['object']} @ {extraction['confidence']:.2f}")
|
|
else:
|
|
print(f" [FAIL] {stored['error']}")
|
|
|
|
# Rate limit politeness
|
|
time.sleep(2)
|
|
|
|
print()
|
|
print("=" * 60)
|
|
print(f"[DONE] Processed {total_posts} posts, stored {total_extractions} assertions")
|
|
print("=" * 60)
|
|
|
|
|
|
def extract_simple(post: dict) -> list:
|
|
"""
|
|
Simple heuristic extraction for batch mode.
|
|
|
|
For full LLM-powered extraction, use the agent via `adk web`.
|
|
"""
|
|
extractions = []
|
|
text = f"{post['title']} {post['text']}".lower()
|
|
subject = post.get("detected_drug", "glp1_agonist")
|
|
|
|
# Side effects
|
|
if "paralysis" in text or "gastroparesis" in text:
|
|
extractions.append({
|
|
"subject": subject,
|
|
"predicate": "adverse_event",
|
|
"object": "gastroparesis",
|
|
"confidence": 0.55,
|
|
"severity": "high",
|
|
})
|
|
|
|
if "vomit" in text or "throw up" in text or "throwing up" in text:
|
|
extractions.append({
|
|
"subject": subject,
|
|
"predicate": "side_effect",
|
|
"object": "vomiting",
|
|
"confidence": 0.5,
|
|
"severity": "medium",
|
|
})
|
|
|
|
if "nausea" in text and "vomit" not in text:
|
|
extractions.append({
|
|
"subject": subject,
|
|
"predicate": "side_effect",
|
|
"object": "nausea",
|
|
"confidence": 0.45,
|
|
"severity": "low",
|
|
})
|
|
|
|
if "hair loss" in text or "losing hair" in text:
|
|
extractions.append({
|
|
"subject": subject,
|
|
"predicate": "side_effect",
|
|
"object": "hair_loss",
|
|
"confidence": 0.5,
|
|
"severity": "medium",
|
|
})
|
|
|
|
if "hospital" in text or "emergency" in text or " er " in text:
|
|
extractions.append({
|
|
"subject": subject,
|
|
"predicate": "adverse_event",
|
|
"object": "hospitalization",
|
|
"confidence": 0.55,
|
|
"severity": "high",
|
|
})
|
|
|
|
if "stopped working" in text or "not working" in text:
|
|
extractions.append({
|
|
"subject": subject,
|
|
"predicate": "efficacy_issue",
|
|
"object": "tolerance",
|
|
"confidence": 0.45,
|
|
"severity": "medium",
|
|
})
|
|
|
|
if "headache" in text:
|
|
extractions.append({
|
|
"subject": subject,
|
|
"predicate": "side_effect",
|
|
"object": "headache",
|
|
"confidence": 0.4,
|
|
"severity": "low",
|
|
})
|
|
|
|
if "fatigue" in text or "tired" in text or "exhausted" in text:
|
|
extractions.append({
|
|
"subject": subject,
|
|
"predicate": "side_effect",
|
|
"object": "fatigue",
|
|
"confidence": 0.4,
|
|
"severity": "low",
|
|
})
|
|
|
|
return extractions
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Reddit Adverse Event Agent",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python main.py --generate-key Generate new signing keypair
|
|
python main.py --check Check environment and connectivity
|
|
python main.py --batch Run batch extraction
|
|
adk web Start interactive web UI
|
|
""",
|
|
)
|
|
parser.add_argument(
|
|
"--generate-key",
|
|
action="store_true",
|
|
help="Generate a new Ed25519 keypair",
|
|
)
|
|
parser.add_argument(
|
|
"--check",
|
|
action="store_true",
|
|
help="Validate environment and connectivity",
|
|
)
|
|
parser.add_argument(
|
|
"--batch",
|
|
action="store_true",
|
|
help="Run batch extraction from all subreddits",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.generate_key:
|
|
generate_key()
|
|
elif args.check:
|
|
success = check_environment()
|
|
sys.exit(0 if success else 1)
|
|
elif args.batch:
|
|
run_batch()
|
|
else:
|
|
parser.print_help()
|
|
print()
|
|
print("For interactive mode, run: adk web")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|