stemedb/latent/ingest-reddit/adk-agent/main.py

#!/usr/bin/env python3
"""
Entry point for the Reddit Adverse Event Agent.

Usage:
    python main.py --generate-key    # Generate new Ed25519 keypair
    python main.py --check           # Validate environment and connectivity
    python main.py --batch           # Run batch extraction from all subreddits
    adk web                          # Start interactive ADK web UI
"""

import argparse
import os
import sys
import time

# Add the current directory to path for local imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from dotenv import load_dotenv

load_dotenv()


def generate_key():
    """Generate a new Ed25519 keypair for signing assertions."""
    from signer import Signer

    signer = Signer.generate()
    print("=" * 60)
    print("New Ed25519 Keypair Generated")
    print("=" * 60)
    print()
    print(f"Public Key (agent_id): {signer.public_key}")
    print()
    print(f"Private Seed (SAVE THIS SECURELY):")
    print(f"  {signer.seed}")
    print()
    print("Add to .env:")
    print(f"  STEMEDB_AGENT_SEED={signer.seed}")
    print("=" * 60)


def check_environment():
    """Validate environment configuration and connectivity."""
    import os

    from config import STEMEDB_URL, ENV_STEMEDB_AGENT_SEED, ENV_GOOGLE_API_KEY

    print("=" * 60)
    print("Environment Check")
    print("=" * 60)
    print()

    errors = []

    # Check STEMEDB_AGENT_SEED
    seed = os.getenv(ENV_STEMEDB_AGENT_SEED)
    if not seed:
        print(f"[FAIL] {ENV_STEMEDB_AGENT_SEED} not set")
        errors.append("Missing agent seed")
    elif len(seed) != 64:
        print(f"[FAIL] {ENV_STEMEDB_AGENT_SEED} wrong length ({len(seed)}, expected 64)")
        errors.append("Invalid agent seed length")
    else:
        try:
            from signer import Signer

            signer = Signer.from_hex(seed)
            print(f"[OK] Agent ID: {signer.public_key[:16]}...")
        except Exception as e:
            print(f"[FAIL] Invalid agent seed: {e}")
            errors.append("Invalid agent seed")

    # Check GOOGLE_API_KEY
    google_key = os.getenv(ENV_GOOGLE_API_KEY)
    if not google_key:
        print(f"[FAIL] {ENV_GOOGLE_API_KEY} not set")
        errors.append("Missing Google API key")
    else:
        print(f"[OK] {ENV_GOOGLE_API_KEY} is set")

    # Check StemeDB connectivity
    print()
    print(f"Checking StemeDB at {STEMEDB_URL}...")
    try:
        from stemedb_client import StemeDBClient

        client = StemeDBClient(STEMEDB_URL)
        health = client.health()
        print(f"[OK] StemeDB {health.version} - {health.assertions_count} assertions")
    except Exception as e:
        print(f"[FAIL] Cannot connect to StemeDB: {e}")
        errors.append("StemeDB connection failed")

    # Check Reddit connectivity
    print()
    print("Checking Reddit API...")
    try:
        from tools import fetch_reddit_posts

        result = fetch_reddit_posts("Ozempic", limit=1)
        if "error" in result:
            print(f"[FAIL] Reddit API error: {result['error']}")
            errors.append("Reddit API failed")
        else:
            print(f"[OK] Reddit API accessible (fetched {result['total_fetched']} posts)")
    except Exception as e:
        print(f"[FAIL] Reddit API error: {e}")
        errors.append("Reddit API failed")

    print()
    if errors:
        print(f"[RESULT] {len(errors)} errors found")
        for err in errors:
            print(f"  - {err}")
        return False
    else:
        print("[RESULT] All checks passed!")
        return True


def run_batch():
    """Run batch extraction from all configured subreddits."""
    from config import TARGET_SUBREDDITS, ENV_STEMEDB_AGENT_SEED
    from tools import fetch_reddit_posts, store_assertion
    from signer import Signer

    print("=" * 60)
    print("Reddit Adverse Event Extraction - Batch Mode")
    print("=" * 60)
    print()

    # Validate signer
    try:
        Signer.from_env(ENV_STEMEDB_AGENT_SEED)
    except ValueError as e:
        print(f"[ERROR] {e}")
        print("Run: python main.py --generate-key")
        return

    total_posts = 0
    total_extractions = 0

    for subreddit in TARGET_SUBREDDITS:
        print(f"\n[*] Scanning r/{subreddit}...")
        result = fetch_reddit_posts(subreddit, limit=25)

        if "error" in result:
            print(f"    [ERROR] {result['error']}")
            continue

        print(f"    Found {result['matched_posts']} posts with keywords")
        total_posts += result["matched_posts"]

        for post in result["posts"]:
            # Simple heuristic extraction (without LLM for batch mode)
            extractions = extract_simple(post)

            for extraction in extractions:
                stored = store_assertion(
                    subject=extraction["subject"],
                    predicate=extraction["predicate"],
                    object_value=extraction["object"],
                    confidence=extraction["confidence"],
                    source_url=post["url"],
                    severity=extraction.get("severity"),
                    reddit_post_id=post["id"],
                )

                if stored["success"]:
                    total_extractions += 1
                    print(f"    -> {extraction['predicate']}:{extraction['object']} @ {extraction['confidence']:.2f}")
                else:
                    print(f"    [FAIL] {stored['error']}")

        # Rate limit politeness
        time.sleep(2)

    print()
    print("=" * 60)
    print(f"[DONE] Processed {total_posts} posts, stored {total_extractions} assertions")
    print("=" * 60)


def extract_simple(post: dict) -> list:
    """
    Simple heuristic extraction for batch mode.

    For full LLM-powered extraction, use the agent via `adk web`.
    """
    extractions = []
    text = f"{post['title']} {post['text']}".lower()
    subject = post.get("detected_drug", "glp1_agonist")

    # Side effects
    if "paralysis" in text or "gastroparesis" in text:
        extractions.append({
            "subject": subject,
            "predicate": "adverse_event",
            "object": "gastroparesis",
            "confidence": 0.55,
            "severity": "high",
        })

    if "vomit" in text or "throw up" in text or "throwing up" in text:
        extractions.append({
            "subject": subject,
            "predicate": "side_effect",
            "object": "vomiting",
            "confidence": 0.5,
            "severity": "medium",
        })

    if "nausea" in text and "vomit" not in text:
        extractions.append({
            "subject": subject,
            "predicate": "side_effect",
            "object": "nausea",
            "confidence": 0.45,
            "severity": "low",
        })

    if "hair loss" in text or "losing hair" in text:
        extractions.append({
            "subject": subject,
            "predicate": "side_effect",
            "object": "hair_loss",
            "confidence": 0.5,
            "severity": "medium",
        })

    if "hospital" in text or "emergency" in text or " er " in text:
        extractions.append({
            "subject": subject,
            "predicate": "adverse_event",
            "object": "hospitalization",
            "confidence": 0.55,
            "severity": "high",
        })

    if "stopped working" in text or "not working" in text:
        extractions.append({
            "subject": subject,
            "predicate": "efficacy_issue",
            "object": "tolerance",
            "confidence": 0.45,
            "severity": "medium",
        })

    if "headache" in text:
        extractions.append({
            "subject": subject,
            "predicate": "side_effect",
            "object": "headache",
            "confidence": 0.4,
            "severity": "low",
        })

    if "fatigue" in text or "tired" in text or "exhausted" in text:
        extractions.append({
            "subject": subject,
            "predicate": "side_effect",
            "object": "fatigue",
            "confidence": 0.4,
            "severity": "low",
        })

    return extractions


def main():
    parser = argparse.ArgumentParser(
        description="Reddit Adverse Event Agent",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    python main.py --generate-key    Generate new signing keypair
    python main.py --check           Check environment and connectivity
    python main.py --batch           Run batch extraction
    adk web                          Start interactive web UI
        """,
    )
    parser.add_argument(
        "--generate-key",
        action="store_true",
        help="Generate a new Ed25519 keypair",
    )
    parser.add_argument(
        "--check",
        action="store_true",
        help="Validate environment and connectivity",
    )
    parser.add_argument(
        "--batch",
        action="store_true",
        help="Run batch extraction from all subreddits",
    )

    args = parser.parse_args()

    if args.generate_key:
        generate_key()
    elif args.check:
        success = check_environment()
        sys.exit(0 if success else 1)
    elif args.batch:
        run_batch()
    else:
        parser.print_help()
        print()
        print("For interactive mode, run: adk web")


if __name__ == "__main__":
    main()