""" ADK tools for the Reddit Adverse Event Agent. These are plain functions that will be registered with the ADK Agent. """ import json import time from typing import Optional import requests # Support both package and script imports try: from .config import ( REDDIT_HEADERS, ADVERSE_EVENT_KEYWORDS, DRUG_MAP, MIN_CONFIDENCE, MAX_CONFIDENCE, SOURCE_CLASS_SOCIAL, DEFAULT_LIFECYCLE, STEMEDB_URL, ENV_STEMEDB_AGENT_SEED, ) from .signer import Signer from .stemedb_client import StemeDBClient except ImportError: from config import ( REDDIT_HEADERS, ADVERSE_EVENT_KEYWORDS, DRUG_MAP, MIN_CONFIDENCE, MAX_CONFIDENCE, SOURCE_CLASS_SOCIAL, DEFAULT_LIFECYCLE, STEMEDB_URL, ENV_STEMEDB_AGENT_SEED, ) from signer import Signer from stemedb_client import StemeDBClient # Module-level client (initialized lazily) _client: Optional[StemeDBClient] = None def _get_client() -> StemeDBClient: """Get or create the StemeDB client.""" global _client if _client is None: signer = Signer.from_env(ENV_STEMEDB_AGENT_SEED) _client = StemeDBClient(STEMEDB_URL, signer) return _client def fetch_reddit_posts(subreddit: str, limit: int = 25) -> dict: """ Fetch recent posts from a subreddit matching adverse event keywords. This function scrapes Reddit's public JSON API for posts in GLP-1 medication subreddits that mention potential adverse events or side effects. Args: subreddit: Name of the subreddit to scan (e.g., "Ozempic", "Mounjaro") limit: Maximum number of posts to fetch (default: 25, max: 100) Returns: A dictionary with: - subreddit: The scanned subreddit name - total_fetched: Number of posts retrieved from Reddit - matched_posts: Number of posts matching adverse event keywords - posts: List of matching posts with id, title, text, url, created_utc, score, author, and detected_drug fields Example: result = fetch_reddit_posts("Ozempic", limit=50) for post in result["posts"]: print(f"{post['title']} - {post['detected_drug']}") """ limit = min(limit, 100) # Reddit API limit url = f"https://www.reddit.com/r/{subreddit}/new.json?limit={limit}" try: response = requests.get(url, headers=REDDIT_HEADERS, timeout=10) response.raise_for_status() data = response.json() except requests.exceptions.RequestException as e: return { "subreddit": subreddit, "error": str(e), "total_fetched": 0, "matched_posts": 0, "posts": [], } matched_posts = [] children = data.get("data", {}).get("children", []) for child in children: post_data = child.get("data", {}) content = f"{post_data.get('title', '')} {post_data.get('selftext', '')}".lower() # Check for adverse event keywords if not any(keyword in content for keyword in ADVERSE_EVENT_KEYWORDS): continue # Detect the drug from subreddit name detected_drug = DRUG_MAP.get(subreddit.lower(), "glp1_agonist") matched_posts.append( { "id": post_data.get("id"), "title": post_data.get("title", ""), "text": post_data.get("selftext", "")[:2000], # Truncate long posts "url": f"https://reddit.com{post_data.get('permalink', '')}", "created_utc": int(post_data.get("created_utc", 0)), "score": post_data.get("score", 0), "author": post_data.get("author", "deleted"), "detected_drug": detected_drug, } ) return { "subreddit": subreddit, "total_fetched": len(children), "matched_posts": len(matched_posts), "posts": matched_posts, } def store_assertion( subject: str, predicate: str, object_value: str, confidence: float, source_url: str, severity: Optional[str] = None, reddit_post_id: Optional[str] = None, ) -> dict: """ Store a signed assertion in StemeDB. This function creates an assertion representing an extracted adverse event and signs it with the agent's Ed25519 key before submitting to StemeDB. Args: subject: The drug or entity (e.g., "semaglutide", "tirzepatide") predicate: The type of assertion (e.g., "side_effect", "adverse_event") object_value: The specific effect (e.g., "nausea", "gastroparesis") confidence: Confidence score (0.0-1.0, will be clamped to 0.3-0.7 for anecdotal data) source_url: Reddit post URL for provenance severity: Optional severity level ("low", "medium", "high") reddit_post_id: Optional Reddit post ID for tracking Returns: A dictionary with: - success: Boolean indicating if the assertion was stored - hash: Content-addressed hash of the assertion (if successful) - subject, predicate, object, confidence: The stored values - source_hash: BLAKE3 hash of the source URL - error: Error message (if failed) Example: result = store_assertion( subject="semaglutide", predicate="side_effect", object_value="nausea", confidence=0.5, source_url="https://reddit.com/r/Ozempic/comments/abc123/...", severity="low" ) if result["success"]: print(f"Stored with hash: {result['hash']}") """ # Clamp confidence to allowed range for anecdotal data clamped_confidence = max(MIN_CONFIDENCE, min(MAX_CONFIDENCE, confidence)) # Build source metadata metadata = {"type": "reddit_post", "severity": severity} if reddit_post_id: metadata["reddit_id"] = reddit_post_id metadata_json = json.dumps(metadata) try: client = _get_client() result = client.assert_fact( subject=subject, predicate=predicate, object_value=object_value, confidence=clamped_confidence, source_url=source_url, source_class=SOURCE_CLASS_SOCIAL, lifecycle=DEFAULT_LIFECYCLE, source_metadata=metadata_json, ) return { "success": True, "hash": result.hash, "subject": subject, "predicate": predicate, "object": object_value, "confidence": clamped_confidence, "source_hash": result.hash[:16] + "...", # Truncated for display } except Exception as e: return { "success": False, "error": str(e), "subject": subject, "predicate": predicate, "object": object_value, }