Major additions: - Community Next.js app (port 18187) for browsing claims with API docs - stemedb-chaos crate: Fault injection, chaos testing, CRDT properties - Latent ingestion system: Reddit/FDA ingesters with ADK-Go agents - Disputed claims handling: Manual review workflows and validation - Aphoria security scanner: New extractors (SQL injection, command injection, weak crypto, TLS version), policy-based ignores, UAT reports - Docker infrastructure: Dockerfile, docker-compose.yml for full stack - VulnBank demo: Intentionally vulnerable multi-language test corpus SDK & API enhancements: - Source registry handlers for tracking data provenance - Metrics endpoint - Skeptic filtering improvements Code quality: - Split 14 large files (>500 lines) into focused modules - All files now under 500-line limit per project guidelines Documentation: - Chaos testing guide, circuit breakers, observability docs - Phase 7 UAT documentation updates - Martin Kleppmann technical writer agent Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
145 lines
4.5 KiB
Python
145 lines
4.5 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
import uuid
|
|
from datetime import datetime
|
|
|
|
# Configuration
|
|
TARGET_MOLECULES = [
|
|
"semaglutide",
|
|
"tirzepatide",
|
|
"liraglutide"
|
|
]
|
|
|
|
API_BASE = "https://api.fda.gov/drug/label.json"
|
|
|
|
# StemeDB Source Class for Regulatory
|
|
SOURCE_CLASS_REGULATORY = 0
|
|
|
|
def fetch_label(molecule):
|
|
"""
|
|
Fetches the most recent FDA label for a given generic molecule name.
|
|
"""
|
|
print(f"[*] Fetching FDA label for: {molecule}...")
|
|
|
|
# Query for the generic name, prioritize recent effective_time
|
|
query = f'openfda.generic_name:"{molecule}"'
|
|
params = {
|
|
"search": query,
|
|
"limit": 1,
|
|
"sort": "effective_time:desc"
|
|
}
|
|
|
|
try:
|
|
response = requests.get(API_BASE, params=params)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
if "results" in data and len(data["results"]) > 0:
|
|
return data["results"][0]
|
|
else:
|
|
print(f"[!] No results found for {molecule}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"[!] Error fetching {molecule}: {e}")
|
|
return None
|
|
|
|
def extract_assertions(label_data, molecule):
|
|
"""
|
|
Parses raw OpenFDA JSON into StemeDB Assertions.
|
|
"""
|
|
assertions = []
|
|
|
|
# Metadata for the Source
|
|
set_id = label_data.get("set_id", "unknown")
|
|
effective_time = label_data.get("effective_time", datetime.now().strftime("%Y%m%d"))
|
|
brand_name = label_data.get("openfda", {}).get("brand_name", ["Unknown"])[0]
|
|
|
|
# 1. Boxed Warnings (Critical Safety Info)
|
|
if "boxed_warning" in label_data:
|
|
text = "\n".join(label_data["boxed_warning"])
|
|
assertions.append({
|
|
"id": str(uuid.uuid4()),
|
|
"subject": molecule,
|
|
"predicate": "has_boxed_warning",
|
|
"object": text, # Raw text for now, NLP would extract specific risks
|
|
"confidence": 1.0,
|
|
"source_class": SOURCE_CLASS_REGULATORY,
|
|
"source_metadata": {
|
|
"type": "openfda_label",
|
|
"set_id": set_id,
|
|
"section": "boxed_warning",
|
|
"effective_date": effective_time,
|
|
"brand_name": brand_name
|
|
},
|
|
"timestamp": int(time.time())
|
|
})
|
|
|
|
# 2. Adverse Reactions (The "Side Effects")
|
|
if "adverse_reactions" in label_data:
|
|
text = "\n".join(label_data["adverse_reactions"])
|
|
assertions.append({
|
|
"id": str(uuid.uuid4()),
|
|
"subject": molecule,
|
|
"predicate": "has_adverse_reactions_section",
|
|
"object": text,
|
|
"confidence": 1.0,
|
|
"source_class": SOURCE_CLASS_REGULATORY,
|
|
"source_metadata": {
|
|
"type": "openfda_label",
|
|
"set_id": set_id,
|
|
"section": "adverse_reactions",
|
|
"effective_date": effective_time,
|
|
"brand_name": brand_name
|
|
},
|
|
"timestamp": int(time.time())
|
|
})
|
|
|
|
# 3. Warnings and Precautions
|
|
if "warnings_and_precautions" in label_data:
|
|
text = "\n".join(label_data["warnings_and_precautions"])
|
|
assertions.append({
|
|
"id": str(uuid.uuid4()),
|
|
"subject": molecule,
|
|
"predicate": "has_warnings",
|
|
"object": text,
|
|
"confidence": 1.0,
|
|
"source_class": SOURCE_CLASS_REGULATORY,
|
|
"source_metadata": {
|
|
"type": "openfda_label",
|
|
"set_id": set_id,
|
|
"section": "warnings_and_precautions",
|
|
"effective_date": effective_time,
|
|
"brand_name": brand_name
|
|
},
|
|
"timestamp": int(time.time())
|
|
})
|
|
|
|
return assertions
|
|
|
|
def main():
|
|
all_assertions = []
|
|
|
|
for molecule in TARGET_MOLECULES:
|
|
label = fetch_label(molecule)
|
|
if label:
|
|
drug_assertions = extract_assertions(label, molecule)
|
|
all_assertions.extend(drug_assertions)
|
|
print(f"[+] Extracted {len(drug_assertions)} Tier 0 assertions for {molecule}")
|
|
|
|
# Be nice to the API
|
|
time.sleep(1)
|
|
|
|
# Output to JSONL (simulating StemeDB Write)
|
|
output_file = "tier0_regulatory_graph.jsonl"
|
|
with open(output_file, "w") as f:
|
|
for assertion in all_assertions:
|
|
f.write(json.dumps(assertion) + "\n")
|
|
|
|
print(f"\n[OK] Successfully generated {len(all_assertions)} regulatory assertions.")
|
|
print(f"[->] Graph data written to {output_file}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|