//! Research-related CLI command implementations. //! //! These functions power the research agent commands (research, research-status). use crate::bridge; use crate::episteme::{self, ConceptIndex}; use crate::research::{self, GapRecord, GapStore, ResearchConfig, ResearchOutcome, Researcher}; use crate::{AphoriaConfig, AphoriaError, Observation}; use tracing::{info, instrument}; /// Arguments for the research command. #[derive(Debug, Clone, Default)] pub struct ResearchArgs { /// Gap threshold: minimum number of projects before researching. pub threshold: Option, /// Maximum age of gaps to consider (days). pub max_age_days: Option, /// Whether to use strict quality validation. pub strict: bool, /// Prune old gaps before researching. pub prune: bool, } /// Run the research agent to fill gaps in authoritative coverage. /// /// This command: /// 1. Loads the gap store /// 2. Finds gaps eligible for research (seen in N+ projects) /// 3. Researches official documentation for each gap /// 4. Validates extracted claims for quality /// 5. Ingests high-quality claims into the corpus #[instrument(skip(config), fields(threshold = ?args.threshold, strict = args.strict))] pub async fn run_research( args: ResearchArgs, config: &AphoriaConfig, ) -> Result { use research::{DEFAULT_GAP_MAX_AGE_DAYS, DEFAULT_GAP_THRESHOLD}; info!("Starting research agent"); let threshold = args.threshold.unwrap_or(DEFAULT_GAP_THRESHOLD); let max_age_days = args.max_age_days.unwrap_or(DEFAULT_GAP_MAX_AGE_DAYS); // Open gap store let gap_store_path = config.episteme.data_dir.join("gaps.json"); let mut gap_store = GapStore::open(&gap_store_path)?; // Prune old gaps if requested if args.prune { gap_store.prune_old_gaps(max_age_days); } // Get research candidates - clone the records to avoid borrow issues let candidates: Vec = gap_store.get_research_candidates(threshold).into_iter().cloned().collect(); if candidates.is_empty() { info!("No gaps eligible for research (threshold: {})", threshold); return Ok(ResearchOutcome::empty()); } info!(candidates = candidates.len(), threshold, "Found research candidates"); // Create researcher let research_config = if args.strict { ResearchConfig { strict_validation: true, min_confidence: 0.85, ..Default::default() } } else { ResearchConfig::default() }; let researcher = Researcher::with_config(research_config); // Research gaps - pass references to our cloned records let candidate_refs: Vec<&GapRecord> = candidates.iter().collect(); let outcome = researcher.research_gaps(&candidate_refs); // Mark gaps as researched for result in &outcome.results { if let Some(record) = gap_store.get_mut(&result.gap) { record.mark_research_attempted(result.success); } } // Save gap store gap_store.save()?; // If we have validated claims, ingest them if outcome.assertions_created > 0 { info!(assertions = outcome.assertions_created, "Ingesting researched claims"); // Get validated claims for ingestion let validated_claims = researcher.get_validated_claims(&candidate_refs); if !validated_claims.is_empty() { let project_root = std::env::current_dir()?; let signing_key = bridge::load_or_generate_key(&project_root)?; let timestamp = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.as_secs()) .unwrap_or(0); // Convert researched claims to assertions let assertions: Vec<_> = validated_claims .into_iter() .map(|claim| { let source_class = match claim.tier { 0 => stemedb_core::types::SourceClass::Regulatory, 1 => stemedb_core::types::SourceClass::Clinical, _ => stemedb_core::types::SourceClass::Observational, }; episteme::create_authoritative_assertion( &signing_key, &claim.subject, &claim.predicate, claim.value, source_class, &claim.description, timestamp, ) }) .collect(); // Ingest assertions let mut episteme_instance = episteme::LocalEpisteme::open(config, &project_root).await?; let ingested = episteme_instance.ingest_authoritative(&assertions).await?; episteme_instance.shutdown().await; info!(ingested, "Research claims ingested"); } } Ok(outcome) } /// Record gaps detected during a scan. /// /// This should be called after each scan to track gaps for research. #[instrument(skip(config, claims, index), fields(claim_count = claims.len()))] pub async fn record_scan_gaps( claims: &[Observation], index: &ConceptIndex, project_id: &str, config: &AphoriaConfig, ) -> Result { // Detect gaps let gaps = research::detect_gaps(claims, index); if gaps.is_empty() { return Ok(0); } // Open gap store and record let gap_store_path = config.episteme.data_dir.join("gaps.json"); let mut gap_store = GapStore::open(&gap_store_path)?; gap_store.record_gaps(&gaps, project_id); gap_store.save()?; info!(gaps_recorded = gaps.len(), project = project_id, "Recorded gaps for research"); Ok(gaps.len()) } /// Show research status including gap statistics. #[instrument(skip(config))] pub async fn show_research_status(config: &AphoriaConfig) -> Result { let gap_store_path = config.episteme.data_dir.join("gaps.json"); let mut output = String::new(); output.push_str("Research Agent Status:\n\n"); if !gap_store_path.exists() { output.push_str(" Gap store: not initialized\n"); output.push_str(" Run scans to start collecting gap data.\n"); return Ok(output); } let gap_store = GapStore::open(&gap_store_path)?; output.push_str(&format!(" Gap store: {}\n", gap_store_path.display())); output.push_str(&format!(" Total gaps tracked: {}\n", gap_store.len())); // Count by project threshold let threshold_3 = gap_store.gaps_by_project_count(3).len(); let threshold_5 = gap_store.gaps_by_project_count(5).len(); output.push_str(&format!(" Gaps seen in 3+ projects: {}\n", threshold_3)); output.push_str(&format!(" Gaps seen in 5+ projects: {}\n", threshold_5)); // Count research status let mut researched = 0; let mut successful = 0; for record in gap_store.all_records() { if record.research_attempted { researched += 1; if record.research_successful { successful += 1; } } } output.push_str(&format!(" Gaps researched: {}\n", researched)); output.push_str(&format!(" Research successful: {}\n", successful)); // Show top gaps ready for research let candidates: Vec<_> = gap_store.get_research_candidates(3); if !candidates.is_empty() { output.push_str("\n Top gaps ready for research:\n"); for record in candidates.iter().take(5) { output.push_str(&format!( " - {} (seen in {} projects)\n", record.topic, record.project_count )); } } Ok(output) }