stemedb/applications/aphoria/src/research_commands.rs

//! Research-related CLI command implementations.
//!
//! These functions power the research agent commands (research, research-status).

use crate::bridge;
use crate::episteme::{self, ConceptIndex};
use crate::research::{self, GapRecord, GapStore, ResearchConfig, ResearchOutcome, Researcher};
use crate::{AphoriaConfig, AphoriaError, Observation};
use tracing::{info, instrument};

/// Arguments for the research command.
#[derive(Debug, Clone, Default)]
pub struct ResearchArgs {
    /// Gap threshold: minimum number of projects before researching.
    pub threshold: Option<u32>,
    /// Maximum age of gaps to consider (days).
    pub max_age_days: Option<u64>,
    /// Whether to use strict quality validation.
    pub strict: bool,
    /// Prune old gaps before researching.
    pub prune: bool,
}

/// Run the research agent to fill gaps in authoritative coverage.
///
/// This command:
/// 1. Loads the gap store
/// 2. Finds gaps eligible for research (seen in N+ projects)
/// 3. Researches official documentation for each gap
/// 4. Validates extracted claims for quality
/// 5. Ingests high-quality claims into the corpus
#[instrument(skip(config), fields(threshold = ?args.threshold, strict = args.strict))]
pub async fn run_research(
    args: ResearchArgs,
    config: &AphoriaConfig,
) -> Result<ResearchOutcome, AphoriaError> {
    use research::{DEFAULT_GAP_MAX_AGE_DAYS, DEFAULT_GAP_THRESHOLD};

    info!("Starting research agent");

    let threshold = args.threshold.unwrap_or(DEFAULT_GAP_THRESHOLD);
    let max_age_days = args.max_age_days.unwrap_or(DEFAULT_GAP_MAX_AGE_DAYS);

    // Open gap store
    let gap_store_path = config.episteme.data_dir.join("gaps.json");
    let mut gap_store = GapStore::open(&gap_store_path)?;

    // Prune old gaps if requested
    if args.prune {
        gap_store.prune_old_gaps(max_age_days);
    }

    // Get research candidates - clone the records to avoid borrow issues
    let candidates: Vec<GapRecord> =
        gap_store.get_research_candidates(threshold).into_iter().cloned().collect();

    if candidates.is_empty() {
        info!("No gaps eligible for research (threshold: {})", threshold);
        return Ok(ResearchOutcome::empty());
    }

    info!(candidates = candidates.len(), threshold, "Found research candidates");

    // Create researcher
    let research_config = if args.strict {
        ResearchConfig { strict_validation: true, min_confidence: 0.85, ..Default::default() }
    } else {
        ResearchConfig::default()
    };

    let researcher = Researcher::with_config(research_config);

    // Research gaps - pass references to our cloned records
    let candidate_refs: Vec<&GapRecord> = candidates.iter().collect();
    let outcome = researcher.research_gaps(&candidate_refs);

    // Mark gaps as researched
    for result in &outcome.results {
        if let Some(record) = gap_store.get_mut(&result.gap) {
            record.mark_research_attempted(result.success);
        }
    }

    // Save gap store
    gap_store.save()?;

    // If we have validated claims, ingest them
    if outcome.assertions_created > 0 {
        info!(assertions = outcome.assertions_created, "Ingesting researched claims");

        // Get validated claims for ingestion
        let validated_claims = researcher.get_validated_claims(&candidate_refs);

        if !validated_claims.is_empty() {
            let project_root = std::env::current_dir()?;
            let signing_key = bridge::load_or_generate_key(&project_root)?;
            let timestamp = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_secs())
                .unwrap_or(0);

            // Convert researched claims to assertions
            let assertions: Vec<_> = validated_claims
                .into_iter()
                .map(|claim| {
                    let source_class = match claim.tier {
                        0 => stemedb_core::types::SourceClass::Regulatory,
                        1 => stemedb_core::types::SourceClass::Clinical,
                        _ => stemedb_core::types::SourceClass::Observational,
                    };

                    episteme::create_authoritative_assertion(
                        &signing_key,
                        &claim.subject,
                        &claim.predicate,
                        claim.value,
                        source_class,
                        &claim.description,
                        timestamp,
                    )
                })
                .collect();

            // Ingest assertions
            let mut episteme_instance =
                episteme::LocalEpisteme::open(config, &project_root).await?;
            let ingested = episteme_instance.ingest_authoritative(&assertions).await?;
            episteme_instance.shutdown().await;

            info!(ingested, "Research claims ingested");
        }
    }

    Ok(outcome)
}

/// Record gaps detected during a scan.
///
/// This should be called after each scan to track gaps for research.
#[instrument(skip(config, claims, index), fields(claim_count = claims.len()))]
pub async fn record_scan_gaps(
    claims: &[Observation],
    index: &ConceptIndex,
    project_id: &str,
    config: &AphoriaConfig,
) -> Result<usize, AphoriaError> {
    // Detect gaps
    let gaps = research::detect_gaps(claims, index);

    if gaps.is_empty() {
        return Ok(0);
    }

    // Open gap store and record
    let gap_store_path = config.episteme.data_dir.join("gaps.json");
    let mut gap_store = GapStore::open(&gap_store_path)?;

    gap_store.record_gaps(&gaps, project_id);
    gap_store.save()?;

    info!(gaps_recorded = gaps.len(), project = project_id, "Recorded gaps for research");

    Ok(gaps.len())
}

/// Show research status including gap statistics.
#[instrument(skip(config))]
pub async fn show_research_status(config: &AphoriaConfig) -> Result<String, AphoriaError> {
    let gap_store_path = config.episteme.data_dir.join("gaps.json");

    let mut output = String::new();
    output.push_str("Research Agent Status:\n\n");

    if !gap_store_path.exists() {
        output.push_str("  Gap store: not initialized\n");
        output.push_str("  Run scans to start collecting gap data.\n");
        return Ok(output);
    }

    let gap_store = GapStore::open(&gap_store_path)?;

    output.push_str(&format!("  Gap store: {}\n", gap_store_path.display()));
    output.push_str(&format!("  Total gaps tracked: {}\n", gap_store.len()));

    // Count by project threshold
    let threshold_3 = gap_store.gaps_by_project_count(3).len();
    let threshold_5 = gap_store.gaps_by_project_count(5).len();

    output.push_str(&format!("  Gaps seen in 3+ projects: {}\n", threshold_3));
    output.push_str(&format!("  Gaps seen in 5+ projects: {}\n", threshold_5));

    // Count research status
    let mut researched = 0;
    let mut successful = 0;

    for record in gap_store.all_records() {
        if record.research_attempted {
            researched += 1;
            if record.research_successful {
                successful += 1;
            }
        }
    }

    output.push_str(&format!("  Gaps researched: {}\n", researched));
    output.push_str(&format!("  Research successful: {}\n", successful));

    // Show top gaps ready for research
    let candidates: Vec<_> = gap_store.get_research_candidates(3);
    if !candidates.is_empty() {
        output.push_str("\n  Top gaps ready for research:\n");
        for record in candidates.iter().take(5) {
            output.push_str(&format!(
                "    - {} (seen in {} projects)\n",
                record.topic, record.project_count
            ));
        }
    }

    Ok(output)
}