stemedb/applications/aphoria/src/research_commands.rs
jml 3b5f88b4f0 feat(aphoria): implement claims architecture (A1-A5) with verify engine, corpus, coverage, and explain
Complete Aphoria claims system overhaul:
- A1: Rename ExtractedClaim to Observation (extractors produce observations, not claims)
- A2: Add AuthoredClaim with full provenance, invariants, and authority tiers
- A3: Verify engine comparing observations against authored claims, CLI + formatters
- A4: Corpus as first-class assertions with predicate indexing, authority lens, trust packs
- A5: Coverage analysis, explain/docs generation, self-audit extractor, claim suggester skill

Also includes: 42 extractors updated for Observation type, verifiable_predicates trait,
conflict detection with comparison modes, claims TOML persistence, Grafana dashboard,
backup/restore scripts, and comprehensive test coverage.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 09:11:47 +00:00

222 lines
7.6 KiB
Rust

//! Research-related CLI command implementations.
//!
//! These functions power the research agent commands (research, research-status).
use crate::bridge;
use crate::episteme::{self, ConceptIndex};
use crate::research::{self, GapRecord, GapStore, ResearchConfig, ResearchOutcome, Researcher};
use crate::{AphoriaConfig, AphoriaError, Observation};
use tracing::{info, instrument};
/// Arguments for the research command.
#[derive(Debug, Clone, Default)]
pub struct ResearchArgs {
/// Gap threshold: minimum number of projects before researching.
pub threshold: Option<u32>,
/// Maximum age of gaps to consider (days).
pub max_age_days: Option<u64>,
/// Whether to use strict quality validation.
pub strict: bool,
/// Prune old gaps before researching.
pub prune: bool,
}
/// Run the research agent to fill gaps in authoritative coverage.
///
/// This command:
/// 1. Loads the gap store
/// 2. Finds gaps eligible for research (seen in N+ projects)
/// 3. Researches official documentation for each gap
/// 4. Validates extracted claims for quality
/// 5. Ingests high-quality claims into the corpus
#[instrument(skip(config), fields(threshold = ?args.threshold, strict = args.strict))]
pub async fn run_research(
args: ResearchArgs,
config: &AphoriaConfig,
) -> Result<ResearchOutcome, AphoriaError> {
use research::{DEFAULT_GAP_MAX_AGE_DAYS, DEFAULT_GAP_THRESHOLD};
info!("Starting research agent");
let threshold = args.threshold.unwrap_or(DEFAULT_GAP_THRESHOLD);
let max_age_days = args.max_age_days.unwrap_or(DEFAULT_GAP_MAX_AGE_DAYS);
// Open gap store
let gap_store_path = config.episteme.data_dir.join("gaps.json");
let mut gap_store = GapStore::open(&gap_store_path)?;
// Prune old gaps if requested
if args.prune {
gap_store.prune_old_gaps(max_age_days);
}
// Get research candidates - clone the records to avoid borrow issues
let candidates: Vec<GapRecord> =
gap_store.get_research_candidates(threshold).into_iter().cloned().collect();
if candidates.is_empty() {
info!("No gaps eligible for research (threshold: {})", threshold);
return Ok(ResearchOutcome::empty());
}
info!(candidates = candidates.len(), threshold, "Found research candidates");
// Create researcher
let research_config = if args.strict {
ResearchConfig { strict_validation: true, min_confidence: 0.85, ..Default::default() }
} else {
ResearchConfig::default()
};
let researcher = Researcher::with_config(research_config);
// Research gaps - pass references to our cloned records
let candidate_refs: Vec<&GapRecord> = candidates.iter().collect();
let outcome = researcher.research_gaps(&candidate_refs);
// Mark gaps as researched
for result in &outcome.results {
if let Some(record) = gap_store.get_mut(&result.gap) {
record.mark_research_attempted(result.success);
}
}
// Save gap store
gap_store.save()?;
// If we have validated claims, ingest them
if outcome.assertions_created > 0 {
info!(assertions = outcome.assertions_created, "Ingesting researched claims");
// Get validated claims for ingestion
let validated_claims = researcher.get_validated_claims(&candidate_refs);
if !validated_claims.is_empty() {
let project_root = std::env::current_dir()?;
let signing_key = bridge::load_or_generate_key(&project_root)?;
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
// Convert researched claims to assertions
let assertions: Vec<_> = validated_claims
.into_iter()
.map(|claim| {
let source_class = match claim.tier {
0 => stemedb_core::types::SourceClass::Regulatory,
1 => stemedb_core::types::SourceClass::Clinical,
_ => stemedb_core::types::SourceClass::Observational,
};
episteme::create_authoritative_assertion(
&signing_key,
&claim.subject,
&claim.predicate,
claim.value,
source_class,
&claim.description,
timestamp,
)
})
.collect();
// Ingest assertions
let mut episteme_instance =
episteme::LocalEpisteme::open(config, &project_root).await?;
let ingested = episteme_instance.ingest_authoritative(&assertions).await?;
episteme_instance.shutdown().await;
info!(ingested, "Research claims ingested");
}
}
Ok(outcome)
}
/// Record gaps detected during a scan.
///
/// This should be called after each scan to track gaps for research.
#[instrument(skip(config, claims, index), fields(claim_count = claims.len()))]
pub async fn record_scan_gaps(
claims: &[Observation],
index: &ConceptIndex,
project_id: &str,
config: &AphoriaConfig,
) -> Result<usize, AphoriaError> {
// Detect gaps
let gaps = research::detect_gaps(claims, index);
if gaps.is_empty() {
return Ok(0);
}
// Open gap store and record
let gap_store_path = config.episteme.data_dir.join("gaps.json");
let mut gap_store = GapStore::open(&gap_store_path)?;
gap_store.record_gaps(&gaps, project_id);
gap_store.save()?;
info!(gaps_recorded = gaps.len(), project = project_id, "Recorded gaps for research");
Ok(gaps.len())
}
/// Show research status including gap statistics.
#[instrument(skip(config))]
pub async fn show_research_status(config: &AphoriaConfig) -> Result<String, AphoriaError> {
let gap_store_path = config.episteme.data_dir.join("gaps.json");
let mut output = String::new();
output.push_str("Research Agent Status:\n\n");
if !gap_store_path.exists() {
output.push_str(" Gap store: not initialized\n");
output.push_str(" Run scans to start collecting gap data.\n");
return Ok(output);
}
let gap_store = GapStore::open(&gap_store_path)?;
output.push_str(&format!(" Gap store: {}\n", gap_store_path.display()));
output.push_str(&format!(" Total gaps tracked: {}\n", gap_store.len()));
// Count by project threshold
let threshold_3 = gap_store.gaps_by_project_count(3).len();
let threshold_5 = gap_store.gaps_by_project_count(5).len();
output.push_str(&format!(" Gaps seen in 3+ projects: {}\n", threshold_3));
output.push_str(&format!(" Gaps seen in 5+ projects: {}\n", threshold_5));
// Count research status
let mut researched = 0;
let mut successful = 0;
for record in gap_store.all_records() {
if record.research_attempted {
researched += 1;
if record.research_successful {
successful += 1;
}
}
}
output.push_str(&format!(" Gaps researched: {}\n", researched));
output.push_str(&format!(" Research successful: {}\n", successful));
// Show top gaps ready for research
let candidates: Vec<_> = gap_store.get_research_candidates(3);
if !candidates.is_empty() {
output.push_str("\n Top gaps ready for research:\n");
for record in candidates.iter().take(5) {
output.push_str(&format!(
" - {} (seen in {} projects)\n",
record.topic, record.project_count
));
}
}
Ok(output)
}