stemedb/crates/stemedb-api/src/main.rs
jml bb0c33f8d3 fix(api): enable querying of CLI-created community corpus items
## Problem
CLI-created community corpus items (tier 3) were stored correctly but
invisible via API queries. Two issues blocked discoverability:

1. **Prefix mismatch**: API hardcoded 'community://pattern/' for
   aggregated patterns, but CLI creates 'community://rust/http/...' URIs
2. **Query parameter parsing**: Axum's default parser doesn't support
   bracket notation (?sources[]=value) used by the dashboard

Result: 0/22 CLI-created items were queryable.

## Solution

### Fix 1: Broaden Community Prefix
- Changed: 'community://pattern/' → 'community://' in corpus handler
- Impact: Now matches both aggregated patterns AND CLI-created items
- Backward compatible: Broader prefix includes narrower results

### Fix 2: Add QsQuery Extractor
- Added: serde_qs dependency + custom QsQuery extractor
- Supports: Bracket notation for array parameters (?sources[]=a&sources[]=b)
- Compatible: Works with JavaScript URLSearchParams standard
- Tested: 3 new unit tests for extractor behavior

## Verification
-  All 22 CLI-created community items now queryable (was 0)
-  Source filtering works: community (22), RFC (2), vendor (5)
-  Multi-source queries work: ?sources[]=community&sources[]=rfc → 24
-  All 89 API tests pass + 3 new extractor tests
-  Clippy clean (0 warnings)
-  No regressions in existing functionality

## Files Changed
- crates/stemedb-api/Cargo.toml: Add serde_qs dependency
- crates/stemedb-api/src/extractors.rs: New QsQuery extractor (117 lines)
- crates/stemedb-api/src/handlers/aphoria/corpus.rs: Use QsQuery, broaden prefix
- crates/stemedb-api/src/lib.rs: Export extractors module

Also includes: Scale-adaptive thresholds, wiki corpus extraction,
documentation updates, and dashboard UI improvements from prior work.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-09 15:54:35 +00:00

184 lines
6.4 KiB
Rust

//! Episteme (StemeDB) API server binary.
//!
//! This starts the HTTP API server with the following components:
//! 1. Opens Journal (WAL) for writes (via GroupCommitBuffer) and reads
//! 2. Opens HybridStore (KV storage)
//! 3. Spawns IngestWorker background task to tail WAL
//! 4. Starts axum HTTP server with OpenAPI documentation
//! 5. Optionally enables The Meter (economic throttling)
//!
//! # Environment Variables
//!
//! | Variable | Default | Description |
//! |----------|---------|-------------|
//! | `STEMEDB_WAL_DIR` | `data/wal` | Directory for WAL files |
//! | `STEMEDB_DB_DIR` | `data/db` | Directory for KV store |
//! | `STEMEDB_BIND_ADDR` | `127.0.0.1:18180` | HTTP server bind address |
//! | `STEMEDB_METER_ENABLED` | `true` | Enable economic throttling |
//! | `STEMEDB_CORPUS_DB_DIR` | (none) | Optional: Directory for Aphoria corpus DB |
use std::path::PathBuf;
use std::sync::Arc;
use tracing::{error, info};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
use axum::Extension;
use metrics_exporter_prometheus::PrometheusBuilder;
use stemedb_api::{create_router, create_router_with_meter, AppState};
use stemedb_ingest::worker::IngestWorker;
use stemedb_storage::HybridStore;
use stemedb_wal::Journal;
/// Server configuration.
#[derive(Debug, Clone)]
struct Config {
/// Directory for WAL files
wal_dir: PathBuf,
/// Directory for KV store
db_dir: PathBuf,
/// HTTP server bind address
bind_addr: String,
/// Enable economic throttling (The Meter)
meter_enabled: bool,
/// Optional corpus database directory (for Aphoria corpus)
corpus_db_dir: Option<PathBuf>,
}
impl Default for Config {
fn default() -> Self {
Self {
wal_dir: PathBuf::from("data/wal"),
db_dir: PathBuf::from("data/db"),
bind_addr: "127.0.0.1:18180".to_string(),
meter_enabled: true,
corpus_db_dir: None,
}
}
}
impl Config {
/// Load configuration from environment variables.
fn from_env() -> Self {
let mut config = Self::default();
if let Ok(wal_dir) = std::env::var("STEMEDB_WAL_DIR") {
config.wal_dir = PathBuf::from(wal_dir);
}
if let Ok(db_dir) = std::env::var("STEMEDB_DB_DIR") {
config.db_dir = PathBuf::from(db_dir);
}
if let Ok(bind_addr) = std::env::var("STEMEDB_BIND_ADDR") {
config.bind_addr = bind_addr;
}
if let Ok(meter_enabled) = std::env::var("STEMEDB_METER_ENABLED") {
config.meter_enabled = meter_enabled.to_lowercase() != "false" && meter_enabled != "0";
}
if let Ok(corpus_db_dir) = std::env::var("STEMEDB_CORPUS_DB_DIR") {
config.corpus_db_dir = Some(PathBuf::from(corpus_db_dir));
}
config
}
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize tracing
let env_filter = match tracing_subscriber::EnvFilter::try_from_default_env() {
Ok(filter) => filter,
Err(_) => "stemedb_api=debug,tower_http=debug".into(),
};
tracing_subscriber::registry().with(env_filter).with(tracing_subscriber::fmt::layer()).init();
// Initialize Prometheus metrics recorder (must be done before any metrics are recorded)
let prometheus_handle = PrometheusBuilder::new()
.install_recorder()
.map_err(|e| format!("Failed to install Prometheus recorder: {e}"))?;
let prometheus_handle = Arc::new(prometheus_handle);
info!("Prometheus metrics recorder initialized");
let config = Config::from_env();
info!("Starting Episteme (StemeDB) API server");
info!(?config, "Configuration loaded");
// Ensure directories exist
std::fs::create_dir_all(&config.wal_dir)?;
std::fs::create_dir_all(&config.db_dir)?;
// Open write Journal (owned by GroupCommitBuffer)
info!("Opening write Journal at {:?}", config.wal_dir);
let write_journal = Journal::open(&config.wal_dir)?;
// Open read Journal (for IngestWorker to tail)
info!("Opening read Journal at {:?}", config.wal_dir);
let read_journal = Journal::open(&config.wal_dir)?;
info!("Opening HybridStore at {:?}", config.db_dir);
let store = Arc::new(HybridStore::open(&config.db_dir)?);
// Open optional corpus store (for Aphoria corpus)
let corpus_store = if let Some(ref corpus_dir) = config.corpus_db_dir {
// Ensure corpus directory exists
std::fs::create_dir_all(corpus_dir)?;
info!("Opening corpus HybridStore at {:?}", corpus_dir);
Some(Arc::new(HybridStore::open(corpus_dir)?))
} else {
info!("No separate corpus DB configured, using main store for corpus queries");
None
};
// Create application state (initializes GroupCommitBuffer)
let state = AppState::new(write_journal, read_journal, Arc::clone(&store), corpus_store);
// Spawn IngestWorker background task (uses read journal)
info!("Spawning IngestWorker background task");
let worker_journal = state.journal.clone();
let worker_store = store;
let worker_flush_notify = Arc::clone(&state.flush_notify);
tokio::spawn(async move {
let worker_result = IngestWorker::new(worker_journal, worker_store).await;
match worker_result {
Ok(worker) => {
// Wire up flush notification so IngestWorker reacts immediately to new data
let mut worker = worker.with_flush_notify(worker_flush_notify);
info!("IngestWorker started with flush notification, entering run loop");
worker.run().await;
}
Err(e) => {
error!("Failed to create IngestWorker: {:?}", e);
}
}
});
// Build router (with or without metering)
let app = if config.meter_enabled {
info!("The Meter enabled: economic throttling active (10K tokens/agent/hour)");
create_router_with_meter(state)
} else {
info!("The Meter disabled: no quota enforcement");
create_router(state)
};
// Add Prometheus handle extension and /metrics route
let app = app.layer(Extension(prometheus_handle));
// Start server
let listener = tokio::net::TcpListener::bind(&config.bind_addr).await?;
info!("API server listening on {}", config.bind_addr);
info!("Swagger UI available at http://{}/swagger-ui", config.bind_addr);
axum::serve(listener, app).await?;
Ok(())
}