stemedb/crates/stemedb-api/src/handlers/assert.rs
jordan 02ecac9a07 fix: merge upstream 10 commits, fix DashMap deadlock, deterministic sim ingestion
Merged 10 upstream commits (MemTable, read-your-writes tests, feed endpoint,
security hardening, signed assertions, source registry, dashboard enhancements)
and fixed all test failures across the full workspace (2656/2656 passing).

Key fixes:
- fix(cluster): DashMap deadlock in swim.rs suspect_node/fail_node/alive_node
  - DashMap::get_mut RefMut + iter() on same map = non-reentrant write lock deadlock
  - Fix: extract clone in scoped block to drop RefMut before calling update_node_gauges()
  - 6 previously-hanging SWIM tests now pass in <2s
- fix(sim): replace background-task+polling ingestion with synchronous process_pending()
  - smoke_high_volume_simulation was CPU-starved under 2656 parallel tests
  - Removed ingestor.start() + wait_until_ingested() pattern throughout sim
  - All arena functions now call ingestor.process_pending() directly (deterministic)
- fix(test): v2 signature helper used wrong hash (rkyv vs canonical compute_content_hash_v2)
- fix(test): quota test signed "test" but v1 requires "subject:predicate" format
- fix(test): http_validation now accepts 400 for valid-format-but-invalid-crypto hex
- fix(test): scale_adaptive micro tier assertions updated (auto_promote upstream change)
- config: add nextest.toml with slow-timeout for background-task-tests group

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-20 20:27:32 -07:00

185 lines
6.8 KiB
Rust

//! Handler for creating assertions.
use axum::{extract::State, http::StatusCode, Json};
use tracing::instrument;
use crate::{
dto::{CreateAssertionRequest, CreateResponse, ErrorResponse, SignatureDto},
error::{ApiError, Result},
hex,
state::AppState,
};
use stemedb_storage::MemTableEntry;
use stemedb_core::limits::MAX_NARRATIVE_LEN;
use stemedb_core::types::{
Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
};
use stemedb_ingest::worker::serialize_assertion;
/// Create a new assertion in the knowledge graph.
///
/// This endpoint accepts an assertion DTO, validates confidence bounds (0.0-1.0),
/// converts hex-encoded fields to binary, serializes the assertion to WAL format,
/// and appends it to the journal. Returns the content-addressed BLAKE3 hash.
///
/// # Validation
/// - Confidence must be between 0.0 and 1.0
/// - At least one signature is required
/// - All hex fields must have correct lengths (32 bytes for hashes, 64 bytes for signatures)
#[utoipa::path(
post,
path = "/v1/assert",
request_body = CreateAssertionRequest,
responses(
(status = 201, description = "Assertion created successfully", body = CreateResponse),
(status = 400, description = "Invalid request", body = ErrorResponse),
(status = 500, description = "Internal server error", body = ErrorResponse)
),
tag = "assertions"
)]
#[instrument(skip(state), fields(subject = %req.subject, predicate = %req.predicate))]
pub async fn create_assertion(
State(state): State<AppState>,
Json(req): Json<CreateAssertionRequest>,
) -> Result<(StatusCode, Json<CreateResponse>)> {
// Convert DTO to internal Assertion type
let assertion = dto_to_assertion(req)?;
// Verify Ed25519 signatures BEFORE writing to WAL.
// This prevents poison records that would permanently block the IngestWorker.
stemedb_core::signing::verify_assertion_signatures(&assertion).map_err(|e| {
metrics::counter!("stemedb_assertions_rejected_total", "reason" => "invalid_signature")
.increment(1);
ApiError::InvalidRequest(format!("Signature verification failed: {}", e))
})?;
// Validate subject does not contain null byte separator (mirrors IngestWorker check)
stemedb_storage::key_codec::validate_subject(&assertion.subject)
.map_err(|e| ApiError::InvalidRequest(format!("Invalid subject: {}", e)))?;
// Serialize to WAL format (includes record type header)
let payload = serialize_assertion(&assertion)
.map_err(|e| ApiError::Serialization(format!("Failed to serialize assertion: {}", e)))?;
// Compute the content-addressed hash
// This must match the hash computation in the ingest worker
let serialized_assertion = stemedb_core::serde::serialize(&assertion)
.map_err(|e| ApiError::Serialization(format!("Failed to serialize for hash: {}", e)))?;
let hash = blake3::hash(&serialized_assertion);
// Append to WAL via group commit buffer
let wal_offset = state.commit_buffer.append(payload).await?;
// Insert into MemTable for immediate visibility (read-your-writes)
// This must happen AFTER WAL commit to maintain durability guarantees
let entry = MemTableEntry::new(assertion, *hash.as_bytes(), wal_offset);
state.memtable.insert(entry);
metrics::counter!("stemedb_assertions_ingested_total").increment(1);
let response =
CreateResponse { hash: hash.to_hex().to_string(), status: "created".to_string() };
Ok((StatusCode::CREATED, Json(response)))
}
/// Convert CreateAssertionRequest DTO to internal Assertion type.
fn dto_to_assertion(req: CreateAssertionRequest) -> Result<Assertion> {
// Validate confidence bounds (0.0 to 1.0)
if req.confidence < 0.0 || req.confidence > 1.0 {
return Err(ApiError::InvalidRequest(format!(
"Confidence must be between 0.0 and 1.0, got {}",
req.confidence
)));
}
// Decode hex fields using shared hex module
let parent_hash = req.parent_hash.map(|h| hex::decode_hash_32(&h)).transpose()?;
let source_hash = hex::decode_hash_32(&req.source_hash)?;
let visual_hash = req.visual_hash.map(|h| hex::decode_hash_8(&h)).transpose()?;
let epoch = req.epoch.map(|e| hex::decode_hash_32(&e)).transpose()?;
// Convert signatures
let signatures =
req.signatures.into_iter().map(decode_signature).collect::<Result<Vec<_>>>()?;
// Validate signatures are not empty
if signatures.is_empty() {
return Err(ApiError::InvalidRequest("At least one signature is required".to_string()));
}
// Validate narrative length
if let Some(ref narrative) = req.narrative {
if narrative.len() > MAX_NARRATIVE_LEN {
return Err(ApiError::InvalidRequest(format!(
"narrative exceeds {} bytes (got {})",
MAX_NARRATIVE_LEN,
narrative.len()
)));
}
}
// Use provided timestamp or generate a new one
// IMPORTANT: For v2 signatures, the timestamp must match what was signed
let timestamp = match req.timestamp {
Some(0) => {
return Err(ApiError::InvalidRequest(
"timestamp must be a valid Unix epoch (> 0). \
Omit the field to use server time."
.to_string(),
));
}
Some(t) => t,
None => std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0),
};
// Use provided HLC timestamp or default
// IMPORTANT: For v2 signatures, the HLC timestamp must match what was signed
let hlc_timestamp = match req.hlc_timestamp {
Some(dto) => {
let node_id = hex::decode_node_id(&dto.node_id)?;
HlcTimestamp { time_ntp64: dto.time_ntp64, node_id }
}
None => HlcTimestamp::default(),
};
Ok(Assertion {
subject: req.subject,
predicate: req.predicate,
object: ObjectValue::from(req.object),
parent_hash,
source_hash,
source_class: req.source_class.map(Into::into).unwrap_or(SourceClass::Expert),
visual_hash,
epoch,
source_metadata: req.source_metadata.map(|s| s.into_bytes()),
narrative: req.narrative,
lifecycle: req.lifecycle.map(Into::into).unwrap_or(LifecycleStage::Proposed),
signatures,
confidence: req.confidence,
timestamp,
hlc_timestamp,
vector: req.vector,
})
}
/// Decode a signature DTO.
fn decode_signature(dto: SignatureDto) -> Result<SignatureEntry> {
let agent_id = hex::decode_hash_32(&dto.agent_id)?;
let signature = hex::decode_signature(&dto.signature)?;
Ok(SignatureEntry {
agent_id,
signature,
timestamp: dto.timestamp,
version: dto.version.unwrap_or(1),
})
}