stemedb/crates/stemedb-core/src/serde.rs
jordan cde30b9213 chore: apply rustfmt formatting across API handlers and core types
Reformats import blocks, function signatures, and expression line wrapping
in stemedb-api handlers, stemedb-core serde/source_record, and serde_helpers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-21 16:43:45 -07:00

627 lines
21 KiB
Rust

//! Zero-copy serialization utilities for StemeDB types.
//!
//! This module provides the canonical serialization/deserialization functions
//! for all rkyv-enabled types in the system. **All production code must use
//! these helpers instead of raw `AllocSerializer` usage.**
//!
//! # Design Philosophy
//!
//! Following the "Deep Module" principle, these functions hide the complexity
//! of rkyv's serialization machinery behind simple interfaces.
//!
//! # Example
//!
//! ```
//! use stemedb_core::serde::{serialize, deserialize};
//! use stemedb_core::types::Vote;
//!
//! let vote = Vote {
//! assertion_hash: [0u8; 32],
//! agent_id: [1u8; 32],
//! weight: 0.8,
//! signature: [2u8; 64],
//! timestamp: 1000,
//! source_url: None,
//! observed_context: None,
//! };
//!
//! // Serialize
//! let bytes = serialize(&vote).expect("serialization failed");
//!
//! // Deserialize
//! let recovered: Vote = deserialize(&bytes).expect("deserialization failed");
//! assert_eq!(vote, recovered);
//! ```
//!
//! # Performance
//!
//! Uses a 4096-byte scratch buffer which is sufficient for most assertions.
//! Larger payloads will cause reallocation but still work correctly.
use rkyv::ser::serializers::AllocSerializer;
use rkyv::ser::Serializer;
use rkyv::validation::validators::DefaultValidator;
use rkyv::{Archive, CheckBytes, Deserialize, Serialize};
use thiserror::Error;
use crate::types::{
Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
SourceRecord, SourceStatus,
};
/// Default scratch buffer size for serialization.
///
/// 4KB is sufficient for most assertions. Larger payloads will trigger
/// reallocation but the operation will still succeed.
pub const DEFAULT_SCRATCH_SIZE: usize = 4096;
/// Errors that can occur during serialization/deserialization.
#[derive(Debug, Error)]
pub enum SerdeError {
/// Failed to serialize the value.
#[error("Serialization error: {0}")]
Serialization(String),
/// Failed to validate or deserialize the archived data.
#[error("Deserialization error: {0}")]
Deserialization(String),
}
/// Serialize a value to bytes using rkyv zero-copy serialization.
///
/// This is the canonical way to serialize StemeDB types. All production
/// code should use this instead of raw `AllocSerializer`.
///
/// # Type Requirements
///
/// The type `T` must implement rkyv's `Serialize` trait, which all StemeDB
/// core types do.
///
/// # Example
///
/// ```
/// use stemedb_core::serde::serialize;
/// use stemedb_core::types::Assertion;
/// # use stemedb_core::types::{ObjectValue, LifecycleStage, SourceClass};
///
/// let assertion = Assertion {
/// subject: "test".to_string(),
/// predicate: "is".to_string(),
/// object: ObjectValue::Boolean(true),
/// parent_hash: None,
/// source_hash: [0u8; 32],
/// source_class: SourceClass::Expert,
/// visual_hash: None,
/// epoch: None,
/// source_metadata: None,
/// narrative: None,
/// lifecycle: LifecycleStage::Proposed,
/// signatures: vec![],
/// confidence: 1.0,
/// timestamp: 0,
/// hlc_timestamp: stemedb_core::types::HlcTimestamp::default(),
/// vector: None,
/// };
///
/// let bytes = serialize(&assertion).expect("serialize");
/// assert!(!bytes.is_empty());
/// ```
pub fn serialize<T>(value: &T) -> Result<Vec<u8>, SerdeError>
where
T: Serialize<AllocSerializer<DEFAULT_SCRATCH_SIZE>>,
{
let mut serializer = AllocSerializer::<DEFAULT_SCRATCH_SIZE>::default();
serializer.serialize_value(value).map_err(|e| SerdeError::Serialization(e.to_string()))?;
Ok(serializer.into_serializer().into_inner().to_vec())
}
/// Deserialize bytes back to a value using rkyv zero-copy deserialization.
///
/// This is the canonical way to deserialize StemeDB types. All production
/// code should use this instead of raw `check_archived_root`.
///
/// # Type Requirements
///
/// The type `T` must implement rkyv's `Archive` and `Deserialize` traits,
/// and its archived form must implement `CheckBytes` for validation.
///
/// # Safety
///
/// This function validates the archived data before deserialization,
/// ensuring memory safety even with untrusted input.
///
/// # Example
///
/// ```
/// use stemedb_core::serde::{serialize, deserialize};
/// use stemedb_core::types::Vote;
///
/// let vote = Vote {
/// assertion_hash: [0u8; 32],
/// agent_id: [1u8; 32],
/// weight: 0.8,
/// signature: [2u8; 64],
/// timestamp: 1000,
/// source_url: None,
/// observed_context: None,
/// };
///
/// let bytes = serialize(&vote).expect("serialize");
/// let recovered: Vote = deserialize(&bytes).expect("deserialize");
/// assert_eq!(vote, recovered);
/// ```
pub fn deserialize<T>(data: &[u8]) -> Result<T, SerdeError>
where
T: Archive,
T::Archived: for<'a> CheckBytes<DefaultValidator<'a>> + Deserialize<T, rkyv::Infallible>,
{
let archived = rkyv::check_archived_root::<T>(data)
.map_err(|e| SerdeError::Deserialization(e.to_string()))?;
archived
.deserialize(&mut rkyv::Infallible)
.map_err(|e| SerdeError::Deserialization(e.to_string()))
}
// ============================================================================
// Legacy Assertion (pre-narrative schema)
// ============================================================================
/// Assertion struct matching the pre-narrative rkyv layout.
///
/// The `narrative: Option<String>` field was added between `source_metadata`
/// and `lifecycle`. rkyv doesn't support schema evolution, so data serialized
/// before that change needs this struct to deserialize correctly.
#[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
#[archive(check_bytes)]
struct LegacyAssertion {
pub subject: String,
pub predicate: String,
pub object: ObjectValue,
pub parent_hash: Option<[u8; 32]>,
pub source_hash: [u8; 32],
pub source_class: SourceClass,
pub visual_hash: Option<[u8; 8]>,
pub epoch: Option<[u8; 32]>,
pub source_metadata: Option<Vec<u8>>,
// narrative: Option<String> did NOT exist in this version
pub lifecycle: LifecycleStage,
pub signatures: Vec<SignatureEntry>,
pub confidence: f32,
pub timestamp: u64,
pub hlc_timestamp: HlcTimestamp,
pub vector: Option<Vec<f32>>,
}
impl From<LegacyAssertion> for Assertion {
fn from(legacy: LegacyAssertion) -> Self {
Self {
subject: legacy.subject,
predicate: legacy.predicate,
object: legacy.object,
parent_hash: legacy.parent_hash,
source_hash: legacy.source_hash,
source_class: legacy.source_class,
visual_hash: legacy.visual_hash,
epoch: legacy.epoch,
source_metadata: legacy.source_metadata,
narrative: None,
lifecycle: legacy.lifecycle,
signatures: legacy.signatures,
confidence: legacy.confidence,
timestamp: legacy.timestamp,
hlc_timestamp: legacy.hlc_timestamp,
vector: legacy.vector,
}
}
}
/// Deserialize an assertion with backward compatibility.
///
/// Tries the current `Assertion` layout first. If that fails, tries the
/// legacy layout (before `narrative` field was added) and converts.
///
/// This allows the system to read assertions written before schema changes
/// without requiring a data migration.
pub fn deserialize_assertion_compat(data: &[u8]) -> Result<Assertion, SerdeError> {
// Try current format first (fast path for new data)
if let Ok(assertion) = deserialize::<Assertion>(data) {
return Ok(assertion);
}
// Fallback: try legacy format (no narrative field)
let legacy: LegacyAssertion = deserialize(data)?;
Ok(legacy.into())
}
// ============================================================================
// Legacy SourceRecord (pre-content schema)
// ============================================================================
/// SourceRecord struct matching the pre-content rkyv layout.
///
/// The `content: Option<String>` field was added after `notes`.
/// rkyv doesn't support schema evolution, so data serialized
/// before that change needs this struct to deserialize correctly.
#[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
#[archive(check_bytes)]
struct LegacySourceRecord {
pub hash: [u8; 32],
pub label: String,
pub url: Option<String>,
pub tier: u8,
pub status: SourceStatus,
pub created_at: u64,
pub updated_at: u64,
pub notes: Option<String>,
// content: Option<String> did NOT exist in this version
}
impl From<LegacySourceRecord> for SourceRecord {
fn from(legacy: LegacySourceRecord) -> Self {
Self {
hash: legacy.hash,
label: legacy.label,
url: legacy.url,
tier: legacy.tier,
status: legacy.status,
created_at: legacy.created_at,
updated_at: legacy.updated_at,
notes: legacy.notes,
content: None,
}
}
}
/// Deserialize a source record with backward compatibility.
///
/// Tries the current `SourceRecord` layout first. If that fails, tries the
/// legacy layout (before `content` field was added) and converts.
pub fn deserialize_source_record_compat(data: &[u8]) -> Result<SourceRecord, SerdeError> {
// Try current format first (fast path for new data)
if let Ok(record) = deserialize::<SourceRecord>(data) {
return Ok(record);
}
// Fallback: try legacy format (no content field)
let legacy: LegacySourceRecord = deserialize(data)?;
Ok(legacy.into())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{
Assertion, Epoch, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass,
Vote,
};
#[test]
fn test_serialize_deserialize_assertion() {
let assertion = Assertion {
subject: "Tesla_Inc".to_string(),
predicate: "has_revenue".to_string(),
object: ObjectValue::Number(96.7),
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Clinical,
visual_hash: Some([1u8; 8]),
epoch: Some([2u8; 32]),
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry {
agent_id: [2u8; 32],
signature: [3u8; 64],
timestamp: 123456789,
version: 1,
}],
confidence: 0.95,
timestamp: 123456789,
hlc_timestamp: HlcTimestamp::default(),
vector: Some(vec![0.1, 0.2, 0.3]),
};
let bytes = serialize(&assertion).expect("serialize");
let recovered: Assertion = deserialize(&bytes).expect("deserialize");
assert_eq!(assertion, recovered);
}
#[test]
fn test_serialize_deserialize_vote() {
let vote = Vote {
assertion_hash: [1u8; 32],
agent_id: [2u8; 32],
weight: 0.8,
signature: [3u8; 64],
timestamp: 123456789,
source_url: None,
observed_context: None,
};
let bytes = serialize(&vote).expect("serialize");
let recovered: Vote = deserialize(&bytes).expect("deserialize");
assert_eq!(vote, recovered);
}
#[test]
fn test_serialize_deserialize_vote_with_provenance() {
let vote = Vote {
assertion_hash: [1u8; 32],
agent_id: [2u8; 32],
weight: 0.8,
signature: [3u8; 64],
timestamp: 123456789,
source_url: Some("https://example.com/article".to_string()),
observed_context: Some(b"The study found that...".to_vec()),
};
let bytes = serialize(&vote).expect("serialize");
let recovered: Vote = deserialize(&bytes).expect("deserialize");
assert_eq!(vote, recovered);
assert_eq!(recovered.source_url, Some("https://example.com/article".to_string()));
assert_eq!(recovered.observed_context, Some(b"The study found that...".to_vec()));
}
#[test]
fn test_serialize_deserialize_vote_with_url_only() {
let vote = Vote {
assertion_hash: [1u8; 32],
agent_id: [2u8; 32],
weight: 0.8,
signature: [3u8; 64],
timestamp: 123456789,
source_url: Some("https://example.com/article".to_string()),
observed_context: None,
};
let bytes = serialize(&vote).expect("serialize");
let recovered: Vote = deserialize(&bytes).expect("deserialize");
assert_eq!(vote, recovered);
assert_eq!(recovered.source_url, Some("https://example.com/article".to_string()));
assert!(recovered.observed_context.is_none());
}
#[test]
fn test_serialize_deserialize_vote_with_context_only() {
let vote = Vote {
assertion_hash: [1u8; 32],
agent_id: [2u8; 32],
weight: 0.8,
signature: [3u8; 64],
timestamp: 123456789,
source_url: None,
observed_context: Some(b"The study found that...".to_vec()),
};
let bytes = serialize(&vote).expect("serialize");
let recovered: Vote = deserialize(&bytes).expect("deserialize");
assert_eq!(vote, recovered);
assert!(recovered.source_url.is_none());
assert_eq!(recovered.observed_context, Some(b"The study found that...".to_vec()));
}
#[test]
fn test_serialize_deserialize_epoch() {
let epoch = Epoch {
id: [1u8; 32],
name: "Test Epoch".to_string(),
supersedes: None,
supersession_type: None,
start_timestamp: 1000,
end_timestamp: None,
};
let bytes = serialize(&epoch).expect("serialize");
let recovered: Epoch = deserialize(&bytes).expect("deserialize");
assert_eq!(epoch, recovered);
}
#[test]
fn test_deserialize_invalid_data() {
let garbage = vec![0u8, 1, 2, 3, 4, 5];
let result: Result<Vote, _> = deserialize(&garbage);
assert!(result.is_err());
}
#[test]
fn test_serialize_empty_assertion() {
let assertion = Assertion {
subject: String::new(),
predicate: String::new(),
object: ObjectValue::Boolean(false),
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Expert,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 0.0,
timestamp: 0,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
let bytes = serialize(&assertion).expect("serialize");
let recovered: Assertion = deserialize(&bytes).expect("deserialize");
assert_eq!(assertion, recovered);
}
#[test]
fn test_serialize_deserialize_assertion_with_metadata() {
let metadata = r#"{"journal":"Nature","DOI":"10.1038/xyz","sample_size":1234}"#;
let assertion = Assertion {
subject: "Semaglutide".to_string(),
predicate: "muscle_effect".to_string(),
object: ObjectValue::Text("significant_loss".to_string()),
parent_hash: None,
source_hash: [1u8; 32],
source_class: SourceClass::Clinical,
visual_hash: None,
epoch: None,
source_metadata: Some(metadata.as_bytes().to_vec()),
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 0.85,
timestamp: 1700000000,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
let bytes = serialize(&assertion).expect("serialize");
let recovered: Assertion = deserialize(&bytes).expect("deserialize");
assert_eq!(assertion, recovered);
assert_eq!(recovered.source_metadata, Some(metadata.as_bytes().to_vec()));
}
#[test]
fn test_serialize_deserialize_assertion_without_metadata() {
let assertion = Assertion {
subject: "test".to_string(),
predicate: "test".to_string(),
object: ObjectValue::Boolean(true),
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Expert,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: None,
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 1.0,
timestamp: 0,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
let bytes = serialize(&assertion).expect("serialize");
let recovered: Assertion = deserialize(&bytes).expect("deserialize");
assert_eq!(assertion, recovered);
assert!(recovered.source_metadata.is_none());
}
#[test]
fn test_legacy_assertion_compat_deserialize() {
// Simulate data serialized with the pre-narrative struct layout.
let legacy = LegacyAssertion {
subject: "Semaglutide".to_string(),
predicate: "reduces_weight".to_string(),
object: ObjectValue::Text("significant".to_string()),
parent_hash: None,
source_hash: [1u8; 32],
source_class: SourceClass::Clinical,
visual_hash: None,
epoch: None,
source_metadata: Some(b"{}".to_vec()),
lifecycle: LifecycleStage::Approved,
signatures: vec![SignatureEntry {
agent_id: [2u8; 32],
signature: [3u8; 64],
timestamp: 1000,
version: 1,
}],
confidence: 0.95,
timestamp: 1700000000,
hlc_timestamp: HlcTimestamp::default(),
vector: Some(vec![0.1, 0.2]),
};
let bytes = serialize(&legacy).expect("serialize legacy");
// Current format should fail (different layout)
assert!(deserialize::<Assertion>(&bytes).is_err());
// Compat function should succeed
let recovered =
deserialize_assertion_compat(&bytes).expect("compat deserialize should succeed");
assert_eq!(recovered.subject, "Semaglutide");
assert_eq!(recovered.predicate, "reduces_weight");
assert_eq!(recovered.confidence, 0.95);
assert_eq!(recovered.signatures.len(), 1);
assert!(recovered.narrative.is_none()); // Wasn't in legacy
assert!(recovered.source_metadata.is_some());
assert_eq!(recovered.timestamp, 1700000000);
}
#[test]
fn test_current_assertion_also_works_via_compat() {
// Current-format assertions should work via the compat path too.
let assertion = Assertion {
subject: "test".to_string(),
predicate: "works".to_string(),
object: ObjectValue::Boolean(true),
parent_hash: None,
source_hash: [0u8; 32],
source_class: SourceClass::Expert,
visual_hash: None,
epoch: None,
source_metadata: None,
narrative: Some("This is a narrative.".to_string()),
lifecycle: LifecycleStage::Proposed,
signatures: vec![],
confidence: 1.0,
timestamp: 0,
hlc_timestamp: HlcTimestamp::default(),
vector: None,
};
let bytes = serialize(&assertion).expect("serialize");
let recovered = deserialize_assertion_compat(&bytes)
.expect("compat deserialize should succeed for current format");
assert_eq!(recovered, assertion);
assert_eq!(recovered.narrative, Some("This is a narrative.".to_string()));
}
#[test]
fn test_legacy_source_record_compat_deserialize() {
// Simulate data serialized with the pre-content struct layout.
let legacy = LegacySourceRecord {
hash: [42u8; 32],
label: "RFC 7519".to_string(),
url: Some("https://tools.ietf.org/html/rfc7519".to_string()),
tier: 0,
status: SourceStatus::Active,
created_at: 1000,
updated_at: 2000,
notes: Some("JWT spec".to_string()),
};
let bytes = serialize(&legacy).expect("serialize legacy");
// Current format should fail (different layout)
assert!(deserialize::<SourceRecord>(&bytes).is_err());
// Compat function should succeed
let recovered =
deserialize_source_record_compat(&bytes).expect("compat deserialize should succeed");
assert_eq!(recovered.hash, [42u8; 32]);
assert_eq!(recovered.label, "RFC 7519");
assert_eq!(recovered.tier, 0);
assert_eq!(recovered.notes, Some("JWT spec".to_string()));
assert!(recovered.content.is_none()); // Wasn't in legacy
}
#[test]
fn test_current_source_record_also_works_via_compat() {
let record = SourceRecord::new([1u8; 32], "Test".to_string(), None, 2, 1000)
.with_content(Some("Full text content".to_string()));
let bytes = serialize(&record).expect("serialize");
let recovered = deserialize_source_record_compat(&bytes)
.expect("compat deserialize should succeed for current format");
assert_eq!(recovered, record);
assert_eq!(recovered.content, Some("Full text content".to_string()));
}
}