//! Zero-copy serialization utilities for StemeDB types. //! //! This module provides the canonical serialization/deserialization functions //! for all rkyv-enabled types in the system. **All production code must use //! these helpers instead of raw `AllocSerializer` usage.** //! //! # Design Philosophy //! //! Following the "Deep Module" principle, these functions hide the complexity //! of rkyv's serialization machinery behind simple interfaces. //! //! # Example //! //! ``` //! use stemedb_core::serde::{serialize, deserialize}; //! use stemedb_core::types::Vote; //! //! let vote = Vote { //! assertion_hash: [0u8; 32], //! agent_id: [1u8; 32], //! weight: 0.8, //! signature: [2u8; 64], //! timestamp: 1000, //! source_url: None, //! observed_context: None, //! }; //! //! // Serialize //! let bytes = serialize(&vote).expect("serialization failed"); //! //! // Deserialize //! let recovered: Vote = deserialize(&bytes).expect("deserialization failed"); //! assert_eq!(vote, recovered); //! ``` //! //! # Performance //! //! Uses a 4096-byte scratch buffer which is sufficient for most assertions. //! Larger payloads will cause reallocation but still work correctly. use rkyv::ser::serializers::AllocSerializer; use rkyv::ser::Serializer; use rkyv::validation::validators::DefaultValidator; use rkyv::{Archive, CheckBytes, Deserialize, Serialize}; use thiserror::Error; use crate::types::{ Assertion, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass, SourceRecord, SourceStatus, }; /// Default scratch buffer size for serialization. /// /// 4KB is sufficient for most assertions. Larger payloads will trigger /// reallocation but the operation will still succeed. pub const DEFAULT_SCRATCH_SIZE: usize = 4096; /// Errors that can occur during serialization/deserialization. #[derive(Debug, Error)] pub enum SerdeError { /// Failed to serialize the value. #[error("Serialization error: {0}")] Serialization(String), /// Failed to validate or deserialize the archived data. #[error("Deserialization error: {0}")] Deserialization(String), } /// Serialize a value to bytes using rkyv zero-copy serialization. /// /// This is the canonical way to serialize StemeDB types. All production /// code should use this instead of raw `AllocSerializer`. /// /// # Type Requirements /// /// The type `T` must implement rkyv's `Serialize` trait, which all StemeDB /// core types do. /// /// # Example /// /// ``` /// use stemedb_core::serde::serialize; /// use stemedb_core::types::Assertion; /// # use stemedb_core::types::{ObjectValue, LifecycleStage, SourceClass}; /// /// let assertion = Assertion { /// subject: "test".to_string(), /// predicate: "is".to_string(), /// object: ObjectValue::Boolean(true), /// parent_hash: None, /// source_hash: [0u8; 32], /// source_class: SourceClass::Expert, /// visual_hash: None, /// epoch: None, /// source_metadata: None, /// narrative: None, /// lifecycle: LifecycleStage::Proposed, /// signatures: vec![], /// confidence: 1.0, /// timestamp: 0, /// hlc_timestamp: stemedb_core::types::HlcTimestamp::default(), /// vector: None, /// }; /// /// let bytes = serialize(&assertion).expect("serialize"); /// assert!(!bytes.is_empty()); /// ``` pub fn serialize(value: &T) -> Result, SerdeError> where T: Serialize>, { let mut serializer = AllocSerializer::::default(); serializer.serialize_value(value).map_err(|e| SerdeError::Serialization(e.to_string()))?; Ok(serializer.into_serializer().into_inner().to_vec()) } /// Deserialize bytes back to a value using rkyv zero-copy deserialization. /// /// This is the canonical way to deserialize StemeDB types. All production /// code should use this instead of raw `check_archived_root`. /// /// # Type Requirements /// /// The type `T` must implement rkyv's `Archive` and `Deserialize` traits, /// and its archived form must implement `CheckBytes` for validation. /// /// # Safety /// /// This function validates the archived data before deserialization, /// ensuring memory safety even with untrusted input. /// /// # Example /// /// ``` /// use stemedb_core::serde::{serialize, deserialize}; /// use stemedb_core::types::Vote; /// /// let vote = Vote { /// assertion_hash: [0u8; 32], /// agent_id: [1u8; 32], /// weight: 0.8, /// signature: [2u8; 64], /// timestamp: 1000, /// source_url: None, /// observed_context: None, /// }; /// /// let bytes = serialize(&vote).expect("serialize"); /// let recovered: Vote = deserialize(&bytes).expect("deserialize"); /// assert_eq!(vote, recovered); /// ``` pub fn deserialize(data: &[u8]) -> Result where T: Archive, T::Archived: for<'a> CheckBytes> + Deserialize, { let archived = rkyv::check_archived_root::(data) .map_err(|e| SerdeError::Deserialization(e.to_string()))?; archived .deserialize(&mut rkyv::Infallible) .map_err(|e| SerdeError::Deserialization(e.to_string())) } // ============================================================================ // Legacy Assertion (pre-narrative schema) // ============================================================================ /// Assertion struct matching the pre-narrative rkyv layout. /// /// The `narrative: Option` field was added between `source_metadata` /// and `lifecycle`. rkyv doesn't support schema evolution, so data serialized /// before that change needs this struct to deserialize correctly. #[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)] #[archive(check_bytes)] struct LegacyAssertion { pub subject: String, pub predicate: String, pub object: ObjectValue, pub parent_hash: Option<[u8; 32]>, pub source_hash: [u8; 32], pub source_class: SourceClass, pub visual_hash: Option<[u8; 8]>, pub epoch: Option<[u8; 32]>, pub source_metadata: Option>, // narrative: Option did NOT exist in this version pub lifecycle: LifecycleStage, pub signatures: Vec, pub confidence: f32, pub timestamp: u64, pub hlc_timestamp: HlcTimestamp, pub vector: Option>, } impl From for Assertion { fn from(legacy: LegacyAssertion) -> Self { Self { subject: legacy.subject, predicate: legacy.predicate, object: legacy.object, parent_hash: legacy.parent_hash, source_hash: legacy.source_hash, source_class: legacy.source_class, visual_hash: legacy.visual_hash, epoch: legacy.epoch, source_metadata: legacy.source_metadata, narrative: None, lifecycle: legacy.lifecycle, signatures: legacy.signatures, confidence: legacy.confidence, timestamp: legacy.timestamp, hlc_timestamp: legacy.hlc_timestamp, vector: legacy.vector, } } } /// Deserialize an assertion with backward compatibility. /// /// Tries the current `Assertion` layout first. If that fails, tries the /// legacy layout (before `narrative` field was added) and converts. /// /// This allows the system to read assertions written before schema changes /// without requiring a data migration. pub fn deserialize_assertion_compat(data: &[u8]) -> Result { // Try current format first (fast path for new data) if let Ok(assertion) = deserialize::(data) { return Ok(assertion); } // Fallback: try legacy format (no narrative field) let legacy: LegacyAssertion = deserialize(data)?; Ok(legacy.into()) } // ============================================================================ // Legacy SourceRecord (pre-content schema) // ============================================================================ /// SourceRecord struct matching the pre-content rkyv layout. /// /// The `content: Option` field was added after `notes`. /// rkyv doesn't support schema evolution, so data serialized /// before that change needs this struct to deserialize correctly. #[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)] #[archive(check_bytes)] struct LegacySourceRecord { pub hash: [u8; 32], pub label: String, pub url: Option, pub tier: u8, pub status: SourceStatus, pub created_at: u64, pub updated_at: u64, pub notes: Option, // content: Option did NOT exist in this version } impl From for SourceRecord { fn from(legacy: LegacySourceRecord) -> Self { Self { hash: legacy.hash, label: legacy.label, url: legacy.url, tier: legacy.tier, status: legacy.status, created_at: legacy.created_at, updated_at: legacy.updated_at, notes: legacy.notes, content: None, } } } /// Deserialize a source record with backward compatibility. /// /// Tries the current `SourceRecord` layout first. If that fails, tries the /// legacy layout (before `content` field was added) and converts. pub fn deserialize_source_record_compat(data: &[u8]) -> Result { // Try current format first (fast path for new data) if let Ok(record) = deserialize::(data) { return Ok(record); } // Fallback: try legacy format (no content field) let legacy: LegacySourceRecord = deserialize(data)?; Ok(legacy.into()) } #[cfg(test)] mod tests { use super::*; use crate::types::{ Assertion, Epoch, HlcTimestamp, LifecycleStage, ObjectValue, SignatureEntry, SourceClass, Vote, }; #[test] fn test_serialize_deserialize_assertion() { let assertion = Assertion { subject: "Tesla_Inc".to_string(), predicate: "has_revenue".to_string(), object: ObjectValue::Number(96.7), parent_hash: None, source_hash: [0u8; 32], source_class: SourceClass::Clinical, visual_hash: Some([1u8; 8]), epoch: Some([2u8; 32]), source_metadata: None, narrative: None, lifecycle: LifecycleStage::Approved, signatures: vec![SignatureEntry { agent_id: [2u8; 32], signature: [3u8; 64], timestamp: 123456789, version: 1, }], confidence: 0.95, timestamp: 123456789, hlc_timestamp: HlcTimestamp::default(), vector: Some(vec![0.1, 0.2, 0.3]), }; let bytes = serialize(&assertion).expect("serialize"); let recovered: Assertion = deserialize(&bytes).expect("deserialize"); assert_eq!(assertion, recovered); } #[test] fn test_serialize_deserialize_vote() { let vote = Vote { assertion_hash: [1u8; 32], agent_id: [2u8; 32], weight: 0.8, signature: [3u8; 64], timestamp: 123456789, source_url: None, observed_context: None, }; let bytes = serialize(&vote).expect("serialize"); let recovered: Vote = deserialize(&bytes).expect("deserialize"); assert_eq!(vote, recovered); } #[test] fn test_serialize_deserialize_vote_with_provenance() { let vote = Vote { assertion_hash: [1u8; 32], agent_id: [2u8; 32], weight: 0.8, signature: [3u8; 64], timestamp: 123456789, source_url: Some("https://example.com/article".to_string()), observed_context: Some(b"The study found that...".to_vec()), }; let bytes = serialize(&vote).expect("serialize"); let recovered: Vote = deserialize(&bytes).expect("deserialize"); assert_eq!(vote, recovered); assert_eq!(recovered.source_url, Some("https://example.com/article".to_string())); assert_eq!(recovered.observed_context, Some(b"The study found that...".to_vec())); } #[test] fn test_serialize_deserialize_vote_with_url_only() { let vote = Vote { assertion_hash: [1u8; 32], agent_id: [2u8; 32], weight: 0.8, signature: [3u8; 64], timestamp: 123456789, source_url: Some("https://example.com/article".to_string()), observed_context: None, }; let bytes = serialize(&vote).expect("serialize"); let recovered: Vote = deserialize(&bytes).expect("deserialize"); assert_eq!(vote, recovered); assert_eq!(recovered.source_url, Some("https://example.com/article".to_string())); assert!(recovered.observed_context.is_none()); } #[test] fn test_serialize_deserialize_vote_with_context_only() { let vote = Vote { assertion_hash: [1u8; 32], agent_id: [2u8; 32], weight: 0.8, signature: [3u8; 64], timestamp: 123456789, source_url: None, observed_context: Some(b"The study found that...".to_vec()), }; let bytes = serialize(&vote).expect("serialize"); let recovered: Vote = deserialize(&bytes).expect("deserialize"); assert_eq!(vote, recovered); assert!(recovered.source_url.is_none()); assert_eq!(recovered.observed_context, Some(b"The study found that...".to_vec())); } #[test] fn test_serialize_deserialize_epoch() { let epoch = Epoch { id: [1u8; 32], name: "Test Epoch".to_string(), supersedes: None, supersession_type: None, start_timestamp: 1000, end_timestamp: None, }; let bytes = serialize(&epoch).expect("serialize"); let recovered: Epoch = deserialize(&bytes).expect("deserialize"); assert_eq!(epoch, recovered); } #[test] fn test_deserialize_invalid_data() { let garbage = vec![0u8, 1, 2, 3, 4, 5]; let result: Result = deserialize(&garbage); assert!(result.is_err()); } #[test] fn test_serialize_empty_assertion() { let assertion = Assertion { subject: String::new(), predicate: String::new(), object: ObjectValue::Boolean(false), parent_hash: None, source_hash: [0u8; 32], source_class: SourceClass::Expert, visual_hash: None, epoch: None, source_metadata: None, narrative: None, lifecycle: LifecycleStage::Proposed, signatures: vec![], confidence: 0.0, timestamp: 0, hlc_timestamp: HlcTimestamp::default(), vector: None, }; let bytes = serialize(&assertion).expect("serialize"); let recovered: Assertion = deserialize(&bytes).expect("deserialize"); assert_eq!(assertion, recovered); } #[test] fn test_serialize_deserialize_assertion_with_metadata() { let metadata = r#"{"journal":"Nature","DOI":"10.1038/xyz","sample_size":1234}"#; let assertion = Assertion { subject: "Semaglutide".to_string(), predicate: "muscle_effect".to_string(), object: ObjectValue::Text("significant_loss".to_string()), parent_hash: None, source_hash: [1u8; 32], source_class: SourceClass::Clinical, visual_hash: None, epoch: None, source_metadata: Some(metadata.as_bytes().to_vec()), narrative: None, lifecycle: LifecycleStage::Proposed, signatures: vec![], confidence: 0.85, timestamp: 1700000000, hlc_timestamp: HlcTimestamp::default(), vector: None, }; let bytes = serialize(&assertion).expect("serialize"); let recovered: Assertion = deserialize(&bytes).expect("deserialize"); assert_eq!(assertion, recovered); assert_eq!(recovered.source_metadata, Some(metadata.as_bytes().to_vec())); } #[test] fn test_serialize_deserialize_assertion_without_metadata() { let assertion = Assertion { subject: "test".to_string(), predicate: "test".to_string(), object: ObjectValue::Boolean(true), parent_hash: None, source_hash: [0u8; 32], source_class: SourceClass::Expert, visual_hash: None, epoch: None, source_metadata: None, narrative: None, lifecycle: LifecycleStage::Proposed, signatures: vec![], confidence: 1.0, timestamp: 0, hlc_timestamp: HlcTimestamp::default(), vector: None, }; let bytes = serialize(&assertion).expect("serialize"); let recovered: Assertion = deserialize(&bytes).expect("deserialize"); assert_eq!(assertion, recovered); assert!(recovered.source_metadata.is_none()); } #[test] fn test_legacy_assertion_compat_deserialize() { // Simulate data serialized with the pre-narrative struct layout. let legacy = LegacyAssertion { subject: "Semaglutide".to_string(), predicate: "reduces_weight".to_string(), object: ObjectValue::Text("significant".to_string()), parent_hash: None, source_hash: [1u8; 32], source_class: SourceClass::Clinical, visual_hash: None, epoch: None, source_metadata: Some(b"{}".to_vec()), lifecycle: LifecycleStage::Approved, signatures: vec![SignatureEntry { agent_id: [2u8; 32], signature: [3u8; 64], timestamp: 1000, version: 1, }], confidence: 0.95, timestamp: 1700000000, hlc_timestamp: HlcTimestamp::default(), vector: Some(vec![0.1, 0.2]), }; let bytes = serialize(&legacy).expect("serialize legacy"); // Current format should fail (different layout) assert!(deserialize::(&bytes).is_err()); // Compat function should succeed let recovered = deserialize_assertion_compat(&bytes).expect("compat deserialize should succeed"); assert_eq!(recovered.subject, "Semaglutide"); assert_eq!(recovered.predicate, "reduces_weight"); assert_eq!(recovered.confidence, 0.95); assert_eq!(recovered.signatures.len(), 1); assert!(recovered.narrative.is_none()); // Wasn't in legacy assert!(recovered.source_metadata.is_some()); assert_eq!(recovered.timestamp, 1700000000); } #[test] fn test_current_assertion_also_works_via_compat() { // Current-format assertions should work via the compat path too. let assertion = Assertion { subject: "test".to_string(), predicate: "works".to_string(), object: ObjectValue::Boolean(true), parent_hash: None, source_hash: [0u8; 32], source_class: SourceClass::Expert, visual_hash: None, epoch: None, source_metadata: None, narrative: Some("This is a narrative.".to_string()), lifecycle: LifecycleStage::Proposed, signatures: vec![], confidence: 1.0, timestamp: 0, hlc_timestamp: HlcTimestamp::default(), vector: None, }; let bytes = serialize(&assertion).expect("serialize"); let recovered = deserialize_assertion_compat(&bytes) .expect("compat deserialize should succeed for current format"); assert_eq!(recovered, assertion); assert_eq!(recovered.narrative, Some("This is a narrative.".to_string())); } #[test] fn test_legacy_source_record_compat_deserialize() { // Simulate data serialized with the pre-content struct layout. let legacy = LegacySourceRecord { hash: [42u8; 32], label: "RFC 7519".to_string(), url: Some("https://tools.ietf.org/html/rfc7519".to_string()), tier: 0, status: SourceStatus::Active, created_at: 1000, updated_at: 2000, notes: Some("JWT spec".to_string()), }; let bytes = serialize(&legacy).expect("serialize legacy"); // Current format should fail (different layout) assert!(deserialize::(&bytes).is_err()); // Compat function should succeed let recovered = deserialize_source_record_compat(&bytes).expect("compat deserialize should succeed"); assert_eq!(recovered.hash, [42u8; 32]); assert_eq!(recovered.label, "RFC 7519"); assert_eq!(recovered.tier, 0); assert_eq!(recovered.notes, Some("JWT spec".to_string())); assert!(recovered.content.is_none()); // Wasn't in legacy } #[test] fn test_current_source_record_also_works_via_compat() { let record = SourceRecord::new([1u8; 32], "Test".to_string(), None, 2, 1000) .with_content(Some("Full text content".to_string())); let bytes = serialize(&record).expect("serialize"); let recovered = deserialize_source_record_compat(&bytes) .expect("compat deserialize should succeed for current format"); assert_eq!(recovered, record); assert_eq!(recovered.content, Some("Full text content".to_string())); } }