stemedb/crates/stemedb-core/src/limits.rs

//! Shared configuration constants and limits for StemeDB.
//!
//! This module centralizes validation limits and default values used across
//! the codebase to prevent duplication and ensure consistency.

/// Maximum allowed subject (entity ID) length in bytes.
///
/// Subjects should be concise identifiers, not full-text descriptions.
/// This limit prevents unbounded memory growth and ensures reasonable
/// index performance.
///
/// # Example
/// - Valid: `"Tesla_Inc"` (9 bytes)
/// - Valid: `"COMPOUND_CID_12345"` (18 bytes)
/// - Invalid: A 2KB entity description embedded in the subject field
pub const MAX_SUBJECT_LEN: usize = 1024;

/// Maximum allowed predicate (relation ID) length in bytes.
///
/// Predicates should be short relation names, not sentences.
/// This limit ensures efficient indexing on the predicate dimension.
///
/// # Example
/// - Valid: `"has_revenue"` (11 bytes)
/// - Valid: `"treats_condition"` (16 bytes)
/// - Invalid: A 512-byte natural language description of the relationship
pub const MAX_PREDICATE_LEN: usize = 256;

/// Maximum allowed object (value) length in bytes.
///
/// Objects can contain numbers, strings, or structured data (JSON).
/// This limit prevents unbounded memory growth while allowing reasonable
/// values like long text descriptions or JSON payloads.
///
/// # Example
/// - Valid: `"96.7"` (4 bytes)
/// - Valid: `"oblate_spheroid"` (16 bytes)
/// - Valid: JSON with nested structure (up to 4096 bytes)
/// - Invalid: A 10KB embedded document in the object field
pub const MAX_OBJECT_LEN: usize = 4096;

/// Maximum source document size in bytes (10 MB).
///
/// Source documents are stored as opaque byte blobs indexed by content hash.
/// This limit prevents storage exhaustion from maliciously large uploads
/// while supporting typical PDFs, images, and text documents.
///
/// # Rationale
/// - Average research paper PDF: 1-3 MB
/// - High-resolution screenshot: 2-5 MB
/// - 10 MB allows reasonable documents while preventing abuse
///
/// # Note
/// For larger source materials, consider storing a pointer (URL, IPFS CID)
/// in the source metadata instead of the raw bytes.
pub const MAX_SOURCE_SIZE: usize = 10 * 1024 * 1024;

/// Default limit for paginated query results.
///
/// Applied when no explicit limit is provided in the query parameters.
/// Prevents accidentally fetching unbounded result sets.
///
/// # Example
/// ```ignore
/// GET /v1/query?subject=Tesla_Inc&limit=50  // explicit limit
/// GET /v1/query?subject=Tesla_Inc           // uses DEFAULT_QUERY_LIMIT
/// ```
pub const DEFAULT_QUERY_LIMIT: usize = 100;