stemedb/crates/stemedb-query/tests/battery/battery8_concept_path.rs
jordan 137a588ed0 feat: Concept hierarchy (Phase 5D) - ConceptPath, source schemes, AliasStore
Implements hierarchical subject identifiers with scheme-based source tier inference:

- ConceptPath type with parse/wire_format, leaf/parent, prefix matching
- SourceScheme registry mapping schemes to default SourceClass tiers:
  - rfc://, fda://, ietf:// → Regulatory (Tier 0)
  - peer://, pubmed:// → PeerReviewed (Tier 1)
  - code://, wiki:// → Expert (Tier 3)
  - blog://, anon:// → Anecdotal (Tier 5)
- AliasStore for cross-scheme entity resolution (bidirectional indexing)
- API endpoints for concept operations
- Battery tests 8, 9 & 10 for concepts, aliases, and advanced signatures
- Go SDK updates for concept types and signing

Completes Phase 5, advancing to Phase 6 (Distributed Writes).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:44:54 -07:00

231 lines
9.0 KiB
Rust

//! Battery 8: ConceptPath Parsing and Source Class Inference.
//!
//! Tests ConceptPath parsing, roundtrip, and scheme-based source class inference.
//!
//! # Test Coverage
//!
//! | Test | Feature | Validates |
//! |------|---------|-----------|
//! | `test_concept_path_parse_full` | Full wire format | scheme://seg1/seg2/seg3 |
//! | `test_concept_path_backward_compat` | Bare strings | Maps to custom:// scheme |
//! | `test_concept_path_roundtrip` | parse → to_wire_format → parse | Identity |
//! | `test_concept_path_prefix_matching` | is_prefix_of | Hierarchical matching |
//! | `test_source_scheme_inference` | scheme → SourceClass | Tier mapping |
#![allow(clippy::expect_used)] // Test code uses expect() for clear failure messages
use stemedb_core::types::{ConceptPath, SourceClass, SourceScheme};
/// Test 8.1: Parse ConceptPath from full wire format.
///
/// Wire format: `scheme://segment1/segment2/segment3`
///
/// Verify:
/// - scheme is extracted correctly
/// - segments are split on "/"
/// - leaf() returns last segment
/// - parent() returns path without last segment
#[test]
fn test_concept_path_parse_full() {
// Full hierarchical path
let path = ConceptPath::parse("code://rust/citadeldb/auth/jwt/aud_validation")
.expect("parse full path");
assert_eq!(path.scheme, "code");
assert_eq!(path.segments, vec!["rust", "citadeldb", "auth", "jwt", "aud_validation"]);
assert_eq!(path.leaf(), "aud_validation");
// Test parent
let parent = path.parent().expect("has parent");
assert_eq!(parent.scheme, "code");
assert_eq!(parent.segments, vec!["rust", "citadeldb", "auth", "jwt"]);
assert_eq!(parent.leaf(), "jwt");
// Test grandparent
let grandparent = parent.parent().expect("has grandparent");
assert_eq!(grandparent.leaf(), "auth");
}
/// Test 8.2: Parse bare string (backward compatibility).
///
/// Bare strings without `://` should map to `custom://` scheme.
///
/// This ensures backward compatibility with pre-ConceptPath subjects.
#[test]
fn test_concept_path_backward_compat() {
// Bare string without scheme
let path = ConceptPath::parse("Semaglutide").expect("parse bare string");
assert_eq!(path.scheme, "custom");
assert_eq!(path.segments, vec!["Semaglutide"]);
assert_eq!(path.leaf(), "Semaglutide");
// No parent for single segment
assert!(path.parent().is_none());
}
/// Test 8.3: Roundtrip: parse → to_wire_format → parse.
///
/// Verify identity property: parsing the wire format of a parsed path
/// should yield an equivalent path.
#[test]
fn test_concept_path_roundtrip() {
let test_cases = vec![
"code://rust/citadeldb/auth/jwt/aud_validation",
"rfc://7519/jwt/audience_validation",
"fda://drug/12345/indication",
"custom://Semaglutide",
];
for original in test_cases {
let parsed = ConceptPath::parse(original).expect("parse original");
let wire = parsed.to_wire_format();
let reparsed = ConceptPath::parse(&wire).expect("reparse wire format");
assert_eq!(parsed.scheme, reparsed.scheme, "scheme mismatch for {}", original);
assert_eq!(parsed.segments, reparsed.segments, "segments mismatch for {}", original);
}
}
/// Test 8.4: Prefix matching with is_prefix_of.
///
/// Verify hierarchical prefix matching:
/// - `code://rust/citadeldb/auth/` is prefix of `code://rust/citadeldb/auth/jwt/aud`
/// - `code://rust/citadeldb/` is prefix of `code://rust/citadeldb/auth/jwt/aud`
/// - Different schemes are NOT prefixes of each other
#[test]
fn test_concept_path_prefix_matching() {
let jwt_path =
ConceptPath::parse("code://rust/citadeldb/auth/jwt/aud").expect("parse jwt path");
let auth_prefix = ConceptPath::parse("code://rust/citadeldb/auth").expect("parse auth prefix");
let citadeldb_prefix =
ConceptPath::parse("code://rust/citadeldb").expect("parse citadeldb prefix");
let rfc_path = ConceptPath::parse("rfc://7519/jwt/audience").expect("parse rfc path");
// auth is prefix of jwt
assert!(auth_prefix.is_prefix_of(&jwt_path), "auth should be prefix of jwt");
// citadeldb is prefix of jwt
assert!(citadeldb_prefix.is_prefix_of(&jwt_path), "citadeldb should be prefix of jwt");
// jwt is NOT prefix of auth (longer path)
assert!(!jwt_path.is_prefix_of(&auth_prefix), "jwt should NOT be prefix of auth");
// different schemes are not prefixes
assert!(
!rfc_path.is_prefix_of(&jwt_path),
"rfc should NOT be prefix of code (different scheme)"
);
assert!(
!jwt_path.is_prefix_of(&rfc_path),
"code should NOT be prefix of rfc (different scheme)"
);
}
/// Test 8.5: Source scheme inference from scheme string.
///
/// Verify tier mapping:
/// - Tier 0 (Regulatory): rfc, nist, fda, sec
/// - Tier 1 (Clinical): owasp, pubmed, doi
/// - Tier 2 (Observational): vendor, cve
/// - Tier 3 (Expert): internal, code, custom
/// - Tier 4 (Community): community, wiki
/// - Tier 5 (Anecdotal): blog, social
#[test]
fn test_source_scheme_inference() {
// Tier 0: Regulatory
assert_eq!(SourceScheme::parse("rfc").default_source_class(), SourceClass::Regulatory);
assert_eq!(SourceScheme::parse("nist").default_source_class(), SourceClass::Regulatory);
assert_eq!(SourceScheme::parse("fda").default_source_class(), SourceClass::Regulatory);
assert_eq!(SourceScheme::parse("sec").default_source_class(), SourceClass::Regulatory);
// Tier 1: Clinical
assert_eq!(SourceScheme::parse("owasp").default_source_class(), SourceClass::Clinical);
assert_eq!(SourceScheme::parse("pubmed").default_source_class(), SourceClass::Clinical);
assert_eq!(SourceScheme::parse("doi").default_source_class(), SourceClass::Clinical);
// Tier 2: Observational
assert_eq!(SourceScheme::parse("vendor").default_source_class(), SourceClass::Observational);
assert_eq!(SourceScheme::parse("cve").default_source_class(), SourceClass::Observational);
// Tier 3: Expert
assert_eq!(SourceScheme::parse("internal").default_source_class(), SourceClass::Expert);
assert_eq!(SourceScheme::parse("code").default_source_class(), SourceClass::Expert);
assert_eq!(SourceScheme::parse("custom").default_source_class(), SourceClass::Expert);
// Tier 4: Community
assert_eq!(SourceScheme::parse("community").default_source_class(), SourceClass::Community);
assert_eq!(SourceScheme::parse("wiki").default_source_class(), SourceClass::Community);
// Tier 5: Anecdotal
assert_eq!(SourceScheme::parse("blog").default_source_class(), SourceClass::Anecdotal);
assert_eq!(SourceScheme::parse("social").default_source_class(), SourceClass::Anecdotal);
}
/// Test 8.6: ConceptPath default_source_class from scheme.
///
/// Verify ConceptPath.default_source_class() returns the correct tier
/// based on the parsed scheme.
#[test]
fn test_concept_path_default_source_class() {
let test_cases = vec![
("rfc://7519/jwt/aud", SourceClass::Regulatory),
("fda://drug/12345", SourceClass::Regulatory),
("owasp://top10/a01", SourceClass::Clinical),
("pubmed://pmid/123456", SourceClass::Clinical),
("vendor://aws/s3/encryption", SourceClass::Observational),
("code://rust/citadeldb/auth", SourceClass::Expert),
("internal://policy/security", SourceClass::Expert),
("community://stackoverflow/q/123", SourceClass::Community),
("blog://medium/post/abc", SourceClass::Anecdotal),
("Semaglutide", SourceClass::Expert), // bare string → custom → Expert
];
for (path_str, expected_class) in test_cases {
let path = ConceptPath::parse(path_str).expect("parse path");
assert_eq!(
path.default_source_class(),
expected_class,
"path {} should have source class {:?}",
path_str,
expected_class
);
}
}
/// Test 8.7: Edge cases for ConceptPath parsing.
///
/// Verify handling of edge cases:
/// - Empty path → error
/// - Single segment with scheme → valid
/// - Path with trailing slash → parsed without empty segment
#[test]
fn test_concept_path_edge_cases() {
// Empty string should fail
let empty_result = ConceptPath::parse("");
assert!(empty_result.is_err(), "empty string should fail parsing");
// Single segment with scheme
let single = ConceptPath::parse("rfc://7519").expect("parse single segment");
assert_eq!(single.scheme, "rfc");
assert_eq!(single.segments, vec!["7519"]);
assert_eq!(single.leaf(), "7519");
assert!(single.parent().is_none(), "single segment has no parent");
// Scheme only (no path) - maps to empty first segment
let scheme_only = ConceptPath::parse("code://");
// This should either error or have an empty segments vec
match scheme_only {
Ok(path) => {
// If it succeeds, segments should be empty or have one empty string
assert!(
path.segments.is_empty() || path.segments == vec![""],
"scheme only should have no meaningful segments"
);
}
Err(_) => {
// Error is also acceptable for scheme-only
}
}
}