stemedb/crates/stemedb-rpc/proto/sync.proto
jordan afed95fe26 feat: Multi-node cluster coordination (Phase 6C)
Add stemedb-cluster crate implementing horizontal scaling:

- SWIM-based membership protocol for node discovery and failure detection
- Consistent hashing (jump hash) for subject-to-shard routing
- Range management with dynamic split (>64MB) and merge (<20MB) operations
- Stateless HTTP gateway for client request routing via axum
- Meta-range gossip merge for cluster-wide metadata propagation

Includes restrictive CORS policy, proper error propagation from routing,
replica cache invalidation on node failure, and 84 tests (57 unit + 27
integration). Raft MV coordination deferred per design decision.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 20:57:54 -07:00

120 lines
3.3 KiB
Protocol Buffer

syntax = "proto3";
package stemedb.sync.v1;
// SyncService enables node-to-node replication for StemeDB.
//
// The service supports two sync patterns:
// 1. Gossip: Push new assertions to peers immediately after ingestion
// 2. Anti-Entropy: Periodic Merkle root exchange and diff-based sync
service SyncService {
// Gossip pushes a new assertion to a peer.
// Called immediately after local ingestion to propagate data quickly.
rpc Gossip(GossipRequest) returns (GossipResponse);
// ExchangeRoots compares Merkle roots to detect divergence.
// If roots differ, the caller should fetch missing assertions.
rpc ExchangeRoots(RootExchangeRequest) returns (RootExchangeResponse);
// FetchAssertions retrieves assertion data by hash.
// Used after ExchangeRoots to pull missing assertions.
rpc FetchAssertions(FetchRequest) returns (FetchResponse);
// Ping checks if a peer is alive and returns basic metadata.
rpc Ping(PingRequest) returns (PingResponse);
// GetLeaves returns all Merkle tree leaf hashes.
// Used for computing the diff during anti-entropy sync.
rpc GetLeaves(GetLeavesRequest) returns (GetLeavesResponse);
}
// GossipRequest pushes a single assertion to a peer.
message GossipRequest {
// BLAKE3 hash of the assertion (32 bytes)
bytes assertion_hash = 1;
// Serialized assertion data (rkyv format)
bytes assertion_data = 2;
// HLC timestamp components for causal ordering
uint64 hlc_time = 3;
uint32 hlc_counter = 4;
bytes hlc_node_id = 5; // 16 bytes
}
message GossipResponse {
// True if the assertion was accepted (stored or already existed)
bool accepted = 1;
// Error message if rejected (e.g., validation failure)
string error = 2;
}
// RootExchangeRequest initiates Merkle root comparison.
message RootExchangeRequest {
// Local Merkle root hash (32 bytes)
bytes merkle_root = 1;
// Number of assertions in local tree
uint64 assertion_count = 2;
}
message RootExchangeResponse {
// Remote Merkle root hash (32 bytes)
bytes merkle_root = 1;
// Number of assertions in remote tree
uint64 assertion_count = 2;
// True if roots match (trees are identical)
bool roots_match = 3;
}
// FetchRequest asks for assertion data by hash.
message FetchRequest {
// List of assertion hashes to fetch (max 1000 per request)
repeated bytes hashes = 1;
}
message FetchResponse {
// Retrieved assertions (may be fewer than requested if not found)
repeated AssertionData assertions = 1;
}
// AssertionData pairs a hash with its serialized data.
message AssertionData {
// BLAKE3 hash of the assertion (32 bytes)
bytes hash = 1;
// Serialized assertion data (rkyv format)
bytes data = 2;
}
// PingRequest is a health check with node identity.
message PingRequest {
// Sender's node ID (16 bytes)
bytes node_id = 1;
}
message PingResponse {
// Responder's node ID (16 bytes)
bytes node_id = 1;
// Number of assertions on this node
uint64 assertion_count = 2;
}
// GetLeavesRequest requests all Merkle tree leaf hashes.
message GetLeavesRequest {
// Maximum number of leaves to return (0 = no limit, but capped at 10000)
uint64 max_leaves = 1;
}
// GetLeavesResponse returns Merkle tree leaf hashes.
message GetLeavesResponse {
// All leaf hashes (each 32 bytes)
repeated bytes leaves = 1;
// True if there are more leaves than max_leaves
bool truncated = 2;
}