Add stemedb-cluster crate implementing horizontal scaling: - SWIM-based membership protocol for node discovery and failure detection - Consistent hashing (jump hash) for subject-to-shard routing - Range management with dynamic split (>64MB) and merge (<20MB) operations - Stateless HTTP gateway for client request routing via axum - Meta-range gossip merge for cluster-wide metadata propagation Includes restrictive CORS policy, proper error propagation from routing, replica cache invalidation on node failure, and 84 tests (57 unit + 27 integration). Raft MV coordination deferred per design decision. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
120 lines
3.3 KiB
Protocol Buffer
120 lines
3.3 KiB
Protocol Buffer
syntax = "proto3";
|
|
package stemedb.sync.v1;
|
|
|
|
// SyncService enables node-to-node replication for StemeDB.
|
|
//
|
|
// The service supports two sync patterns:
|
|
// 1. Gossip: Push new assertions to peers immediately after ingestion
|
|
// 2. Anti-Entropy: Periodic Merkle root exchange and diff-based sync
|
|
service SyncService {
|
|
// Gossip pushes a new assertion to a peer.
|
|
// Called immediately after local ingestion to propagate data quickly.
|
|
rpc Gossip(GossipRequest) returns (GossipResponse);
|
|
|
|
// ExchangeRoots compares Merkle roots to detect divergence.
|
|
// If roots differ, the caller should fetch missing assertions.
|
|
rpc ExchangeRoots(RootExchangeRequest) returns (RootExchangeResponse);
|
|
|
|
// FetchAssertions retrieves assertion data by hash.
|
|
// Used after ExchangeRoots to pull missing assertions.
|
|
rpc FetchAssertions(FetchRequest) returns (FetchResponse);
|
|
|
|
// Ping checks if a peer is alive and returns basic metadata.
|
|
rpc Ping(PingRequest) returns (PingResponse);
|
|
|
|
// GetLeaves returns all Merkle tree leaf hashes.
|
|
// Used for computing the diff during anti-entropy sync.
|
|
rpc GetLeaves(GetLeavesRequest) returns (GetLeavesResponse);
|
|
}
|
|
|
|
// GossipRequest pushes a single assertion to a peer.
|
|
message GossipRequest {
|
|
// BLAKE3 hash of the assertion (32 bytes)
|
|
bytes assertion_hash = 1;
|
|
|
|
// Serialized assertion data (rkyv format)
|
|
bytes assertion_data = 2;
|
|
|
|
// HLC timestamp components for causal ordering
|
|
uint64 hlc_time = 3;
|
|
uint32 hlc_counter = 4;
|
|
bytes hlc_node_id = 5; // 16 bytes
|
|
}
|
|
|
|
message GossipResponse {
|
|
// True if the assertion was accepted (stored or already existed)
|
|
bool accepted = 1;
|
|
|
|
// Error message if rejected (e.g., validation failure)
|
|
string error = 2;
|
|
}
|
|
|
|
// RootExchangeRequest initiates Merkle root comparison.
|
|
message RootExchangeRequest {
|
|
// Local Merkle root hash (32 bytes)
|
|
bytes merkle_root = 1;
|
|
|
|
// Number of assertions in local tree
|
|
uint64 assertion_count = 2;
|
|
}
|
|
|
|
message RootExchangeResponse {
|
|
// Remote Merkle root hash (32 bytes)
|
|
bytes merkle_root = 1;
|
|
|
|
// Number of assertions in remote tree
|
|
uint64 assertion_count = 2;
|
|
|
|
// True if roots match (trees are identical)
|
|
bool roots_match = 3;
|
|
}
|
|
|
|
// FetchRequest asks for assertion data by hash.
|
|
message FetchRequest {
|
|
// List of assertion hashes to fetch (max 1000 per request)
|
|
repeated bytes hashes = 1;
|
|
}
|
|
|
|
message FetchResponse {
|
|
// Retrieved assertions (may be fewer than requested if not found)
|
|
repeated AssertionData assertions = 1;
|
|
}
|
|
|
|
// AssertionData pairs a hash with its serialized data.
|
|
message AssertionData {
|
|
// BLAKE3 hash of the assertion (32 bytes)
|
|
bytes hash = 1;
|
|
|
|
// Serialized assertion data (rkyv format)
|
|
bytes data = 2;
|
|
}
|
|
|
|
// PingRequest is a health check with node identity.
|
|
message PingRequest {
|
|
// Sender's node ID (16 bytes)
|
|
bytes node_id = 1;
|
|
}
|
|
|
|
message PingResponse {
|
|
// Responder's node ID (16 bytes)
|
|
bytes node_id = 1;
|
|
|
|
// Number of assertions on this node
|
|
uint64 assertion_count = 2;
|
|
}
|
|
|
|
// GetLeavesRequest requests all Merkle tree leaf hashes.
|
|
message GetLeavesRequest {
|
|
// Maximum number of leaves to return (0 = no limit, but capped at 10000)
|
|
uint64 max_leaves = 1;
|
|
}
|
|
|
|
// GetLeavesResponse returns Merkle tree leaf hashes.
|
|
message GetLeavesResponse {
|
|
// All leaf hashes (each 32 bytes)
|
|
repeated bytes leaves = 1;
|
|
|
|
// True if there are more leaves than max_leaves
|
|
bool truncated = 2;
|
|
}
|