fix: prevent phantom members — advertise pod IP instead of 0.0.0.0
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Root cause: SyncStorageBridge returned bind addresses (0.0.0.0) in PingResponse, causing peers to overwrite each other's addresses with loopback. Subsequent probes connected to self, registering self as a member (node_count: 4 instead of 3). Two fixes: - alive_node() now skips self (node_id == local_id guard) - Advertise POD_IP (k8s downward API) in PingResponse instead of bind addr
This commit is contained in:
parent
4360a17dd3
commit
476d8e19e4
@ -209,8 +209,22 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
.parse()
|
.parse()
|
||||||
.unwrap_or_else(|_| SocketAddr::from(([0, 0, 0, 0], 18180)));
|
.unwrap_or_else(|_| SocketAddr::from(([0, 0, 0, 0], 18180)));
|
||||||
|
|
||||||
|
// Advertise addresses: use POD_IP (k8s downward API) instead of bind address
|
||||||
|
// (0.0.0.0) which would cause peers to connect to themselves via loopback.
|
||||||
|
let advertise_ip = std::env::var("POD_IP")
|
||||||
|
.ok()
|
||||||
|
.and_then(|ip| ip.parse::<std::net::IpAddr>().ok())
|
||||||
|
.unwrap_or_else(|| rpc_addr.ip()); // Fallback: use bind IP (fine for local dev)
|
||||||
|
let advertise_rpc = SocketAddr::new(advertise_ip, rpc_addr.port());
|
||||||
|
let advertise_api = SocketAddr::new(advertise_ip, api_addr.port());
|
||||||
|
info!(
|
||||||
|
%advertise_rpc,
|
||||||
|
%advertise_api,
|
||||||
|
"Advertising cluster addresses"
|
||||||
|
);
|
||||||
|
|
||||||
// --- Membership ---
|
// --- Membership ---
|
||||||
let local_info = NodeInfo::new(node_id, rpc_addr, api_addr);
|
let local_info = NodeInfo::new(node_id, advertise_rpc, advertise_api);
|
||||||
let membership = Arc::new(SwimMembership::new(local_info, SwimConfig::default()));
|
let membership = Arc::new(SwimMembership::new(local_info, SwimConfig::default()));
|
||||||
|
|
||||||
// Resolve seeds via DNS (for k8s headless service names)
|
// Resolve seeds via DNS (for k8s headless service names)
|
||||||
@ -277,8 +291,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
Arc::clone(&store),
|
Arc::clone(&store),
|
||||||
Arc::clone(&merkle_manager),
|
Arc::clone(&merkle_manager),
|
||||||
*node_id.as_bytes(),
|
*node_id.as_bytes(),
|
||||||
rpc_addr.to_string(),
|
advertise_rpc.to_string(),
|
||||||
api_addr.to_string(),
|
advertise_api.to_string(),
|
||||||
));
|
));
|
||||||
|
|
||||||
let grpc_service = SyncServiceServer::new(SyncServiceHandler::new(bridge));
|
let grpc_service = SyncServiceServer::new(SyncServiceHandler::new(bridge));
|
||||||
|
|||||||
@ -410,6 +410,11 @@ impl SwimMembership {
|
|||||||
/// Marks a node as alive (responded to probe or refuted suspicion).
|
/// Marks a node as alive (responded to probe or refuted suspicion).
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip(self))]
|
||||||
pub fn alive_node(&self, node_id: NodeId, info: NodeInfo) {
|
pub fn alive_node(&self, node_id: NodeId, info: NodeInfo) {
|
||||||
|
// Never add ourselves to the members map — self is tracked separately
|
||||||
|
if node_id == self.local_id() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let lamport = self.tick();
|
let lamport = self.tick();
|
||||||
|
|
||||||
// IMPORTANT: same deadlock hazard — drop RefMut from get_mut before update_node_gauges.
|
// IMPORTANT: same deadlock hazard — drop RefMut from get_mut before update_node_gauges.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user