From 83844a528365e657cd5f4d2cda51d72ced9900da Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Thu, 11 Dec 2025 12:36:51 +0000 Subject: fix: wire up relay disconnection detection for metrics - Add periodic health check in RelayConnection::run_event_loop that polls nostr-sdk's relay.is_connected() every second to detect dead connections - When event channel closes without explicit Closed/Shutdown, send DisconnectNotification to SyncManager (fixes case where TCP drops silently) - Enable test_relay_connected_status test which validates the ngit_sync_relay_connected metric correctly reflects connection state The issue was that when a remote relay stops abruptly, nostr-sdk's notification receiver blocks indefinitely waiting for data. TCP disconnect detection without keepalive can take minutes. The health check polls nostr-sdk's internal relay status which detects disconnection promptly. --- src/sync/mod.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src/sync/mod.rs') diff --git a/src/sync/mod.rs b/src/sync/mod.rs index c62b478..15c89e3 100644 --- a/src/sync/mod.rs +++ b/src/sync/mod.rs @@ -1251,6 +1251,9 @@ impl SyncManager { let metrics_clone = self.metrics.clone(); // Clone metrics for the spawned task let is_bootstrap_clone = is_bootstrap; // Clone is_bootstrap for the spawned task tokio::spawn(async move { + // Track whether we've already sent a disconnect notification + let mut disconnect_sent = false; + while let Some(relay_event) = event_rx.recv().await { match relay_event { RelayEvent::Event(event) => { @@ -1297,6 +1300,7 @@ impl SyncManager { relay_url: relay_url_clone.clone(), }) .await; + disconnect_sent = true; break; } RelayEvent::Shutdown => { @@ -1307,10 +1311,25 @@ impl SyncManager { relay_url: relay_url_clone.clone(), }) .await; + disconnect_sent = true; break; } } } + + // If the event channel closed without a Closed/Shutdown event + // (e.g., connection dropped unexpectedly), still notify SyncManager + if !disconnect_sent { + tracing::info!( + relay = %relay_url_clone, + "Event channel closed, notifying SyncManager of disconnect" + ); + let _ = disconnect_tx + .send(DisconnectNotification { + relay_url: relay_url_clone.clone(), + }) + .await; + } }); tracing::info!( -- cgit v1.2.3