upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/sync/health.rs
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-10 01:24:52 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-10 01:24:52 +0000
commitc9b3b3bd8a04de139bcb0d0b83bf819c367ee8c8 (patch)
tree80cc2c1ec92d71637408a7588d8cb908f03fc4b6 /src/sync/health.rs
parent9369a2885f5a3f9e38c0a3f9fa3af6260513c8e4 (diff)
Implement relay naughty list feature
Add naughty list tracking for relays with persistent infrastructure issues (DNS failures, TLS certificate errors, protocol violations) to reduce log noise and provide better visibility via metrics. Key features: - Classify errors into naughty (persistent) vs transient (temporary) - Track naughty relays with category, reason, and occurrence count - Log WARN on first naughty occurrence, DEBUG on repeats - Automatic expiration after 12 hours (configurable) - Prometheus metrics for monitoring naughty relays by category - Periodic cleanup task integrated with health checker Components added: - src/sync/naughty_list.rs: Core naughty list tracker with error classification - NaughtyListTracker integration in RelayHealthTracker - Connection error handling updates in sync manager - Naughty list metrics (total by category, detailed info per relay) - Config option for naughty_list_expiration_hours (default: 12) Closes DNS lookup failures and TLS certificate errors tracking issues.
Diffstat (limited to 'src/sync/health.rs')
-rw-r--r--src/sync/health.rs15
1 files changed, 15 insertions, 0 deletions
diff --git a/src/sync/health.rs b/src/sync/health.rs
index 2948707..833918b 100644
--- a/src/sync/health.rs
+++ b/src/sync/health.rs
@@ -5,6 +5,7 @@
5//! - Exponential backoff with configurable max delay 5//! - Exponential backoff with configurable max delay
6//! - Dead relay detection after 24h of continuous failures 6//! - Dead relay detection after 24h of continuous failures
7//! - Rate limit detection and fixed cooldown period 7//! - Rate limit detection and fixed cooldown period
8//! - Naughty list for persistent infrastructure issues (DNS, TLS, protocol errors)
8//! 9//!
9//! ## Health States 10//! ## Health States
10//! 11//!
@@ -18,6 +19,7 @@ use std::time::{Duration, Instant};
18 19
19use dashmap::DashMap; 20use dashmap::DashMap;
20 21
22use super::naughty_list::NaughtyListTracker;
21use crate::config::Config; 23use crate::config::Config;
22 24
23/// Duration threshold before a relay is considered dead (24 hours) 25/// Duration threshold before a relay is considered dead (24 hours)
@@ -213,15 +215,21 @@ pub struct RelayHealthTracker {
213 health: DashMap<String, RelayHealth>, 215 health: DashMap<String, RelayHealth>,
214 max_backoff_secs: u64, 216 max_backoff_secs: u64,
215 base_backoff_secs: u64, 217 base_backoff_secs: u64,
218 naughty_list: Option<Arc<NaughtyListTracker>>,
216} 219}
217 220
218impl RelayHealthTracker { 221impl RelayHealthTracker {
219 /// Create a new RelayHealthTracker 222 /// Create a new RelayHealthTracker
220 pub fn new(config: &Config) -> Self { 223 pub fn new(config: &Config) -> Self {
224 let naughty_list = Some(Arc::new(NaughtyListTracker::new(
225 config.naughty_list_expiration_hours,
226 )));
227
221 Self { 228 Self {
222 health: DashMap::new(), 229 health: DashMap::new(),
223 max_backoff_secs: config.sync_max_backoff_secs, 230 max_backoff_secs: config.sync_max_backoff_secs,
224 base_backoff_secs: config.sync_base_backoff_secs, 231 base_backoff_secs: config.sync_base_backoff_secs,
232 naughty_list,
225 } 233 }
226 } 234 }
227 235
@@ -231,6 +239,7 @@ impl RelayHealthTracker {
231 health: DashMap::new(), 239 health: DashMap::new(),
232 max_backoff_secs: DEFAULT_MAX_BACKOFF_SECS, 240 max_backoff_secs: DEFAULT_MAX_BACKOFF_SECS,
233 base_backoff_secs: DEFAULT_BASE_BACKOFF_SECS, 241 base_backoff_secs: DEFAULT_BASE_BACKOFF_SECS,
242 naughty_list: Some(Arc::new(NaughtyListTracker::with_defaults())),
234 } 243 }
235 } 244 }
236 245
@@ -240,6 +249,7 @@ impl RelayHealthTracker {
240 health: DashMap::new(), 249 health: DashMap::new(),
241 max_backoff_secs, 250 max_backoff_secs,
242 base_backoff_secs: DEFAULT_BASE_BACKOFF_SECS, 251 base_backoff_secs: DEFAULT_BASE_BACKOFF_SECS,
252 naughty_list: Some(Arc::new(NaughtyListTracker::with_defaults())),
243 } 253 }
244 } 254 }
245 255
@@ -549,6 +559,11 @@ impl RelayHealthTracker {
549 .get(relay_url) 559 .get(relay_url)
550 .map(|entry| entry.value().clone()) 560 .map(|entry| entry.value().clone())
551 } 561 }
562
563 /// Get a reference to the naughty list tracker
564 pub fn naughty_list(&self) -> Option<Arc<NaughtyListTracker>> {
565 self.naughty_list.clone()
566 }
552} 567}
553 568
554/// Create a shared RelayHealthTracker wrapped in Arc 569/// Create a shared RelayHealthTracker wrapped in Arc