upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/sync/mod.rs
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-10 01:24:52 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-10 01:24:52 +0000
commitc9b3b3bd8a04de139bcb0d0b83bf819c367ee8c8 (patch)
tree80cc2c1ec92d71637408a7588d8cb908f03fc4b6 /src/sync/mod.rs
parent9369a2885f5a3f9e38c0a3f9fa3af6260513c8e4 (diff)
Implement relay naughty list feature
Add naughty list tracking for relays with persistent infrastructure issues (DNS failures, TLS certificate errors, protocol violations) to reduce log noise and provide better visibility via metrics. Key features: - Classify errors into naughty (persistent) vs transient (temporary) - Track naughty relays with category, reason, and occurrence count - Log WARN on first naughty occurrence, DEBUG on repeats - Automatic expiration after 12 hours (configurable) - Prometheus metrics for monitoring naughty relays by category - Periodic cleanup task integrated with health checker Components added: - src/sync/naughty_list.rs: Core naughty list tracker with error classification - NaughtyListTracker integration in RelayHealthTracker - Connection error handling updates in sync manager - Naughty list metrics (total by category, detailed info per relay) - Config option for naughty_list_expiration_hours (default: 12) Closes DNS lookup failures and TLS certificate errors tracking issues.
Diffstat (limited to 'src/sync/mod.rs')
-rw-r--r--src/sync/mod.rs53
1 files changed, 51 insertions, 2 deletions
diff --git a/src/sync/mod.rs b/src/sync/mod.rs
index 412cd16..8b51fac 100644
--- a/src/sync/mod.rs
+++ b/src/sync/mod.rs
@@ -16,6 +16,7 @@ pub mod algorithms;
16pub mod filters; 16pub mod filters;
17pub mod health; 17pub mod health;
18pub mod metrics; 18pub mod metrics;
19pub mod naughty_list;
19pub mod rejected_index; 20pub mod rejected_index;
20pub mod relay_connection; 21pub mod relay_connection;
21pub mod self_subscriber; 22pub mod self_subscriber;
@@ -483,7 +484,18 @@ async fn run_health_and_metrics_checker(
483 // 2. Check for rate limit recovery 484 // 2. Check for rate limit recovery
484 manager.check_rate_limit_recovery().await; 485 manager.check_rate_limit_recovery().await;
485 486
486 // 3. Update metrics with current health states 487 // 3. Check for naughty list expiration
488 if let Some(naughty_list) = manager.health_tracker.naughty_list() {
489 let recovered = naughty_list.expire_old_entries();
490 for url in recovered {
491 tracing::info!(
492 relay = %url,
493 "Relay removed from naughty list after expiration, will retry"
494 );
495 }
496 }
497
498 // 4. Update metrics with current health states and naughty list
487 if let Some(ref metrics) = manager.metrics { 499 if let Some(ref metrics) = manager.metrics {
488 // Get all tracked relay URLs 500 // Get all tracked relay URLs
489 let relay_urls: Vec<String> = { 501 let relay_urls: Vec<String> = {
@@ -496,6 +508,12 @@ async fn run_health_and_metrics_checker(
496 let state = manager.health_tracker.get_state(&relay_url); 508 let state = manager.health_tracker.get_state(&relay_url);
497 metrics.record_health_state(&relay_url, state); 509 metrics.record_health_state(&relay_url, state);
498 } 510 }
511
512 // Update naughty list metrics
513 if let Some(naughty_list) = manager.health_tracker.naughty_list() {
514 let entries = naughty_list.get_all();
515 metrics.update_naughty_list(entries);
516 }
499 } 517 }
500 } 518 }
501 _ = shutdown_rx.recv() => { 519 _ = shutdown_rx.recv() => {
@@ -2018,7 +2036,38 @@ impl SyncManager {
2018 } 2036 }
2019 } 2037 }
2020 Err(e) => { 2038 Err(e) => {
2021 tracing::error!(relay = %relay_url, error = %e, "Connection failed"); 2039 // Classify error to determine if it's a naughty relay or transient issue
2040 let error_str = e.to_string();
2041
2042 if let Some(category) = naughty_list::NaughtyListTracker::classify_error(&error_str)
2043 {
2044 // Persistent infrastructure issue - use naughty list
2045 if let Some(ref naughty_list) = self.health_tracker.naughty_list() {
2046 let is_new = naughty_list.record(relay_url, category, error_str.clone());
2047
2048 if is_new {
2049 tracing::warn!(
2050 relay = %relay_url,
2051 category = ?category,
2052 error = %e,
2053 "Relay has persistent configuration issue, added to naughty list"
2054 );
2055 } else {
2056 tracing::debug!(
2057 relay = %relay_url,
2058 category = ?category,
2059 "Naughty relay failure (already tracked)"
2060 );
2061 }
2062 }
2063 } else {
2064 // Transient network issue - use existing backoff flow
2065 tracing::debug!(
2066 relay = %relay_url,
2067 error = %e,
2068 "Connection failed (transient issue, backoff active)"
2069 );
2070 }
2022 2071
2023 // 4. Update state back to Disconnected on failure 2072 // 4. Update state back to Disconnected on failure
2024 { 2073 {