From 1b6b669b9b82d1f81b887a32055f19c53d3bb8bf Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Sat, 10 Jan 2026 02:14:01 +0000 Subject: Add naughty list for git remotes with persistent SSL/DNS errors Implement domain-level naughty list tracking for git remotes, reusing the existing NaughtyListTracker from relay sync. This prevents repeated attempts to fetch from git domains with persistent infrastructure issues (SSL/TLS certificate errors, DNS failures). Changes: - Updated NaughtyListTracker to track both relay URLs and git domains - Added git_naughty_list field to RealSyncContext for error classification - Modified fetch_oids() to classify git fetch errors and record naughty domains - Updated sync_identifier_next_url() to filter out naughty domains during URL selection - Added git_naughty_list parameter to ThrottleManager for domain queue processing - Threaded naughty list through start_sync_loop and all sync functions - Updated all tests to pass naughty list parameter The naughty list uses 12-hour expiration (configurable) to allow domains to recover from infrastructure issues. First occurrence logs WARN, repeats log DEBUG. --- src/main.rs | 14 +++- src/purgatory/sync/context.rs | 38 +++++++++ src/purgatory/sync/functions.rs | 177 +++++++++++++++++++++++++++++++--------- src/purgatory/sync/loop.rs | 15 +++- src/purgatory/sync/throttle.rs | 35 +++++++- src/sync/naughty_list.rs | 74 +++++++++-------- 6 files changed, 275 insertions(+), 78 deletions(-) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index 5e9e2d0..44545b5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,7 +12,7 @@ use ngit_grasp::{ metrics::Metrics, nostr, purgatory::{sync::RealSyncContext, sync::ThrottleManager, Purgatory}, - sync::SyncManager, + sync::{naughty_list::NaughtyListTracker, SyncManager}, }; #[tokio::main] @@ -132,23 +132,29 @@ async fn main() -> Result<()> { info!("Expired event cleanup task started (24h interval, keeps 7 days)"); // Start purgatory sync loop for background git data fetching + // Create naughty list tracker for git remote domains with persistent errors (12h expiration) + let git_naughty_list = Arc::new(NaughtyListTracker::with_defaults()); + let sync_ctx = Arc::new(RealSyncContext::new( purgatory.clone(), relay_with_db.database.clone(), PathBuf::from(config.effective_git_data_path()), Some(config.domain.clone()), Some(relay_with_db.relay.clone()), + git_naughty_list.clone(), )); // Create throttle manager for rate limiting remote git servers // Default: 5 concurrent requests per domain, 30 requests per minute per domain let throttle_manager = Arc::new(ThrottleManager::new(5, 30)); throttle_manager.set_context(sync_ctx.clone()); + throttle_manager.set_git_naughty_list(git_naughty_list.clone()); // Start the sync loop - let _sync_loop_handle = purgatory - .clone() - .start_sync_loop(sync_ctx, throttle_manager); + let _sync_loop_handle = + purgatory + .clone() + .start_sync_loop(sync_ctx, throttle_manager, git_naughty_list.clone()); info!("Purgatory sync loop started (1s interval)"); // Setup shutdown handler for purgatory cleanup diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs index e61de01..3c2c683 100644 --- a/src/purgatory/sync/context.rs +++ b/src/purgatory/sync/context.rs @@ -187,6 +187,9 @@ use tracing::debug; use crate::nostr::builder::SharedDatabase; use crate::nostr::events::RepositoryState; use crate::purgatory::Purgatory; +use crate::sync::naughty_list::NaughtyListTracker; + +use super::functions::extract_domain; /// Real implementation of `SyncContext` that connects to actual systems. /// @@ -210,6 +213,9 @@ pub struct RealSyncContext { /// Local relay for notifying WebSocket subscribers local_relay: Option, + + /// Naughty list tracker for git remote domains with persistent errors + git_naughty_list: Arc, } impl RealSyncContext { @@ -221,12 +227,14 @@ impl RealSyncContext { /// * `git_data_path` - Base path for git repositories /// * `our_domain` - Our domain to exclude from clone URLs /// * `local_relay` - Local relay for WebSocket notifications + /// * `git_naughty_list` - Naughty list tracker for git remote domains pub fn new( purgatory: Arc, database: SharedDatabase, git_data_path: PathBuf, our_domain: Option, local_relay: Option, + git_naughty_list: Arc, ) -> Self { Self { purgatory, @@ -234,8 +242,14 @@ impl RealSyncContext { git_data_path, our_domain_value: our_domain, local_relay, + git_naughty_list, } } + + /// Get reference to the git naughty list tracker + pub fn git_naughty_list(&self) -> &Arc { + &self.git_naughty_list + } } #[async_trait] @@ -344,6 +358,7 @@ impl SyncContext for RealSyncContext { let repo_path = repo_path.to_path_buf(); let url = url.to_string(); let missing_oids: Vec = missing.into_iter().cloned().collect(); + let naughty_list = self.git_naughty_list.clone(); tokio::task::spawn_blocking(move || -> Result> { // git fetch ... - fetch all OIDs in one command @@ -370,6 +385,29 @@ impl SyncContext for RealSyncContext { } Ok(result) => { let stderr = String::from_utf8_lossy(&result.stderr); + + // Extract domain and classify error for naughty list + if let Some(domain) = extract_domain(&url) { + if let Some(category) = NaughtyListTracker::classify_error(&stderr) { + let is_new = naughty_list.record(&domain, category, stderr.to_string()); + + if is_new { + tracing::warn!( + domain = %domain, + category = %category, + error = %stderr, + "Git remote domain added to naughty list" + ); + } else { + debug!( + domain = %domain, + category = %category, + "Git remote domain still on naughty list" + ); + } + } + } + Err(anyhow::anyhow!("git fetch failed: {}", stderr)) } Err(e) => Err(anyhow::anyhow!("git fetch command error: {}", e)), diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 0139ac5..65d29af 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs @@ -17,6 +17,7 @@ use tracing::debug; use super::context::SyncContext; use super::throttle::ThrottleManager; +use crate::sync::naughty_list::NaughtyListTracker; /// Extract domain from a URL. /// @@ -29,7 +30,7 @@ use super::throttle::ThrottleManager; /// assert_eq!(extract_domain("http://example.com:8080/repo.git"), Some("example.com".to_string())); /// assert_eq!(extract_domain("git@github.com:foo/bar.git"), None); // SSH URLs not supported /// ``` -fn extract_domain(url: &str) -> Option { +pub(crate) fn extract_domain(url: &str) -> Option { // Simple URL parsing for HTTP(S) URLs // Format: scheme://[user@]host[:port]/path let url = url @@ -57,7 +58,8 @@ fn extract_domain(url: &str) -> Option { /// 2. Checks if there are OIDs still needed /// 3. Gets repository data and extracts clone URLs /// 4. Filters out our own domain and already-tried URLs -/// 5. Returns the first non-throttled URL (when `domain` is None) +/// 5. Filters out naughty domains (with persistent SSL/DNS errors) +/// 6. Returns the first non-throttled URL (when `domain` is None) /// or a URL from the specified domain (when `domain` is Some) /// /// # Arguments @@ -68,6 +70,7 @@ fn extract_domain(url: &str) -> Option { /// If None, return any non-throttled URL. /// * `tried_urls` - URLs that have already been tried (will be skipped) /// * `throttle_manager` - Used to check if domains are throttled (when domain is None) +/// * `git_naughty_list` - Used to filter out domains with persistent errors /// /// # Returns /// @@ -79,6 +82,7 @@ pub async fn sync_identifier_next_url( domain: Option<&str>, tried_urls: &HashSet, throttle_manager: &ThrottleManager, + git_naughty_list: &NaughtyListTracker, ) -> Option { // 1. Check if we still have pending events if !ctx.has_pending_events(identifier) { @@ -158,7 +162,7 @@ pub async fn sync_identifier_next_url( .and_then(|urls| urls.iter().find(|url| !tried_urls.contains(*url)).cloned()) } None => { - // Try any non-throttled domain + // Try any non-throttled, non-naughty domain for (d, domain_urls) in &urls_by_domain { if throttle_manager.is_throttled(d) { debug!( @@ -168,6 +172,17 @@ pub async fn sync_identifier_next_url( ); continue; } + + // NEW: Skip naughty domains + if git_naughty_list.is_naughty(d) { + debug!( + identifier = %identifier, + domain = %d, + "Domain is on git naughty list - skipping" + ); + continue; + } + if let Some(url) = domain_urls.iter().find(|url| !tried_urls.contains(*url)) { return Some(url.clone()); } @@ -200,6 +215,7 @@ pub struct ThrottledDomainInfo { /// * `identifier` - The repository identifier /// * `tried_urls` - All URLs that have been tried (across all domains) /// * `throttle_manager` - Used to check which domains are throttled +/// * `git_naughty_list` - Used to filter out domains with persistent errors /// /// # Returns /// @@ -210,6 +226,7 @@ pub async fn get_throttled_domains_with_untried_urls( identifier: &str, tried_urls: &HashSet, throttle_manager: &ThrottleManager, + git_naughty_list: &NaughtyListTracker, ) -> Vec { let repo_data = match ctx.fetch_repository_data(identifier).await { Ok(data) => data, @@ -250,6 +267,11 @@ pub async fn get_throttled_domains_with_untried_urls( return None; // Not throttled, skip } + // Skip naughty domains + if git_naughty_list.is_naughty(&domain) { + return None; // On naughty list, skip + } + let untried: Vec<_> = domain_urls .iter() .filter(|url| !tried_urls.contains(*url)) @@ -388,7 +410,7 @@ pub async fn sync_identifier_from_url( /// Sync git data for an identifier. /// /// This is the main orchestration function called by the sync loop. It: -/// 1. Tries all non-throttled URLs in sequence +/// 1. Tries all non-throttled, non-naughty URLs in sequence /// 2. After each fetch, checks if sync is complete (no pending events or no needed OIDs) /// 3. When no non-throttled URLs remain, enqueues with throttled domains for later processing /// 4. Returns without waiting for throttled domains to complete @@ -398,6 +420,7 @@ pub async fn sync_identifier_from_url( /// * `ctx` - The sync context providing repository data and OID information /// * `identifier` - The repository identifier (d-tag value) /// * `throttle_manager` - Used for rate limiting and domain queue management +/// * `git_naughty_list` - Used to filter out domains with persistent errors /// /// # Returns /// @@ -408,6 +431,7 @@ pub async fn sync_identifier( ctx: &C, identifier: &str, throttle_manager: &Arc, + git_naughty_list: &NaughtyListTracker, ) -> bool { let mut tried_urls: HashSet = HashSet::new(); @@ -416,9 +440,18 @@ pub async fn sync_identifier( "Starting sync for identifier" ); - // Try all non-throttled URLs + // Try all non-throttled, non-naughty URLs loop { - match sync_identifier_next_url(ctx, identifier, None, &tried_urls, throttle_manager).await { + match sync_identifier_next_url( + ctx, + identifier, + None, + &tried_urls, + throttle_manager, + git_naughty_list, + ) + .await + { Some(url) => { debug!( identifier = %identifier, @@ -481,9 +514,14 @@ pub async fn sync_identifier( } // Enqueue with any throttled domains that have untried URLs - let throttled_domains = - get_throttled_domains_with_untried_urls(ctx, identifier, &tried_urls, throttle_manager) - .await; + let throttled_domains = get_throttled_domains_with_untried_urls( + ctx, + identifier, + &tried_urls, + throttle_manager, + git_naughty_list, + ) + .await; for info in throttled_domains { debug!( @@ -525,15 +563,22 @@ mod tests { // Create throttle manager and throttle github.com let throttle_manager = ThrottleManager::new(1, 100); + let naughty_list = NaughtyListTracker::with_defaults(); // Saturate github.com by starting a request throttle_manager.start_request("github.com"); // Should return gitlab.com URL since github.com is throttled let tried_urls = HashSet::new(); - let result = - sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) - .await; + let result = sync_identifier_next_url( + &mock, + "test-repo", + None, + &tried_urls, + &throttle_manager, + &naughty_list, + ) + .await; assert!(result.is_some()); let url = result.unwrap(); @@ -556,15 +601,22 @@ mod tests { .with_pending_events(true); let throttle_manager = ThrottleManager::new(5, 100); + let naughty_list = NaughtyListTracker::with_defaults(); // Mark first URL as tried let mut tried_urls = HashSet::new(); tried_urls.insert("https://github.com/foo/bar.git".to_string()); // Should return the second URL - let result = - sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) - .await; + let result = sync_identifier_next_url( + &mock, + "test-repo", + None, + &tried_urls, + &throttle_manager, + &naughty_list, + ) + .await; assert!(result.is_some()); let url = result.unwrap(); @@ -579,11 +631,18 @@ mod tests { .with_pending_events(false); // No pending events let throttle_manager = ThrottleManager::new(5, 100); + let naughty_list = NaughtyListTracker::with_defaults(); let tried_urls = HashSet::new(); - let result = - sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) - .await; + let result = sync_identifier_next_url( + &mock, + "test-repo", + None, + &tried_urls, + &throttle_manager, + &naughty_list, + ) + .await; assert!(result.is_none()); } @@ -596,11 +655,18 @@ mod tests { .with_pending_events(true); let throttle_manager = ThrottleManager::new(5, 100); + let naughty_list = NaughtyListTracker::with_defaults(); let tried_urls = HashSet::new(); - let result = - sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) - .await; + let result = sync_identifier_next_url( + &mock, + "test-repo", + None, + &tried_urls, + &throttle_manager, + &naughty_list, + ) + .await; assert!(result.is_none()); } @@ -617,11 +683,18 @@ mod tests { .with_our_domain("our-relay.com"); let throttle_manager = ThrottleManager::new(5, 100); + let naughty_list = NaughtyListTracker::with_defaults(); let tried_urls = HashSet::new(); - let result = - sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) - .await; + let result = sync_identifier_next_url( + &mock, + "test-repo", + None, + &tried_urls, + &throttle_manager, + &naughty_list, + ) + .await; assert!(result.is_some()); let url = result.unwrap(); @@ -643,6 +716,7 @@ mod tests { .with_pending_events(true); let throttle_manager = ThrottleManager::new(5, 100); + let naughty_list = NaughtyListTracker::with_defaults(); let tried_urls = HashSet::new(); // Request specific domain @@ -652,6 +726,7 @@ mod tests { Some("gitlab.com"), &tried_urls, &throttle_manager, + &naughty_list, ) .await; @@ -757,6 +832,7 @@ mod tests { .with_pending_events(true); let throttle_manager = ThrottleManager::new(1, 100); + let naughty_list = NaughtyListTracker::with_defaults(); // Throttle github.com and gitlab.com throttle_manager.start_request("github.com"); @@ -771,6 +847,7 @@ mod tests { "test-repo", &tried_urls, &throttle_manager, + &naughty_list, ) .await; @@ -803,9 +880,10 @@ mod tests { .url_provides("https://server3.com/repo.git", &["ghi789"]); let throttle_manager = Arc::new(ThrottleManager::new(5, 100)); + let naughty_list = NaughtyListTracker::with_defaults(); // Run sync_identifier - let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await; + let complete = sync_identifier(&mock, "test-repo", &throttle_manager, &naughty_list).await; // Should return true (sync complete) assert!(complete, "Expected sync to complete after trying all URLs"); @@ -841,12 +919,13 @@ mod tests { // Note: gitlab.com doesn't provide any OIDs let throttle_manager = Arc::new(ThrottleManager::new(1, 100)); + let naughty_list = NaughtyListTracker::with_defaults(); // Throttle github.com by starting a request throttle_manager.start_request("github.com"); // Run sync_identifier - let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await; + let complete = sync_identifier(&mock, "test-repo", &throttle_manager, &naughty_list).await; // Should return false (sync incomplete - github.com is throttled) assert!( @@ -911,23 +990,36 @@ mod tests { .with_pending_events(true); let throttle_manager = ThrottleManager::new(5, 100); + let naughty_list = NaughtyListTracker::with_defaults(); let tried_urls = HashSet::new(); // Get first URL - let first_url = - sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) - .await - .expect("Should return a URL"); + let first_url = sync_identifier_next_url( + &mock, + "test-repo", + None, + &tried_urls, + &throttle_manager, + &naughty_list, + ) + .await + .expect("Should return a URL"); // Try the first URL let mut tried = HashSet::new(); tried.insert(first_url.clone()); // Get second URL - let second_url = - sync_identifier_next_url(&mock, "test-repo", None, &tried, &throttle_manager) - .await - .expect("Should return a second URL"); + let second_url = sync_identifier_next_url( + &mock, + "test-repo", + None, + &tried, + &throttle_manager, + &naughty_list, + ) + .await + .expect("Should return a second URL"); // Both URLs should be available (one from announcement, one from PR) let both_urls = [first_url, second_url]; @@ -955,12 +1047,20 @@ mod tests { .with_pending_events(true); let throttle_manager = ThrottleManager::new(5, 100); + let naughty_list = NaughtyListTracker::with_defaults(); let mut tried_urls = HashSet::new(); // Collect all available URLs let mut available_urls = Vec::new(); - while let Some(url) = - sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager).await + while let Some(url) = sync_identifier_next_url( + &mock, + "test-repo", + None, + &tried_urls, + &throttle_manager, + &naughty_list, + ) + .await { available_urls.push(url.clone()); tried_urls.insert(url); @@ -1001,6 +1101,7 @@ mod tests { .with_pending_events(true); let throttle_manager = ThrottleManager::new(1, 100); + let naughty_list = NaughtyListTracker::with_defaults(); // Throttle both domains throttle_manager.start_request("github.com"); @@ -1013,6 +1114,7 @@ mod tests { "test-repo", &tried_urls, &throttle_manager, + &naughty_list, ) .await; @@ -1037,9 +1139,10 @@ mod tests { // Note: github.com doesn't provide any OIDs let throttle_manager = Arc::new(ThrottleManager::new(5, 100)); + let naughty_list = NaughtyListTracker::with_defaults(); // Run sync_identifier - let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await; + let complete = sync_identifier(&mock, "test-repo", &throttle_manager, &naughty_list).await; // Should complete successfully using PR clone URL assert!(complete, "Sync should complete using PR clone URL"); diff --git a/src/purgatory/sync/loop.rs b/src/purgatory/sync/loop.rs index 92e0594..1ab229d 100644 --- a/src/purgatory/sync/loop.rs +++ b/src/purgatory/sync/loop.rs @@ -15,6 +15,7 @@ use tokio::task::JoinHandle; use tracing::{debug, info}; use crate::purgatory::Purgatory; +use crate::sync::naughty_list::NaughtyListTracker; use super::context::SyncContext; use super::functions::sync_identifier; @@ -37,6 +38,7 @@ impl Purgatory { /// # Arguments /// * `ctx` - The sync context providing repository data and fetch capabilities /// * `throttle_manager` - Used for rate limiting and domain queue management + /// * `git_naughty_list` - Tracker for git remote domains with persistent errors /// /// # Returns /// A `JoinHandle` for the background task (can be used to cancel the loop) @@ -47,12 +49,13 @@ impl Purgatory { /// let purgatory = Arc::new(Purgatory::new("/data/git")); /// let ctx = Arc::new(RealSyncContext::new(...)); /// let throttle_manager = Arc::new(ThrottleManager::new(5, 30)); + /// let git_naughty_list = Arc::new(NaughtyListTracker::with_defaults()); /// /// // Set context on throttle manager for queue processing /// throttle_manager.set_context(ctx.clone()); /// /// // Start the sync loop - /// let handle = purgatory.start_sync_loop(ctx, throttle_manager); + /// let handle = purgatory.start_sync_loop(ctx, throttle_manager, git_naughty_list); /// /// // Later, to stop the loop: /// handle.abort(); @@ -61,6 +64,7 @@ impl Purgatory { self: Arc, ctx: Arc, throttle_manager: Arc, + git_naughty_list: Arc, ) -> JoinHandle<()> { info!( "Starting purgatory sync loop (interval: {:?})", @@ -121,6 +125,7 @@ impl Purgatory { let purgatory = self.clone(); let ctx = ctx.clone(); let throttle_manager = throttle_manager.clone(); + let git_naughty_list = git_naughty_list.clone(); let id = identifier.clone(); tokio::spawn(async move { @@ -129,7 +134,13 @@ impl Purgatory { "Starting sync task for identifier" ); - let complete = sync_identifier(ctx.as_ref(), &id, &throttle_manager).await; + let complete = sync_identifier( + ctx.as_ref(), + &id, + &throttle_manager, + git_naughty_list.as_ref(), + ) + .await; // Check final state and update queue if complete || !purgatory.has_pending_events(&id) { diff --git a/src/purgatory/sync/throttle.rs b/src/purgatory/sync/throttle.rs index ad6e8ea..7f8f636 100644 --- a/src/purgatory/sync/throttle.rs +++ b/src/purgatory/sync/throttle.rs @@ -24,6 +24,7 @@ use tracing::debug; use super::context::SyncContext; use super::functions::{sync_identifier_from_url, sync_identifier_next_url}; +use crate::sync::naughty_list::NaughtyListTracker; /// State for an identifier waiting in a domain's queue. /// @@ -265,6 +266,10 @@ pub struct ThrottleManager { /// Sync context for processing queued identifiers. /// Set once at startup via `set_context()`. ctx: OnceLock>, + + /// Naughty list tracker for git remote domains with persistent errors. + /// Set once at startup via `set_git_naughty_list()`. + git_naughty_list: OnceLock>, } impl ThrottleManager { @@ -279,6 +284,7 @@ impl ThrottleManager { max_concurrent_per_domain: max_concurrent, max_per_minute_per_domain: max_per_minute, ctx: OnceLock::new(), + git_naughty_list: OnceLock::new(), } } @@ -294,6 +300,17 @@ impl ThrottleManager { let _ = self.ctx.set(ctx); } + /// Set the git naughty list tracker (called once at startup). + /// + /// The naughty list is used to filter out domains with persistent errors + /// during URL selection. + /// + /// # Arguments + /// * `git_naughty_list` - The naughty list tracker + pub fn set_git_naughty_list(&self, git_naughty_list: Arc) { + let _ = self.git_naughty_list.set(git_naughty_list); + } + /// Check if a domain is currently throttled (at capacity). /// /// Returns true if the domain has no capacity for another request, @@ -479,10 +496,22 @@ impl ThrottleManager { .unwrap_or_default() }; + // Get naughty list (should be set at startup) + let naughty_list = self + .git_naughty_list + .get() + .expect("git_naughty_list not set"); + // Get next URL for this identifier on this specific domain - let url = - sync_identifier_next_url(ctx.as_ref(), identifier, Some(domain), &tried_urls, self) - .await; + let url = sync_identifier_next_url( + ctx.as_ref(), + identifier, + Some(domain), + &tried_urls, + self, + naughty_list.as_ref(), + ) + .await; match url { Some(url) => { diff --git a/src/sync/naughty_list.rs b/src/sync/naughty_list.rs index 311b9bb..35fcc0f 100644 --- a/src/sync/naughty_list.rs +++ b/src/sync/naughty_list.rs @@ -1,8 +1,9 @@ -//! Naughty List Tracker for Relays with Persistent Infrastructure Issues +//! Naughty List Tracker for Remote Servers with Persistent Infrastructure Issues //! -//! This module tracks relays with persistent configuration/infrastructure problems -//! (DNS failures, TLS certificate errors, protocol violations) separately from -//! transient network issues (timeouts, connection refused). +//! This module tracks remote servers (Nostr relays and git remote domains) with +//! persistent configuration/infrastructure problems (DNS failures, TLS certificate +//! errors, protocol violations) separately from transient network issues (timeouts, +//! connection refused). //! //! ## Failure Classification //! @@ -23,14 +24,14 @@ use dashmap::DashMap; use std::time::Instant; -/// Category of persistent relay failure that qualifies for the naughty list +/// Category of persistent remote server failure that qualifies for the naughty list #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum NaughtyCategory { /// DNS lookup failures (domain doesn't resolve) DnsLookupFailed, /// TLS certificate errors (expired, invalid, mismatch) TlsCertificateInvalid, - /// WebSocket or Nostr protocol violations + /// WebSocket or Nostr protocol violations (relay-specific, won't trigger for git) ProtocolError, } @@ -51,7 +52,7 @@ impl std::fmt::Display for NaughtyCategory { } } -/// Naughty list entry for a relay with persistent issues +/// Naughty list entry for a remote server (relay URL or git domain) with persistent issues #[derive(Debug, Clone)] pub struct NaughtyEntry { /// Category of the persistent failure @@ -66,15 +67,19 @@ pub struct NaughtyEntry { pub occurrence_count: u32, } -/// Tracks relays with persistent infrastructure/configuration issues +/// Tracks remote servers with persistent infrastructure/configuration issues +/// +/// Used for both: +/// - Nostr relay URLs (e.g., "wss://relay.example.com") +/// - Git remote domains (e.g., "git.example.com") /// /// Separate from HealthTracker's backoff logic - this is specifically for -/// relays with configuration problems that are unlikely to be fixed quickly. +/// servers with configuration problems that are unlikely to be fixed quickly. #[derive(Debug)] pub struct NaughtyListTracker { - /// Map of relay URL to naughty entry + /// Map of relay URL or git domain to naughty entry entries: DashMap, - /// How many hours before removing a relay from the naughty list + /// How many hours before removing a server from the naughty list expiration_hours: u64, } @@ -147,21 +152,26 @@ impl NaughtyListTracker { None } - /// Record a naughty relay (adds new entry or updates existing) + /// Record a naughty server (adds new entry or updates existing) /// /// # Arguments /// - /// * `relay_url` - The relay URL + /// * `server_url_or_domain` - The relay URL or git domain /// * `category` - The naughty category /// * `reason` - The full error message /// /// # Returns /// /// `true` if this is a new naughty entry (first occurrence), `false` if updating existing - pub fn record(&self, relay_url: &str, category: NaughtyCategory, reason: String) -> bool { + pub fn record( + &self, + server_url_or_domain: &str, + category: NaughtyCategory, + reason: String, + ) -> bool { let now = Instant::now(); - if let Some(mut entry) = self.entries.get_mut(relay_url) { + if let Some(mut entry) = self.entries.get_mut(server_url_or_domain) { // Update existing entry entry.last_seen = now; entry.occurrence_count = entry.occurrence_count.saturating_add(1); @@ -170,7 +180,7 @@ impl NaughtyListTracker { } else { // Create new entry self.entries.insert( - relay_url.to_string(), + server_url_or_domain.to_string(), NaughtyEntry { category, reason, @@ -183,17 +193,17 @@ impl NaughtyListTracker { } } - /// Check if a relay is on the naughty list (not expired) + /// Check if a server is on the naughty list (not expired) /// /// # Arguments /// - /// * `relay_url` - The relay URL to check + /// * `server_url_or_domain` - The relay URL or git domain to check /// /// # Returns /// - /// `true` if the relay is currently on the naughty list - pub fn is_naughty(&self, relay_url: &str) -> bool { - if let Some(entry) = self.entries.get(relay_url) { + /// `true` if the server is currently on the naughty list + pub fn is_naughty(&self, server_url_or_domain: &str) -> bool { + if let Some(entry) = self.entries.get(server_url_or_domain) { let age = Instant::now().duration_since(entry.first_seen); let expiration = std::time::Duration::from_secs(self.expiration_hours * 3600); age < expiration @@ -206,23 +216,23 @@ impl NaughtyListTracker { /// /// # Arguments /// - /// * `relay_url` - The relay URL to look up + /// * `server_url_or_domain` - The relay URL or git domain to look up /// /// # Returns /// - /// A cloned `NaughtyEntry` if the relay is on the naughty list and not expired - pub fn get_entry(&self, relay_url: &str) -> Option { - self.entries.get(relay_url).map(|e| e.clone()) + /// A cloned `NaughtyEntry` if the server is on the naughty list and not expired + pub fn get_entry(&self, server_url_or_domain: &str) -> Option { + self.entries.get(server_url_or_domain).map(|e| e.clone()) } /// Remove expired entries from the naughty list /// - /// Entries older than `expiration_hours` are removed to allow relays + /// Entries older than `expiration_hours` are removed to allow servers /// to be retried after infrastructure issues are potentially fixed. /// /// # Returns /// - /// Vector of relay URLs that were removed from the naughty list + /// Vector of server URLs/domains that were removed from the naughty list pub fn expire_old_entries(&self) -> Vec { let now = Instant::now(); let expiration = std::time::Duration::from_secs(self.expiration_hours * 3600); @@ -242,11 +252,11 @@ impl NaughtyListTracker { expired } - /// Get all naughty relays (for metrics and monitoring) + /// Get all naughty servers (for metrics and monitoring) /// /// # Returns /// - /// Vector of (relay_url, entry) tuples for all relays currently on the naughty list + /// Vector of (server_url_or_domain, entry) tuples for all servers currently on the naughty list pub fn get_all(&self) -> Vec<(String, NaughtyEntry)> { self.entries .iter() @@ -254,7 +264,7 @@ impl NaughtyListTracker { .collect() } - /// Get the count of relays in a specific category + /// Get the count of servers in a specific category /// /// # Arguments /// @@ -262,7 +272,7 @@ impl NaughtyListTracker { /// /// # Returns /// - /// Number of relays in the specified category + /// Number of servers in the specified category pub fn count_by_category(&self, category: NaughtyCategory) -> usize { self.entries .iter() @@ -270,7 +280,7 @@ impl NaughtyListTracker { .count() } - /// Get total number of relays on the naughty list + /// Get total number of servers on the naughty list pub fn total_count(&self) -> usize { self.entries.len() } -- cgit v1.2.3