diff options
Diffstat (limited to 'src/purgatory/sync/throttle.rs')
| -rw-r--r-- | src/purgatory/sync/throttle.rs | 35 |
1 files changed, 32 insertions, 3 deletions
diff --git a/src/purgatory/sync/throttle.rs b/src/purgatory/sync/throttle.rs index ad6e8ea..7f8f636 100644 --- a/src/purgatory/sync/throttle.rs +++ b/src/purgatory/sync/throttle.rs | |||
| @@ -24,6 +24,7 @@ use tracing::debug; | |||
| 24 | 24 | ||
| 25 | use super::context::SyncContext; | 25 | use super::context::SyncContext; |
| 26 | use super::functions::{sync_identifier_from_url, sync_identifier_next_url}; | 26 | use super::functions::{sync_identifier_from_url, sync_identifier_next_url}; |
| 27 | use crate::sync::naughty_list::NaughtyListTracker; | ||
| 27 | 28 | ||
| 28 | /// State for an identifier waiting in a domain's queue. | 29 | /// State for an identifier waiting in a domain's queue. |
| 29 | /// | 30 | /// |
| @@ -265,6 +266,10 @@ pub struct ThrottleManager { | |||
| 265 | /// Sync context for processing queued identifiers. | 266 | /// Sync context for processing queued identifiers. |
| 266 | /// Set once at startup via `set_context()`. | 267 | /// Set once at startup via `set_context()`. |
| 267 | ctx: OnceLock<Arc<dyn SyncContext>>, | 268 | ctx: OnceLock<Arc<dyn SyncContext>>, |
| 269 | |||
| 270 | /// Naughty list tracker for git remote domains with persistent errors. | ||
| 271 | /// Set once at startup via `set_git_naughty_list()`. | ||
| 272 | git_naughty_list: OnceLock<Arc<NaughtyListTracker>>, | ||
| 268 | } | 273 | } |
| 269 | 274 | ||
| 270 | impl ThrottleManager { | 275 | impl ThrottleManager { |
| @@ -279,6 +284,7 @@ impl ThrottleManager { | |||
| 279 | max_concurrent_per_domain: max_concurrent, | 284 | max_concurrent_per_domain: max_concurrent, |
| 280 | max_per_minute_per_domain: max_per_minute, | 285 | max_per_minute_per_domain: max_per_minute, |
| 281 | ctx: OnceLock::new(), | 286 | ctx: OnceLock::new(), |
| 287 | git_naughty_list: OnceLock::new(), | ||
| 282 | } | 288 | } |
| 283 | } | 289 | } |
| 284 | 290 | ||
| @@ -294,6 +300,17 @@ impl ThrottleManager { | |||
| 294 | let _ = self.ctx.set(ctx); | 300 | let _ = self.ctx.set(ctx); |
| 295 | } | 301 | } |
| 296 | 302 | ||
| 303 | /// Set the git naughty list tracker (called once at startup). | ||
| 304 | /// | ||
| 305 | /// The naughty list is used to filter out domains with persistent errors | ||
| 306 | /// during URL selection. | ||
| 307 | /// | ||
| 308 | /// # Arguments | ||
| 309 | /// * `git_naughty_list` - The naughty list tracker | ||
| 310 | pub fn set_git_naughty_list(&self, git_naughty_list: Arc<NaughtyListTracker>) { | ||
| 311 | let _ = self.git_naughty_list.set(git_naughty_list); | ||
| 312 | } | ||
| 313 | |||
| 297 | /// Check if a domain is currently throttled (at capacity). | 314 | /// Check if a domain is currently throttled (at capacity). |
| 298 | /// | 315 | /// |
| 299 | /// Returns true if the domain has no capacity for another request, | 316 | /// Returns true if the domain has no capacity for another request, |
| @@ -479,10 +496,22 @@ impl ThrottleManager { | |||
| 479 | .unwrap_or_default() | 496 | .unwrap_or_default() |
| 480 | }; | 497 | }; |
| 481 | 498 | ||
| 499 | // Get naughty list (should be set at startup) | ||
| 500 | let naughty_list = self | ||
| 501 | .git_naughty_list | ||
| 502 | .get() | ||
| 503 | .expect("git_naughty_list not set"); | ||
| 504 | |||
| 482 | // Get next URL for this identifier on this specific domain | 505 | // Get next URL for this identifier on this specific domain |
| 483 | let url = | 506 | let url = sync_identifier_next_url( |
| 484 | sync_identifier_next_url(ctx.as_ref(), identifier, Some(domain), &tried_urls, self) | 507 | ctx.as_ref(), |
| 485 | .await; | 508 | identifier, |
| 509 | Some(domain), | ||
| 510 | &tried_urls, | ||
| 511 | self, | ||
| 512 | naughty_list.as_ref(), | ||
| 513 | ) | ||
| 514 | .await; | ||
| 486 | 515 | ||
| 487 | match url { | 516 | match url { |
| 488 | Some(url) => { | 517 | Some(url) => { |