diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-10 02:14:01 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-10 02:15:22 +0000 |
| commit | 1b6b669b9b82d1f81b887a32055f19c53d3bb8bf (patch) | |
| tree | 3ce1785757cb3f16dfa30d74557042973d3bf53f /src/purgatory/sync/functions.rs | |
| parent | 730f430c906c6c2d43ea8f2e5fc3b408a3de128b (diff) | |
Add naughty list for git remotes with persistent SSL/DNS errors
Implement domain-level naughty list tracking for git remotes, reusing the
existing NaughtyListTracker from relay sync. This prevents repeated attempts
to fetch from git domains with persistent infrastructure issues (SSL/TLS
certificate errors, DNS failures).
Changes:
- Updated NaughtyListTracker to track both relay URLs and git domains
- Added git_naughty_list field to RealSyncContext for error classification
- Modified fetch_oids() to classify git fetch errors and record naughty domains
- Updated sync_identifier_next_url() to filter out naughty domains during URL selection
- Added git_naughty_list parameter to ThrottleManager for domain queue processing
- Threaded naughty list through start_sync_loop and all sync functions
- Updated all tests to pass naughty list parameter
The naughty list uses 12-hour expiration (configurable) to allow domains to
recover from infrastructure issues. First occurrence logs WARN, repeats log DEBUG.
Diffstat (limited to 'src/purgatory/sync/functions.rs')
| -rw-r--r-- | src/purgatory/sync/functions.rs | 177 |
1 files changed, 140 insertions, 37 deletions
diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 0139ac5..65d29af 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs | |||
| @@ -17,6 +17,7 @@ use tracing::debug; | |||
| 17 | 17 | ||
| 18 | use super::context::SyncContext; | 18 | use super::context::SyncContext; |
| 19 | use super::throttle::ThrottleManager; | 19 | use super::throttle::ThrottleManager; |
| 20 | use crate::sync::naughty_list::NaughtyListTracker; | ||
| 20 | 21 | ||
| 21 | /// Extract domain from a URL. | 22 | /// Extract domain from a URL. |
| 22 | /// | 23 | /// |
| @@ -29,7 +30,7 @@ use super::throttle::ThrottleManager; | |||
| 29 | /// assert_eq!(extract_domain("http://example.com:8080/repo.git"), Some("example.com".to_string())); | 30 | /// assert_eq!(extract_domain("http://example.com:8080/repo.git"), Some("example.com".to_string())); |
| 30 | /// assert_eq!(extract_domain("git@github.com:foo/bar.git"), None); // SSH URLs not supported | 31 | /// assert_eq!(extract_domain("git@github.com:foo/bar.git"), None); // SSH URLs not supported |
| 31 | /// ``` | 32 | /// ``` |
| 32 | fn extract_domain(url: &str) -> Option<String> { | 33 | pub(crate) fn extract_domain(url: &str) -> Option<String> { |
| 33 | // Simple URL parsing for HTTP(S) URLs | 34 | // Simple URL parsing for HTTP(S) URLs |
| 34 | // Format: scheme://[user@]host[:port]/path | 35 | // Format: scheme://[user@]host[:port]/path |
| 35 | let url = url | 36 | let url = url |
| @@ -57,7 +58,8 @@ fn extract_domain(url: &str) -> Option<String> { | |||
| 57 | /// 2. Checks if there are OIDs still needed | 58 | /// 2. Checks if there are OIDs still needed |
| 58 | /// 3. Gets repository data and extracts clone URLs | 59 | /// 3. Gets repository data and extracts clone URLs |
| 59 | /// 4. Filters out our own domain and already-tried URLs | 60 | /// 4. Filters out our own domain and already-tried URLs |
| 60 | /// 5. Returns the first non-throttled URL (when `domain` is None) | 61 | /// 5. Filters out naughty domains (with persistent SSL/DNS errors) |
| 62 | /// 6. Returns the first non-throttled URL (when `domain` is None) | ||
| 61 | /// or a URL from the specified domain (when `domain` is Some) | 63 | /// or a URL from the specified domain (when `domain` is Some) |
| 62 | /// | 64 | /// |
| 63 | /// # Arguments | 65 | /// # Arguments |
| @@ -68,6 +70,7 @@ fn extract_domain(url: &str) -> Option<String> { | |||
| 68 | /// If None, return any non-throttled URL. | 70 | /// If None, return any non-throttled URL. |
| 69 | /// * `tried_urls` - URLs that have already been tried (will be skipped) | 71 | /// * `tried_urls` - URLs that have already been tried (will be skipped) |
| 70 | /// * `throttle_manager` - Used to check if domains are throttled (when domain is None) | 72 | /// * `throttle_manager` - Used to check if domains are throttled (when domain is None) |
| 73 | /// * `git_naughty_list` - Used to filter out domains with persistent errors | ||
| 71 | /// | 74 | /// |
| 72 | /// # Returns | 75 | /// # Returns |
| 73 | /// | 76 | /// |
| @@ -79,6 +82,7 @@ pub async fn sync_identifier_next_url<C: SyncContext + ?Sized>( | |||
| 79 | domain: Option<&str>, | 82 | domain: Option<&str>, |
| 80 | tried_urls: &HashSet<String>, | 83 | tried_urls: &HashSet<String>, |
| 81 | throttle_manager: &ThrottleManager, | 84 | throttle_manager: &ThrottleManager, |
| 85 | git_naughty_list: &NaughtyListTracker, | ||
| 82 | ) -> Option<String> { | 86 | ) -> Option<String> { |
| 83 | // 1. Check if we still have pending events | 87 | // 1. Check if we still have pending events |
| 84 | if !ctx.has_pending_events(identifier) { | 88 | if !ctx.has_pending_events(identifier) { |
| @@ -158,7 +162,7 @@ pub async fn sync_identifier_next_url<C: SyncContext + ?Sized>( | |||
| 158 | .and_then(|urls| urls.iter().find(|url| !tried_urls.contains(*url)).cloned()) | 162 | .and_then(|urls| urls.iter().find(|url| !tried_urls.contains(*url)).cloned()) |
| 159 | } | 163 | } |
| 160 | None => { | 164 | None => { |
| 161 | // Try any non-throttled domain | 165 | // Try any non-throttled, non-naughty domain |
| 162 | for (d, domain_urls) in &urls_by_domain { | 166 | for (d, domain_urls) in &urls_by_domain { |
| 163 | if throttle_manager.is_throttled(d) { | 167 | if throttle_manager.is_throttled(d) { |
| 164 | debug!( | 168 | debug!( |
| @@ -168,6 +172,17 @@ pub async fn sync_identifier_next_url<C: SyncContext + ?Sized>( | |||
| 168 | ); | 172 | ); |
| 169 | continue; | 173 | continue; |
| 170 | } | 174 | } |
| 175 | |||
| 176 | // NEW: Skip naughty domains | ||
| 177 | if git_naughty_list.is_naughty(d) { | ||
| 178 | debug!( | ||
| 179 | identifier = %identifier, | ||
| 180 | domain = %d, | ||
| 181 | "Domain is on git naughty list - skipping" | ||
| 182 | ); | ||
| 183 | continue; | ||
| 184 | } | ||
| 185 | |||
| 171 | if let Some(url) = domain_urls.iter().find(|url| !tried_urls.contains(*url)) { | 186 | if let Some(url) = domain_urls.iter().find(|url| !tried_urls.contains(*url)) { |
| 172 | return Some(url.clone()); | 187 | return Some(url.clone()); |
| 173 | } | 188 | } |
| @@ -200,6 +215,7 @@ pub struct ThrottledDomainInfo { | |||
| 200 | /// * `identifier` - The repository identifier | 215 | /// * `identifier` - The repository identifier |
| 201 | /// * `tried_urls` - All URLs that have been tried (across all domains) | 216 | /// * `tried_urls` - All URLs that have been tried (across all domains) |
| 202 | /// * `throttle_manager` - Used to check which domains are throttled | 217 | /// * `throttle_manager` - Used to check which domains are throttled |
| 218 | /// * `git_naughty_list` - Used to filter out domains with persistent errors | ||
| 203 | /// | 219 | /// |
| 204 | /// # Returns | 220 | /// # Returns |
| 205 | /// | 221 | /// |
| @@ -210,6 +226,7 @@ pub async fn get_throttled_domains_with_untried_urls<C: SyncContext + ?Sized>( | |||
| 210 | identifier: &str, | 226 | identifier: &str, |
| 211 | tried_urls: &HashSet<String>, | 227 | tried_urls: &HashSet<String>, |
| 212 | throttle_manager: &ThrottleManager, | 228 | throttle_manager: &ThrottleManager, |
| 229 | git_naughty_list: &NaughtyListTracker, | ||
| 213 | ) -> Vec<ThrottledDomainInfo> { | 230 | ) -> Vec<ThrottledDomainInfo> { |
| 214 | let repo_data = match ctx.fetch_repository_data(identifier).await { | 231 | let repo_data = match ctx.fetch_repository_data(identifier).await { |
| 215 | Ok(data) => data, | 232 | Ok(data) => data, |
| @@ -250,6 +267,11 @@ pub async fn get_throttled_domains_with_untried_urls<C: SyncContext + ?Sized>( | |||
| 250 | return None; // Not throttled, skip | 267 | return None; // Not throttled, skip |
| 251 | } | 268 | } |
| 252 | 269 | ||
| 270 | // Skip naughty domains | ||
| 271 | if git_naughty_list.is_naughty(&domain) { | ||
| 272 | return None; // On naughty list, skip | ||
| 273 | } | ||
| 274 | |||
| 253 | let untried: Vec<_> = domain_urls | 275 | let untried: Vec<_> = domain_urls |
| 254 | .iter() | 276 | .iter() |
| 255 | .filter(|url| !tried_urls.contains(*url)) | 277 | .filter(|url| !tried_urls.contains(*url)) |
| @@ -388,7 +410,7 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>( | |||
| 388 | /// Sync git data for an identifier. | 410 | /// Sync git data for an identifier. |
| 389 | /// | 411 | /// |
| 390 | /// This is the main orchestration function called by the sync loop. It: | 412 | /// This is the main orchestration function called by the sync loop. It: |
| 391 | /// 1. Tries all non-throttled URLs in sequence | 413 | /// 1. Tries all non-throttled, non-naughty URLs in sequence |
| 392 | /// 2. After each fetch, checks if sync is complete (no pending events or no needed OIDs) | 414 | /// 2. After each fetch, checks if sync is complete (no pending events or no needed OIDs) |
| 393 | /// 3. When no non-throttled URLs remain, enqueues with throttled domains for later processing | 415 | /// 3. When no non-throttled URLs remain, enqueues with throttled domains for later processing |
| 394 | /// 4. Returns without waiting for throttled domains to complete | 416 | /// 4. Returns without waiting for throttled domains to complete |
| @@ -398,6 +420,7 @@ pub async fn sync_identifier_from_url<C: SyncContext + ?Sized>( | |||
| 398 | /// * `ctx` - The sync context providing repository data and OID information | 420 | /// * `ctx` - The sync context providing repository data and OID information |
| 399 | /// * `identifier` - The repository identifier (d-tag value) | 421 | /// * `identifier` - The repository identifier (d-tag value) |
| 400 | /// * `throttle_manager` - Used for rate limiting and domain queue management | 422 | /// * `throttle_manager` - Used for rate limiting and domain queue management |
| 423 | /// * `git_naughty_list` - Used to filter out domains with persistent errors | ||
| 401 | /// | 424 | /// |
| 402 | /// # Returns | 425 | /// # Returns |
| 403 | /// | 426 | /// |
| @@ -408,6 +431,7 @@ pub async fn sync_identifier<C: SyncContext + ?Sized>( | |||
| 408 | ctx: &C, | 431 | ctx: &C, |
| 409 | identifier: &str, | 432 | identifier: &str, |
| 410 | throttle_manager: &Arc<ThrottleManager>, | 433 | throttle_manager: &Arc<ThrottleManager>, |
| 434 | git_naughty_list: &NaughtyListTracker, | ||
| 411 | ) -> bool { | 435 | ) -> bool { |
| 412 | let mut tried_urls: HashSet<String> = HashSet::new(); | 436 | let mut tried_urls: HashSet<String> = HashSet::new(); |
| 413 | 437 | ||
| @@ -416,9 +440,18 @@ pub async fn sync_identifier<C: SyncContext + ?Sized>( | |||
| 416 | "Starting sync for identifier" | 440 | "Starting sync for identifier" |
| 417 | ); | 441 | ); |
| 418 | 442 | ||
| 419 | // Try all non-throttled URLs | 443 | // Try all non-throttled, non-naughty URLs |
| 420 | loop { | 444 | loop { |
| 421 | match sync_identifier_next_url(ctx, identifier, None, &tried_urls, throttle_manager).await { | 445 | match sync_identifier_next_url( |
| 446 | ctx, | ||
| 447 | identifier, | ||
| 448 | None, | ||
| 449 | &tried_urls, | ||
| 450 | throttle_manager, | ||
| 451 | git_naughty_list, | ||
| 452 | ) | ||
| 453 | .await | ||
| 454 | { | ||
| 422 | Some(url) => { | 455 | Some(url) => { |
| 423 | debug!( | 456 | debug!( |
| 424 | identifier = %identifier, | 457 | identifier = %identifier, |
| @@ -481,9 +514,14 @@ pub async fn sync_identifier<C: SyncContext + ?Sized>( | |||
| 481 | } | 514 | } |
| 482 | 515 | ||
| 483 | // Enqueue with any throttled domains that have untried URLs | 516 | // Enqueue with any throttled domains that have untried URLs |
| 484 | let throttled_domains = | 517 | let throttled_domains = get_throttled_domains_with_untried_urls( |
| 485 | get_throttled_domains_with_untried_urls(ctx, identifier, &tried_urls, throttle_manager) | 518 | ctx, |
| 486 | .await; | 519 | identifier, |
| 520 | &tried_urls, | ||
| 521 | throttle_manager, | ||
| 522 | git_naughty_list, | ||
| 523 | ) | ||
| 524 | .await; | ||
| 487 | 525 | ||
| 488 | for info in throttled_domains { | 526 | for info in throttled_domains { |
| 489 | debug!( | 527 | debug!( |
| @@ -525,15 +563,22 @@ mod tests { | |||
| 525 | 563 | ||
| 526 | // Create throttle manager and throttle github.com | 564 | // Create throttle manager and throttle github.com |
| 527 | let throttle_manager = ThrottleManager::new(1, 100); | 565 | let throttle_manager = ThrottleManager::new(1, 100); |
| 566 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 528 | 567 | ||
| 529 | // Saturate github.com by starting a request | 568 | // Saturate github.com by starting a request |
| 530 | throttle_manager.start_request("github.com"); | 569 | throttle_manager.start_request("github.com"); |
| 531 | 570 | ||
| 532 | // Should return gitlab.com URL since github.com is throttled | 571 | // Should return gitlab.com URL since github.com is throttled |
| 533 | let tried_urls = HashSet::new(); | 572 | let tried_urls = HashSet::new(); |
| 534 | let result = | 573 | let result = sync_identifier_next_url( |
| 535 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) | 574 | &mock, |
| 536 | .await; | 575 | "test-repo", |
| 576 | None, | ||
| 577 | &tried_urls, | ||
| 578 | &throttle_manager, | ||
| 579 | &naughty_list, | ||
| 580 | ) | ||
| 581 | .await; | ||
| 537 | 582 | ||
| 538 | assert!(result.is_some()); | 583 | assert!(result.is_some()); |
| 539 | let url = result.unwrap(); | 584 | let url = result.unwrap(); |
| @@ -556,15 +601,22 @@ mod tests { | |||
| 556 | .with_pending_events(true); | 601 | .with_pending_events(true); |
| 557 | 602 | ||
| 558 | let throttle_manager = ThrottleManager::new(5, 100); | 603 | let throttle_manager = ThrottleManager::new(5, 100); |
| 604 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 559 | 605 | ||
| 560 | // Mark first URL as tried | 606 | // Mark first URL as tried |
| 561 | let mut tried_urls = HashSet::new(); | 607 | let mut tried_urls = HashSet::new(); |
| 562 | tried_urls.insert("https://github.com/foo/bar.git".to_string()); | 608 | tried_urls.insert("https://github.com/foo/bar.git".to_string()); |
| 563 | 609 | ||
| 564 | // Should return the second URL | 610 | // Should return the second URL |
| 565 | let result = | 611 | let result = sync_identifier_next_url( |
| 566 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) | 612 | &mock, |
| 567 | .await; | 613 | "test-repo", |
| 614 | None, | ||
| 615 | &tried_urls, | ||
| 616 | &throttle_manager, | ||
| 617 | &naughty_list, | ||
| 618 | ) | ||
| 619 | .await; | ||
| 568 | 620 | ||
| 569 | assert!(result.is_some()); | 621 | assert!(result.is_some()); |
| 570 | let url = result.unwrap(); | 622 | let url = result.unwrap(); |
| @@ -579,11 +631,18 @@ mod tests { | |||
| 579 | .with_pending_events(false); // No pending events | 631 | .with_pending_events(false); // No pending events |
| 580 | 632 | ||
| 581 | let throttle_manager = ThrottleManager::new(5, 100); | 633 | let throttle_manager = ThrottleManager::new(5, 100); |
| 634 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 582 | let tried_urls = HashSet::new(); | 635 | let tried_urls = HashSet::new(); |
| 583 | 636 | ||
| 584 | let result = | 637 | let result = sync_identifier_next_url( |
| 585 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) | 638 | &mock, |
| 586 | .await; | 639 | "test-repo", |
| 640 | None, | ||
| 641 | &tried_urls, | ||
| 642 | &throttle_manager, | ||
| 643 | &naughty_list, | ||
| 644 | ) | ||
| 645 | .await; | ||
| 587 | 646 | ||
| 588 | assert!(result.is_none()); | 647 | assert!(result.is_none()); |
| 589 | } | 648 | } |
| @@ -596,11 +655,18 @@ mod tests { | |||
| 596 | .with_pending_events(true); | 655 | .with_pending_events(true); |
| 597 | 656 | ||
| 598 | let throttle_manager = ThrottleManager::new(5, 100); | 657 | let throttle_manager = ThrottleManager::new(5, 100); |
| 658 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 599 | let tried_urls = HashSet::new(); | 659 | let tried_urls = HashSet::new(); |
| 600 | 660 | ||
| 601 | let result = | 661 | let result = sync_identifier_next_url( |
| 602 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) | 662 | &mock, |
| 603 | .await; | 663 | "test-repo", |
| 664 | None, | ||
| 665 | &tried_urls, | ||
| 666 | &throttle_manager, | ||
| 667 | &naughty_list, | ||
| 668 | ) | ||
| 669 | .await; | ||
| 604 | 670 | ||
| 605 | assert!(result.is_none()); | 671 | assert!(result.is_none()); |
| 606 | } | 672 | } |
| @@ -617,11 +683,18 @@ mod tests { | |||
| 617 | .with_our_domain("our-relay.com"); | 683 | .with_our_domain("our-relay.com"); |
| 618 | 684 | ||
| 619 | let throttle_manager = ThrottleManager::new(5, 100); | 685 | let throttle_manager = ThrottleManager::new(5, 100); |
| 686 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 620 | let tried_urls = HashSet::new(); | 687 | let tried_urls = HashSet::new(); |
| 621 | 688 | ||
| 622 | let result = | 689 | let result = sync_identifier_next_url( |
| 623 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) | 690 | &mock, |
| 624 | .await; | 691 | "test-repo", |
| 692 | None, | ||
| 693 | &tried_urls, | ||
| 694 | &throttle_manager, | ||
| 695 | &naughty_list, | ||
| 696 | ) | ||
| 697 | .await; | ||
| 625 | 698 | ||
| 626 | assert!(result.is_some()); | 699 | assert!(result.is_some()); |
| 627 | let url = result.unwrap(); | 700 | let url = result.unwrap(); |
| @@ -643,6 +716,7 @@ mod tests { | |||
| 643 | .with_pending_events(true); | 716 | .with_pending_events(true); |
| 644 | 717 | ||
| 645 | let throttle_manager = ThrottleManager::new(5, 100); | 718 | let throttle_manager = ThrottleManager::new(5, 100); |
| 719 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 646 | let tried_urls = HashSet::new(); | 720 | let tried_urls = HashSet::new(); |
| 647 | 721 | ||
| 648 | // Request specific domain | 722 | // Request specific domain |
| @@ -652,6 +726,7 @@ mod tests { | |||
| 652 | Some("gitlab.com"), | 726 | Some("gitlab.com"), |
| 653 | &tried_urls, | 727 | &tried_urls, |
| 654 | &throttle_manager, | 728 | &throttle_manager, |
| 729 | &naughty_list, | ||
| 655 | ) | 730 | ) |
| 656 | .await; | 731 | .await; |
| 657 | 732 | ||
| @@ -757,6 +832,7 @@ mod tests { | |||
| 757 | .with_pending_events(true); | 832 | .with_pending_events(true); |
| 758 | 833 | ||
| 759 | let throttle_manager = ThrottleManager::new(1, 100); | 834 | let throttle_manager = ThrottleManager::new(1, 100); |
| 835 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 760 | 836 | ||
| 761 | // Throttle github.com and gitlab.com | 837 | // Throttle github.com and gitlab.com |
| 762 | throttle_manager.start_request("github.com"); | 838 | throttle_manager.start_request("github.com"); |
| @@ -771,6 +847,7 @@ mod tests { | |||
| 771 | "test-repo", | 847 | "test-repo", |
| 772 | &tried_urls, | 848 | &tried_urls, |
| 773 | &throttle_manager, | 849 | &throttle_manager, |
| 850 | &naughty_list, | ||
| 774 | ) | 851 | ) |
| 775 | .await; | 852 | .await; |
| 776 | 853 | ||
| @@ -803,9 +880,10 @@ mod tests { | |||
| 803 | .url_provides("https://server3.com/repo.git", &["ghi789"]); | 880 | .url_provides("https://server3.com/repo.git", &["ghi789"]); |
| 804 | 881 | ||
| 805 | let throttle_manager = Arc::new(ThrottleManager::new(5, 100)); | 882 | let throttle_manager = Arc::new(ThrottleManager::new(5, 100)); |
| 883 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 806 | 884 | ||
| 807 | // Run sync_identifier | 885 | // Run sync_identifier |
| 808 | let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await; | 886 | let complete = sync_identifier(&mock, "test-repo", &throttle_manager, &naughty_list).await; |
| 809 | 887 | ||
| 810 | // Should return true (sync complete) | 888 | // Should return true (sync complete) |
| 811 | assert!(complete, "Expected sync to complete after trying all URLs"); | 889 | assert!(complete, "Expected sync to complete after trying all URLs"); |
| @@ -841,12 +919,13 @@ mod tests { | |||
| 841 | // Note: gitlab.com doesn't provide any OIDs | 919 | // Note: gitlab.com doesn't provide any OIDs |
| 842 | 920 | ||
| 843 | let throttle_manager = Arc::new(ThrottleManager::new(1, 100)); | 921 | let throttle_manager = Arc::new(ThrottleManager::new(1, 100)); |
| 922 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 844 | 923 | ||
| 845 | // Throttle github.com by starting a request | 924 | // Throttle github.com by starting a request |
| 846 | throttle_manager.start_request("github.com"); | 925 | throttle_manager.start_request("github.com"); |
| 847 | 926 | ||
| 848 | // Run sync_identifier | 927 | // Run sync_identifier |
| 849 | let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await; | 928 | let complete = sync_identifier(&mock, "test-repo", &throttle_manager, &naughty_list).await; |
| 850 | 929 | ||
| 851 | // Should return false (sync incomplete - github.com is throttled) | 930 | // Should return false (sync incomplete - github.com is throttled) |
| 852 | assert!( | 931 | assert!( |
| @@ -911,23 +990,36 @@ mod tests { | |||
| 911 | .with_pending_events(true); | 990 | .with_pending_events(true); |
| 912 | 991 | ||
| 913 | let throttle_manager = ThrottleManager::new(5, 100); | 992 | let throttle_manager = ThrottleManager::new(5, 100); |
| 993 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 914 | let tried_urls = HashSet::new(); | 994 | let tried_urls = HashSet::new(); |
| 915 | 995 | ||
| 916 | // Get first URL | 996 | // Get first URL |
| 917 | let first_url = | 997 | let first_url = sync_identifier_next_url( |
| 918 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) | 998 | &mock, |
| 919 | .await | 999 | "test-repo", |
| 920 | .expect("Should return a URL"); | 1000 | None, |
| 1001 | &tried_urls, | ||
| 1002 | &throttle_manager, | ||
| 1003 | &naughty_list, | ||
| 1004 | ) | ||
| 1005 | .await | ||
| 1006 | .expect("Should return a URL"); | ||
| 921 | 1007 | ||
| 922 | // Try the first URL | 1008 | // Try the first URL |
| 923 | let mut tried = HashSet::new(); | 1009 | let mut tried = HashSet::new(); |
| 924 | tried.insert(first_url.clone()); | 1010 | tried.insert(first_url.clone()); |
| 925 | 1011 | ||
| 926 | // Get second URL | 1012 | // Get second URL |
| 927 | let second_url = | 1013 | let second_url = sync_identifier_next_url( |
| 928 | sync_identifier_next_url(&mock, "test-repo", None, &tried, &throttle_manager) | 1014 | &mock, |
| 929 | .await | 1015 | "test-repo", |
| 930 | .expect("Should return a second URL"); | 1016 | None, |
| 1017 | &tried, | ||
| 1018 | &throttle_manager, | ||
| 1019 | &naughty_list, | ||
| 1020 | ) | ||
| 1021 | .await | ||
| 1022 | .expect("Should return a second URL"); | ||
| 931 | 1023 | ||
| 932 | // Both URLs should be available (one from announcement, one from PR) | 1024 | // Both URLs should be available (one from announcement, one from PR) |
| 933 | let both_urls = [first_url, second_url]; | 1025 | let both_urls = [first_url, second_url]; |
| @@ -955,12 +1047,20 @@ mod tests { | |||
| 955 | .with_pending_events(true); | 1047 | .with_pending_events(true); |
| 956 | 1048 | ||
| 957 | let throttle_manager = ThrottleManager::new(5, 100); | 1049 | let throttle_manager = ThrottleManager::new(5, 100); |
| 1050 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 958 | let mut tried_urls = HashSet::new(); | 1051 | let mut tried_urls = HashSet::new(); |
| 959 | 1052 | ||
| 960 | // Collect all available URLs | 1053 | // Collect all available URLs |
| 961 | let mut available_urls = Vec::new(); | 1054 | let mut available_urls = Vec::new(); |
| 962 | while let Some(url) = | 1055 | while let Some(url) = sync_identifier_next_url( |
| 963 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager).await | 1056 | &mock, |
| 1057 | "test-repo", | ||
| 1058 | None, | ||
| 1059 | &tried_urls, | ||
| 1060 | &throttle_manager, | ||
| 1061 | &naughty_list, | ||
| 1062 | ) | ||
| 1063 | .await | ||
| 964 | { | 1064 | { |
| 965 | available_urls.push(url.clone()); | 1065 | available_urls.push(url.clone()); |
| 966 | tried_urls.insert(url); | 1066 | tried_urls.insert(url); |
| @@ -1001,6 +1101,7 @@ mod tests { | |||
| 1001 | .with_pending_events(true); | 1101 | .with_pending_events(true); |
| 1002 | 1102 | ||
| 1003 | let throttle_manager = ThrottleManager::new(1, 100); | 1103 | let throttle_manager = ThrottleManager::new(1, 100); |
| 1104 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 1004 | 1105 | ||
| 1005 | // Throttle both domains | 1106 | // Throttle both domains |
| 1006 | throttle_manager.start_request("github.com"); | 1107 | throttle_manager.start_request("github.com"); |
| @@ -1013,6 +1114,7 @@ mod tests { | |||
| 1013 | "test-repo", | 1114 | "test-repo", |
| 1014 | &tried_urls, | 1115 | &tried_urls, |
| 1015 | &throttle_manager, | 1116 | &throttle_manager, |
| 1117 | &naughty_list, | ||
| 1016 | ) | 1118 | ) |
| 1017 | .await; | 1119 | .await; |
| 1018 | 1120 | ||
| @@ -1037,9 +1139,10 @@ mod tests { | |||
| 1037 | // Note: github.com doesn't provide any OIDs | 1139 | // Note: github.com doesn't provide any OIDs |
| 1038 | 1140 | ||
| 1039 | let throttle_manager = Arc::new(ThrottleManager::new(5, 100)); | 1141 | let throttle_manager = Arc::new(ThrottleManager::new(5, 100)); |
| 1142 | let naughty_list = NaughtyListTracker::with_defaults(); | ||
| 1040 | 1143 | ||
| 1041 | // Run sync_identifier | 1144 | // Run sync_identifier |
| 1042 | let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await; | 1145 | let complete = sync_identifier(&mock, "test-repo", &throttle_manager, &naughty_list).await; |
| 1043 | 1146 | ||
| 1044 | // Should complete successfully using PR clone URL | 1147 | // Should complete successfully using PR clone URL |
| 1045 | assert!(complete, "Sync should complete using PR clone URL"); | 1148 | assert!(complete, "Sync should complete using PR clone URL"); |