diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-07 17:13:17 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-07 17:13:17 +0000 |
| commit | b71111cc25b99acab786ece4607cb60e9cbebae4 (patch) | |
| tree | 71ef2669be50ccbdf61c34dff93a1c819867f83c /src/purgatory/sync/functions.rs | |
| parent | e557d07ddbb1ea1a8ca6604f9ba945f359f54ce7 (diff) | |
feat(sync): extract clone URLs from PR events in purgatory
Add support for extracting clone URLs from PR/PR-Update events (kind 1618/1619)
during purgatory sync, per NIP-34 specification. This enables fetching PR commits
from URLs specified in the PR event itself, not just from repository announcement
clone URLs.
Changes:
- Add collect_pr_clone_urls() to SyncContext trait
- Implement in RealSyncContext: extract clone tags from PR events in purgatory
- Implement in MockSyncContext: configurable PR clone URLs for testing
- Update sync_identifier_next_url to merge PR clone URLs with announcement URLs
- Update get_throttled_domains_with_untried_urls with same merge logic
- Add unit tests for PR clone URL extraction and filtering
Diffstat (limited to 'src/purgatory/sync/functions.rs')
| -rw-r--r-- | src/purgatory/sync/functions.rs | 206 |
1 files changed, 203 insertions, 3 deletions
diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 13b2e47..1b9518d 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs | |||
| @@ -110,13 +110,24 @@ pub async fn sync_identifier_next_url<C: SyncContext + ?Sized>( | |||
| 110 | } | 110 | } |
| 111 | }; | 111 | }; |
| 112 | 112 | ||
| 113 | // 4. Collect clone URLs, excluding our domain | 113 | // 4. Collect clone URLs from announcements AND PR events in purgatory |
| 114 | let our_domain = ctx.our_domain(); | 114 | let our_domain = ctx.our_domain(); |
| 115 | let all_urls: HashSet<String> = repo_data | 115 | |
| 116 | // Get clone URLs from repository announcements | ||
| 117 | let announcement_urls: HashSet<String> = repo_data | ||
| 116 | .announcements | 118 | .announcements |
| 117 | .iter() | 119 | .iter() |
| 118 | .flat_map(|a| a.clone_urls.iter().cloned()) | 120 | .flat_map(|a| a.clone_urls.iter().cloned()) |
| 121 | .collect(); | ||
| 122 | |||
| 123 | // Get clone URLs from PR events in purgatory | ||
| 124 | let pr_urls = ctx.collect_pr_clone_urls(identifier); | ||
| 125 | |||
| 126 | // Merge and filter out our domain | ||
| 127 | let all_urls: HashSet<String> = announcement_urls | ||
| 128 | .union(&pr_urls) | ||
| 119 | .filter(|url| our_domain.map_or(true, |d| !url.contains(d))) | 129 | .filter(|url| our_domain.map_or(true, |d| !url.contains(d))) |
| 130 | .cloned() | ||
| 120 | .collect(); | 131 | .collect(); |
| 121 | 132 | ||
| 122 | if all_urls.is_empty() { | 133 | if all_urls.is_empty() { |
| @@ -206,11 +217,22 @@ pub async fn get_throttled_domains_with_untried_urls<C: SyncContext + ?Sized>( | |||
| 206 | }; | 217 | }; |
| 207 | 218 | ||
| 208 | let our_domain = ctx.our_domain(); | 219 | let our_domain = ctx.our_domain(); |
| 209 | let all_urls: HashSet<String> = repo_data | 220 | |
| 221 | // Get clone URLs from repository announcements | ||
| 222 | let announcement_urls: HashSet<String> = repo_data | ||
| 210 | .announcements | 223 | .announcements |
| 211 | .iter() | 224 | .iter() |
| 212 | .flat_map(|a| a.clone_urls.iter().cloned()) | 225 | .flat_map(|a| a.clone_urls.iter().cloned()) |
| 226 | .collect(); | ||
| 227 | |||
| 228 | // Get clone URLs from PR events in purgatory | ||
| 229 | let pr_urls = ctx.collect_pr_clone_urls(identifier); | ||
| 230 | |||
| 231 | // Merge and filter out our domain | ||
| 232 | let all_urls: HashSet<String> = announcement_urls | ||
| 233 | .union(&pr_urls) | ||
| 213 | .filter(|url| our_domain.map_or(true, |d| !url.contains(d))) | 234 | .filter(|url| our_domain.map_or(true, |d| !url.contains(d))) |
| 235 | .cloned() | ||
| 214 | .collect(); | 236 | .collect(); |
| 215 | 237 | ||
| 216 | let urls_by_domain: HashMap<String, Vec<String>> = | 238 | let urls_by_domain: HashMap<String, Vec<String>> = |
| @@ -855,4 +877,182 @@ mod tests { | |||
| 855 | "Expected github.com to still be throttled" | 877 | "Expected github.com to still be throttled" |
| 856 | ); | 878 | ); |
| 857 | } | 879 | } |
| 880 | |||
| 881 | // ========================================================================= | ||
| 882 | // PR Clone URL Tests | ||
| 883 | // ========================================================================= | ||
| 884 | |||
| 885 | #[tokio::test] | ||
| 886 | async fn test_collect_pr_clone_urls_returns_configured_urls() { | ||
| 887 | // Test that MockSyncContext returns configured PR clone URLs | ||
| 888 | let mock = MockSyncContext::new() | ||
| 889 | .with_pr_clone_urls(&[ | ||
| 890 | "https://pr-server.com/fork.git", | ||
| 891 | "https://another-server.com/fork.git", | ||
| 892 | ]); | ||
| 893 | |||
| 894 | let pr_urls = mock.collect_pr_clone_urls("test-repo"); | ||
| 895 | |||
| 896 | assert_eq!(pr_urls.len(), 2); | ||
| 897 | assert!(pr_urls.contains("https://pr-server.com/fork.git")); | ||
| 898 | assert!(pr_urls.contains("https://another-server.com/fork.git")); | ||
| 899 | } | ||
| 900 | |||
| 901 | #[tokio::test] | ||
| 902 | async fn test_sync_identifier_next_url_includes_pr_clone_urls() { | ||
| 903 | // Set up mock with announcement URLs and PR clone URLs | ||
| 904 | let mock = MockSyncContext::new() | ||
| 905 | .with_urls(&["https://github.com/owner/repo.git"]) // From announcement | ||
| 906 | .with_pr_clone_urls(&["https://pr-author.com/fork.git"]) // From PR event | ||
| 907 | .with_needed_oids(&["abc123"]) | ||
| 908 | .with_pending_events(true); | ||
| 909 | |||
| 910 | let throttle_manager = ThrottleManager::new(5, 100); | ||
| 911 | let tried_urls = HashSet::new(); | ||
| 912 | |||
| 913 | // Get first URL | ||
| 914 | let first_url = | ||
| 915 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) | ||
| 916 | .await | ||
| 917 | .expect("Should return a URL"); | ||
| 918 | |||
| 919 | // Try the first URL | ||
| 920 | let mut tried = HashSet::new(); | ||
| 921 | tried.insert(first_url.clone()); | ||
| 922 | |||
| 923 | // Get second URL | ||
| 924 | let second_url = | ||
| 925 | sync_identifier_next_url(&mock, "test-repo", None, &tried, &throttle_manager) | ||
| 926 | .await | ||
| 927 | .expect("Should return a second URL"); | ||
| 928 | |||
| 929 | // Both URLs should be available (one from announcement, one from PR) | ||
| 930 | let both_urls = vec![first_url, second_url]; | ||
| 931 | assert!( | ||
| 932 | both_urls.iter().any(|u| u.contains("github.com")), | ||
| 933 | "Should include announcement URL" | ||
| 934 | ); | ||
| 935 | assert!( | ||
| 936 | both_urls.iter().any(|u| u.contains("pr-author.com")), | ||
| 937 | "Should include PR clone URL" | ||
| 938 | ); | ||
| 939 | } | ||
| 940 | |||
| 941 | #[tokio::test] | ||
| 942 | async fn test_pr_clone_urls_filtered_by_our_domain() { | ||
| 943 | // Set up mock with PR clone URL pointing to our domain | ||
| 944 | let mock = MockSyncContext::new() | ||
| 945 | .with_urls(&["https://github.com/owner/repo.git"]) | ||
| 946 | .with_pr_clone_urls(&[ | ||
| 947 | "https://our-relay.com/fork.git", // Should be filtered | ||
| 948 | "https://external.com/fork.git", // Should be included | ||
| 949 | ]) | ||
| 950 | .with_our_domain("our-relay.com") | ||
| 951 | .with_needed_oids(&["abc123"]) | ||
| 952 | .with_pending_events(true); | ||
| 953 | |||
| 954 | let throttle_manager = ThrottleManager::new(5, 100); | ||
| 955 | let mut tried_urls = HashSet::new(); | ||
| 956 | |||
| 957 | // Collect all available URLs | ||
| 958 | let mut available_urls = Vec::new(); | ||
| 959 | while let Some(url) = | ||
| 960 | sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager) | ||
| 961 | .await | ||
| 962 | { | ||
| 963 | available_urls.push(url.clone()); | ||
| 964 | tried_urls.insert(url); | ||
| 965 | } | ||
| 966 | |||
| 967 | // Should have 2 URLs (github.com and external.com), not 3 | ||
| 968 | assert_eq!( | ||
| 969 | available_urls.len(), | ||
| 970 | 2, | ||
| 971 | "Expected 2 URLs after filtering our domain, got: {:?}", | ||
| 972 | available_urls | ||
| 973 | ); | ||
| 974 | |||
| 975 | // our-relay.com should be filtered out | ||
| 976 | assert!( | ||
| 977 | !available_urls.iter().any(|u| u.contains("our-relay.com")), | ||
| 978 | "Our domain should be filtered out" | ||
| 979 | ); | ||
| 980 | |||
| 981 | // github.com and external.com should be present | ||
| 982 | assert!( | ||
| 983 | available_urls.iter().any(|u| u.contains("github.com")), | ||
| 984 | "github.com should be present" | ||
| 985 | ); | ||
| 986 | assert!( | ||
| 987 | available_urls.iter().any(|u| u.contains("external.com")), | ||
| 988 | "external.com should be present" | ||
| 989 | ); | ||
| 990 | } | ||
| 991 | |||
| 992 | #[tokio::test] | ||
| 993 | async fn test_get_throttled_domains_includes_pr_clone_urls() { | ||
| 994 | // Set up mock with throttled PR clone URL domain | ||
| 995 | let mock = MockSyncContext::new() | ||
| 996 | .with_urls(&["https://github.com/owner/repo.git"]) | ||
| 997 | .with_pr_clone_urls(&["https://pr-server.com/fork.git"]) | ||
| 998 | .with_needed_oids(&["abc123"]) | ||
| 999 | .with_pending_events(true); | ||
| 1000 | |||
| 1001 | let throttle_manager = ThrottleManager::new(1, 100); | ||
| 1002 | |||
| 1003 | // Throttle both domains | ||
| 1004 | throttle_manager.start_request("github.com"); | ||
| 1005 | throttle_manager.start_request("pr-server.com"); | ||
| 1006 | |||
| 1007 | let tried_urls = HashSet::new(); | ||
| 1008 | |||
| 1009 | let throttled = | ||
| 1010 | get_throttled_domains_with_untried_urls(&mock, "test-repo", &tried_urls, &throttle_manager) | ||
| 1011 | .await; | ||
| 1012 | |||
| 1013 | // Should include both throttled domains | ||
| 1014 | let domains: Vec<&str> = throttled.iter().map(|t| t.domain.as_str()).collect(); | ||
| 1015 | assert!( | ||
| 1016 | domains.contains(&"github.com"), | ||
| 1017 | "Should include github.com" | ||
| 1018 | ); | ||
| 1019 | assert!( | ||
| 1020 | domains.contains(&"pr-server.com"), | ||
| 1021 | "Should include pr-server.com from PR clone URLs" | ||
| 1022 | ); | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | #[tokio::test] | ||
| 1026 | async fn test_sync_identifier_uses_pr_clone_urls_when_announcement_urls_fail() { | ||
| 1027 | // Set up mock where only PR clone URL can provide the needed OID | ||
| 1028 | let mock = MockSyncContext::new() | ||
| 1029 | .with_urls(&["https://github.com/owner/repo.git"]) // Doesn't have the OID | ||
| 1030 | .with_pr_clone_urls(&["https://pr-author.com/fork.git"]) // Has the OID | ||
| 1031 | .with_needed_oids(&["pr-commit-123"]) | ||
| 1032 | .with_pending_events(true) | ||
| 1033 | .url_provides("https://pr-author.com/fork.git", &["pr-commit-123"]); | ||
| 1034 | // Note: github.com doesn't provide any OIDs | ||
| 1035 | |||
| 1036 | let throttle_manager = Arc::new(ThrottleManager::new(5, 100)); | ||
| 1037 | |||
| 1038 | // Run sync_identifier | ||
| 1039 | let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await; | ||
| 1040 | |||
| 1041 | // Should complete successfully using PR clone URL | ||
| 1042 | assert!(complete, "Sync should complete using PR clone URL"); | ||
| 1043 | |||
| 1044 | // Verify PR clone URL was tried | ||
| 1045 | let fetch_log = mock.fetch_log(); | ||
| 1046 | assert!( | ||
| 1047 | fetch_log.iter().any(|u| u.contains("pr-author.com")), | ||
| 1048 | "PR clone URL should have been tried: {:?}", | ||
| 1049 | fetch_log | ||
| 1050 | ); | ||
| 1051 | |||
| 1052 | // OID should be fetched | ||
| 1053 | assert!( | ||
| 1054 | mock.current_needed_oids().is_empty(), | ||
| 1055 | "OID should be fetched from PR clone URL" | ||
| 1056 | ); | ||
| 1057 | } | ||
| 858 | } | 1058 | } |