upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/purgatory/sync/functions.rs
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-07 17:13:17 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-07 17:13:17 +0000
commitb71111cc25b99acab786ece4607cb60e9cbebae4 (patch)
tree71ef2669be50ccbdf61c34dff93a1c819867f83c /src/purgatory/sync/functions.rs
parente557d07ddbb1ea1a8ca6604f9ba945f359f54ce7 (diff)
feat(sync): extract clone URLs from PR events in purgatory
Add support for extracting clone URLs from PR/PR-Update events (kind 1618/1619) during purgatory sync, per NIP-34 specification. This enables fetching PR commits from URLs specified in the PR event itself, not just from repository announcement clone URLs. Changes: - Add collect_pr_clone_urls() to SyncContext trait - Implement in RealSyncContext: extract clone tags from PR events in purgatory - Implement in MockSyncContext: configurable PR clone URLs for testing - Update sync_identifier_next_url to merge PR clone URLs with announcement URLs - Update get_throttled_domains_with_untried_urls with same merge logic - Add unit tests for PR clone URL extraction and filtering
Diffstat (limited to 'src/purgatory/sync/functions.rs')
-rw-r--r--src/purgatory/sync/functions.rs206
1 files changed, 203 insertions, 3 deletions
diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs
index 13b2e47..1b9518d 100644
--- a/src/purgatory/sync/functions.rs
+++ b/src/purgatory/sync/functions.rs
@@ -110,13 +110,24 @@ pub async fn sync_identifier_next_url<C: SyncContext + ?Sized>(
110 } 110 }
111 }; 111 };
112 112
113 // 4. Collect clone URLs, excluding our domain 113 // 4. Collect clone URLs from announcements AND PR events in purgatory
114 let our_domain = ctx.our_domain(); 114 let our_domain = ctx.our_domain();
115 let all_urls: HashSet<String> = repo_data 115
116 // Get clone URLs from repository announcements
117 let announcement_urls: HashSet<String> = repo_data
116 .announcements 118 .announcements
117 .iter() 119 .iter()
118 .flat_map(|a| a.clone_urls.iter().cloned()) 120 .flat_map(|a| a.clone_urls.iter().cloned())
121 .collect();
122
123 // Get clone URLs from PR events in purgatory
124 let pr_urls = ctx.collect_pr_clone_urls(identifier);
125
126 // Merge and filter out our domain
127 let all_urls: HashSet<String> = announcement_urls
128 .union(&pr_urls)
119 .filter(|url| our_domain.map_or(true, |d| !url.contains(d))) 129 .filter(|url| our_domain.map_or(true, |d| !url.contains(d)))
130 .cloned()
120 .collect(); 131 .collect();
121 132
122 if all_urls.is_empty() { 133 if all_urls.is_empty() {
@@ -206,11 +217,22 @@ pub async fn get_throttled_domains_with_untried_urls<C: SyncContext + ?Sized>(
206 }; 217 };
207 218
208 let our_domain = ctx.our_domain(); 219 let our_domain = ctx.our_domain();
209 let all_urls: HashSet<String> = repo_data 220
221 // Get clone URLs from repository announcements
222 let announcement_urls: HashSet<String> = repo_data
210 .announcements 223 .announcements
211 .iter() 224 .iter()
212 .flat_map(|a| a.clone_urls.iter().cloned()) 225 .flat_map(|a| a.clone_urls.iter().cloned())
226 .collect();
227
228 // Get clone URLs from PR events in purgatory
229 let pr_urls = ctx.collect_pr_clone_urls(identifier);
230
231 // Merge and filter out our domain
232 let all_urls: HashSet<String> = announcement_urls
233 .union(&pr_urls)
213 .filter(|url| our_domain.map_or(true, |d| !url.contains(d))) 234 .filter(|url| our_domain.map_or(true, |d| !url.contains(d)))
235 .cloned()
214 .collect(); 236 .collect();
215 237
216 let urls_by_domain: HashMap<String, Vec<String>> = 238 let urls_by_domain: HashMap<String, Vec<String>> =
@@ -855,4 +877,182 @@ mod tests {
855 "Expected github.com to still be throttled" 877 "Expected github.com to still be throttled"
856 ); 878 );
857 } 879 }
880
881 // =========================================================================
882 // PR Clone URL Tests
883 // =========================================================================
884
885 #[tokio::test]
886 async fn test_collect_pr_clone_urls_returns_configured_urls() {
887 // Test that MockSyncContext returns configured PR clone URLs
888 let mock = MockSyncContext::new()
889 .with_pr_clone_urls(&[
890 "https://pr-server.com/fork.git",
891 "https://another-server.com/fork.git",
892 ]);
893
894 let pr_urls = mock.collect_pr_clone_urls("test-repo");
895
896 assert_eq!(pr_urls.len(), 2);
897 assert!(pr_urls.contains("https://pr-server.com/fork.git"));
898 assert!(pr_urls.contains("https://another-server.com/fork.git"));
899 }
900
901 #[tokio::test]
902 async fn test_sync_identifier_next_url_includes_pr_clone_urls() {
903 // Set up mock with announcement URLs and PR clone URLs
904 let mock = MockSyncContext::new()
905 .with_urls(&["https://github.com/owner/repo.git"]) // From announcement
906 .with_pr_clone_urls(&["https://pr-author.com/fork.git"]) // From PR event
907 .with_needed_oids(&["abc123"])
908 .with_pending_events(true);
909
910 let throttle_manager = ThrottleManager::new(5, 100);
911 let tried_urls = HashSet::new();
912
913 // Get first URL
914 let first_url =
915 sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager)
916 .await
917 .expect("Should return a URL");
918
919 // Try the first URL
920 let mut tried = HashSet::new();
921 tried.insert(first_url.clone());
922
923 // Get second URL
924 let second_url =
925 sync_identifier_next_url(&mock, "test-repo", None, &tried, &throttle_manager)
926 .await
927 .expect("Should return a second URL");
928
929 // Both URLs should be available (one from announcement, one from PR)
930 let both_urls = vec![first_url, second_url];
931 assert!(
932 both_urls.iter().any(|u| u.contains("github.com")),
933 "Should include announcement URL"
934 );
935 assert!(
936 both_urls.iter().any(|u| u.contains("pr-author.com")),
937 "Should include PR clone URL"
938 );
939 }
940
941 #[tokio::test]
942 async fn test_pr_clone_urls_filtered_by_our_domain() {
943 // Set up mock with PR clone URL pointing to our domain
944 let mock = MockSyncContext::new()
945 .with_urls(&["https://github.com/owner/repo.git"])
946 .with_pr_clone_urls(&[
947 "https://our-relay.com/fork.git", // Should be filtered
948 "https://external.com/fork.git", // Should be included
949 ])
950 .with_our_domain("our-relay.com")
951 .with_needed_oids(&["abc123"])
952 .with_pending_events(true);
953
954 let throttle_manager = ThrottleManager::new(5, 100);
955 let mut tried_urls = HashSet::new();
956
957 // Collect all available URLs
958 let mut available_urls = Vec::new();
959 while let Some(url) =
960 sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager)
961 .await
962 {
963 available_urls.push(url.clone());
964 tried_urls.insert(url);
965 }
966
967 // Should have 2 URLs (github.com and external.com), not 3
968 assert_eq!(
969 available_urls.len(),
970 2,
971 "Expected 2 URLs after filtering our domain, got: {:?}",
972 available_urls
973 );
974
975 // our-relay.com should be filtered out
976 assert!(
977 !available_urls.iter().any(|u| u.contains("our-relay.com")),
978 "Our domain should be filtered out"
979 );
980
981 // github.com and external.com should be present
982 assert!(
983 available_urls.iter().any(|u| u.contains("github.com")),
984 "github.com should be present"
985 );
986 assert!(
987 available_urls.iter().any(|u| u.contains("external.com")),
988 "external.com should be present"
989 );
990 }
991
992 #[tokio::test]
993 async fn test_get_throttled_domains_includes_pr_clone_urls() {
994 // Set up mock with throttled PR clone URL domain
995 let mock = MockSyncContext::new()
996 .with_urls(&["https://github.com/owner/repo.git"])
997 .with_pr_clone_urls(&["https://pr-server.com/fork.git"])
998 .with_needed_oids(&["abc123"])
999 .with_pending_events(true);
1000
1001 let throttle_manager = ThrottleManager::new(1, 100);
1002
1003 // Throttle both domains
1004 throttle_manager.start_request("github.com");
1005 throttle_manager.start_request("pr-server.com");
1006
1007 let tried_urls = HashSet::new();
1008
1009 let throttled =
1010 get_throttled_domains_with_untried_urls(&mock, "test-repo", &tried_urls, &throttle_manager)
1011 .await;
1012
1013 // Should include both throttled domains
1014 let domains: Vec<&str> = throttled.iter().map(|t| t.domain.as_str()).collect();
1015 assert!(
1016 domains.contains(&"github.com"),
1017 "Should include github.com"
1018 );
1019 assert!(
1020 domains.contains(&"pr-server.com"),
1021 "Should include pr-server.com from PR clone URLs"
1022 );
1023 }
1024
1025 #[tokio::test]
1026 async fn test_sync_identifier_uses_pr_clone_urls_when_announcement_urls_fail() {
1027 // Set up mock where only PR clone URL can provide the needed OID
1028 let mock = MockSyncContext::new()
1029 .with_urls(&["https://github.com/owner/repo.git"]) // Doesn't have the OID
1030 .with_pr_clone_urls(&["https://pr-author.com/fork.git"]) // Has the OID
1031 .with_needed_oids(&["pr-commit-123"])
1032 .with_pending_events(true)
1033 .url_provides("https://pr-author.com/fork.git", &["pr-commit-123"]);
1034 // Note: github.com doesn't provide any OIDs
1035
1036 let throttle_manager = Arc::new(ThrottleManager::new(5, 100));
1037
1038 // Run sync_identifier
1039 let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await;
1040
1041 // Should complete successfully using PR clone URL
1042 assert!(complete, "Sync should complete using PR clone URL");
1043
1044 // Verify PR clone URL was tried
1045 let fetch_log = mock.fetch_log();
1046 assert!(
1047 fetch_log.iter().any(|u| u.contains("pr-author.com")),
1048 "PR clone URL should have been tried: {:?}",
1049 fetch_log
1050 );
1051
1052 // OID should be fetched
1053 assert!(
1054 mock.current_needed_oids().is_empty(),
1055 "OID should be fetched from PR clone URL"
1056 );
1057 }
858} 1058}