upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/purgatory/sync/context.rs77
-rw-r--r--src/purgatory/sync/functions.rs206
2 files changed, 278 insertions, 5 deletions
diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs
index e97b708..2922f10 100644
--- a/src/purgatory/sync/context.rs
+++ b/src/purgatory/sync/context.rs
@@ -63,6 +63,18 @@ impl ProcessResult {
63/// with mocks. 63/// with mocks.
64#[async_trait] 64#[async_trait]
65pub trait SyncContext: Send + Sync { 65pub trait SyncContext: Send + Sync {
66 /// Collect clone URLs from PR events in purgatory for a given identifier.
67 ///
68 /// PR events (kind 1618) and PR Update events (kind 1619) can include `clone` tags
69 /// specifying where the PR commits can be fetched from. This method extracts those
70 /// URLs to supplement the clone URLs from repository announcements.
71 ///
72 /// # Arguments
73 /// * `identifier` - The repository identifier
74 ///
75 /// # Returns
76 /// Set of clone URLs from PR events in purgatory for this identifier
77 fn collect_pr_clone_urls(&self, identifier: &str) -> HashSet<String>;
66 /// Get repository data (announcements, clone URLs, etc.) from the database. 78 /// Get repository data (announcements, clone URLs, etc.) from the database.
67 /// 79 ///
68 /// # Arguments 80 /// # Arguments
@@ -232,6 +244,30 @@ impl RealSyncContext {
232 244
233#[async_trait] 245#[async_trait]
234impl SyncContext for RealSyncContext { 246impl SyncContext for RealSyncContext {
247 fn collect_pr_clone_urls(&self, identifier: &str) -> HashSet<String> {
248 let mut urls = HashSet::new();
249
250 for entry in self.purgatory.find_prs_for_identifier(identifier) {
251 if let Some(ref event) = entry.event {
252 for tag in event.tags.iter() {
253 let tag_vec = tag.clone().to_vec();
254 if tag_vec.len() >= 2 && tag_vec[0] == "clone" {
255 // Clone tags can have multiple URLs: ["clone", "url1", "url2", ...]
256 urls.extend(tag_vec[1..].iter().cloned());
257 }
258 }
259 }
260 }
261
262 debug!(
263 identifier = %identifier,
264 pr_clone_urls_count = urls.len(),
265 "Collected clone URLs from PR events in purgatory"
266 );
267
268 urls
269 }
270
235 async fn fetch_repository_data(&self, identifier: &str) -> Result<RepositoryData> { 271 async fn fetch_repository_data(&self, identifier: &str) -> Result<RepositoryData> {
236 crate::git::authorization::fetch_repository_data(&self.database, identifier).await 272 crate::git::authorization::fetch_repository_data(&self.database, identifier).await
237 } 273 }
@@ -450,9 +486,12 @@ pub mod mock {
450 /// Repository data to return from fetch_repository_data 486 /// Repository data to return from fetch_repository_data
451 repo_data: RwLock<Option<RepositoryData>>, 487 repo_data: RwLock<Option<RepositoryData>>,
452 488
453 /// Clone URLs available for the repository 489 /// Clone URLs available for the repository (from announcements)
454 clone_urls: Vec<String>, 490 clone_urls: Vec<String>,
455 491
492 /// Clone URLs from PR events in purgatory
493 pr_clone_urls: HashSet<String>,
494
456 /// OIDs still needed (decremented when "fetched") 495 /// OIDs still needed (decremented when "fetched")
457 needed_oids: RwLock<HashSet<String>>, 496 needed_oids: RwLock<HashSet<String>>,
458 497
@@ -490,6 +529,7 @@ pub mod mock {
490 Self { 529 Self {
491 repo_data: RwLock::new(None), 530 repo_data: RwLock::new(None),
492 clone_urls: Vec::new(), 531 clone_urls: Vec::new(),
532 pr_clone_urls: HashSet::new(),
493 needed_oids: RwLock::new(HashSet::new()), 533 needed_oids: RwLock::new(HashSet::new()),
494 url_provides_oids: HashMap::new(), 534 url_provides_oids: HashMap::new(),
495 fetch_log: RwLock::new(Vec::new()), 535 fetch_log: RwLock::new(Vec::new()),
@@ -501,12 +541,18 @@ pub mod mock {
501 } 541 }
502 } 542 }
503 543
504 /// Configure clone URLs for the repository. 544 /// Configure clone URLs for the repository (from announcements).
505 pub fn with_urls(mut self, urls: &[&str]) -> Self { 545 pub fn with_urls(mut self, urls: &[&str]) -> Self {
506 self.clone_urls = urls.iter().map(|s| s.to_string()).collect(); 546 self.clone_urls = urls.iter().map(|s| s.to_string()).collect();
507 self 547 self
508 } 548 }
509 549
550 /// Configure clone URLs from PR events in purgatory.
551 pub fn with_pr_clone_urls(mut self, urls: &[&str]) -> Self {
552 self.pr_clone_urls = urls.iter().map(|s| s.to_string()).collect();
553 self
554 }
555
510 /// Configure OIDs that are still needed. 556 /// Configure OIDs that are still needed.
511 pub fn with_needed_oids(self, oids: &[&str]) -> Self { 557 pub fn with_needed_oids(self, oids: &[&str]) -> Self {
512 *self.needed_oids.write().unwrap() = oids.iter().map(|s| s.to_string()).collect(); 558 *self.needed_oids.write().unwrap() = oids.iter().map(|s| s.to_string()).collect();
@@ -580,6 +626,10 @@ pub mod mock {
580 626
581 #[async_trait] 627 #[async_trait]
582 impl SyncContext for MockSyncContext { 628 impl SyncContext for MockSyncContext {
629 fn collect_pr_clone_urls(&self, _identifier: &str) -> HashSet<String> {
630 self.pr_clone_urls.clone()
631 }
632
583 async fn fetch_repository_data(&self, _identifier: &str) -> Result<RepositoryData> { 633 async fn fetch_repository_data(&self, _identifier: &str) -> Result<RepositoryData> {
584 // Return stored repo_data or create a minimal one with clone URLs 634 // Return stored repo_data or create a minimal one with clone URLs
585 if let Some(data) = self.repo_data.read().unwrap().as_ref() { 635 if let Some(data) = self.repo_data.read().unwrap().as_ref() {
@@ -791,5 +841,28 @@ pub mod mock {
791 mock.set_pending_events(false); 841 mock.set_pending_events(false);
792 assert!(!mock.has_pending_events("test-repo")); 842 assert!(!mock.has_pending_events("test-repo"));
793 } 843 }
844
845 #[test]
846 fn mock_collect_pr_clone_urls_returns_configured_urls() {
847 let mock = MockSyncContext::new().with_pr_clone_urls(&[
848 "https://fork-server.com/repo.git",
849 "https://another-fork.com/repo.git",
850 ]);
851
852 let urls = mock.collect_pr_clone_urls("any-identifier");
853
854 assert_eq!(urls.len(), 2);
855 assert!(urls.contains("https://fork-server.com/repo.git"));
856 assert!(urls.contains("https://another-fork.com/repo.git"));
857 }
858
859 #[test]
860 fn mock_collect_pr_clone_urls_empty_by_default() {
861 let mock = MockSyncContext::new();
862
863 let urls = mock.collect_pr_clone_urls("any-identifier");
864
865 assert!(urls.is_empty());
866 }
794 } 867 }
795} 868}
diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs
index 13b2e47..1b9518d 100644
--- a/src/purgatory/sync/functions.rs
+++ b/src/purgatory/sync/functions.rs
@@ -110,13 +110,24 @@ pub async fn sync_identifier_next_url<C: SyncContext + ?Sized>(
110 } 110 }
111 }; 111 };
112 112
113 // 4. Collect clone URLs, excluding our domain 113 // 4. Collect clone URLs from announcements AND PR events in purgatory
114 let our_domain = ctx.our_domain(); 114 let our_domain = ctx.our_domain();
115 let all_urls: HashSet<String> = repo_data 115
116 // Get clone URLs from repository announcements
117 let announcement_urls: HashSet<String> = repo_data
116 .announcements 118 .announcements
117 .iter() 119 .iter()
118 .flat_map(|a| a.clone_urls.iter().cloned()) 120 .flat_map(|a| a.clone_urls.iter().cloned())
121 .collect();
122
123 // Get clone URLs from PR events in purgatory
124 let pr_urls = ctx.collect_pr_clone_urls(identifier);
125
126 // Merge and filter out our domain
127 let all_urls: HashSet<String> = announcement_urls
128 .union(&pr_urls)
119 .filter(|url| our_domain.map_or(true, |d| !url.contains(d))) 129 .filter(|url| our_domain.map_or(true, |d| !url.contains(d)))
130 .cloned()
120 .collect(); 131 .collect();
121 132
122 if all_urls.is_empty() { 133 if all_urls.is_empty() {
@@ -206,11 +217,22 @@ pub async fn get_throttled_domains_with_untried_urls<C: SyncContext + ?Sized>(
206 }; 217 };
207 218
208 let our_domain = ctx.our_domain(); 219 let our_domain = ctx.our_domain();
209 let all_urls: HashSet<String> = repo_data 220
221 // Get clone URLs from repository announcements
222 let announcement_urls: HashSet<String> = repo_data
210 .announcements 223 .announcements
211 .iter() 224 .iter()
212 .flat_map(|a| a.clone_urls.iter().cloned()) 225 .flat_map(|a| a.clone_urls.iter().cloned())
226 .collect();
227
228 // Get clone URLs from PR events in purgatory
229 let pr_urls = ctx.collect_pr_clone_urls(identifier);
230
231 // Merge and filter out our domain
232 let all_urls: HashSet<String> = announcement_urls
233 .union(&pr_urls)
213 .filter(|url| our_domain.map_or(true, |d| !url.contains(d))) 234 .filter(|url| our_domain.map_or(true, |d| !url.contains(d)))
235 .cloned()
214 .collect(); 236 .collect();
215 237
216 let urls_by_domain: HashMap<String, Vec<String>> = 238 let urls_by_domain: HashMap<String, Vec<String>> =
@@ -855,4 +877,182 @@ mod tests {
855 "Expected github.com to still be throttled" 877 "Expected github.com to still be throttled"
856 ); 878 );
857 } 879 }
880
881 // =========================================================================
882 // PR Clone URL Tests
883 // =========================================================================
884
885 #[tokio::test]
886 async fn test_collect_pr_clone_urls_returns_configured_urls() {
887 // Test that MockSyncContext returns configured PR clone URLs
888 let mock = MockSyncContext::new()
889 .with_pr_clone_urls(&[
890 "https://pr-server.com/fork.git",
891 "https://another-server.com/fork.git",
892 ]);
893
894 let pr_urls = mock.collect_pr_clone_urls("test-repo");
895
896 assert_eq!(pr_urls.len(), 2);
897 assert!(pr_urls.contains("https://pr-server.com/fork.git"));
898 assert!(pr_urls.contains("https://another-server.com/fork.git"));
899 }
900
901 #[tokio::test]
902 async fn test_sync_identifier_next_url_includes_pr_clone_urls() {
903 // Set up mock with announcement URLs and PR clone URLs
904 let mock = MockSyncContext::new()
905 .with_urls(&["https://github.com/owner/repo.git"]) // From announcement
906 .with_pr_clone_urls(&["https://pr-author.com/fork.git"]) // From PR event
907 .with_needed_oids(&["abc123"])
908 .with_pending_events(true);
909
910 let throttle_manager = ThrottleManager::new(5, 100);
911 let tried_urls = HashSet::new();
912
913 // Get first URL
914 let first_url =
915 sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager)
916 .await
917 .expect("Should return a URL");
918
919 // Try the first URL
920 let mut tried = HashSet::new();
921 tried.insert(first_url.clone());
922
923 // Get second URL
924 let second_url =
925 sync_identifier_next_url(&mock, "test-repo", None, &tried, &throttle_manager)
926 .await
927 .expect("Should return a second URL");
928
929 // Both URLs should be available (one from announcement, one from PR)
930 let both_urls = vec![first_url, second_url];
931 assert!(
932 both_urls.iter().any(|u| u.contains("github.com")),
933 "Should include announcement URL"
934 );
935 assert!(
936 both_urls.iter().any(|u| u.contains("pr-author.com")),
937 "Should include PR clone URL"
938 );
939 }
940
941 #[tokio::test]
942 async fn test_pr_clone_urls_filtered_by_our_domain() {
943 // Set up mock with PR clone URL pointing to our domain
944 let mock = MockSyncContext::new()
945 .with_urls(&["https://github.com/owner/repo.git"])
946 .with_pr_clone_urls(&[
947 "https://our-relay.com/fork.git", // Should be filtered
948 "https://external.com/fork.git", // Should be included
949 ])
950 .with_our_domain("our-relay.com")
951 .with_needed_oids(&["abc123"])
952 .with_pending_events(true);
953
954 let throttle_manager = ThrottleManager::new(5, 100);
955 let mut tried_urls = HashSet::new();
956
957 // Collect all available URLs
958 let mut available_urls = Vec::new();
959 while let Some(url) =
960 sync_identifier_next_url(&mock, "test-repo", None, &tried_urls, &throttle_manager)
961 .await
962 {
963 available_urls.push(url.clone());
964 tried_urls.insert(url);
965 }
966
967 // Should have 2 URLs (github.com and external.com), not 3
968 assert_eq!(
969 available_urls.len(),
970 2,
971 "Expected 2 URLs after filtering our domain, got: {:?}",
972 available_urls
973 );
974
975 // our-relay.com should be filtered out
976 assert!(
977 !available_urls.iter().any(|u| u.contains("our-relay.com")),
978 "Our domain should be filtered out"
979 );
980
981 // github.com and external.com should be present
982 assert!(
983 available_urls.iter().any(|u| u.contains("github.com")),
984 "github.com should be present"
985 );
986 assert!(
987 available_urls.iter().any(|u| u.contains("external.com")),
988 "external.com should be present"
989 );
990 }
991
992 #[tokio::test]
993 async fn test_get_throttled_domains_includes_pr_clone_urls() {
994 // Set up mock with throttled PR clone URL domain
995 let mock = MockSyncContext::new()
996 .with_urls(&["https://github.com/owner/repo.git"])
997 .with_pr_clone_urls(&["https://pr-server.com/fork.git"])
998 .with_needed_oids(&["abc123"])
999 .with_pending_events(true);
1000
1001 let throttle_manager = ThrottleManager::new(1, 100);
1002
1003 // Throttle both domains
1004 throttle_manager.start_request("github.com");
1005 throttle_manager.start_request("pr-server.com");
1006
1007 let tried_urls = HashSet::new();
1008
1009 let throttled =
1010 get_throttled_domains_with_untried_urls(&mock, "test-repo", &tried_urls, &throttle_manager)
1011 .await;
1012
1013 // Should include both throttled domains
1014 let domains: Vec<&str> = throttled.iter().map(|t| t.domain.as_str()).collect();
1015 assert!(
1016 domains.contains(&"github.com"),
1017 "Should include github.com"
1018 );
1019 assert!(
1020 domains.contains(&"pr-server.com"),
1021 "Should include pr-server.com from PR clone URLs"
1022 );
1023 }
1024
1025 #[tokio::test]
1026 async fn test_sync_identifier_uses_pr_clone_urls_when_announcement_urls_fail() {
1027 // Set up mock where only PR clone URL can provide the needed OID
1028 let mock = MockSyncContext::new()
1029 .with_urls(&["https://github.com/owner/repo.git"]) // Doesn't have the OID
1030 .with_pr_clone_urls(&["https://pr-author.com/fork.git"]) // Has the OID
1031 .with_needed_oids(&["pr-commit-123"])
1032 .with_pending_events(true)
1033 .url_provides("https://pr-author.com/fork.git", &["pr-commit-123"]);
1034 // Note: github.com doesn't provide any OIDs
1035
1036 let throttle_manager = Arc::new(ThrottleManager::new(5, 100));
1037
1038 // Run sync_identifier
1039 let complete = sync_identifier(&mock, "test-repo", &throttle_manager).await;
1040
1041 // Should complete successfully using PR clone URL
1042 assert!(complete, "Sync should complete using PR clone URL");
1043
1044 // Verify PR clone URL was tried
1045 let fetch_log = mock.fetch_log();
1046 assert!(
1047 fetch_log.iter().any(|u| u.contains("pr-author.com")),
1048 "PR clone URL should have been tried: {:?}",
1049 fetch_log
1050 );
1051
1052 // OID should be fetched
1053 assert!(
1054 mock.current_needed_oids().is_empty(),
1055 "OID should be fetched from PR clone URL"
1056 );
1057 }
858} 1058}