upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/purgatory/sync
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-07 11:40:41 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-07 11:40:41 +0000
commit5bd6b9b93cd52da2075bc00a08cf7feca4b85d5c (patch)
treebc36818ed88458f33f3f94c33685c9f02c0969f4 /src/purgatory/sync
parent09d9771acaffdc6e798cc32d2a68e6d46a323d3a (diff)
Add SyncContext trait and MockSyncContext for purgatory sync
Implement the abstraction layer for purgatory sync operations: - SyncContext trait: defines interface for repository data fetching, OID existence checks, git fetch operations, and event processing - ProcessResult: captures outcomes when releasing events from purgatory - MockSyncContext: test mock with builder pattern for configuring: - Clone URLs and which OIDs each URL provides - Needed OIDs (simulates purgatory state) - URL failure simulation - Fetch logging for assertions The trait uses async_trait for async method support and requires Send + Sync for use in concurrent sync operations. This abstraction enables unit testing of sync logic without I/O, while the real implementation (to be added later) will connect to actual database, git, and relay systems.
Diffstat (limited to 'src/purgatory/sync')
-rw-r--r--src/purgatory/sync/context.rs546
-rw-r--r--src/purgatory/sync/mod.rs5
2 files changed, 551 insertions, 0 deletions
diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs
new file mode 100644
index 0000000..dea97ef
--- /dev/null
+++ b/src/purgatory/sync/context.rs
@@ -0,0 +1,546 @@
1//! Sync context abstraction for testability.
2//!
3//! This module provides the `SyncContext` trait which abstracts external dependencies
4//! for sync operations. This allows unit testing of sync logic by mocking:
5//! - Repository data fetching
6//! - OID existence checks
7//! - Git fetch operations
8//! - Event processing
9//!
10//! The real implementation (`RealSyncContext`) connects to actual database, git,
11//! and relay systems. The mock implementation (`MockSyncContext`) is used in tests.
12
13use anyhow::Result;
14use async_trait::async_trait;
15use std::collections::HashSet;
16use std::path::{Path, PathBuf};
17
18use crate::git::authorization::RepositoryData;
19
20/// Result of processing newly available git data.
21///
22/// This struct captures what happened when we tried to release events from
23/// purgatory after new git data became available.
24#[derive(Debug, Default, Clone)]
25pub struct ProcessResult {
26 /// Number of state events released from purgatory
27 pub states_released: usize,
28 /// Number of PR events released from purgatory
29 pub prs_released: usize,
30 /// Number of repositories synced (OIDs copied + refs aligned)
31 pub repos_synced: usize,
32 /// Number of refs created across all repos
33 pub refs_created: usize,
34 /// Number of refs updated across all repos
35 pub refs_updated: usize,
36 /// Number of refs deleted across all repos
37 pub refs_deleted: usize,
38 /// Errors encountered (non-fatal)
39 pub errors: Vec<String>,
40}
41
42impl ProcessResult {
43 /// Check if any events were released
44 pub fn released_any(&self) -> bool {
45 self.states_released > 0 || self.prs_released > 0
46 }
47}
48
49/// Abstraction over external dependencies for sync operations.
50///
51/// This trait allows unit testing of sync logic by mocking:
52/// - Repository data fetching
53/// - OID existence checks
54/// - Git fetch operations
55/// - Event processing
56///
57/// # Implementation Notes
58///
59/// The real implementation (`RealSyncContext`) holds references to purgatory,
60/// database, etc., and the `process_newly_available_git_data` method delegates
61/// to the unified function. This keeps the sync logic functions
62/// (`sync_identifier_next_url`, `sync_identifier_from_url`) clean and testable
63/// with mocks.
64#[async_trait]
65pub trait SyncContext: Send + Sync {
66 /// Get repository data (announcements, clone URLs, etc.) from the database.
67 ///
68 /// # Arguments
69 /// * `identifier` - The repository identifier (d-tag value)
70 ///
71 /// # Returns
72 /// Repository data including announcements and state events
73 async fn fetch_repository_data(&self, identifier: &str) -> Result<RepositoryData>;
74
75 /// Get all OIDs needed for purgatory events with this identifier.
76 ///
77 /// This collects commit hashes from:
78 /// - State events in purgatory (branch/tag commits)
79 /// - PR events in purgatory (commit hash from c-tag)
80 ///
81 /// # Arguments
82 /// * `identifier` - The repository identifier
83 ///
84 /// # Returns
85 /// Set of OID strings (commit hashes) that are still needed
86 fn collect_needed_oids(&self, identifier: &str) -> HashSet<String>;
87
88 /// Check if an OID exists locally in a repository.
89 ///
90 /// # Arguments
91 /// * `repo_path` - Path to the git repository
92 /// * `oid` - The object ID (commit hash) to check
93 ///
94 /// # Returns
95 /// true if the OID exists in the repository
96 fn oid_exists(&self, repo_path: &Path, oid: &str) -> bool;
97
98 /// Fetch OIDs from a remote server.
99 ///
100 /// Attempts to fetch the specified OIDs from the given URL into the
101 /// local repository.
102 ///
103 /// # Arguments
104 /// * `repo_path` - Path to the local git repository
105 /// * `url` - Remote URL to fetch from
106 /// * `oids` - List of OIDs to fetch
107 ///
108 /// # Returns
109 /// List of OIDs that were successfully fetched
110 async fn fetch_oids(
111 &self,
112 repo_path: &Path,
113 url: &str,
114 oids: &[String],
115 ) -> Result<Vec<String>>;
116
117 /// Process newly available git data.
118 ///
119 /// This is called after each successful OID fetch to check if any purgatory
120 /// events can now be satisfied with the available git data.
121 ///
122 /// The function:
123 /// 1. Discovers satisfiable events from purgatory
124 /// 2. Syncs OIDs to authorized owner repos
125 /// 3. Aligns refs (+ sets HEAD)
126 /// 4. Saves events to database
127 /// 5. Notifies WebSocket subscribers
128 /// 6. Removes from purgatory
129 ///
130 /// # Arguments
131 /// * `source_repo_path` - Path to the repository that has the new git data
132 /// * `new_oids` - Set of OIDs that were just fetched
133 ///
134 /// # Returns
135 /// Result describing what was processed
136 async fn process_newly_available_git_data(
137 &self,
138 source_repo_path: &Path,
139 new_oids: &HashSet<String>,
140 ) -> Result<ProcessResult>;
141
142 /// Check if there are still pending events for this identifier.
143 ///
144 /// Returns true if purgatory has state events or PR events for this identifier.
145 ///
146 /// # Arguments
147 /// * `identifier` - The repository identifier
148 fn has_pending_events(&self, identifier: &str) -> bool;
149
150 /// Find the best local repository to fetch into.
151 ///
152 /// Given repository data from the database, finds an existing local repository
153 /// that can be used as the fetch target. Typically returns the first owner's
154 /// repository that exists on disk.
155 ///
156 /// # Arguments
157 /// * `db_repo_data` - Repository data from the database
158 ///
159 /// # Returns
160 /// Path to the target repository, or None if no suitable repo exists
161 fn find_target_repo(&self, db_repo_data: &RepositoryData) -> Option<PathBuf>;
162
163 /// Get our domain (to exclude from clone URLs).
164 ///
165 /// When syncing, we don't want to fetch from ourselves. This returns our
166 /// domain so it can be filtered out of clone URL lists.
167 fn our_domain(&self) -> Option<&str>;
168}
169
170// =============================================================================
171// Mock Implementation for Testing
172// =============================================================================
173
174#[cfg(test)]
175pub mod mock {
176 use super::*;
177 use std::collections::HashMap;
178 use std::sync::RwLock;
179
180 /// Mock context for testing sync logic without I/O.
181 ///
182 /// This mock allows tests to:
183 /// - Configure repository data (URLs, announcements)
184 /// - Specify which OIDs are needed
185 /// - Configure which URLs provide which OIDs
186 /// - Track fetch attempts for assertions
187 /// - Control whether events are "pending"
188 ///
189 /// # Example
190 ///
191 /// ```ignore
192 /// let mock = MockSyncContext::new()
193 /// .with_urls(&["https://github.com/foo/bar.git", "https://gitlab.com/foo/bar.git"])
194 /// .with_needed_oids(&["abc123", "def456"])
195 /// .url_provides("https://github.com/foo/bar.git", &["abc123"]);
196 ///
197 /// // Use mock in tests...
198 /// assert_eq!(mock.fetch_log(), vec!["https://github.com/foo/bar.git"]);
199 /// ```
200 pub struct MockSyncContext {
201 /// Repository data to return from fetch_repository_data
202 repo_data: RwLock<Option<RepositoryData>>,
203
204 /// Clone URLs available for the repository
205 clone_urls: Vec<String>,
206
207 /// OIDs still needed (decremented when "fetched")
208 needed_oids: RwLock<HashSet<String>>,
209
210 /// Which OIDs each URL can provide
211 url_provides_oids: HashMap<String, HashSet<String>>,
212
213 /// Track fetch attempts for assertions
214 fetch_log: RwLock<Vec<String>>,
215
216 /// Whether there are pending events
217 has_pending: RwLock<bool>,
218
219 /// Our domain (to exclude from clone URLs)
220 our_domain: Option<String>,
221
222 /// Path to return from find_target_repo
223 target_repo_path: Option<PathBuf>,
224
225 /// Whether fetch_oids should fail
226 fetch_should_fail: RwLock<HashSet<String>>,
227
228 /// Results from process_newly_available_git_data calls
229 process_results: RwLock<Vec<ProcessResult>>,
230 }
231
232 impl Default for MockSyncContext {
233 fn default() -> Self {
234 Self::new()
235 }
236 }
237
238 impl MockSyncContext {
239 /// Create a new mock context with default settings.
240 pub fn new() -> Self {
241 Self {
242 repo_data: RwLock::new(None),
243 clone_urls: Vec::new(),
244 needed_oids: RwLock::new(HashSet::new()),
245 url_provides_oids: HashMap::new(),
246 fetch_log: RwLock::new(Vec::new()),
247 has_pending: RwLock::new(true),
248 our_domain: None,
249 target_repo_path: Some(PathBuf::from("/tmp/test-repo")),
250 fetch_should_fail: RwLock::new(HashSet::new()),
251 process_results: RwLock::new(Vec::new()),
252 }
253 }
254
255 /// Configure clone URLs for the repository.
256 pub fn with_urls(mut self, urls: &[&str]) -> Self {
257 self.clone_urls = urls.iter().map(|s| s.to_string()).collect();
258 self
259 }
260
261 /// Configure OIDs that are still needed.
262 pub fn with_needed_oids(self, oids: &[&str]) -> Self {
263 *self.needed_oids.write().unwrap() = oids.iter().map(|s| s.to_string()).collect();
264 self
265 }
266
267 /// Configure which OIDs a specific URL can provide.
268 pub fn url_provides(mut self, url: &str, oids: &[&str]) -> Self {
269 self.url_provides_oids.insert(
270 url.to_string(),
271 oids.iter().map(|s| s.to_string()).collect(),
272 );
273 self
274 }
275
276 /// Configure our domain (to be excluded from clone URLs).
277 pub fn with_our_domain(mut self, domain: &str) -> Self {
278 self.our_domain = Some(domain.to_string());
279 self
280 }
281
282 /// Configure the target repo path.
283 pub fn with_target_repo(mut self, path: &str) -> Self {
284 self.target_repo_path = Some(PathBuf::from(path));
285 self
286 }
287
288 /// Configure whether there are pending events.
289 pub fn with_pending_events(self, has_pending: bool) -> Self {
290 *self.has_pending.write().unwrap() = has_pending;
291 self
292 }
293
294 /// Configure a URL to fail when fetched.
295 pub fn url_should_fail(self, url: &str) -> Self {
296 self.fetch_should_fail
297 .write()
298 .unwrap()
299 .insert(url.to_string());
300 self
301 }
302
303 /// Get the log of fetch attempts (URLs that were fetched from).
304 pub fn fetch_log(&self) -> Vec<String> {
305 self.fetch_log.read().unwrap().clone()
306 }
307
308 /// Clear the fetch log.
309 pub fn clear_fetch_log(&self) {
310 self.fetch_log.write().unwrap().clear();
311 }
312
313 /// Get the current set of needed OIDs.
314 pub fn current_needed_oids(&self) -> HashSet<String> {
315 self.needed_oids.read().unwrap().clone()
316 }
317
318 /// Set whether there are pending events (can be called during test).
319 pub fn set_pending_events(&self, has_pending: bool) {
320 *self.has_pending.write().unwrap() = has_pending;
321 }
322
323 /// Mark specific OIDs as no longer needed (simulates successful fetch).
324 pub fn mark_oids_fetched(&self, oids: &[&str]) {
325 let mut needed = self.needed_oids.write().unwrap();
326 for oid in oids {
327 needed.remove(*oid);
328 }
329 }
330 }
331
332 #[async_trait]
333 impl SyncContext for MockSyncContext {
334 async fn fetch_repository_data(&self, _identifier: &str) -> Result<RepositoryData> {
335 // Return stored repo_data or create a minimal one with clone URLs
336 if let Some(data) = self.repo_data.read().unwrap().as_ref() {
337 // Clone the data - this is a test mock so efficiency isn't critical
338 Ok(RepositoryData {
339 announcements: data.announcements.clone(),
340 states: data.states.clone(),
341 })
342 } else {
343 // Create minimal repo data with just clone URLs
344 // In real tests, you'd set up proper announcements
345 use crate::nostr::events::RepositoryAnnouncement;
346 use nostr_sdk::{EventBuilder, Keys, Kind};
347
348 let keys = Keys::generate();
349 let mut announcements = Vec::new();
350
351 if !self.clone_urls.is_empty() {
352 // Create a minimal announcement with the clone URLs
353 let mut tags = vec![nostr_sdk::Tag::custom(
354 nostr_sdk::TagKind::Custom("d".into()),
355 vec!["test-repo".to_string()],
356 )];
357
358 for url in &self.clone_urls {
359 tags.push(nostr_sdk::Tag::custom(
360 nostr_sdk::TagKind::Custom("clone".into()),
361 vec![url.clone()],
362 ));
363 }
364
365 let event = EventBuilder::new(Kind::from(30617), "")
366 .tags(tags)
367 .sign_with_keys(&keys)
368 .unwrap();
369
370 if let Ok(ann) = RepositoryAnnouncement::from_event(event) {
371 announcements.push(ann);
372 }
373 }
374
375 Ok(RepositoryData {
376 announcements,
377 states: Vec::new(),
378 })
379 }
380 }
381
382 fn collect_needed_oids(&self, _identifier: &str) -> HashSet<String> {
383 self.needed_oids.read().unwrap().clone()
384 }
385
386 fn oid_exists(&self, _repo_path: &Path, oid: &str) -> bool {
387 // OID exists if it's NOT in the needed set
388 !self.needed_oids.read().unwrap().contains(oid)
389 }
390
391 async fn fetch_oids(
392 &self,
393 _repo_path: &Path,
394 url: &str,
395 oids: &[String],
396 ) -> Result<Vec<String>> {
397 // Log the fetch attempt
398 self.fetch_log.write().unwrap().push(url.to_string());
399
400 // Check if this URL should fail
401 if self.fetch_should_fail.read().unwrap().contains(url) {
402 return Err(anyhow::anyhow!("Simulated fetch failure for {}", url));
403 }
404
405 // Get OIDs this URL can provide
406 let provides = self
407 .url_provides_oids
408 .get(url)
409 .cloned()
410 .unwrap_or_default();
411
412 // Find which requested OIDs this URL can provide
413 let fetched: Vec<String> = oids
414 .iter()
415 .filter(|oid| provides.contains(*oid))
416 .cloned()
417 .collect();
418
419 // Remove fetched OIDs from needed set
420 {
421 let mut needed = self.needed_oids.write().unwrap();
422 for oid in &fetched {
423 needed.remove(oid);
424 }
425 }
426
427 Ok(fetched)
428 }
429
430 async fn process_newly_available_git_data(
431 &self,
432 _source_repo_path: &Path,
433 _new_oids: &HashSet<String>,
434 ) -> Result<ProcessResult> {
435 // Return a default result - tests can check if this was called
436 let result = ProcessResult::default();
437 self.process_results.write().unwrap().push(result.clone());
438 Ok(result)
439 }
440
441 fn has_pending_events(&self, _identifier: &str) -> bool {
442 *self.has_pending.read().unwrap()
443 }
444
445 fn find_target_repo(&self, _db_repo_data: &RepositoryData) -> Option<PathBuf> {
446 self.target_repo_path.clone()
447 }
448
449 fn our_domain(&self) -> Option<&str> {
450 self.our_domain.as_deref()
451 }
452 }
453
454 #[cfg(test)]
455 mod tests {
456 use super::*;
457
458 #[tokio::test]
459 async fn mock_tracks_fetch_attempts() {
460 let mock = MockSyncContext::new()
461 .with_urls(&["https://github.com/foo/bar.git"])
462 .with_needed_oids(&["abc123"]);
463
464 // Fetch should log the URL
465 let _ = mock
466 .fetch_oids(
467 Path::new("/tmp"),
468 "https://github.com/foo/bar.git",
469 &["abc123".to_string()],
470 )
471 .await;
472
473 assert_eq!(
474 mock.fetch_log(),
475 vec!["https://github.com/foo/bar.git".to_string()]
476 );
477 }
478
479 #[tokio::test]
480 async fn mock_provides_configured_oids() {
481 let mock = MockSyncContext::new()
482 .with_needed_oids(&["abc123", "def456"])
483 .url_provides("https://github.com/foo/bar.git", &["abc123"]);
484
485 let fetched = mock
486 .fetch_oids(
487 Path::new("/tmp"),
488 "https://github.com/foo/bar.git",
489 &["abc123".to_string(), "def456".to_string()],
490 )
491 .await
492 .unwrap();
493
494 // Only abc123 should be fetched (it's what the URL provides)
495 assert_eq!(fetched, vec!["abc123".to_string()]);
496
497 // abc123 should no longer be needed
498 let needed = mock.current_needed_oids();
499 assert!(!needed.contains("abc123"));
500 assert!(needed.contains("def456"));
501 }
502
503 #[tokio::test]
504 async fn mock_url_failure() {
505 let mock = MockSyncContext::new()
506 .with_needed_oids(&["abc123"])
507 .url_should_fail("https://bad-server.com/repo.git");
508
509 let result = mock
510 .fetch_oids(
511 Path::new("/tmp"),
512 "https://bad-server.com/repo.git",
513 &["abc123".to_string()],
514 )
515 .await;
516
517 assert!(result.is_err());
518 }
519
520 #[test]
521 fn mock_oid_exists_reflects_needed_state() {
522 let mock = MockSyncContext::new().with_needed_oids(&["abc123"]);
523
524 // abc123 is needed, so it doesn't exist
525 assert!(!mock.oid_exists(Path::new("/tmp"), "abc123"));
526
527 // def456 is not needed, so it "exists"
528 assert!(mock.oid_exists(Path::new("/tmp"), "def456"));
529
530 // Mark abc123 as fetched
531 mock.mark_oids_fetched(&["abc123"]);
532
533 // Now it exists
534 assert!(mock.oid_exists(Path::new("/tmp"), "abc123"));
535 }
536
537 #[test]
538 fn mock_pending_events_controllable() {
539 let mock = MockSyncContext::new().with_pending_events(true);
540 assert!(mock.has_pending_events("test-repo"));
541
542 mock.set_pending_events(false);
543 assert!(!mock.has_pending_events("test-repo"));
544 }
545 }
546}
diff --git a/src/purgatory/sync/mod.rs b/src/purgatory/sync/mod.rs
index b29f10e..1ac0cb1 100644
--- a/src/purgatory/sync/mod.rs
+++ b/src/purgatory/sync/mod.rs
@@ -6,8 +6,13 @@
6//! - Exponential backoff per identifier (20s → 2m, then 2m intervals) 6//! - Exponential backoff per identifier (20s → 2m, then 2m intervals)
7//! - Debouncing for burst event arrivals 7//! - Debouncing for burst event arrivals
8 8
9mod context;
9mod queue; 10mod queue;
10mod throttle; 11mod throttle;
11 12
13pub use context::{ProcessResult, SyncContext};
12pub use queue::SyncQueueEntry; 14pub use queue::SyncQueueEntry;
13pub use throttle::{DomainThrottle, ThrottleManager}; 15pub use throttle::{DomainThrottle, ThrottleManager};
16
17#[cfg(test)]
18pub use context::mock::MockSyncContext;