diff options
Diffstat (limited to 'docs')
| -rw-r--r-- | docs/archive/2025-12-18-sync-test-refactor.md | 481 | ||||
| -rw-r--r-- | docs/explanation/README.md | 71 | ||||
| -rw-r--r-- | docs/explanation/grasp-02-proactive-sync-purgatory-git-data.md | 675 | ||||
| -rw-r--r-- | docs/explanation/grasp-02-proactive-sync.md | 3 | ||||
| -rw-r--r-- | docs/explanation/inline-authorization.md | 270 | ||||
| -rw-r--r-- | docs/explanation/purgatory-design.md | 1230 | ||||
| -rw-r--r-- | docs/explanation/purgatory-sync-redesign.md | 1959 | ||||
| -rw-r--r-- | docs/explanation/unify-git-data-sync.md | 481 |
8 files changed, 1435 insertions, 3735 deletions
diff --git a/docs/archive/2025-12-18-sync-test-refactor.md b/docs/archive/2025-12-18-sync-test-refactor.md deleted file mode 100644 index cf22956..0000000 --- a/docs/archive/2025-12-18-sync-test-refactor.md +++ /dev/null | |||
| @@ -1,481 +0,0 @@ | |||
| 1 | # Sync Test Refactor Options | ||
| 2 | |||
| 3 | ## Summary of Requirements | ||
| 4 | |||
| 5 | From your feedback: | ||
| 6 | |||
| 7 | - **Tag variations**: Test with ONE sync mode (live OR historic), but make it clear which | ||
| 8 | - **Discovery**: Needs both live AND historic examples (mechanism differs) | ||
| 9 | - **Duplication**: Compare approaches before deciding | ||
| 10 | |||
| 11 | --- | ||
| 12 | |||
| 13 | ## Chosen Approach: Unified Helper with Event Slices | ||
| 14 | |||
| 15 | A single helper function that handles both sync modes based on which event slices have content: | ||
| 16 | |||
| 17 | ````rust | ||
| 18 | /// Result from sync test scenario | ||
| 19 | pub struct SyncTestResult { | ||
| 20 | pub source: TestRelay, | ||
| 21 | pub syncing: TestRelay, | ||
| 22 | pub keys: Keys, | ||
| 23 | pub repo_coord: String, | ||
| 24 | } | ||
| 25 | |||
| 26 | /// Run a sync test scenario with historic and/or live events. | ||
| 27 | /// | ||
| 28 | /// # Arguments | ||
| 29 | /// - `historic_events` - Events loaded on source BEFORE syncing relay connects | ||
| 30 | /// - `live_events` - Events fed to source AFTER syncing relay connects | ||
| 31 | /// | ||
| 32 | /// # Mode Detection | ||
| 33 | /// - If only `historic_events` has content → Historic sync test | ||
| 34 | /// - If only `live_events` has content → Live sync test | ||
| 35 | /// - Both can have content for mixed scenarios | ||
| 36 | /// | ||
| 37 | /// # Example - Historic Sync | ||
| 38 | /// ```rust | ||
| 39 | /// let (result, events) = run_sync_test(&[announcement, issue], &[]).await; | ||
| 40 | /// // Source had events before syncing relay started | ||
| 41 | /// // Verify events synced to result.syncing | ||
| 42 | /// ``` | ||
| 43 | /// | ||
| 44 | /// # Example - Live Sync | ||
| 45 | /// ```rust | ||
| 46 | /// let (result, events) = run_sync_test(&[], &[issue, patch]).await; | ||
| 47 | /// // Events were added after connection established | ||
| 48 | /// // Verify events synced to result.syncing | ||
| 49 | /// ``` | ||
| 50 | pub async fn run_sync_test( | ||
| 51 | historic_events: &[Event], | ||
| 52 | live_events: &[Event], | ||
| 53 | ) -> SyncTestResult { | ||
| 54 | // 1. Pre-allocate syncing relay port for announcement tags | ||
| 55 | let syncing_port = TestRelay::find_free_port(); | ||
| 56 | let syncing_domain = format!("127.0.0.1:{}", syncing_port); | ||
| 57 | |||
| 58 | // 2. Start source relay | ||
| 59 | let source = TestRelay::start().await; | ||
| 60 | |||
| 61 | // 3. Create keys and announcement listing both relays | ||
| 62 | let keys = Keys::generate(); | ||
| 63 | let announcement = create_repo_announcement( | ||
| 64 | &keys, | ||
| 65 | &[&source.domain(), &syncing_domain], | ||
| 66 | "test-repo", | ||
| 67 | ); | ||
| 68 | |||
| 69 | // 4. Send announcement + historic events to source BEFORE syncing relay starts | ||
| 70 | send_to_relay(&source, &announcement).await; | ||
| 71 | for event in historic_events { | ||
| 72 | send_to_relay(&source, event).await; | ||
| 73 | } | ||
| 74 | |||
| 75 | // 5. Start syncing relay (connects to source) | ||
| 76 | let syncing = TestRelay::start_on_port_with_options( | ||
| 77 | syncing_port, | ||
| 78 | Some(source.url().into()), | ||
| 79 | false, | ||
| 80 | ).await; | ||
| 81 | |||
| 82 | // 6. Wait for sync connection to establish | ||
| 83 | wait_for_sync_connection(syncing.url(), 1, Duration::from_secs(5)).await.ok(); | ||
| 84 | |||
| 85 | // 7. Send live events AFTER connection established | ||
| 86 | for event in live_events { | ||
| 87 | send_to_relay(&source, event).await; | ||
| 88 | } | ||
| 89 | |||
| 90 | // 8. Allow sync to complete | ||
| 91 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 92 | |||
| 93 | SyncTestResult { | ||
| 94 | source, | ||
| 95 | syncing, | ||
| 96 | keys, | ||
| 97 | repo_coord: repo_coord(&keys, "test-repo"), | ||
| 98 | } | ||
| 99 | } | ||
| 100 | ```` | ||
| 101 | |||
| 102 | ### Test Usage Examples | ||
| 103 | |||
| 104 | ```rust | ||
| 105 | // Historic sync - events existed before connection | ||
| 106 | #[tokio::test] | ||
| 107 | async fn test_historic_layer2_issue_syncs() { | ||
| 108 | let keys = Keys::generate(); | ||
| 109 | let repo_coord = repo_coord(&keys, "test-repo"); | ||
| 110 | let issue = build_layer2_issue_event(&keys, &repo_coord, "Historic Issue")?; | ||
| 111 | |||
| 112 | let result = run_sync_test(&[issue.clone()], &[]).await; | ||
| 113 | |||
| 114 | assert!( | ||
| 115 | wait_for_event_on_relay(result.syncing.url(), issue.id).await, | ||
| 116 | "Historic issue should sync" | ||
| 117 | ); | ||
| 118 | } | ||
| 119 | |||
| 120 | // Live sync - events arrive after connection | ||
| 121 | #[tokio::test] | ||
| 122 | async fn test_live_layer2_issue_syncs() { | ||
| 123 | let keys = Keys::generate(); | ||
| 124 | let repo_coord = repo_coord(&keys, "test-repo"); | ||
| 125 | let issue = build_layer2_issue_event(&keys, &repo_coord, "Live Issue")?; | ||
| 126 | |||
| 127 | let result = run_sync_test(&[], &[issue.clone()]).await; | ||
| 128 | |||
| 129 | assert!( | ||
| 130 | wait_for_event_on_relay(result.syncing.url(), issue.id).await, | ||
| 131 | "Live issue should sync" | ||
| 132 | ); | ||
| 133 | } | ||
| 134 | |||
| 135 | // Discovery test - historic | ||
| 136 | #[tokio::test] | ||
| 137 | async fn test_discovery_historic_syncs_layer2() { | ||
| 138 | let keys = Keys::generate(); | ||
| 139 | let repo_coord = repo_coord(&keys, "test-repo"); | ||
| 140 | let issue = build_layer2_issue_event(&keys, &repo_coord, "Discovered Issue")?; | ||
| 141 | |||
| 142 | // Source has the issue before discovery | ||
| 143 | let result = run_sync_test(&[issue.clone()], &[]).await; | ||
| 144 | |||
| 145 | assert!(wait_for_event_on_relay(result.syncing.url(), issue.id).await); | ||
| 146 | } | ||
| 147 | |||
| 148 | // Discovery test - live | ||
| 149 | #[tokio::test] | ||
| 150 | async fn test_discovery_live_syncs_layer2() { | ||
| 151 | // ... setup similar, events in second slice | ||
| 152 | } | ||
| 153 | ``` | ||
| 154 | |||
| 155 | ### Why This Approach | ||
| 156 | |||
| 157 | 1. **Single function, both modes** - No duplication of setup logic | ||
| 158 | 2. **Clear distinction** - Function signature makes it obvious which is historic vs live | ||
| 159 | 3. **No new dependencies** - Plain Rust | ||
| 160 | 4. **Readable tests** - Test body just creates events and calls helper | ||
| 161 | 5. **Flexible** - Can test mixed scenarios if needed | ||
| 162 | |||
| 163 | --- | ||
| 164 | |||
| 165 | ## Proposed Test File Structure | ||
| 166 | |||
| 167 | ``` | ||
| 168 | tests/sync/ | ||
| 169 | ├── mod.rs # Module declarations + overview doc | ||
| 170 | ├── historic_sync.rs # NEW: Historic sync tests (events exist before connection) | ||
| 171 | ├── live_sync.rs # REFACTORED: Live sync tests (events arrive after connection) | ||
| 172 | ├── discovery.rs # REFACTORED: Relay discovery (both modes) | ||
| 173 | ├── tag_variations.rs # REFACTORED: Tag type coverage (live sync only) | ||
| 174 | ├── metrics.rs # UNCHANGED: Prometheus metrics tests | ||
| 175 | └── catchup.rs # UNCHANGED: Documentation only | ||
| 176 | ``` | ||
| 177 | |||
| 178 | ### `tests/sync/historic_sync.rs` (NEW - renamed from bootstrap.rs) | ||
| 179 | |||
| 180 | ```rust | ||
| 181 | //! Historic Sync Tests | ||
| 182 | //! | ||
| 183 | //! Tests for syncing events that exist on source relay BEFORE the syncing relay connects. | ||
| 184 | //! This is the bootstrap/startup sync path. | ||
| 185 | |||
| 186 | /// Historic sync: Layer 1 announcements sync on startup | ||
| 187 | /// run_sync_test with historic_events: [announcement] | ||
| 188 | #[tokio::test] | ||
| 189 | async fn test_historic_layer1_announcement_syncs() { ... } | ||
| 190 | |||
| 191 | /// Historic sync: Layer 2 issues sync on startup | ||
| 192 | /// run_sync_test with historic_events: [issue] | ||
| 193 | #[tokio::test] | ||
| 194 | async fn test_historic_layer2_issue_syncs() { ... } | ||
| 195 | |||
| 196 | /// Historic sync: Layer 3 comments sync after Layer 2 syncs | ||
| 197 | /// run_sync_test with historic_events: [issue, comment] | ||
| 198 | #[tokio::test] | ||
| 199 | async fn test_historic_layer3_comment_syncs() { ... } | ||
| 200 | |||
| 201 | /// Historic sync: Events not listing relay domain are rejected | ||
| 202 | /// run_sync_test with historic_events: [announcement_missing_domain] | ||
| 203 | /// Verify NOT synced | ||
| 204 | #[tokio::test] | ||
| 205 | async fn test_historic_rejects_events_not_listing_relay() { ... } | ||
| 206 | |||
| 207 | /// Historic sync works without NIP-77 negentropy (REQ+EOSE fallback) | ||
| 208 | /// run_sync_test with historic_events, negentropy disabled | ||
| 209 | #[tokio::test] | ||
| 210 | async fn test_historic_sync_without_negentropy() { ... } | ||
| 211 | ``` | ||
| 212 | |||
| 213 | ### `tests/sync/live_sync.rs` (REFACTORED) | ||
| 214 | |||
| 215 | ```rust | ||
| 216 | //! Live Sync Tests | ||
| 217 | //! | ||
| 218 | //! Tests for syncing events that arrive on source relay AFTER the syncing relay connects. | ||
| 219 | //! This is the real-time/subscription-based sync path. | ||
| 220 | |||
| 221 | /// Live sync: Layer 2 issues sync in real-time | ||
| 222 | /// run_sync_test with live_events: [issue] | ||
| 223 | #[tokio::test] | ||
| 224 | async fn test_live_layer2_issue_syncs() { ... } | ||
| 225 | |||
| 226 | /// Live sync: Layer 3 comments sync after Layer 2 syncs | ||
| 227 | /// run_sync_test with live_events: [issue, comment] | ||
| 228 | #[tokio::test] | ||
| 229 | async fn test_live_layer3_comment_syncs() { ... } | ||
| 230 | |||
| 231 | /// Live sync: Events arrive in chronological order | ||
| 232 | /// run_sync_test with live_events: [issue1, issue2, issue3] | ||
| 233 | #[tokio::test] | ||
| 234 | async fn test_live_sync_event_ordering() { ... } | ||
| 235 | ``` | ||
| 236 | |||
| 237 | ### `tests/sync/discovery.rs` (REFACTORED) | ||
| 238 | |||
| 239 | ```rust | ||
| 240 | //! Relay Discovery Tests | ||
| 241 | //! | ||
| 242 | //! Tests for discovering other relays from announcement events. | ||
| 243 | //! Discovery can happen via historic sync or live sync paths. | ||
| 244 | |||
| 245 | // === HISTORIC DISCOVERY === | ||
| 246 | // Relay discovers another relay from an announcement that existed before connection | ||
| 247 | |||
| 248 | /// Historic discovery: Discovers relay from announcement, syncs Layer 2 | ||
| 249 | /// 1. relay_a has announcement + issue | ||
| 250 | /// 2. relay_b starts with sync from relay_a | ||
| 251 | /// 3. relay_b syncs announcement, discovers other relays listed, syncs issue | ||
| 252 | #[tokio::test] | ||
| 253 | async fn test_historic_discovery_syncs_layer2() { ... } | ||
| 254 | |||
| 255 | /// Historic discovery: 3-relay recursive discovery chain | ||
| 256 | /// 1. relay_b has announcement listing relay_c | ||
| 257 | /// 2. relay_c has separate announcement | ||
| 258 | /// 3. relay_a starts syncing from relay_b | ||
| 259 | /// 4. relay_a gets announcement, discovers relay_c, syncs from relay_c | ||
| 260 | #[tokio::test] | ||
| 261 | async fn test_historic_recursive_discovery() { ... } | ||
| 262 | |||
| 263 | // === LIVE DISCOVERY === | ||
| 264 | // Relay discovers another relay from an announcement that arrives after connection | ||
| 265 | |||
| 266 | /// Live discovery: Discovers relay from new announcement, syncs Layer 2 | ||
| 267 | /// 1. Both relays running | ||
| 268 | /// 2. Announcement submitted to relay_a | ||
| 269 | /// 3. relay_b discovers relay_a from announcement, syncs Layer 2 events | ||
| 270 | #[tokio::test] | ||
| 271 | async fn test_live_discovery_syncs_layer2() { ... } | ||
| 272 | |||
| 273 | /// Live discovery: Multi-hop discovery with layer chain | ||
| 274 | /// Similar to recursive but with live submission of announcement | ||
| 275 | #[tokio::test] | ||
| 276 | async fn test_live_recursive_discovery() { ... } | ||
| 277 | ``` | ||
| 278 | |||
| 279 | ### `tests/sync/tag_variations.rs` (REFACTORED - Live sync only) | ||
| 280 | |||
| 281 | ```rust | ||
| 282 | //! Tag Variation Tests (Live Sync Mode) | ||
| 283 | //! | ||
| 284 | //! Tests that all valid tag types are correctly processed during sync. | ||
| 285 | //! Uses LIVE sync mode - tag parsing is mode-independent, so testing one mode is sufficient. | ||
| 286 | |||
| 287 | // === LAYER 2 TAG VARIATIONS === | ||
| 288 | |||
| 289 | /// Layer 2 with lowercase 'a' tag (standard NIP-01) | ||
| 290 | #[tokio::test] | ||
| 291 | async fn test_layer2_lowercase_a_tag() { ... } | ||
| 292 | |||
| 293 | /// Layer 2 with uppercase 'A' tag (NIP-33 style) | ||
| 294 | #[tokio::test] | ||
| 295 | async fn test_layer2_uppercase_a_tag() { ... } | ||
| 296 | |||
| 297 | /// Layer 2 with 'q' quote tag (NIP-18 style) | ||
| 298 | #[tokio::test] | ||
| 299 | async fn test_layer2_q_tag() { ... } | ||
| 300 | |||
| 301 | // === LAYER 3 TAG VARIATIONS === | ||
| 302 | |||
| 303 | /// Layer 3 with lowercase 'e' tag (standard NIP-01) | ||
| 304 | #[tokio::test] | ||
| 305 | async fn test_layer3_lowercase_e_tag() { ... } | ||
| 306 | |||
| 307 | /// Layer 3 with uppercase 'E' tag (NIP-22 comment) | ||
| 308 | #[tokio::test] | ||
| 309 | async fn test_layer3_uppercase_e_tag() { ... } | ||
| 310 | |||
| 311 | /// Layer 3 with 'q' quote tag (NIP-18 style) | ||
| 312 | #[tokio::test] | ||
| 313 | async fn test_layer3_q_tag() { ... } | ||
| 314 | ``` | ||
| 315 | |||
| 316 | ### `tests/sync/mod.rs` (UPDATED) | ||
| 317 | |||
| 318 | ```rust | ||
| 319 | //! Sync Integration Tests | ||
| 320 | //! | ||
| 321 | //! Tests for ngit-grasp's proactive sync functionality, organized by sync mode: | ||
| 322 | //! | ||
| 323 | //! ## Sync Modes | ||
| 324 | //! | ||
| 325 | //! - **Historic Sync** (`historic_sync.rs`): Events exist BEFORE syncing relay connects | ||
| 326 | //! - Also called bootstrap/startup sync | ||
| 327 | //! - Tests the REQ+EOSE or negentropy-based initial sync | ||
| 328 | //! | ||
| 329 | //! - **Live Sync** (`live_sync.rs`): Events arrive AFTER syncing relay connects | ||
| 330 | //! - Also called real-time/subscription-based sync | ||
| 331 | //! - Tests the event forwarding via active subscriptions | ||
| 332 | //! | ||
| 333 | //! ## Other Test Categories | ||
| 334 | //! | ||
| 335 | //! - **Discovery** (`discovery.rs`): Relay discovers other relays from announcements | ||
| 336 | //! - Has both historic and live variants | ||
| 337 | //! | ||
| 338 | //! - **Tag Variations** (`tag_variations.rs`): All valid tag types work correctly | ||
| 339 | //! - Uses live sync (tag parsing is mode-independent) | ||
| 340 | //! | ||
| 341 | //! - **Metrics** (`metrics.rs`): Prometheus metrics for sync operations | ||
| 342 | //! | ||
| 343 | //! - **Catchup** (`catchup.rs`): Documentation only (not integration-testable) | ||
| 344 | |||
| 345 | pub mod catchup; | ||
| 346 | pub mod discovery; | ||
| 347 | pub mod historic_sync; // Renamed from bootstrap | ||
| 348 | pub mod live_sync; | ||
| 349 | pub mod metrics; | ||
| 350 | pub mod tag_variations; | ||
| 351 | ``` | ||
| 352 | |||
| 353 | --- | ||
| 354 | |||
| 355 | ## Test Count Summary | ||
| 356 | |||
| 357 | | File | Before | After | Notes | | ||
| 358 | | --------------------------------- | ------ | ------ | ------------------------ | | ||
| 359 | | `historic_sync.rs` (bootstrap.rs) | 4 | 5 | Renamed, minor additions | | ||
| 360 | | `live_sync.rs` | 3 | 3 | Simplified using helper | | ||
| 361 | | `discovery.rs` | 3 | 4 | Split into historic/live | | ||
| 362 | | `tag_variations.rs` | 6 | 6 | Simplified using helper | | ||
| 363 | | `metrics.rs` | 9 | 9 | Unchanged | | ||
| 364 | | `catchup.rs` | 0 | 0 | Documentation only | | ||
| 365 | | **Total** | **25** | **27** | +2 discovery tests | | ||
| 366 | |||
| 367 | --- | ||
| 368 | |||
| 369 | ## Implementation Plan | ||
| 370 | |||
| 371 | ### Context | ||
| 372 | |||
| 373 | - Two metrics tests are currently failing: `test_live_sync_event_count` and `test_multi_source_aggregate_counts` | ||
| 374 | - This may indicate implementation bugs in sync functionality | ||
| 375 | - Plan must account for distinguishing test bugs from implementation bugs | ||
| 376 | |||
| 377 | ### Phase 1: Establish Baseline | ||
| 378 | |||
| 379 | **Goal:** Understand current state before making changes | ||
| 380 | |||
| 381 | 1. Run `cargo test --test sync` and capture full output | ||
| 382 | 2. Identify all passing vs failing tests | ||
| 383 | 3. For each failing test, investigate whether: | ||
| 384 | - Test logic is incorrect (test bug) | ||
| 385 | - Implementation is broken (impl bug) | ||
| 386 | - Test was never working (aspirational test) | ||
| 387 | 4. Document findings in Known Issues section below | ||
| 388 | |||
| 389 | ### Phase 2: Add Test Infrastructure | ||
| 390 | |||
| 391 | **Goal:** Add new helpers without breaking existing tests | ||
| 392 | |||
| 393 | 1. Add `SyncTestResult` struct to sync_helpers.rs: | ||
| 394 | |||
| 395 | ```rust | ||
| 396 | pub struct SyncTestResult { | ||
| 397 | pub source: TestRelay, | ||
| 398 | pub syncing: TestRelay, | ||
| 399 | pub keys: Keys, | ||
| 400 | pub repo_coord: String, | ||
| 401 | } | ||
| 402 | ``` | ||
| 403 | |||
| 404 | 2. Add `run_sync_test(historic_events, live_events)` helper function | ||
| 405 | |||
| 406 | 3. Add unit tests for the helper itself: | ||
| 407 | |||
| 408 | - `test_run_sync_test_historic_mode` - verify events sent before connection | ||
| 409 | - `test_run_sync_test_live_mode` - verify events sent after connection | ||
| 410 | |||
| 411 | 4. Run `cargo test` to confirm no regressions | ||
| 412 | |||
| 413 | ### Phase 3: Refactor Historic Sync Tests | ||
| 414 | |||
| 415 | **Goal:** Migrate bootstrap.rs → historic_sync.rs incrementally | ||
| 416 | |||
| 417 | 1. Rename file: `bootstrap.rs` → `historic_sync.rs` | ||
| 418 | 2. Update `mod.rs` to reference new module name | ||
| 419 | 3. Refactor tests one at a time: | ||
| 420 | - `test_bootstrap_syncs_existing_layer2_events` → `test_historic_layer2_issue_syncs` | ||
| 421 | - `test_relay_replays_events_after_restart` → keep or remove (tests restart, not sync mode) | ||
| 422 | - `test_announcement_not_listing_relay_is_not_synced` → `test_historic_rejects_unlisted_relay` | ||
| 423 | - `test_history_sync_without_negentropy` → `test_historic_sync_without_negentropy` | ||
| 424 | 4. Test after each refactor: `cargo test --test sync historic_sync` | ||
| 425 | |||
| 426 | ### Phase 4: Refactor Live Sync Tests | ||
| 427 | |||
| 428 | **Goal:** Simplify live_sync.rs using run_sync_test helper | ||
| 429 | |||
| 430 | 1. Refactor tests one at a time: | ||
| 431 | - `test_live_sync_layer2_events` → use `run_sync_test(&[], &[issue])` | ||
| 432 | - `test_live_sync_layer3_events` → use `run_sync_test(&[], &[issue, comment])` | ||
| 433 | - `test_live_sync_event_ordering` → may need custom setup for ordering test | ||
| 434 | 2. Test after each refactor: `cargo test --test sync live_sync` | ||
| 435 | |||
| 436 | ### Phase 5: Refactor Discovery Tests | ||
| 437 | |||
| 438 | **Goal:** Split discovery.rs into historic and live sections | ||
| 439 | |||
| 440 | 1. Add section comment: `// === HISTORIC DISCOVERY ===` | ||
| 441 | 2. Refactor existing tests to use run_sync_test | ||
| 442 | 3. Add new tests: | ||
| 443 | - `test_historic_discovery_syncs_layer2` | ||
| 444 | - `test_live_discovery_syncs_layer2` | ||
| 445 | 4. Test after each change: `cargo test --test sync discovery` | ||
| 446 | |||
| 447 | ### Phase 6: Refactor Tag Variations Tests | ||
| 448 | |||
| 449 | **Goal:** Simplify tag_variations.rs using run_sync_test (live mode) | ||
| 450 | |||
| 451 | 1. Add header doc comment explaining live sync mode choice | ||
| 452 | 2. Refactor each test to use `run_sync_test(&[], &[event])` | ||
| 453 | 3. Test after all changes: `cargo test --test sync tag_variations` | ||
| 454 | |||
| 455 | ### Phase 7: Final Verification and Cleanup | ||
| 456 | |||
| 457 | 1. Update `mod.rs` documentation | ||
| 458 | 2. Run full test suite: `cargo test --test sync` | ||
| 459 | 3. Compare results to Phase 1 baseline: | ||
| 460 | - New failures = regressions from refactor (must fix) | ||
| 461 | - Same failures as baseline = pre-existing issues (document) | ||
| 462 | 4. Delete `work/sync-test-refactor-options.md` | ||
| 463 | |||
| 464 | --- | ||
| 465 | |||
| 466 | ## Known Issues | ||
| 467 | |||
| 468 | _To be filled in during Phase 1_ | ||
| 469 | |||
| 470 | ### Failing Tests Before Refactor | ||
| 471 | |||
| 472 | | Test | Status | Root Cause | Action | | ||
| 473 | | --------------------------------------------- | ---------- | ---------- | ------ | | ||
| 474 | | `metrics::test_live_sync_event_count` | ❌ Failing | TBD | TBD | | ||
| 475 | | `metrics::test_multi_source_aggregate_counts` | ❌ Failing | TBD | TBD | | ||
| 476 | |||
| 477 | ### Implementation Notes | ||
| 478 | |||
| 479 | _Any discoveries about how sync actually works vs how tests expect it to work_ | ||
| 480 | |||
| 481 | --- | ||
diff --git a/docs/explanation/README.md b/docs/explanation/README.md index cc3ec49..f477b73 100644 --- a/docs/explanation/README.md +++ b/docs/explanation/README.md | |||
| @@ -80,6 +80,77 @@ Explanation documentation helps you **understand concepts** and design decisions | |||
| 80 | 80 | ||
| 81 | --- | 81 | --- |
| 82 | 82 | ||
| 83 | ### [Purgatory Design](purgatory-design.md) | ||
| 84 | **In-memory holding area for events awaiting git data** | ||
| 85 | |||
| 86 | **Topics:** | ||
| 87 | - The "which arrives first?" problem | ||
| 88 | - Separate storage for state vs PR events | ||
| 89 | - Late binding for state events | ||
| 90 | - Bidirectional waiting for PR events | ||
| 91 | - Authorization during push | ||
| 92 | |||
| 93 | **Read when:** You want to understand how ngit-grasp handles out-of-order event/git data arrival | ||
| 94 | |||
| 95 | --- | ||
| 96 | |||
| 97 | ### [GRASP-02 Proactive Sync](grasp-02-proactive-sync.md) | ||
| 98 | **Relay-to-relay synchronization for repository discovery** | ||
| 99 | |||
| 100 | **Topics:** | ||
| 101 | - Negentropy-based event sync | ||
| 102 | - Repository announcement discovery | ||
| 103 | - Relay management and reconnection | ||
| 104 | - Layer 2 filtering | ||
| 105 | - Bootstrap and dynamic relay discovery | ||
| 106 | |||
| 107 | **Read when:** You want to understand how ngit-grasp discovers and syncs repositories across relays | ||
| 108 | |||
| 109 | --- | ||
| 110 | |||
| 111 | ### [GRASP-02 Purgatory Git Data Fetching](grasp-02-proactive-sync-purgatory-git-data.md) | ||
| 112 | **Proactive git data fetching from remote servers** | ||
| 113 | |||
| 114 | **Topics:** | ||
| 115 | - Identifier-based batching | ||
| 116 | - Exponential backoff with fresh start | ||
| 117 | - Domain throttling (5 concurrent, 30/min) | ||
| 118 | - Debounced delays (3min user, 500ms sync) | ||
| 119 | - 30-minute expiry | ||
| 120 | - Mock-based testability | ||
| 121 | |||
| 122 | **Read when:** You want to understand how purgatory automatically fetches missing git data | ||
| 123 | |||
| 124 | --- | ||
| 125 | |||
| 126 | ### [Unified Git Data Sync](unify-git-data-sync.md) | ||
| 127 | **Shared processing for git push and purgatory sync paths** | ||
| 128 | |||
| 129 | **Topics:** | ||
| 130 | - Why unify push and sync processing | ||
| 131 | - OID syncing to owner repos | ||
| 132 | - Ref alignment logic | ||
| 133 | - Event release from purgatory | ||
| 134 | - WebSocket notification | ||
| 135 | |||
| 136 | **Read when:** You want to understand how git data is processed consistently regardless of arrival method | ||
| 137 | |||
| 138 | --- | ||
| 139 | |||
| 140 | ### [Monitoring Overview](monitoring.md) | ||
| 141 | **Prometheus metrics and observability** | ||
| 142 | |||
| 143 | **Topics:** | ||
| 144 | - Metrics philosophy | ||
| 145 | - Connection tracking | ||
| 146 | - Git operation metrics | ||
| 147 | - Nostr event metrics | ||
| 148 | - Privacy considerations | ||
| 149 | |||
| 150 | **Read when:** You want to understand how to monitor ngit-grasp in production | ||
| 151 | |||
| 152 | --- | ||
| 153 | |||
| 83 | ## Planned Explanation Documentation | 154 | ## Planned Explanation Documentation |
| 84 | 155 | ||
| 85 | ### GRASP Protocol Design | 156 | ### GRASP Protocol Design |
diff --git a/docs/explanation/grasp-02-proactive-sync-purgatory-git-data.md b/docs/explanation/grasp-02-proactive-sync-purgatory-git-data.md new file mode 100644 index 0000000..31c3e46 --- /dev/null +++ b/docs/explanation/grasp-02-proactive-sync-purgatory-git-data.md | |||
| @@ -0,0 +1,675 @@ | |||
| 1 | # GRASP-02 Proactive Sync: Purgatory Git Data Fetching | ||
| 2 | |||
| 3 | **Status**: ✅ Implemented | ||
| 4 | **Implementation**: [`src/purgatory/sync/`](../../src/purgatory/sync/) | ||
| 5 | **Related**: | ||
| 6 | |||
| 7 | - [Purgatory Design](purgatory-design.md) - Core purgatory concepts | ||
| 8 | - [GRASP-02 Proactive Sync](grasp-02-proactive-sync.md) - Full GRASP-02 implementation | ||
| 9 | - [Unified Git Data Sync](unify-git-data-sync.md) - Shared processing logic | ||
| 10 | |||
| 11 | --- | ||
| 12 | |||
| 13 | ## Overview | ||
| 14 | |||
| 15 | When Nostr events arrive before their git data, they enter **purgatory** waiting to be served. But they don't wait passively—ngit-grasp **actively hunts** for the missing git data across all git servers assoicated with the repo until it finds what it needs. | ||
| 16 | |||
| 17 | ### How It Works | ||
| 18 | |||
| 19 | **If the data exists, we'll find it.** | ||
| 20 | |||
| 21 | The system scours git servers listed in repository announcements and PR events, checking every **2 minutes** for **30 minutes**. If we find the data, events are released immediately. If not, they expire from purgatory after 30 minutes. | ||
| 22 | |||
| 23 | **Smart timing based on how events arrive:** | ||
| 24 | |||
| 25 | - **User-submitted events**: Wait **3 minutes** before hunting—we expect a `git push` to follow shortly | ||
| 26 | - **Sync-received events**: Start hunting after just **500ms**—batch burst arrivals, then get to work | ||
| 27 | |||
| 28 | **Playing nicely with other servers:** | ||
| 29 | |||
| 30 | We respect remote server capacity with: | ||
| 31 | |||
| 32 | - **Throttling**: Max 5 concurrent requests per domain, 30 requests/minute | ||
| 33 | - **Backoff**: Start at 20 seconds, double each attempt, cap at 2 minutes | ||
| 34 | - **Round-robin**: Fair distribution across repositories waiting for the same domain | ||
| 35 | - **Fresh start**: New events reset retry count—recent updates often mean fresh data | ||
| 36 | |||
| 37 | **The result**: If git data is available anywhere in the clone URL list, we'll find it within minutes. If it's not available within 30 minutes, the events expire cleanly. | ||
| 38 | |||
| 39 | ### Key Features | ||
| 40 | |||
| 41 | ✅ **Proactive hunting** - Scours git servers every 2 min (backoff), finds data automatically | ||
| 42 | ✅ **Respectful throttling** - 5 concurrent + 30/min per domain, plays nice with other implementations | ||
| 43 | ✅ **Smart timing** - 3min delay for user pushes, 500ms for synced events | ||
| 44 | ✅ **30min expiry** - Auto-cleanup of events when data never arrives | ||
| 45 | ✅ **Fully testable** - Mock-based architecture for reliable unit tests | ||
| 46 | |||
| 47 | --- | ||
| 48 | |||
| 49 | ## The Problem: Out-of-Order Arrival | ||
| 50 | |||
| 51 | In a distributed system, git data and Nostr events can arrive in any order: | ||
| 52 | |||
| 53 | ``` | ||
| 54 | Timeline A: Event arrives first (user push expected) | ||
| 55 | t=0s: State event received → enters purgatory | ||
| 56 | t=180s: (3min wait - expecting git push) | ||
| 57 | t=30s: Git push arrives → event released ✅ | ||
| 58 | |||
| 59 | Timeline B: Git arrives first | ||
| 60 | t=0s: Git push received → data available | ||
| 61 | t=30s: State event received → immediately served ✅ | ||
| 62 | |||
| 63 | Timeline C: Sync scenario (hunt for data) | ||
| 64 | t=0s: State event received from relay X → enters purgatory | ||
| 65 | t=0.5s: (500ms delay to batch bursts) | ||
| 66 | t=0.5s: Start hunting git servers → check server1, server2, server3... | ||
| 67 | t=45s: Git data found on server2 → event released ✅ | ||
| 68 | |||
| 69 | Timeline D: Data never arrives | ||
| 70 | t=0s: State event received → enters purgatory | ||
| 71 | t=0.5s: Start hunting → server1 (not found), server2 (timeout), server3 (not found) | ||
| 72 | t=20s: Retry → server1 (not found), server2 (not found), server3 (not found) | ||
| 73 | t=60s: Retry → all servers checked, no data | ||
| 74 | ... | ||
| 75 | t=1800s: 30 minutes expired → event discarded, purgatory cleaned up 🗑️ | ||
| 76 | ``` | ||
| 77 | |||
| 78 | **Without proactive sync**: Events in Timeline C would wait indefinitely (or until manual git push). | ||
| 79 | **With proactive sync**: System automatically hunts for data across all known servers, releasing events as soon as the data is found. | ||
| 80 | |||
| 81 | --- | ||
| 82 | |||
| 83 | ## Architecture: Two-Path Sync Design | ||
| 84 | |||
| 85 | The system uses **two independent execution paths** that work together: | ||
| 86 | |||
| 87 | ### Path 1: Main Sync Loop (Non-Throttled URLs) | ||
| 88 | |||
| 89 | Runs every **1 second**, processes identifiers ready for sync: | ||
| 90 | |||
| 91 | 1. Find ready identifiers (where `!in_progress && next_attempt <= now`) | ||
| 92 | 2. Spawn parallel tasks for each identifier | ||
| 93 | 3. Each task tries non-throttled URLs until: | ||
| 94 | - ✅ All OIDs fetched (complete) → remove from queue | ||
| 95 | - ⏸️ Only throttled URLs remain → enqueue with throttled domains, apply backoff | ||
| 96 | - ❌ No URLs left (all tried/throttled) → apply backoff, retry later | ||
| 97 | |||
| 98 | **Key insight**: Main loop doesn't wait for throttled domains. It quickly tries available servers, then hands off to domain queues for rate-limited processing. | ||
| 99 | |||
| 100 | ### Path 2: Domain Throttle Queues (Throttled URLs) | ||
| 101 | |||
| 102 | **Trigger-based** (no polling), processes when capacity frees: | ||
| 103 | |||
| 104 | 1. Identifier enqueued with throttled domain (from main loop) | ||
| 105 | 2. When domain has capacity (slot frees or rate limit window passes): | ||
| 106 | - Pick next identifier (round-robin for fairness) | ||
| 107 | - Try one URL from that domain | ||
| 108 | - Mark URL as tried, release slot | ||
| 109 | 3. Trigger repeats until queue empty or capacity exhausted | ||
| 110 | |||
| 111 | **Key insight**: Each domain independently manages its queue, ensuring we respect rate limits while maximizing throughput. | ||
| 112 | |||
| 113 | --- | ||
| 114 | |||
| 115 | ## Data Flow: From Event to Release | ||
| 116 | |||
| 117 | ```mermaid | ||
| 118 | graph TB | ||
| 119 | A[Event Arrives] --> B{Git Data<br/>Available?} | ||
| 120 | B -->|Yes| C[Serve Immediately] | ||
| 121 | B -->|No| D[Enter Purgatory] | ||
| 122 | |||
| 123 | D --> E[Enqueue for Sync] | ||
| 124 | E --> F{Event Source?} | ||
| 125 | F -->|User Submit| G[3min Delay<br/>expect push] | ||
| 126 | F -->|Relay Sync| H[500ms Delay<br/>batch burst] | ||
| 127 | |||
| 128 | G --> I[Main Sync Loop<br/>1s interval] | ||
| 129 | H --> I | ||
| 130 | |||
| 131 | I --> J{Ready?} | ||
| 132 | J -->|Not Yet| I | ||
| 133 | J -->|Yes| K[Spawn Sync Task] | ||
| 134 | |||
| 135 | K --> L[Try Non-Throttled URLs] | ||
| 136 | L --> M{Got All OIDs?} | ||
| 137 | M -->|Yes| N[Process & Release] | ||
| 138 | M -->|Partial| O[Enqueue Throttled Domains] | ||
| 139 | M -->|None| P[Apply Backoff] | ||
| 140 | |||
| 141 | O --> Q[Domain Queue] | ||
| 142 | Q --> R{Has Capacity?} | ||
| 143 | R -->|No| Q | ||
| 144 | R -->|Yes| S[Try Domain URL] | ||
| 145 | S --> T{Got OIDs?} | ||
| 146 | T -->|Yes| N | ||
| 147 | T -->|No| U[Try Next in Queue] | ||
| 148 | |||
| 149 | P --> I | ||
| 150 | N --> V[Event Served] | ||
| 151 | |||
| 152 | style D fill:#fff3cd | ||
| 153 | style N fill:#d4edda | ||
| 154 | style V fill:#d1ecf1 | ||
| 155 | ``` | ||
| 156 | |||
| 157 | --- | ||
| 158 | |||
| 159 | ## Retry Strategy: Exponential Backoff with Fresh Start | ||
| 160 | |||
| 161 | ### Backoff Schedule | ||
| 162 | |||
| 163 | When sync attempts don't complete (OIDs still needed), backoff increases: | ||
| 164 | |||
| 165 | | Attempt | Delay | Formula | | ||
| 166 | | ------- | ------------- | ---------------------- | | ||
| 167 | | 1 | 20s | `20s * 2^0` | | ||
| 168 | | 2 | 40s | `20s * 2^1` | | ||
| 169 | | 3 | 80s | `20s * 2^2` | | ||
| 170 | | 4+ | 120s (capped) | `min(20s * 2^n, 120s)` | | ||
| 171 | |||
| 172 | **Implementation**: [`src/purgatory/sync/queue.rs:SyncQueueEntry::backoff()`](../../src/purgatory/sync/queue.rs) | ||
| 173 | |||
| 174 | ### Fresh Start on New Events | ||
| 175 | |||
| 176 | **Critical feature**: When a new event arrives for an identifier already in the sync queue, the `attempt_count` resets to 0. | ||
| 177 | |||
| 178 | **Why?** New events often mean: | ||
| 179 | |||
| 180 | - A maintainer just updated the repository | ||
| 181 | - Fresh git data might be available at new clone URLs | ||
| 182 | - Previous failures might have been temporary | ||
| 183 | |||
| 184 | **Example**: | ||
| 185 | |||
| 186 | ``` | ||
| 187 | t=0s: State A arrives → queue with 3min delay, attempt_count=0 | ||
| 188 | t=180s: First sync attempt fails → backoff 20s, attempt_count=1 | ||
| 189 | t=200s: Second attempt fails → backoff 40s, attempt_count=2 | ||
| 190 | t=210s: State B arrives (same identifier) → attempt_count=0 ✨ | ||
| 191 | t=210s: Immediate retry (new event delay) → success! | ||
| 192 | ``` | ||
| 193 | |||
| 194 | --- | ||
| 195 | |||
| 196 | ## Debounced Delays: Smart Timing | ||
| 197 | |||
| 198 | ### User-Submitted Events: 3 Minutes | ||
| 199 | |||
| 200 | When a user submits an event via `EVENT` message, we expect a `git push` to follow shortly: | ||
| 201 | |||
| 202 | ``` | ||
| 203 | t=0s: User submits state event → purgatory + 3min delay | ||
| 204 | t=30s: User runs `git push` → data arrives → event released ✅ | ||
| 205 | ``` | ||
| 206 | |||
| 207 | **Why 3 minutes?** Gives users time to: | ||
| 208 | |||
| 209 | - Finish composing their commit message | ||
| 210 | - Run `git push` command | ||
| 211 | - Handle network delays | ||
| 212 | |||
| 213 | **Configuration**: Hardcoded in [`src/purgatory/mod.rs:DEFAULT_SYNC_DELAY`](../../src/purgatory/mod.rs) | ||
| 214 | |||
| 215 | ### Sync-Triggered Events: 500ms | ||
| 216 | |||
| 217 | When events arrive during relay sync (e.g., negentropy catchup), they often come in bursts: | ||
| 218 | |||
| 219 | ``` | ||
| 220 | t=0s: State A arrives → purgatory + 500ms delay | ||
| 221 | t=0.1s: State B arrives → purgatory + 500ms delay (same repo) | ||
| 222 | t=0.2s: State C arrives → purgatory + 500ms delay (same repo) | ||
| 223 | t=0.5s: Single sync attempt fetches data for all three ✅ | ||
| 224 | ``` | ||
| 225 | |||
| 226 | **Why 500ms?** Batches burst arrivals without excessive delay. | ||
| 227 | |||
| 228 | **Configuration**: Hardcoded in [`src/purgatory/mod.rs:IMMEDIATE_SYNC_DELAY`](../../src/purgatory/mod.rs) | ||
| 229 | |||
| 230 | ### Debouncing Mechanism | ||
| 231 | |||
| 232 | Multiple events for the same identifier **don't create multiple sync tasks**. The `enqueue_sync` method: | ||
| 233 | |||
| 234 | 1. If identifier not in queue → create new entry with delay | ||
| 235 | 2. If identifier already queued → reset `attempt_count`, update `next_attempt` if sooner | ||
| 236 | |||
| 237 | **Result**: Rapid event arrivals → single sync attempt after debounce window. | ||
| 238 | |||
| 239 | **Implementation**: [`src/purgatory/mod.rs:Purgatory::enqueue_sync()`](../../src/purgatory/mod.rs) | ||
| 240 | |||
| 241 | --- | ||
| 242 | |||
| 243 | ## Domain Throttling: Respectful Rate Limiting | ||
| 244 | |||
| 245 | ### Why Throttle? | ||
| 246 | |||
| 247 | Git servers have finite resources. Without throttling: | ||
| 248 | |||
| 249 | - ❌ We could overwhelm small servers with concurrent requests | ||
| 250 | - ❌ Servers might rate-limit or ban us | ||
| 251 | - ❌ Other clients sharing the server suffer degraded performance | ||
| 252 | |||
| 253 | With throttling: | ||
| 254 | |||
| 255 | - ✅ Respect server capacity (5 concurrent max per domain) | ||
| 256 | - ✅ Stay under rate limits (30 requests/min per domain) | ||
| 257 | - ✅ Fair access for all clients | ||
| 258 | |||
| 259 | ### Two-Level Limits | ||
| 260 | |||
| 261 | Each domain has **two independent limits**: | ||
| 262 | |||
| 263 | #### 1. Concurrent Request Limit (Default: 5) | ||
| 264 | |||
| 265 | Maximum in-flight requests to a domain at any moment. | ||
| 266 | |||
| 267 | **Example**: | ||
| 268 | |||
| 269 | ``` | ||
| 270 | Domain: github.com | ||
| 271 | In-flight: [fetch-1, fetch-2, fetch-3, fetch-4, fetch-5] | ||
| 272 | Status: AT CAPACITY (throttled) | ||
| 273 | |||
| 274 | fetch-3 completes → in-flight: 4 | ||
| 275 | Status: HAS CAPACITY (process next queued identifier) | ||
| 276 | ``` | ||
| 277 | |||
| 278 | #### 2. Rate Limit (Default: 30/min) | ||
| 279 | |||
| 280 | Maximum requests in any 60-second sliding window. | ||
| 281 | |||
| 282 | **Example**: | ||
| 283 | |||
| 284 | ``` | ||
| 285 | t=0s: Request 1 → request_times: [0s] | ||
| 286 | t=1s: Request 2 → request_times: [0s, 1s] | ||
| 287 | ... | ||
| 288 | t=30s: Request 30 → request_times: [0s, 1s, ..., 30s] | ||
| 289 | t=31s: Request 31? → THROTTLED (30 requests in last 60s) | ||
| 290 | t=61s: Request at t=0s aged out → request_times: [1s, ..., 30s] | ||
| 291 | t=61s: Request 31 → ALLOWED (only 29 in last 60s) | ||
| 292 | ``` | ||
| 293 | |||
| 294 | **Implementation**: [`src/purgatory/sync/throttle.rs:DomainThrottle::has_capacity()`](../../src/purgatory/sync/throttle.rs) | ||
| 295 | |||
| 296 | ### Round-Robin Fairness | ||
| 297 | |||
| 298 | When multiple identifiers are queued for a throttled domain, we use **round-robin** to ensure fairness: | ||
| 299 | |||
| 300 | ``` | ||
| 301 | Queue: [repo-A, repo-B, repo-C] | ||
| 302 | Round-robin index: 0 | ||
| 303 | |||
| 304 | Attempt 1: Try repo-A (index=0) → fetch → index=1 | ||
| 305 | Attempt 2: Try repo-B (index=1) → fetch → index=2 | ||
| 306 | Attempt 3: Try repo-C (index=2) → fetch → index=0 | ||
| 307 | Attempt 4: Try repo-A (index=0) → ... | ||
| 308 | ``` | ||
| 309 | |||
| 310 | **Why round-robin?** Prevents head-of-line blocking. Without it, repo-A might consume all slots while repo-B and repo-C wait indefinitely. | ||
| 311 | |||
| 312 | **Implementation**: [`src/purgatory/sync/throttle.rs:DomainThrottle::next_ready_identifier()`](../../src/purgatory/sync/throttle.rs) | ||
| 313 | |||
| 314 | ### Trigger-Based Processing (Not Polling) | ||
| 315 | |||
| 316 | Domain queues **don't poll** for capacity. Instead, processing is triggered by two events: | ||
| 317 | |||
| 318 | 1. **`complete_request()`** - A request finishes, slot frees | ||
| 319 | 2. **`enqueue_identifier()`** - New identifier added to queue | ||
| 320 | |||
| 321 | Both methods check `has_capacity()` and trigger `try_process_next()` if true. | ||
| 322 | |||
| 323 | **Why trigger-based?** | ||
| 324 | |||
| 325 | - ✅ Lower CPU usage (no busy-waiting) | ||
| 326 | - ✅ Instant response when capacity frees | ||
| 327 | - ✅ Simpler reasoning (event-driven) | ||
| 328 | |||
| 329 | **Implementation**: [`src/purgatory/sync/throttle.rs:ThrottleManager`](../../src/purgatory/sync/throttle.rs) | ||
| 330 | |||
| 331 | --- | ||
| 332 | |||
| 333 | ## 30-Minute Purgatory Expiry | ||
| 334 | |||
| 335 | Purgatory entries **automatically expire** after 30 minutes to prevent unbounded memory growth. | ||
| 336 | |||
| 337 | ### Why 30 Minutes? | ||
| 338 | |||
| 339 | From the [GRASP-01 spec](https://github.com/DanConwayDev/grasp/blob/main/01.md#purgatory): | ||
| 340 | |||
| 341 | > Events should be kept in purgatory and otherwise discarded after 30 minutes. | ||
| 342 | |||
| 343 | This balances: | ||
| 344 | |||
| 345 | - ⏰ **Long enough** for typical sync scenarios (git data usually arrives within minutes) | ||
| 346 | - 🧹 **Short enough** to prevent memory leaks from abandoned events | ||
| 347 | - 🔄 **Recoverable** events are still on other relays and can be re-submitted | ||
| 348 | |||
| 349 | ### Implementation | ||
| 350 | |||
| 351 | Each purgatory entry tracks: | ||
| 352 | |||
| 353 | - `created_at: Instant` - When added to purgatory | ||
| 354 | - `expires_at: Instant` - When to discard (created_at + 30min) | ||
| 355 | |||
| 356 | The main sync loop checks expiry before processing: | ||
| 357 | |||
| 358 | ```rust | ||
| 359 | if !self.has_pending_events(&identifier) { | ||
| 360 | // No events remain (expired or released) → remove from sync queue | ||
| 361 | self.sync_queue.remove(&identifier); | ||
| 362 | } | ||
| 363 | ``` | ||
| 364 | |||
| 365 | **Note**: Expiry is checked implicitly via `has_pending_events()`. If all events for an identifier have expired, the identifier is removed from the sync queue. | ||
| 366 | |||
| 367 | **Implementation**: [`src/purgatory/mod.rs:DEFAULT_EXPIRY`](../../src/purgatory/mod.rs) | ||
| 368 | |||
| 369 | --- | ||
| 370 | |||
| 371 | ## Testability: Mock-Based Architecture | ||
| 372 | |||
| 373 | A key design goal was **100% unit test coverage** without requiring real git servers or databases. | ||
| 374 | |||
| 375 | ### SyncContext Trait | ||
| 376 | |||
| 377 | All external dependencies are abstracted behind the `SyncContext` trait: | ||
| 378 | |||
| 379 | ```rust | ||
| 380 | #[async_trait] | ||
| 381 | pub trait SyncContext: Send + Sync { | ||
| 382 | async fn fetch_repository_data(&self, identifier: &str) -> Result<RepositoryData>; | ||
| 383 | fn collect_needed_oids(&self, identifier: &str) -> HashSet<String>; | ||
| 384 | async fn oid_exists(&self, repo_path: &Path, oid: &str) -> bool; | ||
| 385 | async fn fetch_oids(&self, repo_path: &Path, url: &str, oids: &[String]) -> Result<Vec<String>>; | ||
| 386 | async fn process_newly_available_git_data(&self, ...) -> Result<ProcessResult>; | ||
| 387 | fn has_pending_events(&self, identifier: &str) -> bool; | ||
| 388 | fn find_target_repo(&self, data: &RepositoryData) -> Option<PathBuf>; | ||
| 389 | fn our_domain(&self) -> Option<&str>; | ||
| 390 | } | ||
| 391 | ``` | ||
| 392 | |||
| 393 | **Two Implementations**: | ||
| 394 | |||
| 395 | 1. **`RealSyncContext`** - Production implementation connecting to real systems | ||
| 396 | 2. **`MockSyncContext`** - Test implementation with configurable behavior | ||
| 397 | |||
| 398 | ### MockSyncContext Features | ||
| 399 | |||
| 400 | The mock supports builder-pattern configuration: | ||
| 401 | |||
| 402 | ```rust | ||
| 403 | let mock = MockSyncContext::new() | ||
| 404 | .with_repository_data("test-repo", RepositoryData { | ||
| 405 | announcements: vec![...], | ||
| 406 | clone_urls: vec!["https://server1.com/repo.git".to_string()], | ||
| 407 | }) | ||
| 408 | .with_needed_oids("test-repo", hashset!["abc123", "def456"]) | ||
| 409 | .with_fetch_result("https://server1.com/repo.git", Ok(vec!["abc123"])) | ||
| 410 | .with_fetch_result("https://server2.com/repo.git", Ok(vec!["def456"])); | ||
| 411 | ``` | ||
| 412 | |||
| 413 | **Test Example** (from [`src/purgatory/sync/functions.rs`](../../src/purgatory/sync/functions.rs)): | ||
| 414 | |||
| 415 | ```rust | ||
| 416 | #[tokio::test] | ||
| 417 | async fn test_sync_identifier_partial_success() { | ||
| 418 | let mock = MockSyncContext::new() | ||
| 419 | .with_repository_data("repo", RepositoryData { | ||
| 420 | clone_urls: vec![ | ||
| 421 | "https://server1.com/repo.git".to_string(), | ||
| 422 | "https://server2.com/repo.git".to_string(), | ||
| 423 | ], | ||
| 424 | ..Default::default() | ||
| 425 | }) | ||
| 426 | .with_needed_oids("repo", hashset!["oid1", "oid2"]) | ||
| 427 | .with_fetch_result("https://server1.com/repo.git", Ok(vec!["oid1"])) | ||
| 428 | .with_fetch_result("https://server2.com/repo.git", Ok(vec!["oid2"])); | ||
| 429 | |||
| 430 | let throttle = Arc::new(ThrottleManager::new(5, 30)); | ||
| 431 | let complete = sync_identifier(&mock, "repo", &throttle).await; | ||
| 432 | |||
| 433 | assert!(complete); // Both OIDs fetched | ||
| 434 | } | ||
| 435 | ``` | ||
| 436 | |||
| 437 | **Why this matters**: | ||
| 438 | |||
| 439 | - ✅ Tests run **instantly** (no network I/O) | ||
| 440 | - ✅ Tests are **deterministic** (no flaky failures) | ||
| 441 | - ✅ Tests cover **edge cases** easily (network errors, partial success, etc.) | ||
| 442 | - ✅ Tests are **isolated** (no shared state between tests) | ||
| 443 | |||
| 444 | **Implementation**: [`src/purgatory/sync/context.rs:MockSyncContext`](../../src/purgatory/sync/context.rs) | ||
| 445 | |||
| 446 | --- | ||
| 447 | |||
| 448 | ## Configuration | ||
| 449 | |||
| 450 | Purgatory sync behavior is configurable via CLI flags or environment variables: | ||
| 451 | |||
| 452 | | Setting | CLI Flag | Environment Variable | Default | Description | | ||
| 453 | | ----------------------- | -------- | -------------------- | ------- | ---------------------------------------------------- | | ||
| 454 | | Domain concurrent limit | (future) | (future) | `5` | Max concurrent requests per domain | | ||
| 455 | | Domain rate limit | (future) | (future) | `30` | Max requests per minute per domain | | ||
| 456 | | Sync loop interval | N/A | N/A | `1s` | How often to check for ready identifiers (hardcoded) | | ||
| 457 | | Default sync delay | N/A | N/A | `180s` | Delay for user-submitted events (hardcoded) | | ||
| 458 | | Immediate sync delay | N/A | N/A | `500ms` | Delay for sync-triggered events (hardcoded) | | ||
| 459 | | Purgatory expiry | N/A | N/A | `30min` | How long events wait before expiring (hardcoded) | | ||
| 460 | |||
| 461 | **Note**: Currently, throttle limits and delays are hardcoded constants. Future work may expose these as configuration options if needed. | ||
| 462 | |||
| 463 | --- | ||
| 464 | |||
| 465 | ## Key Design Decisions | ||
| 466 | |||
| 467 | ### 1. Identifier-Based, Not Event-Based | ||
| 468 | |||
| 469 | **Decision**: Sync by repository identifier, not individual events. | ||
| 470 | |||
| 471 | **Rationale**: Multiple events for the same repository should trigger a single fetch operation, not N separate fetches. | ||
| 472 | |||
| 473 | **Impact**: Batches events efficiently, reduces server load. | ||
| 474 | |||
| 475 | ### 2. Two Separate `tried_urls` Tracking | ||
| 476 | |||
| 477 | **Decision**: Main sync loop and domain queues track tried URLs independently. | ||
| 478 | |||
| 479 | **Main sync**: Local `HashSet<String>` for current attempt (all domains) | ||
| 480 | **Domain queue**: Per-identifier `HashSet<String>` for this domain only | ||
| 481 | |||
| 482 | **Rationale**: | ||
| 483 | |||
| 484 | - Main sync skips throttled domains entirely (doesn't need their tried URLs) | ||
| 485 | - Domain queue only cares about URLs from its own domain | ||
| 486 | - No coordination needed → simpler code | ||
| 487 | |||
| 488 | **Impact**: Clean separation of concerns, easier to reason about. | ||
| 489 | |||
| 490 | ### 3. Trigger-Based Domain Processing | ||
| 491 | |||
| 492 | **Decision**: Domain queues process on triggers (capacity freed, new enqueue), not polling. | ||
| 493 | |||
| 494 | **Rationale**: | ||
| 495 | |||
| 496 | - Polling wastes CPU cycles checking capacity every interval | ||
| 497 | - Triggers provide instant response when capacity frees | ||
| 498 | - Event-driven design is easier to test and debug | ||
| 499 | |||
| 500 | **Impact**: Lower CPU usage, faster response times. | ||
| 501 | |||
| 502 | ### 4. Fresh Start on New Events | ||
| 503 | |||
| 504 | **Decision**: Reset `attempt_count` to 0 when new events arrive for an identifier. | ||
| 505 | |||
| 506 | **Rationale**: | ||
| 507 | |||
| 508 | - New events often mean fresh git data is available | ||
| 509 | - Previous failures might have been temporary | ||
| 510 | - Gives repositories a "second chance" without waiting for full backoff | ||
| 511 | |||
| 512 | **Impact**: Faster recovery from transient failures, better UX. | ||
| 513 | |||
| 514 | ### 5. OID Copying in `process_newly_available_git_data` | ||
| 515 | |||
| 516 | **Decision**: Copy OIDs and release events **per successful fetch**, not at end of sync. | ||
| 517 | |||
| 518 | **Rationale**: | ||
| 519 | |||
| 520 | - Events can be released as soon as their specific OIDs are available | ||
| 521 | - Partial success scenarios work correctly (some events release, others stay) | ||
| 522 | - Handles multiple state events for same identifier independently | ||
| 523 | |||
| 524 | **Impact**: Events release faster, better handling of partial success. | ||
| 525 | |||
| 526 | --- | ||
| 527 | |||
| 528 | ## Observability | ||
| 529 | |||
| 530 | ### Logging | ||
| 531 | |||
| 532 | Sync operations produce structured logs at different levels: | ||
| 533 | |||
| 534 | **INFO**: Major events | ||
| 535 | |||
| 536 | ``` | ||
| 537 | Starting purgatory sync loop (interval: 1s) | ||
| 538 | Sync complete - removed from sync queue (identifier=test-repo, complete=true) | ||
| 539 | ``` | ||
| 540 | |||
| 541 | **DEBUG**: Detailed progress | ||
| 542 | |||
| 543 | ``` | ||
| 544 | Added new sync queue entry (identifier=test-repo, delay_secs=180) | ||
| 545 | Starting sync task for identifier (identifier=test-repo) | ||
| 546 | Sync incomplete - applying backoff (identifier=test-repo, attempt_count=2, next_backoff_secs=40) | ||
| 547 | ``` | ||
| 548 | |||
| 549 | **WARN**: Errors and failures | ||
| 550 | |||
| 551 | ``` | ||
| 552 | Failed to fetch OIDs (url=https://server.com/repo.git, error=connection timeout) | ||
| 553 | ``` | ||
| 554 | |||
| 555 | ### Metrics (Future) | ||
| 556 | |||
| 557 | Planned Prometheus metrics for observability: | ||
| 558 | |||
| 559 | - `purgatory_sync_queue_size` - Number of identifiers pending sync | ||
| 560 | - `purgatory_sync_attempts_total{identifier}` - Total sync attempts per identifier | ||
| 561 | - `purgatory_sync_oids_fetched_total{identifier}` - OIDs successfully fetched | ||
| 562 | - `purgatory_domain_in_flight{domain}` - Current in-flight requests per domain | ||
| 563 | - `purgatory_domain_requests_total{domain}` - Total requests per domain | ||
| 564 | |||
| 565 | --- | ||
| 566 | |||
| 567 | ## Testing Strategy | ||
| 568 | |||
| 569 | ### Unit Tests | ||
| 570 | |||
| 571 | Core sync functions have comprehensive unit tests using `MockSyncContext`: | ||
| 572 | |||
| 573 | **`sync_identifier_next_url`** (3 tests): | ||
| 574 | |||
| 575 | - Skips throttled domains | ||
| 576 | - Skips tried URLs | ||
| 577 | - Returns None when all URLs exhausted | ||
| 578 | |||
| 579 | **`sync_identifier_from_url`** (2 tests): | ||
| 580 | |||
| 581 | - Successful fetch triggers processing | ||
| 582 | - Failed fetch doesn't trigger processing | ||
| 583 | |||
| 584 | **`sync_identifier`** (3 tests): | ||
| 585 | |||
| 586 | - Tries multiple URLs until complete | ||
| 587 | - Enqueues throttled domains when incomplete | ||
| 588 | - Handles partial success correctly | ||
| 589 | |||
| 590 | **`SyncQueueEntry`** (3 tests): | ||
| 591 | |||
| 592 | - Backoff calculation correct | ||
| 593 | - Fresh start on new events | ||
| 594 | - Ready state logic correct | ||
| 595 | |||
| 596 | **`DomainThrottle`** (4 tests): | ||
| 597 | |||
| 598 | - Concurrent limit enforced | ||
| 599 | - Rate limit enforced | ||
| 600 | - Round-robin fairness | ||
| 601 | - Queue management correct | ||
| 602 | |||
| 603 | **Total**: 15+ unit tests covering all core logic | ||
| 604 | |||
| 605 | **Location**: [`src/purgatory/sync/`](../../src/purgatory/sync/) (various `#[cfg(test)]` modules) | ||
| 606 | |||
| 607 | ### Integration Tests | ||
| 608 | |||
| 609 | End-to-end tests verify sync behavior with real relay instances: | ||
| 610 | |||
| 611 | **Planned tests**: | ||
| 612 | |||
| 613 | - State event syncs from remote server | ||
| 614 | - PR event syncs from remote server | ||
| 615 | - Partial OID aggregation across multiple servers | ||
| 616 | - Throttling prevents overwhelming servers | ||
| 617 | - Backoff retry after failures | ||
| 618 | |||
| 619 | **Location**: [`tests/purgatory_sync.rs`](../../tests/purgatory_sync.rs) (planned) | ||
| 620 | |||
| 621 | --- | ||
| 622 | |||
| 623 | ## Future Enhancements | ||
| 624 | |||
| 625 | ### 1. Configurable Throttle Limits | ||
| 626 | |||
| 627 | **Current**: Hardcoded to 5 concurrent, 30/min per domain | ||
| 628 | **Future**: CLI flags `--sync-domain-concurrent` and `--sync-domain-rate-limit` | ||
| 629 | |||
| 630 | **Use case**: Operators might want stricter limits for public servers or looser limits for trusted servers. | ||
| 631 | |||
| 632 | ### 2. Per-Domain Throttle Configuration | ||
| 633 | |||
| 634 | **Current**: Same limits for all domains | ||
| 635 | **Future**: Domain-specific overrides (e.g., `github.com:10,60` for higher limits) | ||
| 636 | |||
| 637 | **Use case**: Popular forges like GitHub/GitLab can handle more load than small personal servers. | ||
| 638 | |||
| 639 | ### 3. Prometheus Metrics | ||
| 640 | |||
| 641 | **Current**: Structured logging only | ||
| 642 | **Future**: Export metrics for monitoring dashboards | ||
| 643 | |||
| 644 | **Use case**: Operators want visibility into sync performance, throttle effectiveness, success rates. | ||
| 645 | |||
| 646 | ### 4. Negentropy Integration | ||
| 647 | |||
| 648 | **Current**: Sync triggered by event arrival | ||
| 649 | **Future**: Proactive sync discovers missing events via negentropy | ||
| 650 | |||
| 651 | **Use case**: Catch up with repositories after downtime without waiting for event re-submission. | ||
| 652 | |||
| 653 | --- | ||
| 654 | |||
| 655 | ## Related Documentation | ||
| 656 | |||
| 657 | - **[Purgatory Design](purgatory-design.md)** - Core purgatory concepts and event flows | ||
| 658 | - **[GRASP-02 Proactive Sync](grasp-02-proactive-sync.md)** - Full GRASP-02 implementation (relay sync) | ||
| 659 | - **[Unified Git Data Sync](unify-git-data-sync.md)** - Shared processing for push and sync paths | ||
| 660 | - **[Architecture Overview](architecture.md)** - System-wide architecture | ||
| 661 | |||
| 662 | --- | ||
| 663 | |||
| 664 | ## Summary | ||
| 665 | |||
| 666 | The purgatory sync system is a sophisticated, production-ready implementation that: | ||
| 667 | |||
| 668 | ✅ **Batches intelligently** - Groups events by identifier for efficient fetching | ||
| 669 | ✅ **Retries smartly** - Exponential backoff with fresh start on new events | ||
| 670 | ✅ **Throttles respectfully** - 5 concurrent + 30/min per domain, round-robin fairness | ||
| 671 | ✅ **Times strategically** - 3min for user events, 500ms for synced events | ||
| 672 | ✅ **Expires responsibly** - 30min auto-cleanup prevents memory leaks | ||
| 673 | ✅ **Tests thoroughly** - Mock-based architecture enables comprehensive unit tests | ||
| 674 | |||
| 675 | This design ensures ngit-grasp can serve repositories reliably even when git data and Nostr events arrive out-of-order or from different sources, while respecting remote server capacity and providing excellent observability. | ||
diff --git a/docs/explanation/grasp-02-proactive-sync.md b/docs/explanation/grasp-02-proactive-sync.md index f13050e..461bde7 100644 --- a/docs/explanation/grasp-02-proactive-sync.md +++ b/docs/explanation/grasp-02-proactive-sync.md | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | Proactively Sync Nostr Events from other relays listed in accepted repository announcements. | 5 | Proactively Sync Nostr Events from other relays listed in accepted repository announcements. |
| 6 | 6 | ||
| 7 | **Note**: This document covers **relay-to-relay event sync**. For automatic git data fetching when events arrive without their data, see [GRASP-02 Purgatory Git Data Fetching](grasp-02-proactive-sync-purgatory-git-data.md). | ||
| 8 | |||
| 7 | Features: | 9 | Features: |
| 8 | 10 | ||
| 9 | - Fetches all repository announcements from connected relays to discover new repos listing our service | 11 | - Fetches all repository announcements from connected relays to discover new repos listing our service |
| @@ -13,6 +15,7 @@ Features: | |||
| 13 | - Plays nicely with other relays - connection backoff and rate-limiting detection with cooldown | 15 | - Plays nicely with other relays - connection backoff and rate-limiting detection with cooldown |
| 14 | - Does a full reconciliation daily | 16 | - Does a full reconciliation daily |
| 15 | - Prometheus metrics | 17 | - Prometheus metrics |
| 18 | - **Triggers purgatory git data sync**: When events arrive via sync, they're enqueued for immediate git data fetching (500ms delay to batch bursts) | ||
| 16 | 19 | ||
| 17 | Key Architectural Points: | 20 | Key Architectural Points: |
| 18 | 21 | ||
diff --git a/docs/explanation/inline-authorization.md b/docs/explanation/inline-authorization.md index 4538602..a71a217 100644 --- a/docs/explanation/inline-authorization.md +++ b/docs/explanation/inline-authorization.md | |||
| @@ -37,16 +37,18 @@ Client Server | |||
| 37 | ``` | 37 | ``` |
| 38 | 38 | ||
| 39 | **Pros:** | 39 | **Pros:** |
| 40 | |||
| 40 | - Standard Git mechanism | 41 | - Standard Git mechanism |
| 41 | - Language-agnostic (hook can be any executable) | 42 | - Language-agnostic (hook can be any executable) |
| 42 | - Well-documented | 43 | - Well-documented |
| 43 | 44 | ||
| 44 | **Cons:** | 45 | **Cons:** |
| 46 | |||
| 45 | - Hook output goes to stderr (client sees as `remote:` messages) | 47 | - Hook output goes to stderr (client sees as `remote:` messages) |
| 46 | - Hard to provide structured error messages | 48 | - Hard to provide structured error messages |
| 47 | - Requires hook installation and management | 49 | - Requires hook installation and management |
| 48 | - Difficult to test (needs Git repository setup) | 50 | - Difficult to test (needs Git repository setup) |
| 49 | - Hook runs *after* Git has started processing | 51 | - Hook runs _after_ Git has started processing |
| 50 | 52 | ||
| 51 | --- | 53 | --- |
| 52 | 54 | ||
| @@ -60,7 +62,7 @@ Client Server (ngit-grasp) | |||
| 60 | |--- git push ----->|--- HTTP handler receives request | 62 | |--- git push ----->|--- HTTP handler receives request |
| 61 | | | | 63 | | | |
| 62 | | |--- Parse ref updates from request | 64 | | |--- Parse ref updates from request |
| 63 | | |--- Query Nostr relay for state | 65 | | |--- Query database + purgatory for state |
| 64 | | |--- Validate push against state | 66 | | |--- Validate push against state |
| 65 | | | | 67 | | | |
| 66 | | |--- If invalid: return HTTP error | 68 | | |--- If invalid: return HTTP error |
| @@ -71,13 +73,16 @@ Client Server (ngit-grasp) | |||
| 71 | ``` | 73 | ``` |
| 72 | 74 | ||
| 73 | **Pros:** | 75 | **Pros:** |
| 76 | |||
| 74 | - Full control over error messages (HTTP response) | 77 | - Full control over error messages (HTTP response) |
| 75 | - Can skip spawning Git entirely for invalid pushes | 78 | - Can skip spawning Git entirely for invalid pushes |
| 76 | - Easier testing (pure Rust, no Git setup needed) | 79 | - Easier testing (pure Rust, no Git setup needed) |
| 77 | - Shared state between Git and Nostr components | 80 | - Shared state between Git and Nostr components |
| 78 | - Better performance (early rejection) | 81 | - Better performance (early rejection) |
| 82 | - Can check both database and purgatory for authorization | ||
| 79 | 83 | ||
| 80 | **Cons:** | 84 | **Cons:** |
| 85 | |||
| 81 | - Requires parsing Git protocol ourselves | 86 | - Requires parsing Git protocol ourselves |
| 82 | - Less standard than hooks | 87 | - Less standard than hooks |
| 83 | - Tighter coupling to Git HTTP protocol | 88 | - Tighter coupling to Git HTTP protocol |
| @@ -86,9 +91,41 @@ Client Server (ngit-grasp) | |||
| 86 | 91 | ||
| 87 | ## Why Inline Authorization Is Better for GRASP | 92 | ## Why Inline Authorization Is Better for GRASP |
| 88 | 93 | ||
| 89 | ### 1. Better Error Messages | 94 | ### 1. Purgatory Integration |
| 95 | |||
| 96 | **Critical advantage:** Inline authorization allows checking **both database and purgatory** during authorization: | ||
| 97 | |||
| 98 | ```rust | ||
| 99 | // From src/git/authorization.rs | ||
| 100 | pub async fn authorize_push( | ||
| 101 | database: &SharedDatabase, | ||
| 102 | identifier: &str, | ||
| 103 | owner_pubkey: &str, | ||
| 104 | request_body: &Bytes, | ||
| 105 | purgatory: &Arc<Purgatory>, // Can check purgatory! | ||
| 106 | repo_path: &std::path::Path, | ||
| 107 | ) -> anyhow::Result<AuthorizationResult> | ||
| 108 | ``` | ||
| 109 | |||
| 110 | **Why this matters:** State events go to purgatory when git data doesn't exist yet. Without inline authorization checking purgatory, we'd have a deadlock: | ||
| 111 | |||
| 112 | 1. State event arrives → No git data → Goes to **purgatory** (not database) | ||
| 113 | 2. Git push arrives → Hook checks **database only** → No state found → **REJECTED** ❌ | ||
| 114 | |||
| 115 | With inline authorization: | ||
| 116 | |||
| 117 | 1. State event arrives → No git data → Goes to purgatory | ||
| 118 | 2. Git push arrives → Checks **database + purgatory** → State found → **AUTHORIZED** ✅ | ||
| 119 | 3. After push succeeds → Save event to database → Remove from purgatory | ||
| 120 | |||
| 121 | See [`src/git/authorization.rs:342-400`](../../src/git/authorization.rs) for implementation. | ||
| 122 | |||
| 123 | otherwise we'd need another way of storing purgatory events. | ||
| 124 | |||
| 125 | ### 2. Better Error Messages | ||
| 90 | 126 | ||
| 91 | **With hooks:** | 127 | **With hooks:** |
| 128 | |||
| 92 | ``` | 129 | ``` |
| 93 | $ git push | 130 | $ git push |
| 94 | remote: error: Push rejected - not authorized for ref refs/heads/main | 131 | remote: error: Push rejected - not authorized for ref refs/heads/main |
| @@ -98,39 +135,37 @@ To https://gitnostr.com/alice/myrepo.git | |||
| 98 | ``` | 135 | ``` |
| 99 | 136 | ||
| 100 | **With inline authorization:** | 137 | **With inline authorization:** |
| 138 | |||
| 101 | ``` | 139 | ``` |
| 102 | $ git push | 140 | $ git push |
| 103 | error: RPC failed; HTTP 403 Forbidden | 141 | error: RPC failed; HTTP 403 Forbidden |
| 104 | error: { | 142 | error: Push rejected: No state event found in purgatory from authorized publishers |
| 105 | "error": "unauthorized", | ||
| 106 | "ref": "refs/heads/main", | ||
| 107 | "required_state": "event_id_abc123", | ||
| 108 | "your_pubkey": "npub1alice...", | ||
| 109 | "docs": "https://docs.gitnostr.com/errors/unauthorized" | ||
| 110 | } | ||
| 111 | ``` | 143 | ``` |
| 112 | 144 | ||
| 113 | The inline approach can return **structured JSON** with actionable information. | 145 | The inline approach provides clear, actionable error messages directly in the HTTP response. |
| 114 | 146 | ||
| 115 | ### 2. Performance Benefits | 147 | ### 3. Performance Benefits |
| 116 | 148 | ||
| 117 | **With hooks:** | 149 | **With hooks:** |
| 150 | |||
| 118 | - Git process spawns | 151 | - Git process spawns |
| 119 | - Git starts receiving pack data | 152 | - Git starts receiving pack data |
| 120 | - Hook runs (might query Nostr relay) | 153 | - Hook runs (might query Nostr relay) |
| 121 | - If rejected, Git throws away received data | 154 | - If rejected, Git throws away received data |
| 122 | 155 | ||
| 123 | **With inline authorization:** | 156 | **With inline authorization:** |
| 124 | - Parse ref updates from HTTP request | 157 | |
| 125 | - Validate against Nostr state (cached) | 158 | - Parse ref updates from HTTP request (pkt-line format) |
| 126 | - If rejected, return HTTP 403 immediately | 159 | - Validate against database + purgatory state |
| 160 | - If rejected, return HTTP error immediately | ||
| 127 | - Never spawn Git for invalid pushes | 161 | - Never spawn Git for invalid pushes |
| 128 | 162 | ||
| 129 | **Result:** Faster rejection, less resource usage. | 163 | **Result:** Faster rejection, less resource usage, no wasted pack data transfer. |
| 130 | 164 | ||
| 131 | ### 3. Easier Testing | 165 | ### 4. Easier Testing |
| 132 | 166 | ||
| 133 | **With hooks:** | 167 | **With hooks:** |
| 168 | |||
| 134 | ```bash | 169 | ```bash |
| 135 | # Test setup | 170 | # Test setup |
| 136 | mkdir -p /tmp/test-repo | 171 | mkdir -p /tmp/test-repo |
| @@ -147,6 +182,7 @@ rm -rf /tmp/test-repo | |||
| 147 | ``` | 182 | ``` |
| 148 | 183 | ||
| 149 | **With inline authorization:** | 184 | **With inline authorization:** |
| 185 | |||
| 150 | ```rust | 186 | ```rust |
| 151 | #[tokio::test] | 187 | #[tokio::test] |
| 152 | async fn test_unauthorized_push() { | 188 | async fn test_unauthorized_push() { |
| @@ -161,43 +197,55 @@ async fn test_unauthorized_push() { | |||
| 161 | 197 | ||
| 162 | See [`tests/push_authorization.rs`](tests/push_authorization.rs) for actual test examples. | 198 | See [`tests/push_authorization.rs`](tests/push_authorization.rs) for actual test examples. |
| 163 | 199 | ||
| 164 | ### 4. Shared State and Types | 200 | ### 5. Shared State and Types |
| 165 | 201 | ||
| 166 | **With hooks:** | 202 | **With hooks:** |
| 203 | |||
| 167 | - Hook is separate process | 204 | - Hook is separate process |
| 168 | - Must query Nostr relay over WebSocket | 205 | - Must query Nostr relay over WebSocket |
| 169 | - Can't share in-memory cache | 206 | - Can't share in-memory cache |
| 207 | - Can't access purgatory | ||
| 170 | - Separate error types | 208 | - Separate error types |
| 171 | 209 | ||
| 172 | **With inline authorization:** | 210 | **With inline authorization:** |
| 211 | |||
| 173 | ```rust | 212 | ```rust |
| 174 | // From src/git/handlers.rs | 213 | // From src/git/handlers.rs |
| 175 | pub async fn handle_receive_pack( | 214 | pub async fn handle_receive_pack( |
| 176 | repo_path: PathBuf, | 215 | repo_path: PathBuf, |
| 177 | body: Bytes, | 216 | body: Bytes, |
| 178 | database: SharedDatabase, // Shared with Nostr relay! | 217 | database: Option<SharedDatabase>, // Shared with Nostr relay! |
| 218 | purgatory: Option<Arc<Purgatory>>, // Shared purgatory access! | ||
| 179 | npub: &str, | 219 | npub: &str, |
| 180 | identifier: &str, | 220 | identifier: &str, |
| 181 | ) -> Result<Response<Full<Bytes>>, GitError> { | 221 | ) -> Result<Response<Full<Bytes>>, GitError> { |
| 182 | // Direct database access for authorization | 222 | // Direct database + purgatory access for authorization |
| 183 | let auth = get_authorization_for_owner(&database, pubkey, identifier).await?; | 223 | let auth = authorize_push( |
| 224 | &database, | ||
| 225 | identifier, | ||
| 226 | owner_pubkey, | ||
| 227 | &body, | ||
| 228 | &purgatory, // Can check purgatory! | ||
| 229 | &repo_path | ||
| 230 | ).await?; | ||
| 184 | // ... | 231 | // ... |
| 185 | } | 232 | } |
| 186 | ``` | 233 | ``` |
| 187 | 234 | ||
| 188 | **Result:** Better performance, type safety, simpler architecture. | 235 | **Result:** Better performance, type safety, simpler architecture, purgatory integration. |
| 189 | 236 | ||
| 190 | ### 5. Simpler Deployment | 237 | ### 6. Simpler Deployment |
| 191 | 238 | ||
| 192 | **With hooks (ngit-relay):** | 239 | **With hooks (ngit-relay):** |
| 240 | |||
| 193 | ``` | 241 | ``` |
| 194 | Docker container: | 242 | Docker container: |
| 195 | - nginx (HTTP frontend) | 243 | - nginx (HTTP frontend) |
| 196 | - git-http-backend (C binary) | 244 | - git-http-backend (C binary) |
| 197 | - pre-receive hook (Go binary) | 245 | - pre-receive hook (Go binary) |
| 198 | - Khatru relay (Go binary) | 246 | - Khatru relay (Go binary) |
| 199 | - supervisord (process manager) | 247 | - supervisord (process manager) |
| 200 | 248 | ||
| 201 | Setup steps: | 249 | Setup steps: |
| 202 | 1. Install all components | 250 | 1. Install all components |
| 203 | 2. Configure nginx | 251 | 2. Configure nginx |
| @@ -207,13 +255,14 @@ Setup steps: | |||
| 207 | ``` | 255 | ``` |
| 208 | 256 | ||
| 209 | **With inline authorization (ngit-grasp):** | 257 | **With inline authorization (ngit-grasp):** |
| 258 | |||
| 210 | ``` | 259 | ``` |
| 211 | Single Rust binary: | 260 | Single Rust binary: |
| 212 | - HTTP server (Hyper) | 261 | - HTTP server (Hyper) |
| 213 | - Git protocol handler | 262 | - Git protocol handler |
| 214 | - Nostr relay (nostr-relay-builder) | 263 | - Nostr relay (nostr-relay-builder) |
| 215 | - Authorization logic | 264 | - Authorization logic |
| 216 | 265 | ||
| 217 | Setup steps: | 266 | Setup steps: |
| 218 | 1. Run binary | 267 | 1. Run binary |
| 219 | 2. Configure environment variables | 268 | 2. Configure environment variables |
| @@ -227,66 +276,95 @@ Setup steps: | |||
| 227 | 276 | ||
| 228 | ### How We Parse Ref Updates | 277 | ### How We Parse Ref Updates |
| 229 | 278 | ||
| 230 | The Git HTTP protocol sends ref updates in the request body: | 279 | The Git HTTP protocol sends ref updates in pkt-line format: |
| 231 | 280 | ||
| 232 | ``` | 281 | ``` |
| 233 | POST /alice/myrepo.git/git-receive-pack HTTP/1.1 | 282 | POST /alice/myrepo.git/git-receive-pack HTTP/1.1 |
| 234 | Content-Type: application/x-git-receive-pack-request | 283 | Content-Type: application/x-git-receive-pack-request |
| 235 | 284 | ||
| 236 | 0000000000000000000000000000000000000000 abc123... refs/heads/main\0 report-status | 285 | 00a5 0000...0000 abc123...def456 refs/heads/main\0 report-status\n |
| 286 | 0000 | ||
| 287 | PACK... | ||
| 237 | ``` | 288 | ``` |
| 238 | 289 | ||
| 239 | We parse this **before** spawning Git. See [`src/git/authorization.rs`](src/git/authorization.rs) for the implementation: | 290 | We parse this **before** spawning Git. See [`src/git/authorization.rs:695-778`](../../src/git/authorization.rs) for the implementation: |
| 240 | 291 | ||
| 241 | ```rust | 292 | ```rust |
| 242 | /// Parse ref updates from git-receive-pack request body | 293 | /// Parse the refs being updated from a Git pack |
| 243 | pub fn parse_pushed_refs(body: &[u8]) -> Result<Vec<PushedRef>, AuthorizationError> { | 294 | /// |
| 244 | // Parse pkt-line format | 295 | /// The receive-pack protocol sends ref updates in pkt-line format: |
| 245 | // Extract ref updates | 296 | /// - 4-byte hex length prefix (e.g., "00a5") |
| 246 | // Return structured data | 297 | /// - Payload: `<old-oid> <new-oid> <ref-name>\0<capabilities>\n` |
| 298 | /// - Flush packet "0000" terminates the list | ||
| 299 | pub fn parse_pushed_refs(data: &[u8]) -> Vec<(String, String, String)> { | ||
| 300 | // Handles both pkt-line format (real Git clients) | ||
| 301 | // and simple text format (for unit tests) | ||
| 247 | } | 302 | } |
| 248 | ``` | 303 | ``` |
| 249 | 304 | ||
| 250 | ### How We Validate | 305 | ### How We Validate |
| 251 | 306 | ||
| 252 | Validation checks (from [`src/git/authorization.rs`](src/git/authorization.rs)): | 307 | The authorization flow (from [`src/git/authorization.rs:51-162`](../../src/git/authorization.rs)): |
| 253 | |||
| 254 | 1. Does pusher's pubkey have write access? | ||
| 255 | 2. Are they listed as a maintainer in the latest state event? | ||
| 256 | 3. Do the refs match the state event? | ||
| 257 | 308 | ||
| 258 | ```rust | 309 | ```rust |
| 259 | /// Validate that pushed refs match the authorized state | 310 | pub async fn authorize_push( |
| 260 | pub fn validate_push_refs( | 311 | database: &SharedDatabase, |
| 261 | pushed_refs: &[PushedRef], | 312 | identifier: &str, |
| 262 | state: &RepositoryState, | 313 | owner_pubkey: &str, |
| 263 | ) -> Result<(), AuthorizationError> { | 314 | request_body: &Bytes, |
| 264 | for pushed_ref in pushed_refs { | 315 | purgatory: &Arc<Purgatory>, |
| 265 | if pushed_ref.ref_name.starts_with("refs/heads/") { | 316 | repo_path: &std::path::Path, |
| 266 | // Validate branch against state | 317 | ) -> anyhow::Result<AuthorizationResult> { |
| 267 | } else if pushed_ref.ref_name.starts_with("refs/tags/") { | 318 | // 1. Parse refs from push request |
| 268 | // Validate tag against state | 319 | let pushed_refs = parse_pushed_refs(request_body); |
| 269 | } else if pushed_ref.ref_name.starts_with("refs/nostr/") { | 320 | |
| 270 | // Allow refs/nostr/<event-id> for PRs | 321 | // 2. Separate refs/nostr/ refs from state refs |
| 271 | } | 322 | let (nostr_refs, state_refs) = partition_refs(&pushed_refs); |
| 272 | } | 323 | |
| 273 | Ok(()) | 324 | // 3. Handle refs/nostr/ refs (PR events) |
| 325 | // - Validate event ID format | ||
| 326 | // - Check purgatory for PR event | ||
| 327 | // - Create placeholder if git-data-first scenario | ||
| 328 | |||
| 329 | // 4. Handle normal refs (state events) | ||
| 330 | // - Check database + purgatory for state events | ||
| 331 | // - Collect authorized maintainers | ||
| 332 | // - Find latest authorized state | ||
| 333 | // - Validate refs match state | ||
| 334 | |||
| 335 | // 5. Return authorization result with purgatory events | ||
| 274 | } | 336 | } |
| 275 | ``` | 337 | ``` |
| 276 | 338 | ||
| 339 | **Key validation checks:** | ||
| 340 | |||
| 341 | 1. **For state refs** (`refs/heads/*`, `refs/tags/*`): | ||
| 342 | |||
| 343 | - Query database for announcements → collect authorized maintainers | ||
| 344 | - Check **purgatory** for matching state events (critical for purgatory flow!) | ||
| 345 | - Filter to events from authorized maintainers | ||
| 346 | - Find latest state event | ||
| 347 | - Validate pushed refs match state event refs | ||
| 348 | |||
| 349 | 2. **For PR refs** (`refs/nostr/<event-id>`): | ||
| 350 | - Validate event ID format | ||
| 351 | - Check purgatory for PR event with matching commit | ||
| 352 | - If no event found, create placeholder (git-data-first scenario) | ||
| 353 | - Collect PR events from purgatory for post-push processing | ||
| 354 | |||
| 277 | --- | 355 | --- |
| 278 | 356 | ||
| 279 | ## Comparison with Reference Implementation | 357 | ## Comparison with Reference Implementation |
| 280 | 358 | ||
| 281 | | Aspect | ngit-relay (hooks) | ngit-grasp (inline) | | 359 | | Aspect | ngit-relay (hooks) | ngit-grasp (inline) | |
| 282 | |--------|-------------------|---------------------| | 360 | | ------------------ | ---------------------------------------- | ---------------------- | |
| 283 | | **Components** | nginx + git-http-backend + hook + Khatru | Single Rust binary | | 361 | | **Components** | nginx + git-http-backend + hook + Khatru | Single Rust binary | |
| 284 | | **Validation** | Pre-receive hook (separate process) | Inline HTTP handler | | 362 | | **Validation** | Pre-receive hook (separate process) | Inline HTTP handler | |
| 285 | | **Error messages** | Hook stderr → `remote:` | HTTP response JSON | | 363 | | **Error messages** | Hook stderr → `remote:` | HTTP response JSON | |
| 286 | | **Performance** | Spawns Git first | Validates first | | 364 | | **Performance** | Spawns Git first | Validates first | |
| 287 | | **Testing** | Shell scripts + Go tests | Pure Rust tests | | 365 | | **Testing** | Shell scripts + Go tests | Pure Rust tests | |
| 288 | | **Deployment** | Docker + supervisord | Single binary | | 366 | | **Deployment** | Docker + supervisord | Single binary | |
| 289 | | **State sharing** | WebSocket query | Direct database access | | 367 | | **State sharing** | WebSocket query | Direct database access | |
| 290 | 368 | ||
| 291 | Both are GRASP-compliant, but inline authorization is simpler and more efficient. | 369 | Both are GRASP-compliant, but inline authorization is simpler and more efficient. |
| 292 | 370 | ||
| @@ -295,24 +373,30 @@ Both are GRASP-compliant, but inline authorization is simpler and more efficient | |||
| 295 | ## Trade-offs and Limitations | 373 | ## Trade-offs and Limitations |
| 296 | 374 | ||
| 297 | ### What We Gain | 375 | ### What We Gain |
| 376 | |||
| 377 | - ✅ **Purgatory integration** - Can check database + purgatory during authorization | ||
| 378 | - ✅ **Prevents deadlock** - State events in purgatory can authorize pushes | ||
| 298 | - ✅ Better error messages | 379 | - ✅ Better error messages |
| 299 | - ✅ Better performance | 380 | - ✅ Better performance (early rejection) |
| 300 | - ✅ Easier testing | 381 | - ✅ Easier testing (pure Rust) |
| 301 | - ✅ Simpler deployment | 382 | - ✅ Simpler deployment (single binary) |
| 302 | - ✅ Tighter integration | 383 | - ✅ Tighter integration (shared state) |
| 303 | 384 | ||
| 304 | ### What We Lose | 385 | ### What We Lose |
| 386 | |||
| 305 | - ❌ Non-standard approach (not using Git's hook system) | 387 | - ❌ Non-standard approach (not using Git's hook system) |
| 306 | - ❌ Tighter coupling to Git HTTP protocol | 388 | - ❌ Tighter coupling to Git HTTP protocol |
| 307 | - ❌ Must parse protocol ourselves | 389 | - ❌ Must parse pkt-line protocol ourselves |
| 308 | 390 | ||
| 309 | ### Is It Worth It? | 391 | ### Is It Worth It? |
| 310 | 392 | ||
| 311 | **Yes**, because: | 393 | **Absolutely**, because: |
| 312 | 1. We handle protocol parsing in [`src/git/protocol.rs`](src/git/protocol.rs) | 394 | |
| 313 | 2. GRASP is already non-standard (Nostr authorization) | 395 | 1. **Purgatory integration is essential** - Without it, we'd have a deadlock where state events in purgatory can't authorize pushes |
| 314 | 3. Benefits far outweigh the coupling cost | 396 | 2. Protocol parsing is isolated in [`src/git/authorization.rs`](../../src/git/authorization.rs) |
| 315 | 4. We can still add hook support later if needed | 397 | 3. GRASP is already non-standard (Nostr authorization) |
| 398 | 4. Benefits far outweigh the coupling cost | ||
| 399 | 5. We can still add hook support later if needed (but purgatory checking would still need inline access) | ||
| 316 | 400 | ||
| 317 | --- | 401 | --- |
| 318 | 402 | ||
| @@ -320,14 +404,15 @@ Both are GRASP-compliant, but inline authorization is simpler and more efficient | |||
| 320 | 404 | ||
| 321 | Key files in the ngit-grasp implementation: | 405 | Key files in the ngit-grasp implementation: |
| 322 | 406 | ||
| 323 | | Component | Location | | 407 | | Component | Location | |
| 324 | |-----------|----------| | 408 | | ----------------------- | ------------------------------------------------------------------------- | |
| 325 | | HTTP routing | [`src/http/mod.rs`](src/http/mod.rs) | | 409 | | HTTP routing | [`src/http/mod.rs`](../../src/http/mod.rs) | |
| 326 | | Git handlers | [`src/git/handlers.rs`](src/git/handlers.rs) | | 410 | | Git handlers | [`src/git/handlers.rs`](../../src/git/handlers.rs) | |
| 327 | | Push authorization | [`src/git/authorization.rs`](src/git/authorization.rs) | | 411 | | Push authorization | [`src/git/authorization.rs`](../../src/git/authorization.rs) | |
| 328 | | Git protocol parsing | [`src/git/protocol.rs`](src/git/protocol.rs) | | 412 | | Pkt-line parsing | [`src/git/authorization.rs:695-778`](../../src/git/authorization.rs) | |
| 329 | | Subprocess management | [`src/git/subprocess.rs`](src/git/subprocess.rs) | | 413 | | Subprocess management | [`src/git/subprocess.rs`](../../src/git/subprocess.rs) | |
| 330 | | Event acceptance policy | [`src/nostr/builder.rs:51`](src/nostr/builder.rs:51) - `Nip34WritePolicy` | | 414 | | Purgatory integration | [`src/purgatory/mod.rs`](../../src/purgatory/mod.rs) | |
| 415 | | Event acceptance policy | [`src/nostr/builder.rs`](../../src/nostr/builder.rs) - `Nip34WritePolicy` | | ||
| 331 | 416 | ||
| 332 | --- | 417 | --- |
| 333 | 418 | ||
| @@ -345,6 +430,7 @@ pub struct GitConfig { | |||
| 345 | ``` | 430 | ``` |
| 346 | 431 | ||
| 347 | This would allow: | 432 | This would allow: |
| 433 | |||
| 348 | - Migration path for hook-based systems | 434 | - Migration path for hook-based systems |
| 349 | - Extra validation for paranoid deployments | 435 | - Extra validation for paranoid deployments |
| 350 | - Compatibility with other Git tools | 436 | - Compatibility with other Git tools |
| @@ -352,6 +438,7 @@ This would allow: | |||
| 352 | ### If Git Protocol Changes | 438 | ### If Git Protocol Changes |
| 353 | 439 | ||
| 354 | The protocol parsing is isolated in [`src/git/protocol.rs`](src/git/protocol.rs). If the Git protocol changes: | 440 | The protocol parsing is isolated in [`src/git/protocol.rs`](src/git/protocol.rs). If the Git protocol changes: |
| 441 | |||
| 355 | - Update the protocol module | 442 | - Update the protocol module |
| 356 | - Tests will catch any breakage | 443 | - Tests will catch any breakage |
| 357 | 444 | ||
| @@ -361,18 +448,21 @@ The protocol parsing is isolated in [`src/git/protocol.rs`](src/git/protocol.rs) | |||
| 361 | 448 | ||
| 362 | **Inline authorization is the right choice for ngit-grasp** because: | 449 | **Inline authorization is the right choice for ngit-grasp** because: |
| 363 | 450 | ||
| 364 | 1. It provides better error messages for users | 451 | 1. **Purgatory integration** - Without inline authorization, state events in purgatory couldn't authorize pushes, creating a deadlock |
| 365 | 2. It's more performant (early rejection) | 452 | 2. **Better error messages** - Direct HTTP responses with clear rejection reasons |
| 366 | 3. It's easier to test (pure Rust) | 453 | 3. **Better performance** - Early rejection before spawning Git |
| 367 | 4. It's simpler to deploy (single binary) | 454 | 4. **Easier testing** - Pure Rust unit tests, no Git setup needed |
| 368 | 5. It enables better integration (shared database) | 455 | 5. **Simpler deployment** - Single binary with shared state |
| 456 | 6. **Shared database + purgatory** - Both authorization sources accessible during validation | ||
| 369 | 457 | ||
| 370 | The trade-off (coupling to Git HTTP protocol) is acceptable because: | 458 | The trade-off (coupling to Git HTTP protocol) is acceptable because: |
| 371 | - The protocol is stable and well-specified | 459 | |
| 372 | - Protocol handling is isolated in one module | 460 | - The pkt-line protocol is stable and well-specified |
| 461 | - Protocol parsing is isolated in [`src/git/authorization.rs`](../../src/git/authorization.rs) | ||
| 462 | - Purgatory integration requires inline access anyway | ||
| 373 | - Benefits far outweigh the cost | 463 | - Benefits far outweigh the cost |
| 374 | 464 | ||
| 375 | This decision aligns with our goal of creating a **developer-friendly, production-ready GRASP implementation**. | 465 | This decision aligns with our goal of creating a **developer-friendly, production-ready GRASP implementation** that properly handles the event-git-data ordering problem via purgatory. |
| 376 | 466 | ||
| 377 | --- | 467 | --- |
| 378 | 468 | ||
| @@ -386,4 +476,4 @@ This decision aligns with our goal of creating a **developer-friendly, productio | |||
| 386 | 476 | ||
| 387 | --- | 477 | --- |
| 388 | 478 | ||
| 389 | *Part of the [ngit-grasp explanation docs](./)* \ No newline at end of file | 479 | _Part of the [ngit-grasp explanation docs](./)_ |
diff --git a/docs/explanation/purgatory-design.md b/docs/explanation/purgatory-design.md index 5ee4e06..b984745 100644 --- a/docs/explanation/purgatory-design.md +++ b/docs/explanation/purgatory-design.md | |||
| @@ -1,1013 +1,795 @@ | |||
| 1 | # Purgatory Implementation Design | 1 | # Purgatory: In-Memory Holding Area for Events Awaiting Git Data |
| 2 | 2 | ||
| 3 | **Status**: ✅ Implemented (2025-12-23) | 3 | **Status**: ✅ Implemented |
| 4 | **Implementation**: Phases 1-7 complete | 4 | **Implementation**: [`src/purgatory/`](../../src/purgatory/) |
| 5 | **Source Code**: [`src/purgatory/`](../../src/purgatory/) | ||
| 6 | **Related**: [`docs/explanation/architecture.md`](architecture.md) - System architecture overview | 5 | **Related**: [`docs/explanation/architecture.md`](architecture.md) - System architecture overview |
| 7 | 6 | ||
| 7 | --- | ||
| 8 | |||
| 8 | ## Overview | 9 | ## Overview |
| 9 | 10 | ||
| 10 | Purgatory is an in-memory holding area for nostr events that depend on git data that hasn't arrived yet, **and** for git data that arrived before its corresponding nostr event. Events/placeholders are held until the other half arrives, at which point they are processed and saved to the database. | 11 | Purgatory is an in-memory holding area that solves the **"which arrives first?"** problem in GRASP. Either nostr events or git pushes can arrive in any order: |
| 12 | |||
| 13 | - **Event first**: Event waits in purgatory until git data arrives | ||
| 14 | - **Git first**: Placeholder waits in purgatory until event arrives | ||
| 15 | |||
| 16 | When both halves arrive, they are processed together and saved to the database. | ||
| 11 | 17 | ||
| 12 | **Spec Reference**: [GRASP-01 Purgatory Section](../grasp/01.md:20-22) | 18 | **Spec Reference**: [GRASP-01 Purgatory Section](https://github.com/DanConwayDev/grasp/blob/main/01.md#purgatory) |
| 13 | 19 | ||
| 14 | > Accepted repo state announcements, PRs and PR Updates SHOULD be accepted with message "purgatory: won't be served until git data arrives" and kept in purgatory (not served) until the related git data arrives and otherwise discarded after 30 minutes. | 20 | > Accepted repo state announcements, PRs and PR Updates SHOULD be accepted with message "purgatory: won't be served until git data arrives" and kept in purgatory (not served) until the related git data arrives and otherwise discarded after 30 minutes. |
| 15 | 21 | ||
| 22 | --- | ||
| 23 | |||
| 16 | ## Key Design Principles | 24 | ## Key Design Principles |
| 17 | 25 | ||
| 18 | ### 1. Separate Storage for State vs PR Events | 26 | ### 1. In-Memory Only |
| 27 | |||
| 28 | Purgatory data is **not persisted** to disk. On restart, all purgatory entries are lost. This is acceptable because: | ||
| 29 | |||
| 30 | - Events are still on other relays (can be re-submitted) | ||
| 31 | - Git data can be re-pushed | ||
| 32 | - 30-minute expiry means data is transient anyway | ||
| 19 | 33 | ||
| 20 | State events (kind 30618) and PR events (kind 1617/1618) have fundamentally different matching and authorization patterns. They are stored in **separate purgatory stores** with different indices: | 34 | ### 2. Separate Storage for State vs PR Events |
| 21 | 35 | ||
| 22 | - **State Events**: Indexed by `identifier` (d tag), matched via ref comparison | 36 | State events (kind 30618) and PR events (kind 1617/1618) have fundamentally different matching patterns: |
| 23 | - **PR Events**: Indexed by `event_id`, matched directly via `refs/nostr/<event-id>` | ||
| 24 | 37 | ||
| 25 | ### 2. Late Binding of Refs (State Events) | 38 | | Event Type | Index | Matching Strategy | |
| 39 | |------------|-------|-------------------| | ||
| 40 | | **State Events** | `identifier` (d tag) | Compare refs at push time | | ||
| 41 | | **PR Events** | `event_id` (hex string) | Direct match via `refs/nostr/<event-id>` | | ||
| 26 | 42 | ||
| 27 | **Do NOT extract refs at event arrival time.** Extract and match refs at git push time. | 43 | They use **separate DashMap stores** for efficient concurrent access. |
| 28 | 44 | ||
| 29 | **Why?** Multiple state events might be in purgatory with different target states. An older state event's git data might arrive after a newer one is received. By waiting until push time, we: | 45 | ### 3. Late Binding for State Events |
| 30 | 46 | ||
| 31 | - Compare the pushed refs against each purgatory state event's expected state | 47 | **Critical:** Do NOT extract refs from state events at arrival time. Extract and match refs **at git push time**. |
| 48 | |||
| 49 | **Why?** Multiple state events might be in purgatory with different target states. An older state event's git data might arrive after a newer one is received. By waiting until push time: | ||
| 50 | |||
| 51 | - Compare pushed refs against each purgatory state event's expected state | ||
| 32 | - Handle out-of-order git data arrival correctly | 52 | - Handle out-of-order git data arrival correctly |
| 33 | - Only release events when their specific target state is achieved | 53 | - Only release events when their specific target state is achieved |
| 34 | 54 | ||
| 35 | ### 3. Bidirectional Waiting (PR Events) | 55 | See [`src/purgatory/helpers.rs:can_satisfy_state`](../../src/purgatory/helpers.rs) for implementation. |
| 56 | |||
| 57 | ### 4. Bidirectional Waiting for PR Events | ||
| 36 | 58 | ||
| 37 | For PR events, **either side can arrive first**: | 59 | For PR events, **either side can arrive first**: |
| 38 | 60 | ||
| 39 | - **Event first**: PR event waits in purgatory for git push to `refs/nostr/<event-id>` | 61 | | Scenario | What Happens | |
| 40 | - **Git first**: Push creates placeholder, waits for PR event to arrive | 62 | |----------|--------------| |
| 63 | | **Event first** | PR event waits in purgatory for git push to `refs/nostr/<event-id>` | | ||
| 64 | | **Git first** | Push creates placeholder entry waiting for PR event | | ||
| 41 | 65 | ||
| 42 | ### 4. Ref Pairs, Not Just Commits | 66 | Placeholders are identified by `PrPurgatoryEntry.event == None`. |
| 43 | 67 | ||
| 44 | State events define a target state: specific refs pointing to specific objects (commits or annotated tags). We match **ref name + object SHA** pairs, not just raw commits. | 68 | ### 5. Authorization During Push (Not After) |
| 45 | 69 | ||
| 46 | ### 5. No Separate PurgatoryState Tracking | 70 | **Critical for avoiding deadlock:** Authorization checks **both database and purgatory** during push validation. |
| 47 | 71 | ||
| 48 | All entries use a single 30-minute expiry timer. When git push activity begins, we simply ensure at least 15 minutes remain on the timer (extend if needed). This eliminates complexity of tracking Secured vs Pending states. | 72 | Without this, we'd have a deadlock: |
| 73 | 1. State event arrives → No git data → Goes to **purgatory** (not database) | ||
| 74 | 2. Git push arrives → Authorization checks **database only** → No state found → **REJECTED** ❌ | ||
| 49 | 75 | ||
| 50 | ## Event Lifecycle | 76 | With purgatory checking during authorization: |
| 77 | 1. State event arrives → No git data → Goes to purgatory | ||
| 78 | 2. Git push arrives → Checks **database + purgatory** → State found → **AUTHORIZED** ✅ | ||
| 79 | 3. After push succeeds → Save event to database → Remove from purgatory | ||
| 51 | 80 | ||
| 52 | ```mermaid | 81 | See [`src/git/authorization.rs:51-162`](../../src/git/authorization.rs) for implementation. |
| 53 | stateDiagram-v2 | 82 | |
| 54 | [*] --> Waiting: Event or git data arrives | 83 | --- |
| 55 | Waiting --> Processing: Other half arrives | ||
| 56 | Waiting --> Discarded: 30 min expiry | ||
| 57 | Processing --> Released: Match verified + authorized | ||
| 58 | Processing --> Rejected: Mismatch or unauthorized | ||
| 59 | Released --> [*]: Event saved to database | ||
| 60 | Rejected --> Waiting: Return to waiting - different match expected | ||
| 61 | Discarded --> [*]: Entry dropped | ||
| 62 | ``` | ||
| 63 | 84 | ||
| 64 | ## Data Structures | 85 | ## Data Structures |
| 65 | 86 | ||
| 66 | ### RefPair - A single ref target | 87 | ### Core Types |
| 67 | 88 | ||
| 68 | ```rust | 89 | ```rust |
| 69 | /// A reference name and its target object | 90 | /// A reference name and its target object |
| 70 | #[derive(Debug, Clone, Hash, Eq, PartialEq)] | 91 | #[derive(Debug, Clone, Hash, Eq, PartialEq)] |
| 71 | pub struct RefPair { | 92 | pub struct RefPair { |
| 72 | /// Full ref name, e.g., "refs/heads/main" or "refs/tags/v1.0" | 93 | pub ref_name: String, // e.g., "refs/heads/main" |
| 94 | pub object_sha: String, // commit or annotated tag SHA | ||
| 95 | } | ||
| 96 | |||
| 97 | /// A ref update in a git push | ||
| 98 | #[derive(Debug, Clone)] | ||
| 99 | pub struct RefUpdate { | ||
| 100 | pub old_oid: String, | ||
| 101 | pub new_oid: String, | ||
| 73 | pub ref_name: String, | 102 | pub ref_name: String, |
| 74 | /// Target object SHA (commit or annotated tag) | ||
| 75 | pub object_sha: String, | ||
| 76 | } | 103 | } |
| 77 | ``` | 104 | ``` |
| 78 | 105 | ||
| 79 | ### StatePurgatoryEntry | 106 | ### State Purgatory Entry |
| 80 | 107 | ||
| 81 | ```rust | 108 | ```rust |
| 82 | pub struct StatePurgatoryEntry { | 109 | pub struct StatePurgatoryEntry { |
| 83 | /// The nostr state event (kind 30618) awaiting git data | 110 | /// The nostr state event (kind 30618) awaiting git data |
| 84 | pub event: Event, | 111 | pub event: Event, |
| 85 | 112 | ||
| 86 | /// The repository identifier from the event's 'd' tag | 113 | /// Repository identifier from 'd' tag |
| 87 | pub identifier: String, | 114 | pub identifier: String, |
| 88 | 115 | ||
| 89 | /// Event author pubkey | 116 | /// Event author pubkey |
| 90 | pub author: PublicKey, | 117 | pub author: PublicKey, |
| 91 | 118 | ||
| 92 | /// When this entry was added to purgatory | 119 | /// When added to purgatory |
| 93 | pub created_at: Instant, | 120 | pub created_at: Instant, |
| 94 | 121 | ||
| 95 | /// Expiry deadline (30 min from creation, may be extended) | 122 | /// Expiry deadline (30 min from creation, may be extended) |
| 96 | pub expires_at: Instant, | 123 | pub expires_at: Instant, |
| 97 | } | 124 | } |
| 98 | ``` | 125 | ``` |
| 99 | 126 | ||
| 100 | ### PrPurgatoryEntry | 127 | **Note:** Refs are NOT extracted at creation time. They're extracted at push time for late binding. |
| 128 | |||
| 129 | ### PR Purgatory Entry | ||
| 101 | 130 | ||
| 102 | ```rust | 131 | ```rust |
| 103 | pub struct PrPurgatoryEntry { | 132 | pub struct PrPurgatoryEntry { |
| 104 | /// The nostr PR event, if received (None = git data arrived first) | 133 | /// The nostr PR event, if received (None = git data arrived first) |
| 105 | pub event: Option<Event>, | 134 | pub event: Option<Event>, |
| 106 | 135 | ||
| 107 | /// The expected commit SHA from 'c' tag (if event exists) | 136 | /// Expected commit SHA from 'c' tag (if event exists) |
| 108 | /// or the actual commit pushed (if git arrived first) | 137 | /// or actual commit pushed (if git arrived first) |
| 109 | pub commit: String, | 138 | pub commit: String, |
| 110 | 139 | ||
| 111 | /// When this entry was added to purgatory | 140 | /// When added to purgatory |
| 112 | pub created_at: Instant, | 141 | pub created_at: Instant, |
| 113 | 142 | ||
| 114 | /// Expiry deadline (30 min from creation, may be extended) | 143 | /// Expiry deadline (30 min from creation) |
| 115 | pub expires_at: Instant, | 144 | pub expires_at: Instant, |
| 116 | } | 145 | } |
| 117 | ``` | 146 | ``` |
| 118 | 147 | ||
| 119 | **Note:** `PrPurgatoryEntry.event` being `None` indicates the "git data first" scenario - we have a placeholder waiting for the PR event. | 148 | **Key:** `event: None` indicates a placeholder (git-data-first scenario). |
| 120 | 149 | ||
| 121 | ### Purgatory Stores | 150 | ### Purgatory Stores |
| 122 | 151 | ||
| 123 | ```rust | 152 | ```rust |
| 124 | pub struct Purgatory { | 153 | pub struct Purgatory { |
| 125 | /// State events indexed by identifier | 154 | /// State events indexed by identifier (d tag) |
| 126 | state_events: DashMap<String, Vec<StatePurgatoryEntry>>, | 155 | /// Multiple state events per identifier allowed (different authors) |
| 127 | 156 | state_events: Arc<DashMap<String, Vec<StatePurgatoryEntry>>>, | |
| 157 | |||
| 128 | /// PR events indexed by event_id (hex string) | 158 | /// PR events indexed by event_id (hex string) |
| 129 | pr_events: DashMap<String, PrPurgatoryEntry>, | 159 | /// Single entry per event ID |
| 160 | pr_events: Arc<DashMap<String, PrPurgatoryEntry>>, | ||
| 161 | |||
| 162 | /// Sync queue for background git data fetching | ||
| 163 | sync_queue: Arc<DashMap<String, SyncQueueEntry>>, | ||
| 164 | |||
| 165 | _git_data_path: PathBuf, | ||
| 130 | } | 166 | } |
| 131 | ``` | 167 | ``` |
| 132 | 168 | ||
| 169 | --- | ||
| 170 | |||
| 133 | ## Event Flows | 171 | ## Event Flows |
| 134 | 172 | ||
| 135 | ### State Event (Kind 30618) Arrival | 173 | ### State Event Arrival (Kind 30618) |
| 136 | 174 | ||
| 137 | ```mermaid | 175 | ```mermaid |
| 138 | sequenceDiagram | 176 | sequenceDiagram |
| 139 | participant Client | 177 | participant Client |
| 140 | participant WritePolicy | 178 | participant WritePolicy |
| 141 | participant Purgatory | 179 | participant Purgatory |
| 180 | participant Database | ||
| 142 | participant GitRepos | 181 | participant GitRepos |
| 143 | 182 | ||
| 144 | Client->>WritePolicy: EVENT kind:30618 | 183 | Client->>WritePolicy: EVENT kind:30618 |
| 145 | WritePolicy->>WritePolicy: Validate structure | 184 | WritePolicy->>WritePolicy: Validate structure |
| 146 | WritePolicy->>WritePolicy: Parse identifier and author | 185 | WritePolicy->>WritePolicy: Parse identifier and author |
| 147 | 186 | ||
| 148 | Note right of WritePolicy: Check if we already have git data | 187 | Note right of WritePolicy: Check if git data exists |
| 149 | WritePolicy->>GitRepos: Check if any authorized repo has matching refs | 188 | WritePolicy->>GitRepos: Check if any authorized repo has matching refs |
| 150 | 189 | ||
| 151 | alt Git data already exists | 190 | alt Git data exists |
| 152 | GitRepos-->>WritePolicy: Refs match in repo X | 191 | GitRepos-->>WritePolicy: Refs match in repo X |
| 153 | WritePolicy->>WritePolicy: Process immediately | 192 | WritePolicy->>Database: Save event |
| 154 | WritePolicy->>Client: OK true - event saved | 193 | WritePolicy->>Client: OK true - event saved |
| 155 | else Git data not available yet | 194 | else Git data not available yet |
| 156 | WritePolicy->>Purgatory: add_state - event, identifier | 195 | WritePolicy->>Purgatory: add_state(event, identifier, author) |
| 157 | Purgatory->>Purgatory: Add entry indexed by identifier | 196 | Purgatory->>Purgatory: Store in state_events[identifier] |
| 158 | WritePolicy->>Client: OK true purgatory: will not be served until git data arrives | 197 | Purgatory->>Purgatory: Enqueue for sync (3min delay) |
| 198 | WritePolicy->>Client: OK true "purgatory: awaiting git data" | ||
| 159 | end | 199 | end |
| 160 | ``` | 200 | ``` |
| 161 | 201 | ||
| 162 | ### PR Event (Kind 1617/1618) Arrival | 202 | ### PR Event Arrival (Kind 1617/1618) |
| 163 | 203 | ||
| 164 | ```mermaid | 204 | ```mermaid |
| 165 | sequenceDiagram | 205 | sequenceDiagram |
| 166 | participant Client | 206 | participant Client |
| 167 | participant WritePolicy | 207 | participant WritePolicy |
| 168 | participant Purgatory | 208 | participant Purgatory |
| 209 | participant Database | ||
| 169 | participant GitRepos | 210 | participant GitRepos |
| 170 | 211 | ||
| 171 | Client->>WritePolicy: EVENT kind:1617 | 212 | Client->>WritePolicy: EVENT kind:1617/1618 |
| 172 | WritePolicy->>WritePolicy: Validate structure | 213 | WritePolicy->>WritePolicy: Extract event_id and commit from 'c' tag |
| 173 | WritePolicy->>WritePolicy: Extract event_id and commit from c tag | ||
| 174 | 214 | ||
| 175 | Note right of WritePolicy: Check if git data already exists | 215 | Note right of WritePolicy: Check if git data exists |
| 176 | WritePolicy->>GitRepos: Check refs/nostr/event-id in authorized repos | 216 | WritePolicy->>GitRepos: Check refs/nostr/<event-id> in repos |
| 177 | 217 | ||
| 178 | alt Git data already exists | 218 | alt Git data exists in database |
| 179 | GitRepos-->>WritePolicy: Found matching commit | 219 | GitRepos-->>WritePolicy: Found with matching commit |
| 180 | WritePolicy->>WritePolicy: Process immediately | 220 | WritePolicy->>Database: Save event |
| 181 | WritePolicy->>Client: OK true - event saved | 221 | WritePolicy->>Client: OK true - event saved |
| 182 | else Git data arrived first - placeholder exists | 222 | else Placeholder exists in purgatory |
| 183 | WritePolicy->>Purgatory: find_pr_placeholder - event_id | 223 | WritePolicy->>Purgatory: find_pr_placeholder(event_id) |
| 184 | alt Placeholder found with matching commit | 224 | alt Placeholder has matching commit |
| 185 | Purgatory-->>WritePolicy: Placeholder entry | 225 | Purgatory-->>WritePolicy: Placeholder entry |
| 186 | WritePolicy->>WritePolicy: Process - save event | 226 | WritePolicy->>Database: Save event |
| 187 | WritePolicy->>Purgatory: remove - event_id | 227 | WritePolicy->>Purgatory: remove_pr(event_id) |
| 188 | WritePolicy->>Client: OK true - event saved | 228 | WritePolicy->>Client: OK true - event saved |
| 189 | else Placeholder found with different commit | 229 | else Placeholder has different commit |
| 190 | WritePolicy->>Client: OK false - commit mismatch | 230 | WritePolicy->>Client: OK false - commit mismatch |
| 191 | else No placeholder | ||
| 192 | WritePolicy->>Purgatory: add_pr - event, commit | ||
| 193 | WritePolicy->>Client: OK true purgatory: will not be served until git data arrives | ||
| 194 | end | 231 | end |
| 232 | else No git data yet | ||
| 233 | WritePolicy->>Purgatory: add_pr(event, event_id, commit) | ||
| 234 | Purgatory->>Purgatory: Store in pr_events[event_id] | ||
| 235 | Purgatory->>Purgatory: Enqueue for sync (3min delay) | ||
| 236 | WritePolicy->>Client: OK true "purgatory: awaiting git data" | ||
| 195 | end | 237 | end |
| 196 | ``` | 238 | ``` |
| 197 | 239 | ||
| 198 | ### Git Push - State Event Matching | 240 | ### Git Push - State Refs |
| 199 | 241 | ||
| 200 | When a push arrives to normal refs (branches/tags), we check for matching state events: | 242 | **Critical:** Authorization happens BEFORE git-receive-pack execution, checking both database and purgatory. |
| 201 | |||
| 202 | **Key Rule**: For a given `pubkey/identifier` repo, there can only be **one authoritative state event** - the one with the largest `created_at` among all authorized maintainers. State events in purgatory are only processed if they aren't superseded by existing state events on the relay. | ||
| 203 | 243 | ||
| 204 | ```mermaid | 244 | ```mermaid |
| 205 | sequenceDiagram | 245 | sequenceDiagram |
| 206 | participant GitClient | 246 | participant GitClient |
| 207 | participant GitHandler | 247 | participant GitHandler |
| 248 | participant Authorization | ||
| 208 | participant Purgatory | 249 | participant Purgatory |
| 209 | participant Database | 250 | participant Database |
| 210 | participant GitRepo | 251 | participant GitProcess |
| 211 | 252 | ||
| 212 | GitClient->>GitHandler: POST /npub/identifier/git-receive-pack | 253 | GitClient->>GitHandler: POST /npub/id/git-receive-pack |
| 213 | GitHandler->>GitHandler: Parse pushed refs into RefPairs | 254 | GitHandler->>Authorization: authorize_push(body, purgatory, database) |
| 214 | 255 | ||
| 215 | Note over GitHandler: Look up state events by identifier | 256 | Note over Authorization: Parse pushed refs from pkt-line format |
| 216 | GitHandler->>Purgatory: find_matching_states - identifier, pushed_refs | 257 | Authorization->>Authorization: parse_pushed_refs(body) |
| 217 | 258 | Authorization->>Authorization: Separate state refs from refs/nostr/* | |
| 218 | loop For each state event in purgatory[identifier] | 259 | |
| 219 | Purgatory->>Purgatory: Parse state event refs | 260 | Note over Authorization: Check database for state events |
| 220 | Purgatory->>Purgatory: Check: pushed_refs covers event refs that differ from local | 261 | Authorization->>Database: Query state events for identifier |
| 221 | Purgatory->>Purgatory: Check: event refs not in push already exist locally | 262 | |
| 222 | alt All event refs can be satisfied | 263 | alt State found in database |
| 223 | Purgatory->>Purgatory: Add to candidates | 264 | Database-->>Authorization: State event |
| 224 | end | 265 | Authorization->>Authorization: Validate refs match |
| 225 | end | 266 | Authorization-->>GitHandler: Authorized (from_purgatory=false) |
| 226 | 267 | else No state in database - check purgatory | |
| 227 | Purgatory-->>GitHandler: Vec of candidate state events | 268 | Authorization->>Purgatory: find_matching_states(identifier, pushed_refs, local_refs) |
| 228 | 269 | ||
| 229 | Note over GitHandler: Get all state events and announcements for identifier | 270 | alt Matching state in purgatory |
| 230 | GitHandler->>Database: Get announcements for identifier | 271 | Purgatory-->>Authorization: State event(s) |
| 231 | GitHandler->>Database: Get all state events for identifier from relay | 272 | Authorization->>Authorization: Filter to authorized authors |
| 232 | GitHandler->>GitHandler: collect_authorized_maintainers from announcements | 273 | Authorization->>Authorization: Find latest state |
| 233 | 274 | Authorization->>Authorization: Validate refs match | |
| 234 | Note over GitHandler: Find authoritative state event | 275 | Authorization->>Purgatory: extend_expiry(15min) |
| 235 | loop For each candidate from purgatory | 276 | Authorization-->>GitHandler: Authorized (from_purgatory=true) |
| 236 | GitHandler->>GitHandler: Check if event.author is authorized for target repo | 277 | else No matching state anywhere |
| 237 | alt Author authorized | 278 | Authorization-->>GitHandler: Rejected - no authorized state |
| 238 | GitHandler->>GitHandler: Check relay state events for this identifier | ||
| 239 | GitHandler->>GitHandler: Compare created_at with all authorized state events | ||
| 240 | alt This is the largest created_at among authorized | ||
| 241 | GitHandler->>GitHandler: Verify OIDs for unpushed refs exist locally | ||
| 242 | alt All OIDs available | ||
| 243 | GitHandler->>GitHandler: Set as approved state event | ||
| 244 | end | ||
| 245 | else Superseded by existing state on relay | ||
| 246 | GitHandler->>Purgatory: Remove superseded event | ||
| 247 | end | ||
| 248 | end | 279 | end |
| 249 | end | 280 | end |
| 250 | 281 | ||
| 251 | Note over GitHandler: Only ONE approved state event per repo | 282 | alt Authorized |
| 252 | alt Approved state event exists | 283 | GitHandler->>GitProcess: Execute git-receive-pack |
| 253 | GitHandler->>Purgatory: extend_expiry - event_id - ensure 15 min | 284 | |
| 254 | GitHandler->>GitRepo: Execute git-receive-pack | 285 | alt Push succeeds AND from_purgatory=true |
| 255 | |||
| 256 | alt Push successful | ||
| 257 | GitHandler->>Database: Save state event | 286 | GitHandler->>Database: Save state event |
| 258 | GitHandler->>GitRepo: Align ALL refs to state - including unpushed | 287 | GitHandler->>Purgatory: remove_state_event(identifier, event_id) |
| 259 | GitHandler->>GitHandler: Sync to other authorized maintainer repos | ||
| 260 | GitHandler->>Purgatory: remove - event_id | ||
| 261 | GitHandler->>GitClient: Push accepted | 288 | GitHandler->>GitClient: Push accepted |
| 262 | else Push failed | 289 | else Push succeeds AND from_purgatory=false |
| 263 | GitHandler->>GitClient: Push rejected | 290 | GitHandler->>GitClient: Push accepted |
| 291 | else Push fails | ||
| 292 | GitHandler->>GitClient: Push rejected - git error | ||
| 264 | end | 293 | end |
| 265 | else No approved state event | 294 | else Rejected |
| 266 | GitHandler->>GitClient: Push rejected - no authorized state | 295 | GitHandler->>GitClient: Push rejected - not authorized |
| 267 | end | 296 | end |
| 268 | ``` | 297 | ``` |
| 269 | 298 | ||
| 270 | ### Git Push - PR Event (refs/nostr/event-id) | 299 | ### Git Push - PR Refs (refs/nostr/event-id) |
| 271 | 300 | ||
| 272 | ```mermaid | 301 | ```mermaid |
| 273 | sequenceDiagram | 302 | sequenceDiagram |
| 274 | participant GitClient | 303 | participant GitClient |
| 275 | participant GitHandler | 304 | participant GitHandler |
| 276 | participant Database | ||
| 277 | participant Purgatory | 305 | participant Purgatory |
| 278 | participant GitRepo | 306 | participant Database |
| 279 | 307 | participant GitProcess | |
| 280 | GitClient->>GitHandler: POST /npub/identifier/git-receive-pack refs/nostr/abc123 | 308 | |
| 281 | GitHandler->>GitHandler: Extract event_id from ref | 309 | GitClient->>GitHandler: POST /npub/id/git-receive-pack refs/nostr/abc123 |
| 282 | GitHandler->>GitHandler: Extract pushed commit SHA | 310 | GitHandler->>GitHandler: Extract event_id and commit from push |
| 283 | 311 | ||
| 284 | Note over GitHandler: First check relay database | 312 | Note over GitHandler: Check database first |
| 285 | GitHandler->>Database: Query for PR event with event_id | 313 | GitHandler->>Database: Query PR event with event_id |
| 286 | 314 | ||
| 287 | alt Event exists in database | 315 | alt Event exists in database |
| 288 | Database-->>GitHandler: PR event found | 316 | Database-->>GitHandler: PR event |
| 289 | GitHandler->>GitHandler: Compare commit tags | 317 | GitHandler->>GitHandler: Compare commit tags |
| 290 | alt Commit matches | 318 | alt Commit matches |
| 291 | GitHandler->>GitRepo: Execute push | 319 | GitHandler->>GitProcess: Execute push |
| 292 | GitHandler->>GitClient: Push accepted | 320 | GitHandler->>GitClient: Push accepted |
| 293 | else Commit mismatch | 321 | else Commit mismatch |
| 294 | GitHandler->>GitClient: Push rejected - commit mismatch with existing event | 322 | GitHandler->>GitClient: Push rejected - commit mismatch |
| 295 | end | 323 | end |
| 296 | else Event not in database | 324 | else Event not in database - check purgatory |
| 297 | GitHandler->>Purgatory: find_pr - event_id | 325 | GitHandler->>Purgatory: find_pr(event_id) |
| 298 | 326 | ||
| 299 | alt PR event in purgatory | 327 | alt PR event in purgatory |
| 300 | Purgatory-->>GitHandler: PR entry with event | 328 | Purgatory-->>GitHandler: PR entry with event |
| 301 | GitHandler->>GitHandler: Compare commit tags | 329 | GitHandler->>GitHandler: Compare commit tags |
| 302 | alt Commit matches | 330 | alt Commit matches |
| 303 | GitHandler->>GitRepo: Execute push | 331 | GitHandler->>GitProcess: Execute push |
| 304 | GitHandler->>Database: Save PR event | 332 | GitHandler->>Database: Save PR event |
| 305 | GitHandler->>GitHandler: Sync to other authorized repos | 333 | GitHandler->>Purgatory: remove_pr(event_id) |
| 306 | GitHandler->>Purgatory: remove - event_id | ||
| 307 | GitHandler->>GitClient: Push accepted | 334 | GitHandler->>GitClient: Push accepted |
| 308 | else Commit mismatch | 335 | else Commit mismatch |
| 309 | GitHandler->>GitClient: Push rejected - commit mismatch | 336 | GitHandler->>GitClient: Push rejected - commit mismatch |
| 310 | end | 337 | end |
| 311 | else No PR event anywhere | 338 | else No PR event anywhere (git-data-first) |
| 312 | Note over GitHandler: Git data first scenario | 339 | GitHandler->>GitProcess: Execute push - accept any commit |
| 313 | GitHandler->>GitRepo: Execute push - accept any data | 340 | GitHandler->>Purgatory: add_pr_placeholder(event_id, commit) |
| 314 | GitHandler->>Purgatory: add_pr_placeholder - event_id, commit | ||
| 315 | GitHandler->>GitClient: Push accepted - awaiting PR event | 341 | GitHandler->>GitClient: Push accepted - awaiting PR event |
| 316 | end | 342 | end |
| 317 | end | 343 | end |
| 318 | ``` | 344 | ``` |
| 319 | 345 | ||
| 320 | ### Sync to Other Maintainer Repos | 346 | --- |
| 321 | 347 | ||
| 322 | After successfully processing a state event or PR, we sync to other authorized repos: | 348 | ## Background Sync |
| 323 | 349 | ||
| 324 | ```mermaid | 350 | Purgatory includes a background sync system that fetches git data from remote servers when events arrive before git data. |
| 325 | sequenceDiagram | ||
| 326 | participant Handler | ||
| 327 | participant Database | ||
| 328 | participant Authorization | ||
| 329 | participant GitRepos | ||
| 330 | 351 | ||
| 331 | Handler->>Database: Get all announcements for identifier | 352 | ### Sync Architecture |
| 332 | Handler->>Authorization: collect_authorized_maintainers - announcements | ||
| 333 | Authorization-->>Handler: Map of owner -> authorized maintainers | ||
| 334 | 353 | ||
| 335 | loop For each owner in map | 354 | ``` |
| 336 | alt Event author in owner's authorized set | 355 | ┌─────────────────────────────────────────────────────┐ |
| 337 | Note right of Handler: This owner's repo should have this state/PR | 356 | │ Sync Loop (1s) │ |
| 357 | │ - Checks sync_queue for ready identifiers │ | ||
| 358 | │ - Spawns tasks for each ready identifier │ | ||
| 359 | └─────────────────────────────────────────────────────┘ | ||
| 360 | │ | ||
| 361 | ▼ | ||
| 362 | ┌─────────────────────────────────────────────────────┐ | ||
| 363 | │ sync_identifier(identifier) │ | ||
| 364 | │ 1. Try all non-throttled URLs sequentially │ | ||
| 365 | │ 2. Check if complete after each fetch │ | ||
| 366 | │ 3. Enqueue with throttled domains if incomplete │ | ||
| 367 | └─────────────────────────────────────────────────────┘ | ||
| 368 | │ | ||
| 369 | ▼ | ||
| 370 | ┌─────────────────────────────────────────────────────┐ | ||
| 371 | │ sync_identifier_from_url(identifier, url) │ | ||
| 372 | │ 1. Collect needed OIDs from purgatory events │ | ||
| 373 | │ 2. Fetch OIDs from remote URL │ | ||
| 374 | │ 3. Process newly available git data │ | ||
| 375 | └─────────────────────────────────────────────────────┘ | ||
| 376 | │ | ||
| 377 | ▼ | ||
| 378 | ┌─────────────────────────────────────────────────────┐ | ||
| 379 | │ process_newly_available_git_data(repo, oids) │ | ||
| 380 | │ 1. Find satisfiable state events in purgatory │ | ||
| 381 | │ 2. Find satisfiable PR events in purgatory │ | ||
| 382 | │ 3. Save events to database │ | ||
| 383 | │ 4. Sync git data to other owner repos │ | ||
| 384 | │ 5. Remove from purgatory │ | ||
| 385 | └─────────────────────────────────────────────────────┘ | ||
| 386 | ``` | ||
| 338 | 387 | ||
| 339 | alt State event | 388 | ### Sync Queue Entry |
| 340 | Handler->>GitRepos: Align owner's repo to state event refs | 389 | |
| 341 | else PR event | 390 | ```rust |
| 342 | Handler->>GitRepos: Ensure refs/nostr/event-id exists | 391 | pub struct SyncQueueEntry { |
| 343 | end | 392 | /// When to attempt next sync |
| 344 | end | 393 | pub next_attempt: Instant, |
| 345 | end | 394 | |
| 395 | /// Number of sync attempts made | ||
| 396 | pub attempt_count: u32, | ||
| 397 | |||
| 398 | /// Whether a sync task is currently running | ||
| 399 | pub in_progress: bool, | ||
| 400 | } | ||
| 346 | ``` | 401 | ``` |
| 347 | 402 | ||
| 348 | ### Background Cleanup | 403 | **Backoff strategy:** |
| 404 | - First attempt: 20 seconds | ||
| 405 | - Second attempt: 2 minutes | ||
| 406 | - Subsequent attempts: 2 minutes | ||
| 349 | 407 | ||
| 350 | ```mermaid | 408 | ### Sync Delays |
| 351 | sequenceDiagram | ||
| 352 | participant Timer | ||
| 353 | participant Purgatory | ||
| 354 | 409 | ||
| 355 | loop Every 60 seconds | 410 | | Scenario | Delay | Reason | |
| 356 | Timer->>Purgatory: cleanup | 411 | |----------|-------|--------| |
| 412 | | User-submitted event | 3 minutes | Give time for git push to arrive | | ||
| 413 | | Sync-triggered event | 500ms | Batch burst arrivals from negentropy | | ||
| 357 | 414 | ||
| 358 | Note over Purgatory: Clean state events | 415 | ### Domain Throttling |
| 359 | loop For each identifier in state_events | ||
| 360 | loop For each entry | ||
| 361 | alt now > expires_at | ||
| 362 | Purgatory->>Purgatory: Remove entry | ||
| 363 | end | ||
| 364 | end | ||
| 365 | end | ||
| 366 | 416 | ||
| 367 | Note over Purgatory: Clean PR events | 417 | ```rust |
| 368 | loop For each event_id in pr_events | 418 | pub struct ThrottleManager { |
| 369 | alt now > entry.expires_at | 419 | /// Max requests per domain per minute |
| 370 | Purgatory->>Purgatory: Remove entry | 420 | max_requests_per_minute: usize, |
| 371 | end | 421 | |
| 372 | end | 422 | /// Tracking window duration |
| 373 | end | 423 | window_duration: Duration, |
| 424 | |||
| 425 | /// Per-domain throttle state | ||
| 426 | domains: DashMap<String, DomainThrottle>, | ||
| 427 | } | ||
| 374 | ``` | 428 | ``` |
| 375 | 429 | ||
| 376 | ## API Methods | 430 | **Rate limiting:** |
| 431 | - Default: 5 requests per domain per 30 seconds | ||
| 432 | - Tracks request timestamps in a sliding window | ||
| 433 | - Queues identifiers when domain is throttled | ||
| 434 | - Processes queue when capacity frees up | ||
| 435 | |||
| 436 | See [`src/purgatory/sync/throttle.rs`](../../src/purgatory/sync/throttle.rs) for implementation. | ||
| 437 | |||
| 438 | --- | ||
| 439 | |||
| 440 | ## Purgatory API | ||
| 377 | 441 | ||
| 378 | ### Purgatory | 442 | ### Adding Entries |
| 379 | 443 | ||
| 380 | ```rust | 444 | ```rust |
| 381 | impl Purgatory { | 445 | impl Purgatory { |
| 382 | /// Create a new empty purgatory | 446 | /// Add a state event to purgatory |
| 383 | pub fn new() -> Self; | 447 | /// Automatically enqueues for sync with 3min delay |
| 384 | 448 | pub fn add_state(&self, event: Event, identifier: String, author: PublicKey); | |
| 385 | // ==================== State Events ==================== | 449 | |
| 450 | /// Add a PR event to purgatory | ||
| 451 | /// Automatically enqueues for sync with 3min delay | ||
| 452 | pub fn add_pr(&self, event: Event, event_id: String, commit: String); | ||
| 453 | |||
| 454 | /// Add a PR placeholder (git-data-first scenario) | ||
| 455 | pub fn add_pr_placeholder(&self, event_id: String, commit: String); | ||
| 456 | } | ||
| 457 | ``` | ||
| 386 | 458 | ||
| 387 | /// Add a state event (kind 30618) to purgatory | 459 | ### Finding Entries |
| 388 | /// Returns purgatory message for client response | ||
| 389 | pub fn add_state(&self, event: Event, identifier: String) -> String; | ||
| 390 | 460 | ||
| 391 | /// Find state events that could be satisfied by pushed refs | 461 | ```rust |
| 392 | /// Returns events where: | 462 | impl Purgatory { |
| 393 | /// - All refs in event are either in pushed_refs OR already exist locally | 463 | /// Find state events waiting for an identifier |
| 394 | /// - At least one ref in event is in pushed_refs (something to update) | 464 | pub fn find_state(&self, identifier: &str) -> Vec<StatePurgatoryEntry>; |
| 465 | |||
| 466 | /// Find state events that match pushed refs (late binding) | ||
| 395 | pub fn find_matching_states( | 467 | pub fn find_matching_states( |
| 396 | &self, | 468 | &self, |
| 397 | identifier: &str, | 469 | identifier: &str, |
| 398 | pushed_refs: &[RefPair], | 470 | pushed_updates: &[RefUpdate], |
| 399 | local_refs: &HashMap<String, String>, | 471 | local_refs: &HashMap<String, String>, |
| 400 | ) -> Vec<Event>; | 472 | ) -> Vec<Event>; |
| 473 | |||
| 474 | /// Find a PR entry by event ID | ||
| 475 | pub fn find_pr(&self, event_id: &str) -> Option<PrPurgatoryEntry>; | ||
| 476 | |||
| 477 | /// Find a PR placeholder specifically (git-data-first) | ||
| 478 | pub fn find_pr_placeholder(&self, event_id: &str) -> Option<String>; | ||
| 479 | } | ||
| 480 | ``` | ||
| 401 | 481 | ||
| 402 | /// Extend expiry for entries about to be processed | 482 | ### Removing Entries |
| 403 | /// Ensures at least `duration` remaining on timer | ||
| 404 | pub fn extend_expiry(&self, event_ids: &[EventId], duration: Duration); | ||
| 405 | |||
| 406 | /// Remove state event after successful processing | ||
| 407 | pub fn remove_state(&self, event_id: &EventId); | ||
| 408 | |||
| 409 | // ==================== PR Events ==================== | ||
| 410 | |||
| 411 | /// Add a PR event (kind 1617/1618) to purgatory | ||
| 412 | /// Returns purgatory message for client response | ||
| 413 | pub fn add_pr(&self, event: Event, commit: String) -> String; | ||
| 414 | |||
| 415 | /// Add a placeholder for git-data-first scenario | ||
| 416 | /// Called when push to refs/nostr/<event-id> arrives before the PR event | ||
| 417 | pub fn add_pr_placeholder(&self, event_id: EventId, commit: String); | ||
| 418 | |||
| 419 | /// Find PR entry by event ID | ||
| 420 | /// Returns the entry if found (may or may not have event) | ||
| 421 | pub fn find_pr(&self, event_id: &EventId) -> Option<PrPurgatoryEntry>; | ||
| 422 | |||
| 423 | /// Find PR placeholder (git-data-first entry without event) | ||
| 424 | pub fn find_pr_placeholder(&self, event_id: &EventId) -> Option<PrPurgatoryEntry>; | ||
| 425 | 483 | ||
| 426 | /// Remove PR entry after successful processing | 484 | ```rust |
| 427 | pub fn remove_pr(&self, event_id: &EventId); | 485 | impl Purgatory { |
| 486 | /// Remove all state events for an identifier | ||
| 487 | pub fn remove_state(&self, identifier: &str); | ||
| 488 | |||
| 489 | /// Remove a specific state event by event ID | ||
| 490 | pub fn remove_state_event(&self, identifier: &str, event_id: &EventId); | ||
| 491 | |||
| 492 | /// Remove a PR entry | ||
| 493 | pub fn remove_pr(&self, event_id: &str); | ||
| 494 | } | ||
| 495 | ``` | ||
| 428 | 496 | ||
| 429 | // ==================== Maintenance ==================== | 497 | ### Maintenance |
| 430 | 498 | ||
| 431 | /// Remove expired entries (30 min) | 499 | ```rust |
| 432 | /// Returns count of removed entries | 500 | impl Purgatory { |
| 433 | pub fn cleanup(&self) -> usize; | 501 | /// Remove expired entries (called every 60 seconds) |
| 502 | /// Returns (state_removed, pr_removed) | ||
| 503 | pub fn cleanup(&self) -> (usize, usize); | ||
| 504 | |||
| 505 | /// Extend expiry for entries about to be processed | ||
| 506 | /// Ensures at least `duration` remaining | ||
| 507 | pub fn extend_expiry(&self, identifier: &str, event_ids: &[EventId], duration: Duration); | ||
| 508 | |||
| 509 | /// Get current counts for metrics | ||
| 510 | pub fn count(&self) -> (usize, usize); | ||
| 511 | } | ||
| 512 | ``` | ||
| 434 | 513 | ||
| 435 | /// Get counts for metrics/debugging | 514 | ### Sync Queue Management |
| 436 | pub fn state_event_count(&self) -> usize; | ||
| 437 | pub fn pr_event_count(&self) -> usize; | ||
| 438 | pub fn pr_placeholder_count(&self) -> usize; | ||
| 439 | 515 | ||
| 440 | /// Check if an event is in purgatory (either store) | 516 | ```rust |
| 441 | pub fn contains(&self, event_id: &EventId) -> bool; | 517 | impl Purgatory { |
| 518 | /// Enqueue identifier for sync with custom delay | ||
| 519 | pub fn enqueue_sync(&self, identifier: &str, delay: Duration); | ||
| 520 | |||
| 521 | /// Enqueue with default delay (3 minutes) | ||
| 522 | pub fn enqueue_sync_default(&self, identifier: &str); | ||
| 523 | |||
| 524 | /// Enqueue with immediate delay (500ms) | ||
| 525 | pub fn enqueue_sync_immediate(&self, identifier: &str); | ||
| 526 | |||
| 527 | /// Check if identifier has pending events | ||
| 528 | pub fn has_pending_events(&self, identifier: &str) -> bool; | ||
| 529 | |||
| 530 | /// Remove identifier from sync queue | ||
| 531 | pub fn remove_from_sync_queue(&self, identifier: &str); | ||
| 442 | } | 532 | } |
| 443 | ``` | 533 | ``` |
| 444 | 534 | ||
| 445 | ### Helper: Extract and Match Refs | 535 | --- |
| 536 | |||
| 537 | ## Helper Functions | ||
| 538 | |||
| 539 | ### State Event Matching | ||
| 446 | 540 | ||
| 447 | ```rust | 541 | ```rust |
| 448 | /// Extract ref pairs from a state event | 542 | /// Extract ref pairs from a state event |
| 449 | pub fn extract_refs_from_state(event: &Event) -> Vec<RefPair> { | 543 | pub fn extract_refs_from_state(event: &Event) -> Vec<RefPair>; |
| 450 | // Parse refs/heads/* and refs/tags/* tags | ||
| 451 | // Return vec of RefPair { ref_name, object_sha } | ||
| 452 | } | ||
| 453 | 544 | ||
| 454 | /// Check if a state event can be satisfied by a push | 545 | /// Check if a state event can be satisfied by a push |
| 455 | /// | ||
| 456 | /// Returns true if: | 546 | /// Returns true if: |
| 457 | /// - Every ref in state_refs is either in pushed_refs (matching SHA) OR in local_refs (matching SHA) | 547 | /// - Every ref in state is either in pushed_refs OR in local_refs |
| 458 | /// - At least one ref in state_refs is actually being changed by the push | 548 | /// - At least one ref in state is being changed by the push |
| 459 | pub fn can_satisfy_state( | 549 | pub fn can_satisfy_state( |
| 460 | state_refs: &[RefPair], | 550 | state_refs: &[RefPair], |
| 461 | pushed_refs: &[RefPair], | 551 | pushed_refs: &[RefPair], |
| 462 | local_refs: &HashMap<String, String>, | 552 | local_refs: &HashMap<String, String>, |
| 463 | ) -> bool; | 553 | ) -> bool; |
| 464 | 554 | ||
| 465 | /// Get refs from state event that aren't being pushed but need updating | 555 | /// Check if a state event can be applied to a repository |
| 556 | /// Returns true if all required OIDs exist in the repo | ||
| 557 | pub fn can_apply_state( | ||
| 558 | event: &Event, | ||
| 559 | repo_path: &Path, | ||
| 560 | ) -> Result<bool>; | ||
| 561 | |||
| 562 | /// Get refs from state that aren't being pushed | ||
| 466 | pub fn get_unpushed_refs( | 563 | pub fn get_unpushed_refs( |
| 467 | state_refs: &[RefPair], | 564 | state_refs: &[RefPair], |
| 468 | pushed_refs: &[RefPair], | 565 | pushed_refs: &[RefPair], |
| 469 | ) -> Vec<RefPair>; | 566 | ) -> Vec<RefPair>; |
| 470 | |||
| 471 | /// Verify all OIDs from refs exist in the local git repo | ||
| 472 | pub fn verify_oids_exist( | ||
| 473 | repo_path: &Path, | ||
| 474 | refs: &[RefPair], | ||
| 475 | ) -> Result<bool, git::Error>; | ||
| 476 | ``` | 567 | ``` |
| 477 | 568 | ||
| 478 | ## Integration Points | 569 | See [`src/purgatory/helpers.rs`](../../src/purgatory/helpers.rs) for implementation. |
| 479 | |||
| 480 | ### 1. Nip34WritePolicy Changes | ||
| 481 | 570 | ||
| 482 | ```rust | 571 | --- |
| 483 | pub struct Nip34WritePolicy { | ||
| 484 | ctx: PolicyContext, | ||
| 485 | purgatory: Arc<Purgatory>, // Shared with git handlers | ||
| 486 | // ... existing fields | ||
| 487 | } | ||
| 488 | ``` | ||
| 489 | |||
| 490 | ### 2. handle_state Changes | ||
| 491 | 572 | ||
| 492 | On state event arrival: | 573 | ## Integration Points |
| 493 | 574 | ||
| 494 | **Key Rules**: | 575 | ### 1. Event Policy (Nip34WritePolicy) |
| 495 | 576 | ||
| 496 | 1. Reject if we already have a state event from this author for this identifier with a larger `created_at` date (outdated event) | 577 | State and PR events are added to purgatory when git data doesn't exist: |
| 497 | 2. If accepted, check if we need to sync repos with the same identifier | ||
| 498 | 578 | ||
| 499 | ```rust | 579 | ```rust |
| 580 | // From src/nostr/policy/state.rs | ||
| 500 | async fn handle_state(&self, event: &Event) -> WritePolicyResult { | 581 | async fn handle_state(&self, event: &Event) -> WritePolicyResult { |
| 501 | let identifier = extract_identifier(&event)?; | 582 | let identifier = extract_identifier(event)?; |
| 502 | let author = event.pubkey; | 583 | |
| 503 | let state_refs = extract_refs_from_state(&event); | 584 | // Check if we have matching git data |
| 504 | 585 | if self.has_matching_git_data(&identifier, event).await? { | |
| 505 | // Check for existing state event from this author with larger created_at | 586 | return WritePolicyResult::Accept; |
| 506 | let existing_states = self.database.get_state_events_by_author_identifier( | ||
| 507 | &author, | ||
| 508 | &identifier | ||
| 509 | ).await?; | ||
| 510 | |||
| 511 | for existing in existing_states { | ||
| 512 | if existing.created_at > event.created_at { | ||
| 513 | // Reject - we have a newer state from this author | ||
| 514 | return WritePolicyResult::Reject { | ||
| 515 | status: false, | ||
| 516 | message: "rejected: newer state event exists for this author/identifier".into() | ||
| 517 | }; | ||
| 518 | } | ||
| 519 | } | 587 | } |
| 520 | 588 | ||
| 521 | // Check if we already have matching git data | ||
| 522 | let repos = self.find_repos_for_identifier(&identifier).await?; | ||
| 523 | for repo in repos { | ||
| 524 | if self.refs_match_state(&repo, &state_refs).await? { | ||
| 525 | // Git data exists - process immediately | ||
| 526 | // Also trigger sync check for other repos with same identifier | ||
| 527 | // Pass the repo that has the git data so it can be used as source | ||
| 528 | self.check_and_sync_repos_for_identifier(&identifier, &event, &repo).await?; | ||
| 529 | return WritePolicyResult::Accept; | ||
| 530 | } | ||
| 531 | } | ||
| 532 | |||
| 533 | // Add to purgatory | 589 | // Add to purgatory |
| 534 | let msg = self.purgatory.add_state(event.clone(), identifier); | 590 | self.purgatory.add_state( |
| 591 | event.clone(), | ||
| 592 | identifier.clone(), | ||
| 593 | event.pubkey, | ||
| 594 | ); | ||
| 595 | |||
| 535 | WritePolicyResult::Reject { | 596 | WritePolicyResult::Reject { |
| 536 | status: true, // Client sees OK | 597 | status: true, // Client sees OK |
| 537 | message: msg.into() | 598 | message: "purgatory: awaiting git data".into() |
| 538 | } | ||
| 539 | } | ||
| 540 | ``` | ||
| 541 | |||
| 542 | ### 3. handle_pr_event Changes | ||
| 543 | |||
| 544 | On PR event arrival: | ||
| 545 | |||
| 546 | **Key Rule**: Incoming PR events supersede existing refs. If the existing `refs/nostr/<event-id>` ref has a different commit_id, the ref should be removed and the event stored in purgatory to await new git data. | ||
| 547 | |||
| 548 | ```rust | ||
| 549 | async fn handle_pr_event(&self, event: &Event) -> WritePolicyResult { | ||
| 550 | let commit = extract_c_tag_commit(&event)?; | ||
| 551 | let event_id = event.id.to_hex(); | ||
| 552 | |||
| 553 | // Check if placeholder exists (git-data-first) | ||
| 554 | if let Some(placeholder) = self.purgatory.find_pr_placeholder(&event.id) { | ||
| 555 | if placeholder.commit == commit { | ||
| 556 | self.purgatory.remove_pr(&event.id); // Note this shouldnt remove the git data | ||
| 557 | return WritePolicyResult::Accept; | ||
| 558 | } else { | ||
| 559 | // Placeholder has different commit - incoming event supersedes | ||
| 560 | // Update placeholder with new expected commit | ||
| 561 | self.purgatory.remove_pr(&event.id); | ||
| 562 | // TODO also remove git data | ||
| 563 | let msg = self.purgatory.add_pr(event.clone(), commit); | ||
| 564 | return WritePolicyResult::Reject { | ||
| 565 | status: true, // Client sees OK - in purgatory awaiting correct git data | ||
| 566 | message: msg.into() | ||
| 567 | }; | ||
| 568 | } | ||
| 569 | } | ||
| 570 | |||
| 571 | // Add to purgatory | ||
| 572 | let msg = self.purgatory.add_pr(event.clone(), commit); | ||
| 573 | WritePolicyResult::Reject { | ||
| 574 | status: true, | ||
| 575 | message: msg.into() | ||
| 576 | } | 599 | } |
| 577 | } | 600 | } |
| 578 | ``` | 601 | ``` |
| 579 | 602 | ||
| 580 | ### 4. Git Handler Changes | 603 | ### 2. Git Push Authorization |
| 581 | 604 | ||
| 582 | In `handle_receive_pack` for normal refs: | 605 | Authorization checks both database and purgatory: |
| 583 | |||
| 584 | **Key Rule**: Refs are sent to a specific repo (`pubkey/identifier`), and there can only be **one authorized state event** for that repo. Therefore, this function returns a single `Option<Event>` rather than `Vec<Event>`. | ||
| 585 | 606 | ||
| 586 | ```rust | 607 | ```rust |
| 587 | async fn handle_state_refs_push( | 608 | // From src/git/authorization.rs |
| 588 | &self, | 609 | pub async fn authorize_push( |
| 610 | database: &SharedDatabase, | ||
| 589 | identifier: &str, | 611 | identifier: &str, |
| 590 | repo_owner: &str, | 612 | owner_pubkey: &str, |
| 591 | pushed_refs: &[RefPair], | 613 | request_body: &Bytes, |
| 592 | ) -> Result<Option<Event>, GitError> { | 614 | purgatory: &Arc<Purgatory>, // Critical! |
| 593 | let local_refs = git::list_refs(&self.repo_path)?; | 615 | repo_path: &std::path::Path, |
| 594 | 616 | ) -> anyhow::Result<AuthorizationResult> { | |
| 595 | // Find matching state events | 617 | // Parse pushed refs |
| 596 | let candidates = self.purgatory.find_matching_states( | 618 | let pushed_refs = parse_pushed_refs(request_body); |
| 597 | identifier, | 619 | |
| 598 | pushed_refs, | 620 | // Check database for state events |
| 599 | &local_refs, | 621 | let db_result = get_authorization_from_db(database, identifier).await?; |
| 600 | ); | 622 | |
| 601 | 623 | if !db_result.authorized { | |
| 602 | // Get all state events from relay for this identifier | 624 | // No state in database - check purgatory |
| 603 | let relay_states = self.database.get_state_events_for_identifier(identifier).await?; | 625 | let purgatory_result = get_state_authorization_for_specific_owner_repo( |
| 604 | 626 | database, | |
| 605 | // Get announcements to determine authorization | 627 | identifier, |
| 606 | let announcements = self.database.get_announcements(identifier).await?; | 628 | owner_pubkey, |
| 607 | let auth_map = collect_authorized_maintainers(&announcements); | 629 | purgatory, |
| 608 | 630 | &pushed_refs, | |
| 609 | // Find the authoritative state event (largest created_at among all authorized) | 631 | repo_path, |
| 610 | let mut best_candidate: Option<(Event, Timestamp)> = None; | 632 | ).await?; |
| 611 | 633 | ||
| 612 | // Check purgatory candidates | 634 | return purgatory_result; |
| 613 | for event in candidates { | ||
| 614 | if let Some(maintainers) = auth_map.get(repo_owner) { | ||
| 615 | if maintainers.contains(&event.pubkey.to_hex()) { | ||
| 616 | // Check if this event is superseded by any relay state | ||
| 617 | let superseded = relay_states.iter().any(|relay_state| { | ||
| 618 | let relay_authorized = auth_map.values() | ||
| 619 | .any(|m| m.contains(&relay_state.pubkey.to_hex())); | ||
| 620 | relay_authorized && relay_state.created_at > event.created_at | ||
| 621 | }); | ||
| 622 | |||
| 623 | if superseded { | ||
| 624 | // Don't use this as best_candidate for THIS repo | ||
| 625 | // Note: Do NOT remove from purgatory - it may still be | ||
| 626 | // authoritative for a DIFFERENT repo with a different maintainer set | ||
| 627 | continue; | ||
| 628 | } | ||
| 629 | |||
| 630 | // Verify OIDs for unpushed refs exist | ||
| 631 | let state_refs = extract_refs_from_state(&event); | ||
| 632 | let unpushed = get_unpushed_refs(&state_refs, pushed_refs); | ||
| 633 | if verify_oids_exist(&self.repo_path, &unpushed)? { | ||
| 634 | // Track best candidate by created_at | ||
| 635 | if best_candidate.is_none() || event.created_at > best_candidate.as_ref().unwrap().1 { | ||
| 636 | best_candidate = Some((event, event.created_at)); | ||
| 637 | } | ||
| 638 | } | ||
| 639 | } | ||
| 640 | } | ||
| 641 | } | ||
| 642 | |||
| 643 | // If we found an approved event, extend its expiry | ||
| 644 | if let Some((ref event, _)) = best_candidate { | ||
| 645 | self.purgatory.extend_expiry(&[event.id], Duration::from_secs(900)); | ||
| 646 | } | 635 | } |
| 647 | 636 | ||
| 648 | Ok(best_candidate.map(|(e, _)| e)) | 637 | db_result |
| 649 | } | 638 | } |
| 650 | ``` | 639 | ``` |
| 651 | 640 | ||
| 652 | For `refs/nostr/<event-id>` pushes: | 641 | ### 3. Post-Push Processing |
| 653 | 642 | ||
| 654 | **Key Rule**: If there is no event (neither in database nor in purgatory), a push of a different commit_id should be **accepted**. This is the "git-data-first" scenario where we're waiting for the PR event to arrive. | 643 | After successful push, events from purgatory are saved to database: |
| 655 | 644 | ||
| 656 | ```rust | 645 | ```rust |
| 657 | async fn handle_nostr_ref_push( | 646 | // From src/git/handlers.rs |
| 658 | &self, | 647 | if from_purgatory { |
| 659 | event_id: &str, | 648 | if let (Some(db), Some(purg)) = (&database, &purgatory) { |
| 660 | pushed_commit: &str, | 649 | // Save state event to database |
| 661 | ) -> Result<PushDecision, GitError> { | 650 | db.save_event(&state.event).await?; |
| 662 | // Check database first | 651 | |
| 663 | if let Some(event) = self.database.get_event_by_id(event_id).await? { | 652 | // Remove from purgatory |
| 664 | let expected_commit = extract_c_tag_commit(&event)?; | 653 | purg.remove_state_event(identifier, &state.event.id); |
| 665 | if expected_commit == pushed_commit { | ||
| 666 | return Ok(PushDecision::Accept); | ||
| 667 | } else { | ||
| 668 | return Ok(PushDecision::Reject("commit mismatch with existing event")); | ||
| 669 | } | ||
| 670 | } | ||
| 671 | |||
| 672 | // Check purgatory for PR event | ||
| 673 | if let Some(entry) = self.purgatory.find_pr(&EventId::from_hex(event_id)?) { | ||
| 674 | if let Some(event) = entry.event { | ||
| 675 | // Event exists in purgatory - must match commit | ||
| 676 | let expected_commit = extract_c_tag_commit(&event)?; | ||
| 677 | if expected_commit == pushed_commit { | ||
| 678 | // Remove from purgatory before returning | ||
| 679 | self.purgatory.remove_pr(&EventId::from_hex(event_id)?); // note this shouldnt delete the git data | ||
| 680 | return Ok(PushDecision::AcceptAndRelease(event)); | ||
| 681 | } else { | ||
| 682 | return Ok(PushDecision::Reject("commit mismatch with purgatory event")); | ||
| 683 | } | ||
| 684 | } else { | ||
| 685 | // Placeholder exists (previous push, no event yet) | ||
| 686 | // Accept and update placeholder with new commit | ||
| 687 | // This allows re-pushing with corrected data before event arrives | ||
| 688 | return Ok(PushDecision::AcceptAndUpdatePlaceholder(pushed_commit.to_string())); | ||
| 689 | } | ||
| 690 | } | 654 | } |
| 691 | |||
| 692 | // No event anywhere - git-data-first scenario | ||
| 693 | // Accept ANY commit and create placeholder awaiting the PR event | ||
| 694 | Ok(PushDecision::AcceptAndCreatePlaceholder(pushed_commit.to_string())) | ||
| 695 | } | 655 | } |
| 696 | // TODO when AcceptAndUpdatePlaceholder gets called purgatory must get udpated with a new / updated entry either here of where AcceptAndCreatePlaceholder is handled | ||
| 697 | ``` | 656 | ``` |
| 698 | 657 | ||
| 699 | ### 5. Main.rs Changes | 658 | ### 4. Background Sync Loop |
| 700 | 659 | ||
| 701 | ```rust | 660 | Started during application initialization: |
| 702 | // During startup | ||
| 703 | let purgatory = Arc::new(Purgatory::new()); | ||
| 704 | 661 | ||
| 705 | // Pass to WritePolicy | 662 | ```rust |
| 706 | let write_policy = Nip34WritePolicy::new( | 663 | // From src/main.rs |
| 707 | &config.domain, | 664 | let purgatory = Arc::new(Purgatory::new(git_data_path)); |
| 665 | let ctx = Arc::new(RealSyncContext::new( | ||
| 708 | database.clone(), | 666 | database.clone(), |
| 709 | &git_data_path, | ||
| 710 | purgatory.clone(), | 667 | purgatory.clone(), |
| 711 | ); | 668 | config.domain.clone(), |
| 712 | 669 | git_data_path.clone(), | |
| 713 | // Pass to git handlers (via shared state) | 670 | )); |
| 714 | let git_state = GitState { | 671 | let throttle_manager = Arc::new(ThrottleManager::new(5, 30)); |
| 715 | purgatory: purgatory.clone(), | 672 | throttle_manager.set_context(ctx.clone()); |
| 716 | database: database.clone(), | 673 | |
| 717 | // ... | 674 | // Start sync loop |
| 718 | }; | 675 | let sync_handle = purgatory.clone().start_sync_loop(ctx, throttle_manager); |
| 719 | 676 | ||
| 720 | // Spawn cleanup task | 677 | // Start cleanup task |
| 721 | let purgatory_cleanup = purgatory.clone(); | 678 | let cleanup_handle = tokio::spawn(async move { |
| 722 | tokio::spawn(async move { | ||
| 723 | let mut interval = tokio::time::interval(Duration::from_secs(60)); | 679 | let mut interval = tokio::time::interval(Duration::from_secs(60)); |
| 724 | loop { | 680 | loop { |
| 725 | interval.tick().await; | 681 | interval.tick().await; |
| 726 | let removed = purgatory_cleanup.cleanup(); | 682 | let (state_removed, pr_removed) = purgatory.cleanup(); |
| 727 | if removed > 0 { | 683 | if state_removed + pr_removed > 0 { |
| 728 | tracing::debug!("Purgatory cleanup removed {} expired entries", removed); | 684 | tracing::debug!( |
| 685 | "Purgatory cleanup removed {} state, {} PR entries", | ||
| 686 | state_removed, pr_removed | ||
| 687 | ); | ||
| 729 | } | 688 | } |
| 730 | } | 689 | } |
| 731 | }); | 690 | }); |
| 732 | ``` | 691 | ``` |
| 733 | 692 | ||
| 734 | ## Post-Push Sync Flow | 693 | --- |
| 735 | |||
| 736 | After successfully processing a state or PR event, sync to other maintainer repos: | ||
| 737 | |||
| 738 | **Key Rules for State Events (30618)**: | ||
| 739 | |||
| 740 | 1. Fetch all state events matching the identifier from the database | ||
| 741 | 2. Check if another existing state event supersedes this one (the maintainer set may be different, so another maintainer may have a more recent state event) | ||
| 742 | 3. Only sync if this event is not superseded | ||
| 743 | |||
| 744 | ```rust | ||
| 745 | async fn sync_to_other_repos( | ||
| 746 | &self, | ||
| 747 | identifier: &str, | ||
| 748 | event: &Event, | ||
| 749 | processed_repo_owner: &str, | ||
| 750 | ) -> Result<usize, SyncError> { | ||
| 751 | let announcements = self.database.get_announcements(identifier).await?; | ||
| 752 | let auth_map = collect_authorized_maintainers(&announcements); | ||
| 753 | |||
| 754 | // Fetch all state events for this identifier to check for superseding | ||
| 755 | let all_state_events = self.database.get_state_events_for_identifier(identifier).await?; | ||
| 756 | |||
| 757 | let mut synced = 0; | ||
| 758 | for (owner, maintainers) in auth_map { | ||
| 759 | // Skip the repo we just processed | ||
| 760 | if owner == processed_repo_owner { | ||
| 761 | continue; | ||
| 762 | } | ||
| 763 | |||
| 764 | // Check if event author is authorized for this owner's repo | ||
| 765 | if maintainers.contains(&event.pubkey.to_hex()) { | ||
| 766 | let repo_path = self.repo_path_for_owner(&owner); | ||
| 767 | |||
| 768 | match event.kind.as_u64() { | ||
| 769 | 30618 => { | ||
| 770 | // State event - check if another state event supersedes this one | ||
| 771 | // for THIS owner's repo (maintainer sets may differ between owners) | ||
| 772 | let owner_maintainers = auth_map.get(&owner).cloned().unwrap_or_default(); | ||
| 773 | |||
| 774 | // Find the authoritative state for this owner's repo | ||
| 775 | let superseding_state = all_state_events.iter() | ||
| 776 | .filter(|s| owner_maintainers.contains(&s.pubkey.to_hex())) | ||
| 777 | .filter(|s| s.created_at > event.created_at) | ||
| 778 | .max_by_key(|s| s.created_at); | ||
| 779 | |||
| 780 | if let Some(newer_state) = superseding_state { | ||
| 781 | // This event is superseded by a more recent state | ||
| 782 | // for this owner's repo - skip syncing this event | ||
| 783 | tracing::debug!( | ||
| 784 | "Skipping sync to {}: event {} superseded by {}", | ||
| 785 | owner, | ||
| 786 | event.id, | ||
| 787 | newer_state.id | ||
| 788 | ); | ||
| 789 | continue; | ||
| 790 | } | ||
| 791 | |||
| 792 | // No superseding state - align refs | ||
| 793 | let state_refs = extract_refs_from_state(&event); | ||
| 794 | self.align_refs(&repo_path, &state_refs)?; | ||
| 795 | } | ||
| 796 | 1617 | 1618 => { | ||
| 797 | // PR event - ensure nostr ref exists | ||
| 798 | let commit = extract_c_tag_commit(&event)?; | ||
| 799 | self.ensure_nostr_ref(&repo_path, &event.id, &commit)?; | ||
| 800 | } | ||
| 801 | _ => {} | ||
| 802 | } | ||
| 803 | synced += 1; | ||
| 804 | } | ||
| 805 | } | ||
| 806 | |||
| 807 | Ok(synced) | ||
| 808 | } | ||
| 809 | ``` | ||
| 810 | 694 | ||
| 811 | ## Implementation File Structure | 695 | ## File Structure |
| 812 | 696 | ||
| 813 | ``` | 697 | ``` |
| 814 | src/ | 698 | src/ |
| 815 | ├── purgatory/ | 699 | ├── purgatory/ |
| 816 | │ ├── mod.rs # Purgatory struct, public API | 700 | │ ├── mod.rs # Main Purgatory struct and API |
| 817 | │ ├── types.rs # RefPair, StatePurgatoryEntry, PrPurgatoryEntry | 701 | │ ├── types.rs # RefPair, StatePurgatoryEntry, PrPurgatoryEntry |
| 818 | │ ├── state_events.rs # State event purgatory logic | 702 | │ ├── helpers.rs # Ref extraction and matching functions |
| 819 | │ ── pr_events.rs # PR event purgatory logic | 703 | │ ── sync/ |
| 820 | │ ── helpers.rs # extract_refs_from_state, can_satisfy_state, etc. | 704 | │ ── mod.rs # Sync module exports |
| 821 | ├── nostr/ | 705 | ── loop.rs # Background sync loop |
| 822 | │ ├── builder.rs # Modified: Nip34WritePolicy accepts Arc<Purgatory> | 706 | │ ├── functions.rs # sync_identifier, sync_identifier_from_url |
| 823 | │ ── policy/ | 707 | │ ── context.rs # SyncContext trait and RealSyncContext |
| 824 | │ ├── state.rs # Modified: handle_state uses purgatory | 708 | │ ├── queue.rs # SyncQueueEntry |
| 825 | │ └── pr_event.rs # Modified: handle_pr_event uses purgatory | 709 | │ └── throttle.rs # ThrottleManager, DomainThrottle |
| 826 | ├── git/ | 710 | ├── git/ |
| 827 | │ └── handlers.rs # Modified: handle_receive_pack integrates purgatory | 711 | │ ├── authorization.rs # authorize_push with purgatory checking |
| 828 | └── main.rs # Modified: creates Purgatory, spawns cleanup task | 712 | │ ├── handlers.rs # handle_receive_pack with post-push processing |
| 713 | │ └── sync.rs # process_newly_available_git_data | ||
| 714 | └── nostr/ | ||
| 715 | └── policy/ | ||
| 716 | ├── state.rs # State event policy with purgatory | ||
| 717 | └── pr_event.rs # PR event policy with purgatory | ||
| 829 | ``` | 718 | ``` |
| 830 | 719 | ||
| 831 | ## Additional Type Definitions | 720 | --- |
| 832 | |||
| 833 | ### PushDecision Enum | ||
| 834 | |||
| 835 | Used by `handle_nostr_ref_push` to communicate different outcomes to the caller: | ||
| 836 | |||
| 837 | ```rust | ||
| 838 | /// Result of evaluating a push to refs/nostr/<event-id> | ||
| 839 | pub enum PushDecision { | ||
| 840 | /// Push is valid - event exists in database and commit matches | ||
| 841 | Accept, | ||
| 842 | |||
| 843 | /// Push valid and event should be released from purgatory to database | ||
| 844 | AcceptAndRelease(Event), | ||
| 845 | |||
| 846 | /// Push valid - create new placeholder awaiting PR event | ||
| 847 | AcceptAndCreatePlaceholder(String), // commit SHA | ||
| 848 | |||
| 849 | /// Push valid - update existing placeholder with new commit | ||
| 850 | AcceptAndUpdatePlaceholder(String), // new commit SHA | ||
| 851 | |||
| 852 | /// Push rejected with reason | ||
| 853 | Reject(&'static str), | ||
| 854 | } | ||
| 855 | ``` | ||
| 856 | |||
| 857 | ### WritePolicyResult Clarification | ||
| 858 | |||
| 859 | The design uses a pattern where purgatory events return `status: true` but the event is NOT saved: | ||
| 860 | |||
| 861 | ```rust | ||
| 862 | // Event goes to purgatory - client sees OK but event not served until git data arrives | ||
| 863 | WritePolicyResult::Reject { | ||
| 864 | status: true, // Nostr OK message to client | ||
| 865 | message: "purgatory: won't be served until git data arrives".into() | ||
| 866 | } | ||
| 867 | |||
| 868 | // Event rejected - client sees error | ||
| 869 | WritePolicyResult::Reject { | ||
| 870 | status: false, // Nostr error message to client | ||
| 871 | message: "rejected: reason...".into() | ||
| 872 | } | ||
| 873 | |||
| 874 | // Event accepted and saved to database | ||
| 875 | WritePolicyResult::Accept | ||
| 876 | ``` | ||
| 877 | |||
| 878 | **Note:** This is a quirk of using the `WritePolicyResult::Reject` variant for purgatory - the `status: true` ensures the client receives an OK response, but since we're in the Reject variant, the event is not automatically saved to the database. The purgatory mechanism holds it until git data arrives. | ||
| 879 | |||
| 880 | ## Test Scenarios | ||
| 881 | 721 | ||
| 882 | ### State Event Tests | 722 | ## Testing |
| 883 | 723 | ||
| 884 | 1. **Event arrives, git data exists** - Event processed immediately, saved to DB | 724 | ### Unit Tests |
| 885 | 2. **Event arrives, git data doesn't exist** - Event goes to purgatory, client sees OK | ||
| 886 | 3. **Git push arrives, matching event in purgatory** - Event released from purgatory, saved to DB | ||
| 887 | 4. **Git push arrives, no matching event** - Push rejected (no authorized state) | ||
| 888 | 5. **Event expires in purgatory** - Entry removed after 30 minutes (dont implement test due to 30m wait) | ||
| 889 | 6. **Multiple state events for same identifier** - Late binding at push time selects correct one | ||
| 890 | 725 | ||
| 891 | ### PR Event Tests | 726 | Located in each module: |
| 892 | 727 | ||
| 893 | 1. **PR event arrives, git data exists** - Event processed immediately, saved to DB | 728 | - **[`src/purgatory/mod.rs`](../../src/purgatory/mod.rs)** - Core purgatory operations |
| 894 | 2. **PR event arrives, no git data** - Event goes to purgatory awaiting git push | 729 | - **[`src/purgatory/helpers.rs`](../../src/purgatory/helpers.rs)** - Ref matching logic |
| 895 | 3. **PR event arrives, placeholder exists with matching commit** - Event released, saved to DB | 730 | - **[`src/purgatory/sync/functions.rs`](../../src/purgatory/sync/functions.rs)** - Sync functions with MockSyncContext |
| 896 | 4. **PR event arrives, placeholder exists with different commit** - Ref deleted, event to purgatory | 731 | - **[`src/purgatory/sync/throttle.rs`](../../src/purgatory/sync/throttle.rs)** - Throttle manager |
| 897 | 5. **Git push to refs/nostr/ arrives, PR event exists in purgatory** - Event released, ref created | ||
| 898 | 6. **Git push to refs/nostr/ arrives, no PR event** - Placeholder created, awaiting event | ||
| 899 | 7. **Second git push updates placeholder** - Placeholder commit updated | ||
| 900 | 732 | ||
| 901 | ### Edge Cases (NOT TESTED) | 733 | ### Integration Tests |
| 902 | 734 | ||
| 903 | 1. **Relay restart** - All purgatory entries lost (acceptable per design) | 735 | Located in [`tests/`](../../tests/): |
| 904 | 2. **Same event submitted twice** - Deduplicated by event ID | ||
| 905 | 3. **Push timeout during processing** - Entry expiry extended to 15 min minimum | ||
| 906 | 4. **Race between event and git push** - Whichever completes the pair triggers release | ||
| 907 | 736 | ||
| 737 | - **State event purgatory flow** - Event arrives, git push releases it | ||
| 738 | - **PR event purgatory flow** - Event arrives, git push releases it | ||
| 739 | - **Git-data-first flow** - Git push creates placeholder, event completes it | ||
| 740 | - **Authorization with purgatory** - Push authorized by purgatory state | ||
| 741 | - **Background sync** - Sync fetches git data and releases events | ||
| 908 | 742 | ||
| 909 | ## Purgatory Authorization Fix (2025-12-24) | 743 | --- |
| 910 | 744 | ||
| 911 | **Critical Implementation Note**: The original purgatory design placed purgatory checking AFTER git push execution. This created a deadlock where pushes were rejected because the authorizing state event was in purgatory, not the database. | 745 | ## Key Learnings |
| 912 | 746 | ||
| 913 | ### The Deadlock Problem | 747 | ### 1. Purgatory Authorization is Critical |
| 914 | 748 | ||
| 915 | **Original broken flow:** | 749 | Without checking purgatory during authorization, we have a deadlock: |
| 916 | 1. State event arrives → No git data exists → Event stored in PURGATORY (not database) | 750 | - State event goes to purgatory (no git data) |
| 917 | 2. Git push arrives → Authorization checks DATABASE only → No state found → **PUSH REJECTED** ❌ | 751 | - Push is rejected (no state in database) |
| 918 | 3. Purgatory check runs → But push already failed, so this never helps | 752 | - Event never gets released |
| 919 | 753 | ||
| 920 | ### The Fix: Authorization-Time Purgatory Check | 754 | **Solution:** `authorize_push()` checks both database and purgatory. |
| 921 | 755 | ||
| 922 | **Correct flow (implemented):** | 756 | ### 2. Late Binding for State Events |
| 923 | 1. State event arrives → No git data exists → Event stored in purgatory | ||
| 924 | 2. Git push arrives → Authorization checks **DATABASE + PURGATORY** → State found in purgatory → **PUSH AUTHORIZED** ✅ | ||
| 925 | 3. After successful push → Save purgatory event to database → Remove from purgatory | ||
| 926 | 757 | ||
| 927 | ### Implementation Details | 758 | Extracting refs at event arrival time doesn't work when: |
| 759 | - Multiple state events arrive for same identifier | ||
| 760 | - Git data for older state arrives after newer state received | ||
| 928 | 761 | ||
| 929 | #### 1. Modified [`AuthorizationResult`](../../src/git/authorization.rs:397) | 762 | **Solution:** Extract and match refs at push time via `find_matching_states()`. |
| 930 | 763 | ||
| 931 | Added `from_purgatory: bool` field to track whether the authorizing state came from purgatory: | 764 | ### 3. Bidirectional Waiting for PR Events |
| 932 | 765 | ||
| 933 | ```rust | 766 | PR events can arrive before or after git data: |
| 934 | pub struct AuthorizationResult { | 767 | - Event first → Wait for git push |
| 935 | pub authorized: bool, | 768 | - Git first → Create placeholder, wait for event |
| 936 | pub reason: String, | ||
| 937 | pub state: Option<RepositoryState>, | ||
| 938 | pub maintainers: Vec<String>, | ||
| 939 | pub from_purgatory: bool, // NEW: Track event source | ||
| 940 | } | ||
| 941 | ``` | ||
| 942 | 769 | ||
| 943 | #### 2. Enhanced [`get_authorization_for_owner()`](../../src/git/authorization.rs:342) | 770 | **Solution:** `PrPurgatoryEntry.event: Option<Event>` with `None` = placeholder. |
| 944 | 771 | ||
| 945 | Added purgatory checking when no state found in database: | 772 | ### 4. Sync Queue Debouncing |
| 946 | 773 | ||
| 947 | ```rust | 774 | When events arrive in bursts (e.g., negentropy sync), we don't want to spawn a sync task for each event. |
| 948 | pub async fn get_authorization_for_owner( | ||
| 949 | database: &SharedDatabase, | ||
| 950 | identifier: &str, | ||
| 951 | owner_pubkey: &str, | ||
| 952 | purgatory: Option<&Arc<Purgatory>>, | ||
| 953 | pushed_refs: &[(String, String, String)], | ||
| 954 | repo_path: &Path, | ||
| 955 | ) -> Result<AuthorizationResult> | ||
| 956 | ``` | ||
| 957 | 775 | ||
| 958 | **Logic**: | 776 | **Solution:** `enqueue_sync()` resets `attempt_count` and updates `next_attempt` if already queued. |
| 959 | 1. Check database for state events (existing behavior) | ||
| 960 | 2. If no state in database AND purgatory available: | ||
| 961 | - Parse pushed refs to RefPairs | ||
| 962 | - Get local refs from repository | ||
| 963 | - Call [`find_matching_states()`](../../src/purgatory/mod.rs:203) | ||
| 964 | - Filter to latest event from authorized authors | ||
| 965 | - Return authorization with `from_purgatory: true` | ||
| 966 | 777 | ||
| 967 | #### 3. Post-Push Purgatory Event Save | 778 | ### 5. Domain Throttling with Queues |
| 968 | 779 | ||
| 969 | In [`handle_receive_pack()`](../../src/git/handlers.rs:187), after successful push: | 780 | When a domain is throttled, we still want to eventually sync from it. |
| 970 | 781 | ||
| 971 | ```rust | 782 | **Solution:** `ThrottleManager` maintains per-domain queues and processes them when capacity frees. |
| 972 | if from_purgatory { | ||
| 973 | if let (Some(db), Some(purg)) = (&database, &purgatory) { | ||
| 974 | // Save state event to database | ||
| 975 | db.save_event(&state.event).await?; | ||
| 976 | |||
| 977 | // Remove from purgatory | ||
| 978 | purg.remove_state_event(identifier, &state.event.id); | ||
| 979 | } | ||
| 980 | } | ||
| 981 | ``` | ||
| 982 | 783 | ||
| 983 | ### Files Modified | 784 | --- |
| 984 | |||
| 985 | 1. **[`src/git/authorization.rs`](../../src/git/authorization.rs)** | ||
| 986 | - Added `from_purgatory` field to `AuthorizationResult` | ||
| 987 | - Modified `get_authorization_for_owner()` signature and logic | ||
| 988 | - Added purgatory checking when database has no state | ||
| 989 | |||
| 990 | 2. **[`src/git/handlers.rs`](../../src/git/handlers.rs)** | ||
| 991 | - Modified `authorize_push()` to accept purgatory and repo_path parameters | ||
| 992 | - Added tracking of `from_purgatory` flag | ||
| 993 | - Added post-push database save for purgatory events | ||
| 994 | |||
| 995 | ### Why This Order Matters | ||
| 996 | |||
| 997 | Checking purgatory DURING authorization (before push execution) is critical: | ||
| 998 | |||
| 999 | - **Prevents deadlock**: Push is authorized by purgatory state before execution | ||
| 1000 | - **Maintains atomicity**: Only saves to database after successful push | ||
| 1001 | - **Race condition safe**: First successful push claims the purgatory event | ||
| 1002 | |||
| 1003 | The alternative (checking purgatory after push) creates an insurmountable deadlock where valid pushes are rejected because their authorizing state is in purgatory instead of the database. | ||
| 1004 | 785 | ||
| 1005 | ### Testing | 786 | ## Related Documentation |
| 1006 | 787 | ||
| 1007 | The fix enables the `test_push_authorized_by_owner_state` integration test scenario where: | 788 | - [Inline Authorization](inline-authorization.md) - Why purgatory checking during authorization is essential |
| 1008 | 1. State event is sent to relay (goes to purgatory - no git data yet) | 789 | - [Architecture Overview](architecture.md) - Full system design |
| 1009 | 2. Git push is sent (uses purgatory state for authorization) | 790 | - [Background Sync](../how-to/purgatory-sync.md) - How to configure and monitor sync |
| 1010 | 3. State event is released from purgatory to database | 791 | - [Test Strategy](../reference/test-strategy.md) - How we test purgatory |
| 1011 | 792 | ||
| 1012 | --- | 793 | --- |
| 1013 | 794 | ||
| 795 | *Part of the [ngit-grasp explanation docs](./)* | ||
diff --git a/docs/explanation/purgatory-sync-redesign.md b/docs/explanation/purgatory-sync-redesign.md deleted file mode 100644 index 629c2ff..0000000 --- a/docs/explanation/purgatory-sync-redesign.md +++ /dev/null | |||
| @@ -1,1959 +0,0 @@ | |||
| 1 | # Purgatory Sync Redesign | ||
| 2 | |||
| 3 | ## Status | ||
| 4 | |||
| 5 | **Proposed** - January 2026 | ||
| 6 | |||
| 7 | ## Context | ||
| 8 | |||
| 9 | The current purgatory sync implementation (`start_state_sync` at `src/purgatory/mod.rs:510`) has several limitations: | ||
| 10 | |||
| 11 | 1. **Per-event syncing**: Each state event triggers its own independent sync operation | ||
| 12 | 2. **No PR event syncing**: PR events enter purgatory but don't trigger git data fetching | ||
| 13 | 3. **No batching**: Multiple events for the same repository cause redundant fetch requests | ||
| 14 | 4. **No rate limiting**: Can overwhelm remote git servers or get rate-limited | ||
| 15 | 5. **No coordination**: Multiple concurrent syncs may fetch the same OIDs | ||
| 16 | |||
| 17 | When syncing a new repository, we often receive multiple state and PR events in a burst. The current approach creates unnecessary load on remote servers and doesn't handle this common case efficiently. | ||
| 18 | |||
| 19 | ## Decision | ||
| 20 | |||
| 21 | Redesign purgatory sync to be **identifier-based** rather than **event-based**, with: | ||
| 22 | |||
| 23 | 1. A background sync loop that processes identifiers, not individual events | ||
| 24 | 2. Batched OID fetching across all purgatory events for an identifier | ||
| 25 | 3. Domain-based throttling (30 requests/minute per domain) | ||
| 26 | 4. Exponential backoff per identifier (20s → 2m, then 2m intervals) | ||
| 27 | 5. Debouncing for burst event arrivals (500ms for sync-triggered, 3min default) | ||
| 28 | 6. **Clean separation of concerns**: Domain throttle handles rate limiting only; sync logic tracks its own tried URLs | ||
| 29 | |||
| 30 | ### Key Design Decision: Where Does OID Copying Happen? | ||
| 31 | |||
| 32 | **Answer: In `process_newly_available_git_data`, NOT after the entire sync completes.** | ||
| 33 | |||
| 34 | The current implementation (`sync_state_git_data`) fetches all OIDs first, then at the end: | ||
| 35 | 1. Copies OIDs to all authorized owner repos | ||
| 36 | 2. Aligns refs with state | ||
| 37 | 3. Saves to database | ||
| 38 | 4. Notifies subscribers | ||
| 39 | 5. Removes from purgatory | ||
| 40 | |||
| 41 | The redesign moves all of this into `process_newly_available_git_data`, which is called after **each successful URL fetch**. This enables: | ||
| 42 | |||
| 43 | | Aspect | Current (end-of-sync) | Redesign (per-fetch) | | ||
| 44 | |--------|----------------------|---------------------| | ||
| 45 | | **When events release** | Only after all URLs tried | As soon as OIDs available | | ||
| 46 | | **Partial success** | All or nothing per event | Events release independently | | ||
| 47 | | **Multiple state events** | All wait for slowest | Each releases when ready | | ||
| 48 | | **Authorization check** | Once at start | At release time (handles changes) | | ||
| 49 | |||
| 50 | **Why this matters:** | ||
| 51 | |||
| 52 | Consider syncing an identifier with 3 state events from different maintainers: | ||
| 53 | - State A needs OIDs from `server1.com` (fast) | ||
| 54 | - State B needs OIDs from `server2.com` (slow) | ||
| 55 | - State C needs OIDs from `server3.com` (down) | ||
| 56 | |||
| 57 | With the redesign: | ||
| 58 | 1. Fetch from `server1.com` succeeds → `process_newly_available_git_data` releases State A immediately | ||
| 59 | 2. Fetch from `server2.com` succeeds → `process_newly_available_git_data` releases State B | ||
| 60 | 3. Fetch from `server3.com` fails → State C stays in purgatory, retries with backoff | ||
| 61 | |||
| 62 | The current implementation would wait for all servers before releasing any events. | ||
| 63 | |||
| 64 | ### Unified Processing with Git Push Handler | ||
| 65 | |||
| 66 | **Key insight**: The post-git-data-available processing is identical whether data arrives via: | ||
| 67 | - A successful `git push` (handle_receive_pack) | ||
| 68 | - Purgatory sync fetching OIDs from remote servers | ||
| 69 | |||
| 70 | Both paths need to: | ||
| 71 | 1. Discover satisfiable events from purgatory | ||
| 72 | 2. Sync OIDs to authorized owner repos | ||
| 73 | 3. Align refs (+ set HEAD) | ||
| 74 | 4. Save events to database | ||
| 75 | 5. Notify WebSocket subscribers | ||
| 76 | 6. Remove from purgatory | ||
| 77 | |||
| 78 | Rather than duplicate this logic, we use a single unified function `process_newly_available_git_data` that handles all post-git-data-available processing. See [Unified Git Data Sync](unify-git-data-sync.md) for the complete design. | ||
| 79 | |||
| 80 | This means: | ||
| 81 | - **`handle_receive_pack`** calls `process_newly_available_git_data` after git push succeeds | ||
| 82 | - **`sync_identifier_from_url`** calls `process_newly_available_git_data` after OID fetch succeeds | ||
| 83 | - **Same behavior** regardless of how git data arrived | ||
| 84 | |||
| 85 | ## Architecture | ||
| 86 | |||
| 87 | ### Overview | ||
| 88 | |||
| 89 | ``` | ||
| 90 | ┌──────────────────────────────────────────────────────────────────────────────────┐ | ||
| 91 | │ Purgatory │ | ||
| 92 | │ │ | ||
| 93 | │ ┌─────────────────┐ ┌─────────────────┐ │ | ||
| 94 | │ │ State Events │ │ PR Events │ │ | ||
| 95 | │ │ (by identifier)│ │ (by event_id) │ │ | ||
| 96 | │ └────────┬────────┘ └────────┬────────┘ │ | ||
| 97 | │ │ │ │ | ||
| 98 | │ └──────────┬─────────┘ │ | ||
| 99 | │ │ add_state() / add_pr() / trigger_immediate_sync() │ | ||
| 100 | │ ▼ │ | ||
| 101 | │ ┌──────────────────────────┐ │ | ||
| 102 | │ │ Sync Queue │ │ | ||
| 103 | │ │ DashMap<id, Entry> │ │ | ||
| 104 | │ │ │ │ | ||
| 105 | │ │ Entry { │ │ | ||
| 106 | │ │ next_attempt, │ ← delay/backoff timer │ | ||
| 107 | │ │ attempt_count, │ ← for backoff calculation │ | ||
| 108 | │ │ in_progress, │ ← prevents concurrent runs │ | ||
| 109 | │ │ } │ │ | ||
| 110 | │ └────────────┬─────────────┘ │ | ||
| 111 | │ │ │ | ||
| 112 | │ ┌─────────────────────┼──────────────────────────────────────────────────────┐ │ | ||
| 113 | │ │ ▼ │ │ | ||
| 114 | │ │ ┌─────────────────────┐ │ │ | ||
| 115 | │ │ │ Main Sync Loop │ (every 1s) │ │ | ||
| 116 | │ │ │ │ │ │ | ||
| 117 | │ │ │ 1. Find ALL ready │ │ │ | ||
| 118 | │ │ │ identifiers │ │ │ | ||
| 119 | │ │ │ 2. Spawn parallel │───────┐ │ │ | ||
| 120 | │ │ │ tasks for each │ │ (parallel tasks) │ │ | ||
| 121 | │ │ │ 3. Apply backoff │ │ │ │ | ||
| 122 | │ │ │ when done │ │ │ │ | ||
| 123 | │ │ └─────────────────────┘ │ │ │ | ||
| 124 | │ │ ▼ │ │ | ||
| 125 | │ │ ┌──────────────────────────────────────────┐ │ │ | ||
| 126 | │ │ │ sync_identifier() │ │ │ | ||
| 127 | │ │ │ │ │ │ | ||
| 128 | │ │ │ Owns its own tried_urls: HashSet │ │ │ | ||
| 129 | │ │ │ │ │ │ | ||
| 130 | │ │ │ loop: │ │ │ | ||
| 131 | │ │ │ url = sync_identifier_next_url( │ │ │ | ||
| 132 | │ │ │ domain=None) │ │ │ | ||
| 133 | │ │ │ if url is Some: │ │ │ | ||
| 134 | │ │ │ sync_identifier_from_url(url) │ │ │ | ||
| 135 | │ │ │ tried_urls.insert(url) │ │ │ | ||
| 136 | │ │ │ else: │ │ │ | ||
| 137 | │ │ │ break (no non-throttled URLs left) │ │ │ | ||
| 138 | │ │ │ │ │ │ | ||
| 139 | │ │ │ Enqueue throttled domains then return │ │ │ | ||
| 140 | │ │ └──────────────────────────────────────────┘ │ │ | ||
| 141 | │ │ │ │ │ | ||
| 142 | │ │ │ enqueue_identifier() │ │ | ||
| 143 | │ │ ▼ │ │ | ||
| 144 | │ │ ┌─────────────────────────────────────────────────────────────────────┐ │ │ | ||
| 145 | │ │ │ ThrottleManager │ │ │ | ||
| 146 | │ │ │ │ │ │ | ||
| 147 | │ │ │ DashMap<domain, DomainThrottle> │ │ │ | ||
| 148 | │ │ │ │ │ │ | ||
| 149 | │ │ │ ┌─────────────────────────────────────────────────────────────┐ │ │ │ | ||
| 150 | │ │ │ │ DomainThrottle (per domain) │ │ │ │ | ||
| 151 | │ │ │ │ │ │ │ │ | ||
| 152 | │ │ │ │ Rate limiting: │ Queue (IndexMap for ordering): │ │ │ │ | ||
| 153 | │ │ │ │ - in_flight: u32 │ - queue: IndexMap<id, State> │ │ │ │ | ||
| 154 | │ │ │ │ - request_times │ - State: tried_urls, │ │ │ │ | ||
| 155 | │ │ │ │ - round_robin_index │ in_progress │ │ │ │ | ||
| 156 | │ │ │ └─────────────────────────────────────────────────────────────┘ │ │ │ | ||
| 157 | │ │ │ │ │ │ | ||
| 158 | │ │ │ Trigger-based processing (no polling loop): │ │ │ | ||
| 159 | │ │ │ - enqueue_identifier() triggers if capacity available │ │ │ | ||
| 160 | │ │ │ - complete_request() triggers next item if capacity available │ │ │ | ||
| 161 | │ │ │ │ │ │ | ||
| 162 | │ │ │ process_queued_identifier(): │ │ │ | ||
| 163 | │ │ │ 1. Pick next identifier (round-robin, not in_progress) │ │ │ | ||
| 164 | │ │ │ 2. url = sync_identifier_next_url(domain=Some(this_domain)) │ │ │ | ||
| 165 | │ │ │ 3. If url: sync_identifier_from_url(url), mark tried │ │ │ | ||
| 166 | │ │ │ Else: remove identifier from queue, try next │ │ │ | ||
| 167 | │ │ └─────────────────────────────────────────────────────────────────────┘ │ │ | ||
| 168 | │ │ │ │ | ||
| 169 | │ └────────────────────────────────────────────────────────────────────────────┘ │ | ||
| 170 | └───────────────────────────────────────────────────────────────────────────────────┘ | ||
| 171 | ``` | ||
| 172 | |||
| 173 | ### Key Design Principles | ||
| 174 | |||
| 175 | **1. Two Independent Execution Paths** | ||
| 176 | |||
| 177 | The main sync loop and DomainThrottle loops run independently: | ||
| 178 | - **Main sync**: Tries non-throttled URLs, completes quickly, applies backoff, retries later | ||
| 179 | - **DomainThrottle**: Processes queued identifiers when capacity frees, doesn't block main sync | ||
| 180 | |||
| 181 | **2. Two Separate tried_urls Tracking** | ||
| 182 | |||
| 183 | Each path tracks its own tried URLs: | ||
| 184 | - **sync_identifier**: Local `HashSet<String>` for current attempt (all domains) | ||
| 185 | - **DomainThrottle**: Per-identifier `HashSet<String>` for URLs tried via throttle (this domain only) | ||
| 186 | |||
| 187 | These don't need to merge because: | ||
| 188 | - Main sync skips throttled domains anyway | ||
| 189 | - DomainThrottle only processes its own domain's URLs | ||
| 190 | |||
| 191 | **3. Shared Functions** | ||
| 192 | |||
| 193 | Both paths use the same core functions: | ||
| 194 | - **`sync_identifier_next_url`**: Pure URL selection logic | ||
| 195 | - **`sync_identifier_from_url`**: Pure fetch logic | ||
| 196 | |||
| 197 | The `domain` parameter determines behavior: | ||
| 198 | - `None`: Return any non-throttled URL | ||
| 199 | - `Some(domain)`: Return URL from that specific domain only | ||
| 200 | |||
| 201 | ### Flow Summary | ||
| 202 | |||
| 203 | 1. **Event arrives** → added to state_events/pr_events + sync_queue with delay | ||
| 204 | - User-submitted: 3 minute delay (expect git push to follow) | ||
| 205 | - Sync-triggered: 500ms delay (batch burst arrivals) | ||
| 206 | - `enqueue_sync()` resets `attempt_count` to 0 and updates `next_attempt` if needed | ||
| 207 | |||
| 208 | 2. **Main sync loop** (every 1s): | ||
| 209 | - Finds ALL ready identifiers (where `!in_progress && next_attempt <= now`) | ||
| 210 | - Spawns parallel tasks for each (marks `in_progress = true`) | ||
| 211 | - Each `sync_identifier()` task: | ||
| 212 | - Creates fresh `tried_urls: HashSet<String>` | ||
| 213 | - Loops calling `sync_identifier_next_url(domain=None)` + `sync_identifier_from_url` | ||
| 214 | - When no non-throttled URLs remain: enqueue with throttled domains, return | ||
| 215 | - When task completes: apply backoff or remove from queue | ||
| 216 | |||
| 217 | 3. **ThrottleManager / DomainThrottle** (trigger-based, no polling): | ||
| 218 | - Processing triggered by `enqueue_identifier()` or `complete_request()` | ||
| 219 | - When triggered and capacity available: pick next queued identifier (round-robin, not in_progress) | ||
| 220 | - Call `sync_identifier_next_url(domain=Some(this_domain))` | ||
| 221 | - If URL returned: call `sync_identifier_from_url`, mark URL tried, mark not in_progress | ||
| 222 | - If no URL: remove identifier from queue, try next identifier | ||
| 223 | |||
| 224 | ## Data Structures | ||
| 225 | |||
| 226 | ### SyncQueueEntry | ||
| 227 | |||
| 228 | Tracks sync state for each identifier in the main sync queue: | ||
| 229 | |||
| 230 | ```rust | ||
| 231 | /// Entry in the sync queue tracking when/how to sync an identifier | ||
| 232 | #[derive(Debug, Clone)] | ||
| 233 | pub struct SyncQueueEntry { | ||
| 234 | /// Don't attempt sync before this time | ||
| 235 | pub next_attempt: Instant, | ||
| 236 | |||
| 237 | /// Number of sync attempts (for backoff calculation) | ||
| 238 | /// Reset to 0 when new event arrives for this identifier | ||
| 239 | pub attempt_count: u32, | ||
| 240 | |||
| 241 | /// Whether a sync is currently in progress for this identifier | ||
| 242 | pub in_progress: bool, | ||
| 243 | } | ||
| 244 | |||
| 245 | impl SyncQueueEntry { | ||
| 246 | pub fn new(delay: Duration) -> Self { | ||
| 247 | Self { | ||
| 248 | next_attempt: Instant::now() + delay, | ||
| 249 | attempt_count: 0, | ||
| 250 | in_progress: false, | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | /// Calculate backoff: 20s, 40s, 80s, 120s (capped at 2min) | ||
| 255 | pub fn backoff(&self) -> Duration { | ||
| 256 | let base = Duration::from_secs(20); | ||
| 257 | let multiplier = 2u32.saturating_pow(self.attempt_count.saturating_sub(1).min(3)); | ||
| 258 | (base * multiplier).min(Duration::from_secs(120)) | ||
| 259 | } | ||
| 260 | |||
| 261 | pub fn is_ready(&self) -> bool { | ||
| 262 | !self.in_progress && Instant::now() >= self.next_attempt | ||
| 263 | } | ||
| 264 | |||
| 265 | /// Called when new event arrives - resets attempt_count | ||
| 266 | pub fn on_new_event(&mut self, delay: Duration) { | ||
| 267 | self.attempt_count = 0; | ||
| 268 | let new_attempt = Instant::now() + delay; | ||
| 269 | if new_attempt < self.next_attempt { | ||
| 270 | self.next_attempt = new_attempt; | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | /// Called when sync attempt completes | ||
| 275 | pub fn on_sync_complete(&mut self) { | ||
| 276 | self.in_progress = false; | ||
| 277 | if self.next_attempt <= Instant::now() { | ||
| 278 | self.attempt_count += 1; | ||
| 279 | self.next_attempt = Instant::now() + self.backoff(); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | } | ||
| 283 | ``` | ||
| 284 | |||
| 285 | ### ThrottleManager | ||
| 286 | |||
| 287 | Manages all per-domain throttles and provides the interface for checking throttle status: | ||
| 288 | |||
| 289 | ```rust | ||
| 290 | /// Manages rate limiting across all domains. | ||
| 291 | /// | ||
| 292 | /// Owns a collection of DomainThrottle instances and provides: | ||
| 293 | /// - Throttle status checking for sync_identifier_next_url | ||
| 294 | /// - Identifier queue management | ||
| 295 | /// - Trigger-based processing when capacity frees up | ||
| 296 | pub struct ThrottleManager { | ||
| 297 | /// Per-domain throttle state | ||
| 298 | throttles: DashMap<String, DomainThrottle>, | ||
| 299 | |||
| 300 | /// Sync context for processing queued identifiers | ||
| 301 | /// Set once at startup via set_context() | ||
| 302 | ctx: OnceLock<Arc<dyn SyncContext>>, | ||
| 303 | |||
| 304 | /// Configuration | ||
| 305 | max_concurrent_per_domain: u32, | ||
| 306 | max_per_minute_per_domain: u32, | ||
| 307 | } | ||
| 308 | |||
| 309 | impl ThrottleManager { | ||
| 310 | pub fn new(max_concurrent: u32, max_per_minute: u32) -> Self { | ||
| 311 | Self { | ||
| 312 | throttles: DashMap::new(), | ||
| 313 | ctx: OnceLock::new(), | ||
| 314 | max_concurrent_per_domain: max_concurrent, | ||
| 315 | max_per_minute_per_domain: max_per_minute, | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | /// Set the sync context (called once at startup) | ||
| 320 | pub fn set_context(&self, ctx: Arc<dyn SyncContext>) { | ||
| 321 | let _ = self.ctx.set(ctx); | ||
| 322 | } | ||
| 323 | |||
| 324 | /// Check if a domain is currently throttled (at capacity) | ||
| 325 | pub fn is_throttled(&self, domain: &str) -> bool { | ||
| 326 | self.throttles | ||
| 327 | .get(domain) | ||
| 328 | .map_or(false, |t| !t.has_capacity()) | ||
| 329 | } | ||
| 330 | |||
| 331 | /// Get or create throttle for a domain | ||
| 332 | fn get_or_create(&self, domain: &str) -> dashmap::mapref::one::RefMut<String, DomainThrottle> { | ||
| 333 | self.throttles | ||
| 334 | .entry(domain.to_string()) | ||
| 335 | .or_insert_with(|| DomainThrottle::new( | ||
| 336 | domain.to_string(), | ||
| 337 | self.max_concurrent_per_domain, | ||
| 338 | self.max_per_minute_per_domain, | ||
| 339 | )) | ||
| 340 | } | ||
| 341 | |||
| 342 | /// Record that a request is starting for a domain | ||
| 343 | pub fn start_request(&self, domain: &str) { | ||
| 344 | self.get_or_create(domain).start_request(); | ||
| 345 | } | ||
| 346 | |||
| 347 | /// Record that a request completed for a domain. | ||
| 348 | /// Triggers processing of next queued identifier if capacity available. | ||
| 349 | pub fn complete_request(self: &Arc<Self>, domain: &str) { | ||
| 350 | let should_trigger = { | ||
| 351 | if let Some(mut throttle) = self.throttles.get_mut(domain) { | ||
| 352 | throttle.complete_request(); | ||
| 353 | throttle.has_capacity() && throttle.has_queued_work() | ||
| 354 | } else { | ||
| 355 | false | ||
| 356 | } | ||
| 357 | }; | ||
| 358 | |||
| 359 | if should_trigger { | ||
| 360 | self.try_process_next(domain); | ||
| 361 | } | ||
| 362 | } | ||
| 363 | |||
| 364 | /// Add an identifier to a domain's waiting queue. | ||
| 365 | /// Triggers processing if capacity is available. | ||
| 366 | pub fn enqueue_identifier( | ||
| 367 | self: &Arc<Self>, | ||
| 368 | domain: &str, | ||
| 369 | identifier: String, | ||
| 370 | tried_urls_for_domain: HashSet<String>, | ||
| 371 | ) { | ||
| 372 | let should_trigger = { | ||
| 373 | let mut throttle = self.get_or_create(domain); | ||
| 374 | throttle.enqueue_identifier(identifier, tried_urls_for_domain); | ||
| 375 | throttle.has_capacity() | ||
| 376 | }; | ||
| 377 | |||
| 378 | if should_trigger { | ||
| 379 | self.try_process_next(domain); | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | /// Try to process the next queued identifier for a domain | ||
| 384 | fn try_process_next(self: &Arc<Self>, domain: &str) { | ||
| 385 | let identifier = { | ||
| 386 | if let Some(mut throttle) = self.throttles.get_mut(domain) { | ||
| 387 | throttle.next_ready_identifier() | ||
| 388 | } else { | ||
| 389 | None | ||
| 390 | } | ||
| 391 | }; | ||
| 392 | |||
| 393 | if let Some(identifier) = identifier { | ||
| 394 | let manager = self.clone(); | ||
| 395 | let domain = domain.to_string(); | ||
| 396 | |||
| 397 | tokio::spawn(async move { | ||
| 398 | manager.process_queued_identifier(&domain, &identifier).await; | ||
| 399 | }); | ||
| 400 | } | ||
| 401 | } | ||
| 402 | |||
| 403 | /// Process a single identifier from a domain's queue | ||
| 404 | async fn process_queued_identifier(self: &Arc<Self>, domain: &str, identifier: &str) { | ||
| 405 | let ctx = match self.ctx.get() { | ||
| 406 | Some(ctx) => ctx, | ||
| 407 | None => return, | ||
| 408 | }; | ||
| 409 | |||
| 410 | // Get next URL for this identifier on this domain | ||
| 411 | let url = { | ||
| 412 | let throttle = match self.throttles.get(domain) { | ||
| 413 | Some(t) => t, | ||
| 414 | None => return, | ||
| 415 | }; | ||
| 416 | let tried_urls = throttle.get_tried_urls(identifier); | ||
| 417 | |||
| 418 | sync_identifier_next_url( | ||
| 419 | ctx.as_ref(), | ||
| 420 | identifier, | ||
| 421 | Some(domain), | ||
| 422 | &tried_urls, | ||
| 423 | self, | ||
| 424 | ).await | ||
| 425 | }; | ||
| 426 | |||
| 427 | match url { | ||
| 428 | Some(url) => { | ||
| 429 | // Fetch from this URL (this calls start_request/complete_request internally) | ||
| 430 | sync_identifier_from_url(ctx.as_ref(), identifier, &url, self).await; | ||
| 431 | |||
| 432 | // Record URL as tried and mark not in_progress | ||
| 433 | // complete_request() will trigger next item if capacity available | ||
| 434 | if let Some(mut throttle) = self.throttles.get_mut(domain) { | ||
| 435 | throttle.mark_url_tried(identifier, url); | ||
| 436 | throttle.mark_identifier_not_in_progress(identifier); | ||
| 437 | } | ||
| 438 | } | ||
| 439 | None => { | ||
| 440 | // No more URLs for this identifier on this domain - remove from queue | ||
| 441 | if let Some(mut throttle) = self.throttles.get_mut(domain) { | ||
| 442 | throttle.remove_identifier(identifier); | ||
| 443 | } | ||
| 444 | // Try next identifier since we didn't use any capacity | ||
| 445 | self.try_process_next(domain); | ||
| 446 | } | ||
| 447 | } | ||
| 448 | } | ||
| 449 | } | ||
| 450 | ``` | ||
| 451 | |||
| 452 | ### DomainThrottle | ||
| 453 | |||
| 454 | Per-domain rate limiting and waiting queue: | ||
| 455 | |||
| 456 | ```rust | ||
| 457 | /// Per-domain rate limiting and identifier queue. | ||
| 458 | /// | ||
| 459 | /// Handles: | ||
| 460 | /// - Rate limiting (concurrent requests, requests per minute) | ||
| 461 | /// - Queue of identifiers waiting for capacity (using IndexMap for round-robin order) | ||
| 462 | /// - Tracking tried URLs per identifier (for this domain only) | ||
| 463 | /// - In-progress flag per identifier (prevents concurrent fetches for same identifier | ||
| 464 | /// on this domain, important when queue is small and we have multiple concurrent slots) | ||
| 465 | pub struct DomainThrottle { | ||
| 466 | /// Domain this throttle manages | ||
| 467 | domain: String, | ||
| 468 | |||
| 469 | /// Current in-flight request count | ||
| 470 | in_flight: u32, | ||
| 471 | |||
| 472 | /// Request timestamps (sliding window for rate limiting) | ||
| 473 | request_times: VecDeque<Instant>, | ||
| 474 | |||
| 475 | /// Queued identifiers with their state. | ||
| 476 | /// IndexMap preserves insertion order for round-robin processing. | ||
| 477 | queue: IndexMap<String, IdentifierQueueState>, | ||
| 478 | |||
| 479 | /// Round-robin index for fair processing across identifiers | ||
| 480 | round_robin_index: usize, | ||
| 481 | |||
| 482 | /// Configuration | ||
| 483 | max_concurrent: u32, | ||
| 484 | max_per_minute: u32, | ||
| 485 | } | ||
| 486 | |||
| 487 | /// State for an identifier waiting in a domain's queue | ||
| 488 | #[derive(Debug, Clone)] | ||
| 489 | struct IdentifierQueueState { | ||
| 490 | /// URLs from this domain that have been tried | ||
| 491 | tried_urls: HashSet<String>, | ||
| 492 | |||
| 493 | /// Whether a fetch is currently in progress for this identifier on this domain. | ||
| 494 | /// Prevents starting multiple concurrent fetches for the same identifier, | ||
| 495 | /// which is important when the queue is small (e.g., 2 identifiers with 5 | ||
| 496 | /// concurrent slots would otherwise try to process the same identifier multiple times). | ||
| 497 | in_progress: bool, | ||
| 498 | } | ||
| 499 | |||
| 500 | impl DomainThrottle { | ||
| 501 | pub fn new(domain: String, max_concurrent: u32, max_per_minute: u32) -> Self { | ||
| 502 | Self { | ||
| 503 | domain, | ||
| 504 | in_flight: 0, | ||
| 505 | request_times: VecDeque::new(), | ||
| 506 | queue: IndexMap::new(), | ||
| 507 | round_robin_index: 0, | ||
| 508 | max_concurrent, | ||
| 509 | max_per_minute, | ||
| 510 | } | ||
| 511 | } | ||
| 512 | |||
| 513 | /// Check if domain has capacity for another request | ||
| 514 | pub fn has_capacity(&self) -> bool { | ||
| 515 | if self.in_flight >= self.max_concurrent { | ||
| 516 | return false; | ||
| 517 | } | ||
| 518 | |||
| 519 | let now = Instant::now(); | ||
| 520 | let window = Duration::from_secs(60); | ||
| 521 | let recent_count = self.request_times | ||
| 522 | .iter() | ||
| 523 | .filter(|t| now.duration_since(**t) < window) | ||
| 524 | .count(); | ||
| 525 | |||
| 526 | recent_count < self.max_per_minute as usize | ||
| 527 | } | ||
| 528 | |||
| 529 | /// Check if there are any identifiers in the queue | ||
| 530 | pub fn has_queued_work(&self) -> bool { | ||
| 531 | !self.queue.is_empty() | ||
| 532 | } | ||
| 533 | |||
| 534 | /// Record that a request is starting | ||
| 535 | pub fn start_request(&mut self) { | ||
| 536 | self.in_flight += 1; | ||
| 537 | self.request_times.push_back(Instant::now()); | ||
| 538 | } | ||
| 539 | |||
| 540 | /// Record that a request completed | ||
| 541 | pub fn complete_request(&mut self) { | ||
| 542 | self.in_flight = self.in_flight.saturating_sub(1); | ||
| 543 | |||
| 544 | // Clean old timestamps | ||
| 545 | let now = Instant::now(); | ||
| 546 | let window = Duration::from_secs(60); | ||
| 547 | while self.request_times.front().map_or(false, |t| now.duration_since(*t) >= window) { | ||
| 548 | self.request_times.pop_front(); | ||
| 549 | } | ||
| 550 | } | ||
| 551 | |||
| 552 | /// Add an identifier to the queue | ||
| 553 | pub fn enqueue_identifier(&mut self, identifier: String, tried_urls: HashSet<String>) { | ||
| 554 | self.queue | ||
| 555 | .entry(identifier) | ||
| 556 | .and_modify(|state| { | ||
| 557 | // Merge tried_urls if already exists | ||
| 558 | state.tried_urls.extend(tried_urls.iter().cloned()); | ||
| 559 | }) | ||
| 560 | .or_insert(IdentifierQueueState { | ||
| 561 | tried_urls, | ||
| 562 | in_progress: false, | ||
| 563 | }); | ||
| 564 | } | ||
| 565 | |||
| 566 | /// Get next identifier ready for processing (round-robin, not in_progress). | ||
| 567 | /// | ||
| 568 | /// Iterates through the queue starting from round_robin_index, skipping | ||
| 569 | /// any identifiers that are already in_progress. This ensures fair | ||
| 570 | /// distribution even when some identifiers have active fetches. | ||
| 571 | pub fn next_ready_identifier(&mut self) -> Option<String> { | ||
| 572 | let len = self.queue.len(); | ||
| 573 | if len == 0 { | ||
| 574 | return None; | ||
| 575 | } | ||
| 576 | |||
| 577 | // Try each identifier starting from round_robin_index | ||
| 578 | for i in 0..len { | ||
| 579 | let index = (self.round_robin_index + i) % len; | ||
| 580 | if let Some((identifier, state)) = self.queue.get_index_mut(index) { | ||
| 581 | if !state.in_progress { | ||
| 582 | state.in_progress = true; | ||
| 583 | self.round_robin_index = (index + 1) % len; | ||
| 584 | return Some(identifier.clone()); | ||
| 585 | } | ||
| 586 | } | ||
| 587 | } | ||
| 588 | |||
| 589 | None // All identifiers are in_progress | ||
| 590 | } | ||
| 591 | |||
| 592 | /// Get tried URLs for an identifier | ||
| 593 | pub fn get_tried_urls(&self, identifier: &str) -> HashSet<String> { | ||
| 594 | self.queue | ||
| 595 | .get(identifier) | ||
| 596 | .map(|s| s.tried_urls.clone()) | ||
| 597 | .unwrap_or_default() | ||
| 598 | } | ||
| 599 | |||
| 600 | /// Mark a URL as tried for an identifier | ||
| 601 | pub fn mark_url_tried(&mut self, identifier: &str, url: String) { | ||
| 602 | if let Some(state) = self.queue.get_mut(identifier) { | ||
| 603 | state.tried_urls.insert(url); | ||
| 604 | } | ||
| 605 | } | ||
| 606 | |||
| 607 | /// Mark identifier as not in progress (fetch completed) | ||
| 608 | pub fn mark_identifier_not_in_progress(&mut self, identifier: &str) { | ||
| 609 | if let Some(state) = self.queue.get_mut(identifier) { | ||
| 610 | state.in_progress = false; | ||
| 611 | } | ||
| 612 | } | ||
| 613 | |||
| 614 | /// Remove an identifier from the queue entirely | ||
| 615 | pub fn remove_identifier(&mut self, identifier: &str) { | ||
| 616 | if let Some((index, _, _)) = self.queue.shift_remove_full(identifier) { | ||
| 617 | // Adjust round_robin_index if we removed an entry before it | ||
| 618 | if index < self.round_robin_index && self.round_robin_index > 0 { | ||
| 619 | self.round_robin_index -= 1; | ||
| 620 | } | ||
| 621 | // Clamp to valid range | ||
| 622 | if !self.queue.is_empty() { | ||
| 623 | self.round_robin_index = self.round_robin_index % self.queue.len(); | ||
| 624 | } else { | ||
| 625 | self.round_robin_index = 0; | ||
| 626 | } | ||
| 627 | } | ||
| 628 | } | ||
| 629 | } | ||
| 630 | ``` | ||
| 631 | |||
| 632 | ### SyncContext Trait (For Testability) | ||
| 633 | |||
| 634 | Abstract the external dependencies to enable unit testing: | ||
| 635 | |||
| 636 | ```rust | ||
| 637 | /// Abstraction over external dependencies for sync operations. | ||
| 638 | /// | ||
| 639 | /// This trait allows unit testing of sync logic by mocking: | ||
| 640 | /// - Repository data fetching | ||
| 641 | /// - OID existence checks | ||
| 642 | /// - Git fetch operations | ||
| 643 | /// - Event processing | ||
| 644 | #[async_trait] | ||
| 645 | pub trait SyncContext: Send + Sync { | ||
| 646 | /// Get repository data (announcements, clone URLs, etc.) | ||
| 647 | async fn fetch_repository_data(&self, identifier: &str) -> Result<RepositoryData>; | ||
| 648 | |||
| 649 | /// Get all OIDs needed for purgatory events with this identifier | ||
| 650 | fn collect_needed_oids(&self, identifier: &str) -> HashSet<String>; | ||
| 651 | |||
| 652 | /// Check if an OID exists locally | ||
| 653 | fn oid_exists(&self, repo_path: &Path, oid: &str) -> bool; | ||
| 654 | |||
| 655 | /// Fetch OIDs from a remote server | ||
| 656 | async fn fetch_oids(&self, repo_path: &Path, url: &str, oids: &[String]) -> Result<Vec<String>>; | ||
| 657 | |||
| 658 | /// Process newly available git data. | ||
| 659 | /// | ||
| 660 | /// This is a thin wrapper around the unified `process_newly_available_git_data` function. | ||
| 661 | /// Called after each successful OID fetch to check if any purgatory events can now be satisfied. | ||
| 662 | /// | ||
| 663 | /// See [Unified Git Data Sync](unify-git-data-sync.md) for the complete design. | ||
| 664 | async fn process_newly_available_git_data( | ||
| 665 | &self, | ||
| 666 | source_repo_path: &Path, | ||
| 667 | new_oids: &HashSet<String>, | ||
| 668 | ) -> Result<ProcessResult>; | ||
| 669 | |||
| 670 | /// Check if there are still pending events for this identifier | ||
| 671 | fn has_pending_events(&self, identifier: &str) -> bool; | ||
| 672 | |||
| 673 | /// Find the best local repo to fetch into | ||
| 674 | fn find_target_repo(&self, db_repo_data: &RepositoryData) -> Option<PathBuf>; | ||
| 675 | |||
| 676 | /// Our domain (to exclude from clone URLs) | ||
| 677 | fn our_domain(&self) -> Option<&str>; | ||
| 678 | } | ||
| 679 | |||
| 680 | /// Real implementation of SyncContext with all dependencies | ||
| 681 | pub struct RealSyncContext { | ||
| 682 | purgatory: Purgatory, | ||
| 683 | database: SharedDatabase, | ||
| 684 | git_data_path: PathBuf, | ||
| 685 | our_domain: Option<String>, | ||
| 686 | local_relay: Option<nostr_relay_builder::LocalRelay>, | ||
| 687 | } | ||
| 688 | |||
| 689 | impl RealSyncContext { | ||
| 690 | pub fn new( | ||
| 691 | purgatory: Purgatory, | ||
| 692 | database: SharedDatabase, | ||
| 693 | git_data_path: PathBuf, | ||
| 694 | our_domain: Option<String>, | ||
| 695 | local_relay: Option<nostr_relay_builder::LocalRelay>, | ||
| 696 | ) -> Self { | ||
| 697 | Self { | ||
| 698 | purgatory, | ||
| 699 | database, | ||
| 700 | git_data_path, | ||
| 701 | our_domain, | ||
| 702 | local_relay, | ||
| 703 | } | ||
| 704 | } | ||
| 705 | } | ||
| 706 | |||
| 707 | #[async_trait] | ||
| 708 | impl SyncContext for RealSyncContext { | ||
| 709 | // ... other methods ... | ||
| 710 | |||
| 711 | async fn process_newly_available_git_data( | ||
| 712 | &self, | ||
| 713 | source_repo_path: &Path, | ||
| 714 | new_oids: &HashSet<String>, | ||
| 715 | ) -> Result<ProcessResult> { | ||
| 716 | // Call the unified function that handles all post-git-data-available processing | ||
| 717 | // This is the same function called by handle_receive_pack after a git push | ||
| 718 | crate::git::process_newly_available_git_data( | ||
| 719 | source_repo_path, | ||
| 720 | new_oids, | ||
| 721 | &self.database, | ||
| 722 | self.local_relay.as_ref(), | ||
| 723 | &self.purgatory, | ||
| 724 | &self.git_data_path, | ||
| 725 | ).await | ||
| 726 | } | ||
| 727 | |||
| 728 | // ... other methods ... | ||
| 729 | } | ||
| 730 | ``` | ||
| 731 | |||
| 732 | **Note**: The `SyncContext` trait abstracts away the dependencies for testability. The real implementation (`RealSyncContext`) holds references to purgatory, database, etc., and the `process_newly_available_git_data` method delegates to the unified function. This keeps the sync logic functions (`sync_identifier_next_url`, `sync_identifier_from_url`) clean and testable with mocks. | ||
| 733 | |||
| 734 | ## Core Sync Logic | ||
| 735 | |||
| 736 | ### Two-Function Design | ||
| 737 | |||
| 738 | The sync logic is split into two functions that can be called by either the main sync loop or by DomainThrottle: | ||
| 739 | |||
| 740 | 1. **`sync_identifier_next_url`**: Pure selection logic - finds next URL to try | ||
| 741 | 2. **`sync_identifier_from_url`**: Pure fetch logic - fetches from a specific URL | ||
| 742 | |||
| 743 | This separation enables: | ||
| 744 | - Main sync loop to try non-throttled URLs immediately | ||
| 745 | - DomainThrottle to process queued identifiers when capacity frees | ||
| 746 | - Clean testability with mocked SyncContext | ||
| 747 | |||
| 748 | ### Helper: extract_domain | ||
| 749 | |||
| 750 | ```rust | ||
| 751 | /// Extract domain from a URL (e.g., "https://github.com/foo/bar.git" → "github.com") | ||
| 752 | fn extract_domain(url: &str) -> Option<String> { | ||
| 753 | url::Url::parse(url) | ||
| 754 | .ok() | ||
| 755 | .and_then(|u| u.host_str().map(|s| s.to_string())) | ||
| 756 | } | ||
| 757 | ``` | ||
| 758 | |||
| 759 | ### sync_identifier_next_url | ||
| 760 | |||
| 761 | ```rust | ||
| 762 | /// Find the next URL to try for an identifier. | ||
| 763 | /// | ||
| 764 | /// When `domain` is None: returns any non-throttled URL not in tried_urls | ||
| 765 | /// When `domain` is Some: returns a URL from that specific domain not in tried_urls | ||
| 766 | /// | ||
| 767 | /// Returns None if: | ||
| 768 | /// - No pending events for this identifier | ||
| 769 | /// - No OIDs needed (sync complete) | ||
| 770 | /// - No untried URLs available (for the specified domain or all domains) | ||
| 771 | /// - All available domains are throttled (when domain is None) | ||
| 772 | pub async fn sync_identifier_next_url<C: SyncContext>( | ||
| 773 | ctx: &C, | ||
| 774 | identifier: &str, | ||
| 775 | domain: Option<&str>, | ||
| 776 | tried_urls: &HashSet<String>, | ||
| 777 | throttle_manager: &ThrottleManager, | ||
| 778 | ) -> Option<String> { | ||
| 779 | // 1. Check if we still have pending events | ||
| 780 | if !ctx.has_pending_events(identifier) { | ||
| 781 | return None; | ||
| 782 | } | ||
| 783 | |||
| 784 | // 2. Collect needed OIDs | ||
| 785 | let needed_oids = ctx.collect_needed_oids(identifier); | ||
| 786 | if needed_oids.is_empty() { | ||
| 787 | // No OIDs needed - sync is complete | ||
| 788 | return None; | ||
| 789 | } | ||
| 790 | |||
| 791 | // 3. Get repository data | ||
| 792 | let repo_data = match ctx.fetch_repository_data(identifier).await { | ||
| 793 | Ok(data) => data, | ||
| 794 | Err(_) => return None, | ||
| 795 | }; | ||
| 796 | |||
| 797 | // 4. Collect clone URLs, excluding our domain | ||
| 798 | let all_urls: Vec<String> = repo_data | ||
| 799 | .announcements | ||
| 800 | .iter() | ||
| 801 | .flat_map(|a| a.clone_urls.iter().cloned()) | ||
| 802 | .filter(|url| ctx.our_domain().map_or(true, |d| !url.contains(d))) | ||
| 803 | .collect::<HashSet<_>>() | ||
| 804 | .into_iter() | ||
| 805 | .collect(); | ||
| 806 | |||
| 807 | // 5. Group by domain | ||
| 808 | let urls_by_domain: HashMap<String, Vec<String>> = all_urls | ||
| 809 | .iter() | ||
| 810 | .fold(HashMap::new(), |mut acc, url| { | ||
| 811 | if let Some(d) = extract_domain(url) { | ||
| 812 | acc.entry(d).or_default().push(url.clone()); | ||
| 813 | } | ||
| 814 | acc | ||
| 815 | }); | ||
| 816 | |||
| 817 | // 6. Find an available URL | ||
| 818 | match domain { | ||
| 819 | Some(specific_domain) => { | ||
| 820 | // Only look at URLs from this specific domain | ||
| 821 | urls_by_domain | ||
| 822 | .get(specific_domain) | ||
| 823 | .and_then(|urls| { | ||
| 824 | urls.iter() | ||
| 825 | .find(|url| !tried_urls.contains(*url)) | ||
| 826 | .cloned() | ||
| 827 | }) | ||
| 828 | } | ||
| 829 | None => { | ||
| 830 | // Try any non-throttled domain | ||
| 831 | for (d, domain_urls) in &urls_by_domain { | ||
| 832 | if throttle_manager.is_throttled(d) { | ||
| 833 | continue; | ||
| 834 | } | ||
| 835 | if let Some(url) = domain_urls.iter().find(|url| !tried_urls.contains(*url)) { | ||
| 836 | return Some(url.clone()); | ||
| 837 | } | ||
| 838 | } | ||
| 839 | None | ||
| 840 | } | ||
| 841 | } | ||
| 842 | } | ||
| 843 | |||
| 844 | /// Information about throttled domains with untried URLs | ||
| 845 | #[derive(Debug, Clone)] | ||
| 846 | pub struct ThrottledDomainInfo { | ||
| 847 | pub domain: String, | ||
| 848 | pub tried_urls_for_domain: HashSet<String>, | ||
| 849 | } | ||
| 850 | |||
| 851 | /// Get information about throttled domains that have untried URLs. | ||
| 852 | /// | ||
| 853 | /// Called by main sync loop to know which DomainThrottle queues to add the identifier to. | ||
| 854 | pub async fn get_throttled_domains_with_untried_urls<C: SyncContext>( | ||
| 855 | ctx: &C, | ||
| 856 | identifier: &str, | ||
| 857 | tried_urls: &HashSet<String>, | ||
| 858 | throttle_manager: &ThrottleManager, | ||
| 859 | ) -> Vec<ThrottledDomainInfo> { | ||
| 860 | let repo_data = match ctx.fetch_repository_data(identifier).await { | ||
| 861 | Ok(data) => data, | ||
| 862 | Err(_) => return vec![], | ||
| 863 | }; | ||
| 864 | |||
| 865 | let all_urls: Vec<String> = repo_data | ||
| 866 | .announcements | ||
| 867 | .iter() | ||
| 868 | .flat_map(|a| a.clone_urls.iter().cloned()) | ||
| 869 | .filter(|url| ctx.our_domain().map_or(true, |d| !url.contains(d))) | ||
| 870 | .collect::<HashSet<_>>() | ||
| 871 | .into_iter() | ||
| 872 | .collect(); | ||
| 873 | |||
| 874 | let urls_by_domain: HashMap<String, Vec<String>> = all_urls | ||
| 875 | .iter() | ||
| 876 | .fold(HashMap::new(), |mut acc, url| { | ||
| 877 | if let Some(d) = extract_domain(url) { | ||
| 878 | acc.entry(d).or_default().push(url.clone()); | ||
| 879 | } | ||
| 880 | acc | ||
| 881 | }); | ||
| 882 | |||
| 883 | urls_by_domain | ||
| 884 | .into_iter() | ||
| 885 | .filter_map(|(domain, domain_urls)| { | ||
| 886 | if !throttle_manager.is_throttled(&domain) { | ||
| 887 | return None; // Not throttled, skip | ||
| 888 | } | ||
| 889 | |||
| 890 | let untried: Vec<_> = domain_urls | ||
| 891 | .iter() | ||
| 892 | .filter(|url| !tried_urls.contains(*url)) | ||
| 893 | .collect(); | ||
| 894 | |||
| 895 | if untried.is_empty() { | ||
| 896 | return None; // All URLs tried for this domain | ||
| 897 | } | ||
| 898 | |||
| 899 | // Collect tried URLs that belong to this domain | ||
| 900 | let tried_urls_for_domain: HashSet<String> = tried_urls | ||
| 901 | .iter() | ||
| 902 | .filter(|url| extract_domain(url).as_deref() == Some(&domain)) | ||
| 903 | .cloned() | ||
| 904 | .collect(); | ||
| 905 | |||
| 906 | Some(ThrottledDomainInfo { | ||
| 907 | domain, | ||
| 908 | tried_urls_for_domain, | ||
| 909 | }) | ||
| 910 | }) | ||
| 911 | .collect() | ||
| 912 | } | ||
| 913 | ``` | ||
| 914 | |||
| 915 | ### sync_identifier_from_url | ||
| 916 | |||
| 917 | ```rust | ||
| 918 | /// Fetch git data from a specific URL for an identifier. | ||
| 919 | /// | ||
| 920 | /// This function: | ||
| 921 | /// 1. Records the request with the throttle manager | ||
| 922 | /// 2. Performs the actual git fetch | ||
| 923 | /// 3. Processes any events that can now be satisfied | ||
| 924 | /// 4. Records request completion | ||
| 925 | /// | ||
| 926 | /// Returns the number of OIDs successfully fetched. | ||
| 927 | pub async fn sync_identifier_from_url<C: SyncContext>( | ||
| 928 | ctx: &C, | ||
| 929 | identifier: &str, | ||
| 930 | url: &str, | ||
| 931 | throttle_manager: &Arc<ThrottleManager>, | ||
| 932 | ) -> usize { | ||
| 933 | let domain = match extract_domain(url) { | ||
| 934 | Some(d) => d, | ||
| 935 | None => return 0, | ||
| 936 | }; | ||
| 937 | |||
| 938 | // Get repository data for target repo path | ||
| 939 | let repo_data = match ctx.fetch_repository_data(identifier).await { | ||
| 940 | Ok(data) => data, | ||
| 941 | Err(e) => { | ||
| 942 | tracing::debug!(identifier = %identifier, error = %e, "Failed to fetch repo data"); | ||
| 943 | return 0; | ||
| 944 | } | ||
| 945 | }; | ||
| 946 | |||
| 947 | let target_repo = match ctx.find_target_repo(&repo_data) { | ||
| 948 | Some(path) => path, | ||
| 949 | None => { | ||
| 950 | tracing::debug!(identifier = %identifier, "No target repo found"); | ||
| 951 | return 0; | ||
| 952 | } | ||
| 953 | }; | ||
| 954 | |||
| 955 | // Collect needed OIDs | ||
| 956 | let needed_oids: Vec<String> = ctx.collect_needed_oids(identifier).into_iter().collect(); | ||
| 957 | if needed_oids.is_empty() { | ||
| 958 | return 0; | ||
| 959 | } | ||
| 960 | |||
| 961 | // Perform the fetch | ||
| 962 | throttle_manager.start_request(&domain); | ||
| 963 | let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await; | ||
| 964 | throttle_manager.complete_request(&domain); | ||
| 965 | |||
| 966 | let oids_fetched = match fetch_result { | ||
| 967 | Ok(fetched) => { | ||
| 968 | tracing::debug!( | ||
| 969 | identifier = %identifier, | ||
| 970 | url = %url, | ||
| 971 | oids_fetched = fetched.len(), | ||
| 972 | "Fetch succeeded" | ||
| 973 | ); | ||
| 974 | fetched.len() | ||
| 975 | } | ||
| 976 | Err(e) => { | ||
| 977 | tracing::debug!( | ||
| 978 | identifier = %identifier, | ||
| 979 | url = %url, | ||
| 980 | error = %e, | ||
| 981 | "Fetch failed" | ||
| 982 | ); | ||
| 983 | 0 | ||
| 984 | } | ||
| 985 | }; | ||
| 986 | |||
| 987 | // Try to process any events that can now be satisfied | ||
| 988 | if oids_fetched > 0 { | ||
| 989 | let new_oids: HashSet<String> = needed_oids.iter().cloned().collect(); | ||
| 990 | if let Err(e) = ctx.process_newly_available_git_data(&target_repo, &new_oids).await { | ||
| 991 | tracing::warn!( | ||
| 992 | identifier = %identifier, | ||
| 993 | error = %e, | ||
| 994 | "Failed to process newly available git data" | ||
| 995 | ); | ||
| 996 | } | ||
| 997 | } | ||
| 998 | |||
| 999 | oids_fetched | ||
| 1000 | } | ||
| 1001 | ``` | ||
| 1002 | |||
| 1003 | ### process_newly_available_git_data (Unified Function) | ||
| 1004 | |||
| 1005 | This is the core function that handles the "release from purgatory" logic. It's called after each successful fetch to check if any purgatory events can now be satisfied with the available git data. | ||
| 1006 | |||
| 1007 | **Key Design Decision**: This is a **unified function** shared with the git push handler. Both `handle_receive_pack` (after git push) and `sync_identifier_from_url` (after purgatory sync fetch) call the same function. See [Unified Git Data Sync](unify-git-data-sync.md) for the complete implementation. | ||
| 1008 | |||
| 1009 | **Why unify?** | ||
| 1010 | |||
| 1011 | The post-git-data-available processing is identical regardless of how data arrived: | ||
| 1012 | |||
| 1013 | | Step | After git push | After purgatory fetch | | ||
| 1014 | |------|---------------|----------------------| | ||
| 1015 | | Discover satisfiable events | ✅ Same | ✅ Same | | ||
| 1016 | | Sync OIDs to owner repos | ✅ Same | ✅ Same | | ||
| 1017 | | Align refs (+ set HEAD) | ✅ Same | ✅ Same | | ||
| 1018 | | Save events to database | ✅ Same | ✅ Same | | ||
| 1019 | | Notify WebSocket | ✅ Same | ✅ Same | | ||
| 1020 | | Remove from purgatory | ✅ Same | ✅ Same | | ||
| 1021 | |||
| 1022 | ```rust | ||
| 1023 | /// Result of processing newly available git data | ||
| 1024 | #[derive(Debug, Default)] | ||
| 1025 | pub struct ProcessResult { | ||
| 1026 | /// Number of state events released from purgatory | ||
| 1027 | pub states_released: usize, | ||
| 1028 | /// Number of PR events released from purgatory | ||
| 1029 | pub prs_released: usize, | ||
| 1030 | /// Number of repositories synced (OIDs copied + refs aligned) | ||
| 1031 | pub repos_synced: usize, | ||
| 1032 | /// Number of refs created/updated/deleted | ||
| 1033 | pub refs_created: usize, | ||
| 1034 | pub refs_updated: usize, | ||
| 1035 | pub refs_deleted: usize, | ||
| 1036 | /// Errors encountered (non-fatal) | ||
| 1037 | pub errors: Vec<String>, | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | /// Unified processing of newly available git data. | ||
| 1041 | /// | ||
| 1042 | /// Called whenever git data becomes available, whether from: | ||
| 1043 | /// - A successful `git push` (handle_receive_pack) | ||
| 1044 | /// - Purgatory sync fetching OIDs from remote servers | ||
| 1045 | /// | ||
| 1046 | /// See unify-git-data-sync.md for complete implementation details. | ||
| 1047 | pub async fn process_newly_available_git_data( | ||
| 1048 | source_repo_path: &Path, | ||
| 1049 | new_oids: &HashSet<String>, | ||
| 1050 | database: &SharedDatabase, | ||
| 1051 | local_relay: Option<&nostr_relay_builder::LocalRelay>, | ||
| 1052 | purgatory: &Purgatory, | ||
| 1053 | git_data_path: &Path, | ||
| 1054 | ) -> Result<ProcessResult>; | ||
| 1055 | ``` | ||
| 1056 | |||
| 1057 | **Key properties of the unified function:** | ||
| 1058 | |||
| 1059 | 1. **Early release**: If we fetch from `server1.com` and get all OIDs for state event A, we immediately release A even if state event B still needs OIDs from `server2.com` | ||
| 1060 | |||
| 1061 | 2. **Idempotent**: The function can be called multiple times safely. It only processes events that are actually satisfiable. | ||
| 1062 | |||
| 1063 | 3. **Atomic per-event**: Each event is processed independently. If saving one event fails, others can still succeed. | ||
| 1064 | |||
| 1065 | 4. **Authorization at release time**: We check authorization when releasing, not when adding to purgatory. This handles the case where maintainer sets change while an event is in purgatory. | ||
| 1066 | |||
| 1067 | 5. **Handles all event types**: Both state events (kind 30618) and PR events (kind 1617/1618) are processed uniformly. | ||
| 1068 | |||
| 1069 | ### The Sync Identifier Loop (Main Sync) | ||
| 1070 | |||
| 1071 | ```rust | ||
| 1072 | /// Sync git data for an identifier. | ||
| 1073 | /// | ||
| 1074 | /// This is called by the main sync loop. It: | ||
| 1075 | /// 1. Tries all non-throttled URLs | ||
| 1076 | /// 2. Enqueues with throttled domains for later processing | ||
| 1077 | /// 3. Returns without waiting for throttled domains | ||
| 1078 | /// | ||
| 1079 | /// Returns true if sync completed (no pending events or no OIDs needed), | ||
| 1080 | /// false if events remain (will be retried after backoff). | ||
| 1081 | pub async fn sync_identifier<C: SyncContext>( | ||
| 1082 | ctx: &C, | ||
| 1083 | identifier: &str, | ||
| 1084 | throttle_manager: &Arc<ThrottleManager>, | ||
| 1085 | ) -> bool { | ||
| 1086 | let mut tried_urls: HashSet<String> = HashSet::new(); | ||
| 1087 | |||
| 1088 | // Try all non-throttled URLs | ||
| 1089 | loop { | ||
| 1090 | match sync_identifier_next_url( | ||
| 1091 | ctx, | ||
| 1092 | identifier, | ||
| 1093 | None, // Any domain | ||
| 1094 | &tried_urls, | ||
| 1095 | throttle_manager, | ||
| 1096 | ).await { | ||
| 1097 | Some(url) => { | ||
| 1098 | // Found a non-throttled URL to try | ||
| 1099 | sync_identifier_from_url(ctx, identifier, &url, throttle_manager).await; | ||
| 1100 | tried_urls.insert(url); | ||
| 1101 | |||
| 1102 | // Check if sync is now complete | ||
| 1103 | if !ctx.has_pending_events(identifier) { | ||
| 1104 | tracing::info!(identifier = %identifier, "Sync complete - no pending events"); | ||
| 1105 | return true; | ||
| 1106 | } | ||
| 1107 | |||
| 1108 | let needed_oids = ctx.collect_needed_oids(identifier); | ||
| 1109 | if needed_oids.is_empty() { | ||
| 1110 | tracing::info!(identifier = %identifier, "Sync complete - all OIDs available"); | ||
| 1111 | return true; | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | // Continue trying more URLs | ||
| 1115 | } | ||
| 1116 | None => { | ||
| 1117 | // No more non-throttled URLs available | ||
| 1118 | break; | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | } | ||
| 1122 | |||
| 1123 | // Check if we're done (no pending events or no needed OIDs) | ||
| 1124 | if !ctx.has_pending_events(identifier) { | ||
| 1125 | return true; | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | let needed_oids = ctx.collect_needed_oids(identifier); | ||
| 1129 | if needed_oids.is_empty() { | ||
| 1130 | return true; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | // Enqueue with any throttled domains that have untried URLs | ||
| 1134 | let throttled_domains = get_throttled_domains_with_untried_urls( | ||
| 1135 | ctx, | ||
| 1136 | identifier, | ||
| 1137 | &tried_urls, | ||
| 1138 | throttle_manager, | ||
| 1139 | ).await; | ||
| 1140 | |||
| 1141 | for info in throttled_domains { | ||
| 1142 | tracing::debug!( | ||
| 1143 | identifier = %identifier, | ||
| 1144 | domain = %info.domain, | ||
| 1145 | "Enqueueing with throttled domain" | ||
| 1146 | ); | ||
| 1147 | throttle_manager.enqueue_identifier( | ||
| 1148 | &info.domain, | ||
| 1149 | identifier.to_string(), | ||
| 1150 | info.tried_urls_for_domain, | ||
| 1151 | ); | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | // Return false - events remain, will retry after backoff | ||
| 1155 | // (throttled domains will process independently) | ||
| 1156 | false | ||
| 1157 | } | ||
| 1158 | ``` | ||
| 1159 | |||
| 1160 | ### The Main Sync Loop | ||
| 1161 | |||
| 1162 | ```rust | ||
| 1163 | impl Purgatory { | ||
| 1164 | pub fn start_sync_loop( | ||
| 1165 | self: Arc<Self>, | ||
| 1166 | database: SharedDatabase, | ||
| 1167 | our_domain: Option<String>, | ||
| 1168 | local_relay: Option<nostr_relay_builder::LocalRelay>, | ||
| 1169 | throttle_manager: Arc<ThrottleManager>, | ||
| 1170 | ) -> tokio::task::JoinHandle<()> { | ||
| 1171 | tokio::spawn(async move { | ||
| 1172 | let mut interval = tokio::time::interval(Duration::from_secs(1)); | ||
| 1173 | |||
| 1174 | loop { | ||
| 1175 | interval.tick().await; | ||
| 1176 | |||
| 1177 | // Find all ready identifiers | ||
| 1178 | let ready: Vec<String> = self.sync_queue | ||
| 1179 | .iter() | ||
| 1180 | .filter(|e| e.value().is_ready()) | ||
| 1181 | .map(|e| e.key().clone()) | ||
| 1182 | .collect(); | ||
| 1183 | |||
| 1184 | for identifier in ready { | ||
| 1185 | // Check if events still exist | ||
| 1186 | if !self.has_pending_events(&identifier) { | ||
| 1187 | self.sync_queue.remove(&identifier); | ||
| 1188 | continue; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | // Mark in progress | ||
| 1192 | if let Some(mut entry) = self.sync_queue.get_mut(&identifier) { | ||
| 1193 | if entry.in_progress { | ||
| 1194 | continue; | ||
| 1195 | } | ||
| 1196 | entry.in_progress = true; | ||
| 1197 | } else { | ||
| 1198 | continue; | ||
| 1199 | } | ||
| 1200 | |||
| 1201 | // Spawn sync task | ||
| 1202 | let purgatory = self.clone(); | ||
| 1203 | let db = database.clone(); | ||
| 1204 | let domain = our_domain.clone(); | ||
| 1205 | let relay = local_relay.clone(); | ||
| 1206 | let throttle_manager = throttle_manager.clone(); | ||
| 1207 | let id = identifier.clone(); | ||
| 1208 | |||
| 1209 | tokio::spawn(async move { | ||
| 1210 | // Create the real SyncContext implementation | ||
| 1211 | let ctx = RealSyncContext::new( | ||
| 1212 | purgatory.clone(), | ||
| 1213 | db, | ||
| 1214 | domain, | ||
| 1215 | relay, | ||
| 1216 | ); | ||
| 1217 | |||
| 1218 | let complete = sync_identifier(&ctx, &id, &throttle_manager).await; | ||
| 1219 | |||
| 1220 | if complete || !purgatory.has_pending_events(&id) { | ||
| 1221 | purgatory.sync_queue.remove(&id); | ||
| 1222 | tracing::info!(identifier = %id, "Removed from sync queue"); | ||
| 1223 | } else { | ||
| 1224 | // Apply backoff - will retry later | ||
| 1225 | // (throttled domains are being processed independently) | ||
| 1226 | if let Some(mut entry) = purgatory.sync_queue.get_mut(&id) { | ||
| 1227 | entry.on_sync_complete(); | ||
| 1228 | } | ||
| 1229 | } | ||
| 1230 | }); | ||
| 1231 | } | ||
| 1232 | } | ||
| 1233 | }) | ||
| 1234 | } | ||
| 1235 | } | ||
| 1236 | ``` | ||
| 1237 | |||
| 1238 | ## Testing Strategy | ||
| 1239 | |||
| 1240 | Tests are created **only** as part of each implementation phase. See [Implementation Phases](#implementation-phases) for the complete test plan. | ||
| 1241 | |||
| 1242 | ### Design Principles | ||
| 1243 | |||
| 1244 | 1. **Tests accompany code**: Each phase specifies exactly which tests to create | ||
| 1245 | 2. **Unit tests for mechanics**: Test backoff, throttle, retry logic in isolation using mocks | ||
| 1246 | 3. **Integration tests for outcomes**: Verify events sync correctly end-to-end | ||
| 1247 | 4. **No speculative tests**: Don't create tests for code that doesn't exist yet | ||
| 1248 | |||
| 1249 | ### MockSyncContext | ||
| 1250 | |||
| 1251 | Phases 4-6 use `MockSyncContext` to test sync logic without I/O: | ||
| 1252 | |||
| 1253 | ```rust | ||
| 1254 | /// Mock context for testing sync logic | ||
| 1255 | #[cfg(test)] | ||
| 1256 | pub struct MockSyncContext { | ||
| 1257 | /// Repository data to return | ||
| 1258 | repo_data: RepositoryData, | ||
| 1259 | /// OIDs still needed (decremented when "fetched") | ||
| 1260 | needed_oids: RefCell<HashSet<String>>, | ||
| 1261 | /// Which OIDs each URL can provide | ||
| 1262 | url_provides_oids: HashMap<String, HashSet<String>>, | ||
| 1263 | /// Track fetch attempts for assertions | ||
| 1264 | fetch_log: RefCell<Vec<String>>, | ||
| 1265 | /// Whether there are pending events | ||
| 1266 | has_pending: RefCell<bool>, | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | impl MockSyncContext { | ||
| 1270 | pub fn new() -> Self; | ||
| 1271 | pub fn with_urls(self, urls: &[&str]) -> Self; | ||
| 1272 | pub fn with_needed_oids(self, oids: &[&str]) -> Self; | ||
| 1273 | pub fn url_provides(self, url: &str, oids: &[&str]) -> Self; | ||
| 1274 | } | ||
| 1275 | ``` | ||
| 1276 | |||
| 1277 | ### Test Locations | ||
| 1278 | |||
| 1279 | | Test Type | Location | Created In | | ||
| 1280 | |-----------|----------|------------| | ||
| 1281 | | SyncQueueEntry | `src/purgatory/sync/queue.rs` | Phase 1 | | ||
| 1282 | | DomainThrottle | `src/purgatory/sync/throttle.rs` | Phase 2 | | ||
| 1283 | | ThrottleManager | `src/purgatory/sync/throttle.rs` | Phase 3 | | ||
| 1284 | | Core sync functions | `src/purgatory/sync/functions.rs` | Phase 5-6 | | ||
| 1285 | | Queue integration | `src/purgatory/mod.rs` | Phase 7 | | ||
| 1286 | | Unified function helpers | `src/git/sync.rs` | Phase 9 | | ||
| 1287 | | Integration tests | `tests/purgatory_sync.rs` | Phase 12 | | ||
| 1288 | |||
| 1289 | ## Implementation Phases | ||
| 1290 | |||
| 1291 | Each phase has clear deliverables, unit tests, and success criteria. Unit tests are created **only** for the code built in that phase. | ||
| 1292 | |||
| 1293 | --- | ||
| 1294 | |||
| 1295 | ### Phase 1: SyncQueueEntry with Backoff | ||
| 1296 | |||
| 1297 | **Goal**: Implement the sync queue entry struct with backoff calculation. | ||
| 1298 | |||
| 1299 | **Files**: | ||
| 1300 | - `src/purgatory/sync/mod.rs` (new - module declaration) | ||
| 1301 | - `src/purgatory/sync/queue.rs` (new) | ||
| 1302 | |||
| 1303 | **Note**: This creates a new `sync` submodule under `src/purgatory/`. The current purgatory structure is flat (`mod.rs`, `helpers.rs`, `types.rs`). Add `pub mod sync;` to `src/purgatory/mod.rs`. | ||
| 1304 | |||
| 1305 | **Deliverables**: | ||
| 1306 | ```rust | ||
| 1307 | pub struct SyncQueueEntry { | ||
| 1308 | pub next_attempt: Instant, | ||
| 1309 | pub attempt_count: u32, | ||
| 1310 | pub in_progress: bool, | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | impl SyncQueueEntry { | ||
| 1314 | pub fn new(delay: Duration) -> Self; | ||
| 1315 | pub fn backoff(&self) -> Duration; | ||
| 1316 | pub fn is_ready(&self) -> bool; | ||
| 1317 | pub fn on_new_event(&mut self, delay: Duration); | ||
| 1318 | pub fn on_sync_complete(&mut self); | ||
| 1319 | } | ||
| 1320 | ``` | ||
| 1321 | |||
| 1322 | **Unit Tests** (2 tests): | ||
| 1323 | ```rust | ||
| 1324 | #[cfg(test)] | ||
| 1325 | mod tests { | ||
| 1326 | #[test] | ||
| 1327 | fn backoff_doubles_up_to_cap() { | ||
| 1328 | // 20s → 40s → 80s → 120s → 120s (capped) | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | #[test] | ||
| 1332 | fn new_event_resets_attempt_count() { | ||
| 1333 | // on_new_event() resets attempt_count to 0 | ||
| 1334 | } | ||
| 1335 | } | ||
| 1336 | ``` | ||
| 1337 | |||
| 1338 | **Success Criteria**: | ||
| 1339 | - [ ] `SyncQueueEntry::new()` creates entry with given delay | ||
| 1340 | - [ ] `backoff()` returns 20s, 40s, 80s, 120s, 120s for attempts 1-5 | ||
| 1341 | - [ ] `on_new_event()` resets `attempt_count` to 0 | ||
| 1342 | - [ ] `on_sync_complete()` increments `attempt_count` and updates `next_attempt` | ||
| 1343 | - [ ] Both unit tests pass | ||
| 1344 | |||
| 1345 | --- | ||
| 1346 | |||
| 1347 | ### Phase 2: DomainThrottle with Rate Limiting and Round-Robin | ||
| 1348 | |||
| 1349 | **Goal**: Implement per-domain throttling with concurrent/rate limits and fair queue processing. | ||
| 1350 | |||
| 1351 | **Files**: | ||
| 1352 | - `src/purgatory/sync/throttle.rs` (new) | ||
| 1353 | |||
| 1354 | **Deliverables**: | ||
| 1355 | ```rust | ||
| 1356 | pub struct DomainThrottle { | ||
| 1357 | domain: String, | ||
| 1358 | in_flight: u32, | ||
| 1359 | request_times: VecDeque<Instant>, | ||
| 1360 | queue: IndexMap<String, IdentifierQueueState>, | ||
| 1361 | round_robin_index: usize, | ||
| 1362 | max_concurrent: u32, | ||
| 1363 | max_per_minute: u32, | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | impl DomainThrottle { | ||
| 1367 | pub fn new(domain: String, max_concurrent: u32, max_per_minute: u32) -> Self; | ||
| 1368 | pub fn has_capacity(&self) -> bool; | ||
| 1369 | pub fn start_request(&mut self); | ||
| 1370 | pub fn complete_request(&mut self); | ||
| 1371 | pub fn enqueue_identifier(&mut self, identifier: String, tried_urls: HashSet<String>); | ||
| 1372 | pub fn next_ready_identifier(&mut self) -> Option<String>; | ||
| 1373 | pub fn mark_identifier_not_in_progress(&mut self, identifier: &str); | ||
| 1374 | pub fn remove_identifier(&mut self, identifier: &str); | ||
| 1375 | } | ||
| 1376 | ``` | ||
| 1377 | |||
| 1378 | **Unit Tests** (4 tests): | ||
| 1379 | ```rust | ||
| 1380 | #[cfg(test)] | ||
| 1381 | mod tests { | ||
| 1382 | #[test] | ||
| 1383 | fn concurrent_limit_blocks_when_saturated() { | ||
| 1384 | // has_capacity() returns false when in_flight >= max_concurrent | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | #[test] | ||
| 1388 | fn rate_limit_blocks_when_window_full() { | ||
| 1389 | // has_capacity() returns false when requests in last 60s >= max_per_minute | ||
| 1390 | // Use deterministic time (pass Instant or mock clock) | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | #[test] | ||
| 1394 | fn round_robin_processes_identifiers_fairly() { | ||
| 1395 | // Enqueue A, B, C → next_ready returns A, B, C, A, B, C... | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | #[test] | ||
| 1399 | fn skips_in_progress_identifiers() { | ||
| 1400 | // next_ready skips identifiers where in_progress=true | ||
| 1401 | } | ||
| 1402 | } | ||
| 1403 | ``` | ||
| 1404 | |||
| 1405 | **Success Criteria**: | ||
| 1406 | - [ ] Concurrent limit enforced (blocks at max_concurrent) | ||
| 1407 | - [ ] Rate limit enforced (blocks at max_per_minute within 60s window) | ||
| 1408 | - [ ] Round-robin ordering maintained across calls | ||
| 1409 | - [ ] In-progress identifiers skipped | ||
| 1410 | - [ ] All 4 unit tests pass | ||
| 1411 | |||
| 1412 | --- | ||
| 1413 | |||
| 1414 | ### Phase 3: ThrottleManager (Rate Limiting Only) | ||
| 1415 | |||
| 1416 | **Goal**: Implement the manager that owns all domain throttles and provides rate limiting. Queue processing methods (`set_context`, `process_queued_identifier`) are added in Phase 6 after `SyncContext` exists. | ||
| 1417 | |||
| 1418 | **Files**: | ||
| 1419 | - `src/purgatory/sync/throttle.rs` (extend) | ||
| 1420 | |||
| 1421 | **Deliverables**: | ||
| 1422 | ```rust | ||
| 1423 | pub struct ThrottleManager { | ||
| 1424 | throttles: DashMap<String, DomainThrottle>, | ||
| 1425 | max_concurrent_per_domain: u32, | ||
| 1426 | max_per_minute_per_domain: u32, | ||
| 1427 | // Note: ctx: OnceLock<Arc<dyn SyncContext>> added in Phase 6 | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | impl ThrottleManager { | ||
| 1431 | pub fn new(max_concurrent: u32, max_per_minute: u32) -> Self; | ||
| 1432 | pub fn is_throttled(&self, domain: &str) -> bool; | ||
| 1433 | pub fn start_request(&self, domain: &str); | ||
| 1434 | pub fn complete_request(&self, domain: &str); // Trigger logic added in Phase 6 | ||
| 1435 | pub fn enqueue_identifier(&self, domain: &str, identifier: String, tried_urls: HashSet<String>); // Trigger logic added in Phase 6 | ||
| 1436 | } | ||
| 1437 | ``` | ||
| 1438 | |||
| 1439 | **Unit Tests** (1 test): | ||
| 1440 | ```rust | ||
| 1441 | #[cfg(test)] | ||
| 1442 | mod tests { | ||
| 1443 | #[test] | ||
| 1444 | fn is_throttled_reflects_domain_capacity() { | ||
| 1445 | // is_throttled returns true when domain has no capacity | ||
| 1446 | } | ||
| 1447 | } | ||
| 1448 | ``` | ||
| 1449 | |||
| 1450 | **Success Criteria**: | ||
| 1451 | - [ ] `is_throttled()` correctly reflects domain capacity | ||
| 1452 | - [ ] `start_request()`/`complete_request()` delegate to correct domain | ||
| 1453 | - [ ] `enqueue_identifier()` creates domain throttle if needed | ||
| 1454 | - [ ] Unit test passes | ||
| 1455 | |||
| 1456 | **Note**: The `complete_request()` and `enqueue_identifier()` methods in this phase only update state. The trigger-based processing (spawning tasks when capacity frees) is added in Phase 6 after `SyncContext` is available. | ||
| 1457 | |||
| 1458 | --- | ||
| 1459 | |||
| 1460 | ### Phase 4: SyncContext Trait and MockSyncContext | ||
| 1461 | |||
| 1462 | **Goal**: Define the abstraction for sync operations and create the test mock. | ||
| 1463 | |||
| 1464 | **Files**: | ||
| 1465 | - `src/purgatory/sync/context.rs` (new) | ||
| 1466 | |||
| 1467 | **Deliverables**: | ||
| 1468 | ```rust | ||
| 1469 | #[async_trait] | ||
| 1470 | pub trait SyncContext: Send + Sync { | ||
| 1471 | async fn fetch_repository_data(&self, identifier: &str) -> Result<RepositoryData>; | ||
| 1472 | fn collect_needed_oids(&self, identifier: &str) -> HashSet<String>; | ||
| 1473 | async fn fetch_oids(&self, repo_path: &Path, url: &str, oids: &[String]) -> Result<Vec<String>>; | ||
| 1474 | async fn process_newly_available_git_data( | ||
| 1475 | &self, | ||
| 1476 | source_repo_path: &Path, | ||
| 1477 | new_oids: &HashSet<String>, | ||
| 1478 | ) -> Result<ProcessResult>; | ||
| 1479 | fn has_pending_events(&self, identifier: &str) -> bool; | ||
| 1480 | fn find_target_repo(&self, data: &RepositoryData) -> Option<PathBuf>; | ||
| 1481 | fn our_domain(&self) -> Option<&str>; | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | // Test support | ||
| 1485 | #[cfg(test)] | ||
| 1486 | pub struct MockSyncContext { ... } | ||
| 1487 | ``` | ||
| 1488 | |||
| 1489 | **Unit Tests** (0 tests): | ||
| 1490 | - This phase creates infrastructure only; tests come in Phase 5 | ||
| 1491 | |||
| 1492 | **Success Criteria**: | ||
| 1493 | - [ ] `SyncContext` trait compiles with all required methods | ||
| 1494 | - [ ] `MockSyncContext` implements `SyncContext` | ||
| 1495 | - [ ] Mock supports builder pattern for test setup | ||
| 1496 | |||
| 1497 | --- | ||
| 1498 | |||
| 1499 | ### Phase 5: Core Sync Functions | ||
| 1500 | |||
| 1501 | **Goal**: Implement `sync_identifier_next_url` and `sync_identifier_from_url`. | ||
| 1502 | |||
| 1503 | **Files**: | ||
| 1504 | - `src/purgatory/sync/functions.rs` (new) | ||
| 1505 | |||
| 1506 | **Deliverables**: | ||
| 1507 | ```rust | ||
| 1508 | pub async fn sync_identifier_next_url<C: SyncContext>( | ||
| 1509 | ctx: &C, | ||
| 1510 | identifier: &str, | ||
| 1511 | domain: Option<&str>, | ||
| 1512 | tried_urls: &HashSet<String>, | ||
| 1513 | throttle_manager: &ThrottleManager, | ||
| 1514 | ) -> Option<String>; | ||
| 1515 | |||
| 1516 | pub async fn sync_identifier_from_url<C: SyncContext>( | ||
| 1517 | ctx: &C, | ||
| 1518 | identifier: &str, | ||
| 1519 | url: &str, | ||
| 1520 | throttle_manager: &Arc<ThrottleManager>, | ||
| 1521 | ) -> usize; | ||
| 1522 | ``` | ||
| 1523 | |||
| 1524 | **Unit Tests** (3 tests): | ||
| 1525 | ```rust | ||
| 1526 | #[cfg(test)] | ||
| 1527 | mod tests { | ||
| 1528 | #[tokio::test] | ||
| 1529 | async fn next_url_skips_throttled_domains() { | ||
| 1530 | // When domain is throttled, next_url returns URL from different domain | ||
| 1531 | } | ||
| 1532 | |||
| 1533 | #[tokio::test] | ||
| 1534 | async fn next_url_skips_tried_urls() { | ||
| 1535 | // URLs in tried_urls set are not returned | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | #[tokio::test] | ||
| 1539 | async fn from_url_fetches_and_processes_on_success() { | ||
| 1540 | // Successful fetch triggers process_newly_available_git_data | ||
| 1541 | } | ||
| 1542 | } | ||
| 1543 | ``` | ||
| 1544 | |||
| 1545 | **Success Criteria**: | ||
| 1546 | - [ ] `sync_identifier_next_url` returns non-throttled, untried URL | ||
| 1547 | - [ ] `sync_identifier_next_url` returns `None` when all URLs tried or throttled | ||
| 1548 | - [ ] `sync_identifier_from_url` calls `fetch_oids` and `process_newly_available_git_data` | ||
| 1549 | - [ ] All 3 unit tests pass | ||
| 1550 | |||
| 1551 | --- | ||
| 1552 | |||
| 1553 | ### Phase 6: sync_identifier Orchestration + ThrottleManager Queue Processing | ||
| 1554 | |||
| 1555 | **Goal**: Implement the main sync loop for a single identifier AND add trigger-based queue processing to ThrottleManager. | ||
| 1556 | |||
| 1557 | **Files**: | ||
| 1558 | - `src/purgatory/sync/functions.rs` (extend) | ||
| 1559 | - `src/purgatory/sync/throttle.rs` (extend - add queue processing triggers) | ||
| 1560 | |||
| 1561 | **Deliverables**: | ||
| 1562 | ```rust | ||
| 1563 | // In functions.rs | ||
| 1564 | pub async fn sync_identifier<C: SyncContext>( | ||
| 1565 | ctx: &C, | ||
| 1566 | identifier: &str, | ||
| 1567 | throttle_manager: &Arc<ThrottleManager>, | ||
| 1568 | ) -> bool; // true if complete, false if pending | ||
| 1569 | |||
| 1570 | // In throttle.rs - add to ThrottleManager | ||
| 1571 | impl ThrottleManager { | ||
| 1572 | /// Set the sync context (called once at startup) | ||
| 1573 | pub fn set_context(&self, ctx: Arc<dyn SyncContext>); | ||
| 1574 | |||
| 1575 | /// Try to process the next queued identifier for a domain (internal) | ||
| 1576 | fn try_process_next(self: &Arc<Self>, domain: &str); | ||
| 1577 | |||
| 1578 | /// Process a single identifier from a domain's queue (internal) | ||
| 1579 | async fn process_queued_identifier(self: &Arc<Self>, domain: &str, identifier: &str); | ||
| 1580 | } | ||
| 1581 | |||
| 1582 | // Update complete_request and enqueue_identifier to trigger processing | ||
| 1583 | ``` | ||
| 1584 | |||
| 1585 | **Unit Tests** (2 tests): | ||
| 1586 | ```rust | ||
| 1587 | #[cfg(test)] | ||
| 1588 | mod tests { | ||
| 1589 | #[tokio::test] | ||
| 1590 | async fn tries_multiple_urls_until_complete() { | ||
| 1591 | // Tries URL1 (partial), URL2 (partial), URL3 (complete) → returns true | ||
| 1592 | } | ||
| 1593 | |||
| 1594 | #[tokio::test] | ||
| 1595 | async fn enqueues_throttled_domains_when_incomplete() { | ||
| 1596 | // When URLs remain but are throttled, enqueues and returns false | ||
| 1597 | } | ||
| 1598 | } | ||
| 1599 | ``` | ||
| 1600 | |||
| 1601 | **Success Criteria**: | ||
| 1602 | - [ ] Loops through available URLs until sync complete or all tried | ||
| 1603 | - [ ] Enqueues with throttled domains when OIDs still needed | ||
| 1604 | - [ ] Returns `true` when all OIDs fetched, `false` otherwise | ||
| 1605 | - [ ] `complete_request()` triggers `try_process_next()` when capacity available | ||
| 1606 | - [ ] `enqueue_identifier()` triggers `try_process_next()` when capacity available | ||
| 1607 | - [ ] Both unit tests pass | ||
| 1608 | |||
| 1609 | --- | ||
| 1610 | |||
| 1611 | ### Phase 7: Purgatory Sync Queue Integration | ||
| 1612 | |||
| 1613 | **Goal**: Add sync queue to Purgatory and implement `enqueue_sync`. | ||
| 1614 | |||
| 1615 | **Files**: | ||
| 1616 | - `src/purgatory/mod.rs` (extend) | ||
| 1617 | |||
| 1618 | **Deliverables**: | ||
| 1619 | ```rust | ||
| 1620 | impl Purgatory { | ||
| 1621 | // New field: sync_queue: Arc<DashMap<String, SyncQueueEntry>> | ||
| 1622 | |||
| 1623 | pub fn enqueue_sync(&self, identifier: &str, delay: Duration); | ||
| 1624 | pub fn has_pending_events(&self, identifier: &str) -> bool; | ||
| 1625 | } | ||
| 1626 | ``` | ||
| 1627 | |||
| 1628 | **Unit Tests** (1 test): | ||
| 1629 | ```rust | ||
| 1630 | #[cfg(test)] | ||
| 1631 | mod tests { | ||
| 1632 | #[test] | ||
| 1633 | fn enqueue_sync_debounces_rapid_calls() { | ||
| 1634 | // Multiple enqueue_sync calls within delay window result in single entry | ||
| 1635 | } | ||
| 1636 | } | ||
| 1637 | ``` | ||
| 1638 | |||
| 1639 | **Success Criteria**: | ||
| 1640 | - [ ] `enqueue_sync` adds/updates entry in sync_queue | ||
| 1641 | - [ ] Rapid calls debounce (don't create multiple entries) | ||
| 1642 | - [ ] `has_pending_events` checks both state_events and pr_events | ||
| 1643 | - [ ] Unit test passes | ||
| 1644 | |||
| 1645 | --- | ||
| 1646 | |||
| 1647 | ### Phase 8: Main Sync Loop | ||
| 1648 | |||
| 1649 | **Goal**: Implement the background sync loop that processes ready identifiers. | ||
| 1650 | |||
| 1651 | **Files**: | ||
| 1652 | - `src/purgatory/sync/loop.rs` (new) | ||
| 1653 | |||
| 1654 | **Deliverables**: | ||
| 1655 | ```rust | ||
| 1656 | impl Purgatory { | ||
| 1657 | pub fn start_sync_loop( | ||
| 1658 | self: Arc<Self>, | ||
| 1659 | ctx: Arc<dyn SyncContext>, | ||
| 1660 | throttle_manager: Arc<ThrottleManager>, | ||
| 1661 | ) -> JoinHandle<()>; | ||
| 1662 | } | ||
| 1663 | ``` | ||
| 1664 | |||
| 1665 | **Note**: The sync loop interval is hardcoded to 1 second. No configuration option needed. | ||
| 1666 | |||
| 1667 | **Unit Tests** (0 tests): | ||
| 1668 | - The sync loop is tested via integration tests; unit testing async loops is fragile | ||
| 1669 | |||
| 1670 | **Success Criteria**: | ||
| 1671 | - [ ] Loop runs every 1 second (hardcoded) | ||
| 1672 | - [ ] Finds ready identifiers and spawns sync tasks | ||
| 1673 | - [ ] Applies backoff on incomplete syncs | ||
| 1674 | - [ ] Removes completed identifiers from queue | ||
| 1675 | |||
| 1676 | --- | ||
| 1677 | |||
| 1678 | ### Phase 9: Unified `process_newly_available_git_data` Function | ||
| 1679 | |||
| 1680 | **Goal**: Implement the unified function that handles all post-git-data-available processing. | ||
| 1681 | |||
| 1682 | This is the core function described in [Unified Git Data Sync](unify-git-data-sync.md). It will be called by both: | ||
| 1683 | - `handle_receive_pack` after a successful git push | ||
| 1684 | - `RealSyncContext::process_newly_available_git_data` after purgatory sync fetches OIDs | ||
| 1685 | |||
| 1686 | **Files**: | ||
| 1687 | - `src/git/sync.rs` (extend - add `ProcessResult` alongside existing types) | ||
| 1688 | |||
| 1689 | **Note**: `src/git/sync.rs` already exists with `sync_to_owner_repos`, `align_repository_with_state`, etc. This phase extends it with the unified processing function. | ||
| 1690 | |||
| 1691 | **Deliverables**: | ||
| 1692 | ```rust | ||
| 1693 | /// Result of processing newly available git data | ||
| 1694 | #[derive(Debug, Default)] | ||
| 1695 | pub struct ProcessResult { | ||
| 1696 | pub states_released: usize, | ||
| 1697 | pub prs_released: usize, | ||
| 1698 | pub repos_synced: usize, | ||
| 1699 | pub refs_created: usize, | ||
| 1700 | pub refs_updated: usize, | ||
| 1701 | pub refs_deleted: usize, | ||
| 1702 | pub errors: Vec<String>, | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | /// Unified processing of newly available git data. | ||
| 1706 | /// | ||
| 1707 | /// Called whenever git data becomes available, whether from: | ||
| 1708 | /// - A successful `git push` (handle_receive_pack) | ||
| 1709 | /// - Purgatory sync fetching OIDs from remote servers | ||
| 1710 | pub async fn process_newly_available_git_data( | ||
| 1711 | source_repo_path: &Path, | ||
| 1712 | new_oids: &HashSet<String>, | ||
| 1713 | database: &SharedDatabase, | ||
| 1714 | local_relay: Option<&nostr_relay_builder::LocalRelay>, | ||
| 1715 | purgatory: &Purgatory, | ||
| 1716 | git_data_path: &Path, | ||
| 1717 | ) -> Result<ProcessResult>; | ||
| 1718 | |||
| 1719 | // Helper functions | ||
| 1720 | fn extract_identifier_from_repo_path(repo_path: &Path, git_data_path: &Path) -> Option<String>; | ||
| 1721 | fn extract_identifier_from_pr_event(event: &Event) -> Option<String>; | ||
| 1722 | |||
| 1723 | // Purgatory additions | ||
| 1724 | impl Purgatory { | ||
| 1725 | /// Find all PR events for an identifier. | ||
| 1726 | /// Filters pr_events entries where the identifier matches (no secondary index needed). | ||
| 1727 | pub fn find_prs_for_identifier(&self, identifier: &str) -> Vec<PrPurgatoryEntry>; | ||
| 1728 | } | ||
| 1729 | ``` | ||
| 1730 | |||
| 1731 | **Unit Tests** (3 tests): | ||
| 1732 | ```rust | ||
| 1733 | #[cfg(test)] | ||
| 1734 | mod tests { | ||
| 1735 | #[test] | ||
| 1736 | fn extract_identifier_from_repo_path_valid() { | ||
| 1737 | // {git_data_path}/{npub}/{identifier}.git → identifier | ||
| 1738 | } | ||
| 1739 | |||
| 1740 | #[test] | ||
| 1741 | fn extract_identifier_from_pr_event_valid() { | ||
| 1742 | // Event with "a" tag "30617:<pubkey>:<identifier>" → identifier | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | #[test] | ||
| 1746 | fn extract_identifier_from_pr_event_missing_tag() { | ||
| 1747 | // Event without "a" tag → None | ||
| 1748 | } | ||
| 1749 | } | ||
| 1750 | ``` | ||
| 1751 | |||
| 1752 | **Success Criteria**: | ||
| 1753 | - [ ] `process_newly_available_git_data` discovers satisfiable events from purgatory | ||
| 1754 | - [ ] State events: syncs OIDs to owner repos, aligns refs, sets HEAD, saves to DB, notifies WS, removes from purgatory | ||
| 1755 | - [ ] PR events: syncs commit to owner repos, creates refs/nostr/<event-id>, saves to DB, notifies WS, removes from purgatory | ||
| 1756 | - [ ] `find_prs_for_identifier` filters pr_events by identifier correctly | ||
| 1757 | - [ ] All 3 unit tests pass | ||
| 1758 | |||
| 1759 | --- | ||
| 1760 | |||
| 1761 | ### Phase 10: Update `handle_receive_pack` to Use Unified Function | ||
| 1762 | |||
| 1763 | **Goal**: Refactor the push authorization handler to use `process_newly_available_git_data`. | ||
| 1764 | |||
| 1765 | This replaces ~100 lines of duplicated post-push processing with a single call to the unified function. | ||
| 1766 | |||
| 1767 | **Files**: | ||
| 1768 | - `src/git/handlers.rs` (modify) | ||
| 1769 | |||
| 1770 | **Before** (current code): | ||
| 1771 | ```rust | ||
| 1772 | // After git receive-pack succeeds: | ||
| 1773 | // - try_set_head_if_available() | ||
| 1774 | // - database.save_event() | ||
| 1775 | // - remove_state_event() / remove_pr() | ||
| 1776 | // - relay.notify_event() | ||
| 1777 | // - sync_to_owner_repos() | ||
| 1778 | // - sync_pr_refs_to_tagged_owner_repos() | ||
| 1779 | // ... ~100 lines of processing | ||
| 1780 | ``` | ||
| 1781 | |||
| 1782 | **After** (simplified): | ||
| 1783 | ```rust | ||
| 1784 | // After git receive-pack succeeds: | ||
| 1785 | let new_oids: HashSet<String> = pushed_refs | ||
| 1786 | .iter() | ||
| 1787 | .filter(|(_, new_oid, _)| new_oid != "0000000000000000000000000000000000000000") | ||
| 1788 | .map(|(_, new_oid, _)| new_oid.clone()) | ||
| 1789 | .collect(); | ||
| 1790 | |||
| 1791 | let result = process_newly_available_git_data( | ||
| 1792 | &repo_path, | ||
| 1793 | &new_oids, | ||
| 1794 | &database, | ||
| 1795 | Some(&relay), | ||
| 1796 | &purgatory, | ||
| 1797 | Path::new(git_data_path), | ||
| 1798 | ).await; | ||
| 1799 | |||
| 1800 | info!( | ||
| 1801 | "Processed push: {} states, {} PRs released, {} repos synced", | ||
| 1802 | result.states_released, | ||
| 1803 | result.prs_released, | ||
| 1804 | result.repos_synced | ||
| 1805 | ); | ||
| 1806 | ``` | ||
| 1807 | |||
| 1808 | **Unit Tests** (0 tests): | ||
| 1809 | - Behavior tested via existing integration tests which should continue to pass | ||
| 1810 | |||
| 1811 | **Success Criteria**: | ||
| 1812 | - [ ] `handle_receive_pack` uses `process_newly_available_git_data` | ||
| 1813 | - [ ] Duplicate code removed (~100 lines) | ||
| 1814 | - [ ] All existing push-related tests still pass | ||
| 1815 | - [ ] Push behavior unchanged (same events saved, same refs created) | ||
| 1816 | |||
| 1817 | --- | ||
| 1818 | |||
| 1819 | ### Phase 11: RealSyncContext Implementation | ||
| 1820 | |||
| 1821 | **Goal**: Implement the production `SyncContext` that connects to real systems. | ||
| 1822 | |||
| 1823 | **Files**: | ||
| 1824 | - `src/purgatory/sync/context.rs` (extend) | ||
| 1825 | |||
| 1826 | **Deliverables**: | ||
| 1827 | ```rust | ||
| 1828 | pub struct RealSyncContext { | ||
| 1829 | purgatory: Purgatory, | ||
| 1830 | database: SharedDatabase, | ||
| 1831 | git_data_path: PathBuf, | ||
| 1832 | our_domain: Option<String>, | ||
| 1833 | local_relay: Option<LocalRelay>, | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | impl SyncContext for RealSyncContext { ... } | ||
| 1837 | ``` | ||
| 1838 | |||
| 1839 | **Unit Tests** (0 tests): | ||
| 1840 | - `RealSyncContext` is tested via integration tests | ||
| 1841 | |||
| 1842 | **Success Criteria**: | ||
| 1843 | - [ ] All `SyncContext` methods implemented | ||
| 1844 | - [ ] Connects to real database, git, and relay | ||
| 1845 | - [ ] `process_newly_available_git_data` method delegates to unified function from Phase 9 | ||
| 1846 | |||
| 1847 | --- | ||
| 1848 | |||
| 1849 | ### Phase 12: Integration Tests | ||
| 1850 | |||
| 1851 | **Goal**: Verify end-to-end sync behavior with real relay instances. | ||
| 1852 | |||
| 1853 | **Files**: | ||
| 1854 | - `tests/purgatory_sync.rs` (new) | ||
| 1855 | |||
| 1856 | **Integration Tests** (5 tests): | ||
| 1857 | ```rust | ||
| 1858 | #[tokio::test] | ||
| 1859 | async fn state_event_syncs_from_remote() { | ||
| 1860 | // State event enters purgatory, git data fetched, event released | ||
| 1861 | } | ||
| 1862 | |||
| 1863 | #[tokio::test] | ||
| 1864 | async fn pr_event_syncs_from_remote() { | ||
| 1865 | // PR event enters purgatory, commit fetched, event released | ||
| 1866 | } | ||
| 1867 | |||
| 1868 | #[tokio::test] | ||
| 1869 | async fn concurrent_state_and_pr_sync() { | ||
| 1870 | // Both event types sync correctly when arriving together | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | #[tokio::test] | ||
| 1874 | async fn partial_oid_aggregation_from_multiple_servers() { | ||
| 1875 | // OIDs aggregated when no single server has all | ||
| 1876 | } | ||
| 1877 | |||
| 1878 | #[tokio::test] | ||
| 1879 | async fn push_triggers_unified_processing() { | ||
| 1880 | // Git push triggers process_newly_available_git_data | ||
| 1881 | // Verifies Phase 10 integration | ||
| 1882 | } | ||
| 1883 | ``` | ||
| 1884 | |||
| 1885 | **Success Criteria**: | ||
| 1886 | - [ ] All 5 integration tests pass | ||
| 1887 | - [ ] State events release after git sync | ||
| 1888 | - [ ] PR events release after commit sync | ||
| 1889 | - [ ] Partial OID scenarios handled correctly | ||
| 1890 | - [ ] Push path uses unified function (same behavior as purgatory sync) | ||
| 1891 | |||
| 1892 | --- | ||
| 1893 | |||
| 1894 | ### Phase 13: Cleanup | ||
| 1895 | |||
| 1896 | **Goal**: Remove old `start_state_sync` code and wire up new system. | ||
| 1897 | |||
| 1898 | **Files**: | ||
| 1899 | - `src/purgatory/mod.rs` (modify) | ||
| 1900 | - `src/main.rs` (modify) | ||
| 1901 | |||
| 1902 | **Deliverables**: | ||
| 1903 | - Remove `start_state_sync` method | ||
| 1904 | - Wire `start_sync_loop` into application startup | ||
| 1905 | - Update `add_state` to call `enqueue_sync` | ||
| 1906 | |||
| 1907 | **Success Criteria**: | ||
| 1908 | - [ ] Old sync code removed | ||
| 1909 | - [ ] New sync loop starts on application boot | ||
| 1910 | - [ ] All existing tests still pass | ||
| 1911 | - [ ] All new tests pass | ||
| 1912 | |||
| 1913 | --- | ||
| 1914 | |||
| 1915 | ## Test Summary | ||
| 1916 | |||
| 1917 | | Phase | Unit Tests | Integration Tests | Total | | ||
| 1918 | |-------|------------|-------------------|-------| | ||
| 1919 | | 1. SyncQueueEntry | 2 | - | 2 | | ||
| 1920 | | 2. DomainThrottle | 4 | - | 4 | | ||
| 1921 | | 3. ThrottleManager (rate limiting) | 1 | - | 1 | | ||
| 1922 | | 4. SyncContext | 0 | - | 0 | | ||
| 1923 | | 5. Core Functions | 3 | - | 3 | | ||
| 1924 | | 6. sync_identifier + queue triggers | 2 | - | 2 | | ||
| 1925 | | 7. Queue Integration | 1 | - | 1 | | ||
| 1926 | | 8. Sync Loop | 0 | - | 0 | | ||
| 1927 | | 9. Unified Function | 3 | - | 3 | | ||
| 1928 | | 10. Push Handler Update | 0 | - | 0 | | ||
| 1929 | | 11. RealSyncContext | 0 | - | 0 | | ||
| 1930 | | 12. Integration | - | 5 | 5 | | ||
| 1931 | | 13. Cleanup | 0 | - | 0 | | ||
| 1932 | | **Total** | **16** | **5** | **21** | | ||
| 1933 | |||
| 1934 | ## Configuration | ||
| 1935 | |||
| 1936 | | Option | CLI Flag | Environment Variable | Default | | ||
| 1937 | |--------|----------|---------------------|---------| | ||
| 1938 | | Domain concurrent limit | `--sync-domain-concurrent` | `NGIT_SYNC_DOMAIN_CONCURRENT` | `5` | | ||
| 1939 | | Domain rate limit | `--sync-domain-rate-limit` | `NGIT_SYNC_DOMAIN_RATE_LIMIT` | `30` | | ||
| 1940 | | Default sync delay | `--sync-default-delay-secs` | `NGIT_SYNC_DEFAULT_DELAY_SECS` | `180` | | ||
| 1941 | | Immediate sync delay | `--sync-immediate-delay-ms` | `NGIT_SYNC_IMMEDIATE_DELAY_MS` | `500` | | ||
| 1942 | |||
| 1943 | **Note**: Sync loop interval is hardcoded to 1 second (not configurable). | ||
| 1944 | |||
| 1945 | ## Observability | ||
| 1946 | |||
| 1947 | ### Metrics | ||
| 1948 | |||
| 1949 | - `purgatory_sync_queue_size` - Identifiers pending sync | ||
| 1950 | - `purgatory_sync_attempts_total` - Sync attempts per identifier | ||
| 1951 | - `purgatory_sync_oids_fetched_total` - OIDs successfully fetched | ||
| 1952 | - `purgatory_domain_in_flight` - In-flight requests per domain | ||
| 1953 | - `purgatory_domain_requests_total` - Total requests per domain | ||
| 1954 | |||
| 1955 | ### Logging | ||
| 1956 | |||
| 1957 | - `INFO`: Successful sync completion, OIDs fetched | ||
| 1958 | - `DEBUG`: URL attempts, throttle decisions, backoff applied | ||
| 1959 | - `WARN`: Fetch failures, processing errors | ||
diff --git a/docs/explanation/unify-git-data-sync.md b/docs/explanation/unify-git-data-sync.md deleted file mode 100644 index fa1f983..0000000 --- a/docs/explanation/unify-git-data-sync.md +++ /dev/null | |||
| @@ -1,481 +0,0 @@ | |||
| 1 | # Unified Git Data Sync | ||
| 2 | |||
| 3 | ## Status | ||
| 4 | |||
| 5 | **Proposed** - January 2026 | ||
| 6 | |||
| 7 | ## Context | ||
| 8 | |||
| 9 | Currently, two separate code paths handle "git data is now available" scenarios: | ||
| 10 | |||
| 11 | 1. **`handle_receive_pack`** (src/git/handlers.rs) - After a successful `git push` | ||
| 12 | 2. **`sync_state_git_data`** (src/purgatory/mod.rs) - After purgatory sync fetches OIDs from remote servers | ||
| 13 | |||
| 14 | Both paths perform essentially the same post-processing: | ||
| 15 | |||
| 16 | | Step | `handle_receive_pack` | `sync_state_git_data` | | ||
| 17 | |------|----------------------|----------------------| | ||
| 18 | | Set HEAD | ✅ `try_set_head_if_available()` | ✅ (via `align_repository_with_state`) | | ||
| 19 | | Save events to DB | ✅ `database.save_event()` | ✅ `database.save_event()` | | ||
| 20 | | Remove from purgatory | ✅ `remove_state_event()` / `remove_pr()` | ✅ `remove_state_event()` | | ||
| 21 | | Notify WebSocket | ✅ `relay.notify_event()` | ✅ `relay.notify_event()` | | ||
| 22 | | Sync state to owner repos | ✅ `sync_to_owner_repos()` | ✅ `sync_to_owner_repos()` | | ||
| 23 | | Sync PR refs to owner repos | ✅ `sync_pr_refs_to_tagged_owner_repos()` | ❌ Not implemented | | ||
| 24 | |||
| 25 | This duplication creates maintenance burden and inconsistent behavior (e.g., PR sync missing from purgatory path). | ||
| 26 | |||
| 27 | ## Decision | ||
| 28 | |||
| 29 | Create a single unified function that handles all post-git-data-available processing: | ||
| 30 | |||
| 31 | ```rust | ||
| 32 | pub async fn process_newly_available_git_data( | ||
| 33 | source_repo_path: &Path, | ||
| 34 | new_oids: &HashSet<String>, | ||
| 35 | database: &SharedDatabase, | ||
| 36 | local_relay: Option<&nostr_relay_builder::LocalRelay>, | ||
| 37 | purgatory: &Purgatory, | ||
| 38 | git_data_path: &Path, | ||
| 39 | ) -> ProcessResult | ||
| 40 | ``` | ||
| 41 | |||
| 42 | ### Key Design Principles | ||
| 43 | |||
| 44 | **1. Always discover events from purgatory** | ||
| 45 | |||
| 46 | Rather than accepting pre-authorized events (which may have changed since authorization), the function always scans purgatory to find satisfiable events. This ensures consistency and handles race conditions where events change between authorization and processing. | ||
| 47 | |||
| 48 | **2. Minimal input, maximal output** | ||
| 49 | |||
| 50 | Callers only need to provide: | ||
| 51 | - `source_repo_path` - Where the git data landed | ||
| 52 | - `new_oids` - Which OIDs are now available (for efficient filtering) | ||
| 53 | |||
| 54 | The function handles everything else: finding events, syncing across repos, aligning refs, setting HEAD, saving to database, notifying subscribers, and cleaning up purgatory. | ||
| 55 | |||
| 56 | **3. Process all event types uniformly** | ||
| 57 | |||
| 58 | Both state events (kind 30618) and PR events (kind 1617/1618) are processed in the same flow, ensuring consistent behavior. | ||
| 59 | |||
| 60 | ## Architecture | ||
| 61 | |||
| 62 | ### Flow Overview | ||
| 63 | |||
| 64 | ``` | ||
| 65 | ┌─────────────────────────────────────────────────────────────────────────────────┐ | ||
| 66 | │ Git Data Becomes Available │ | ||
| 67 | │ │ | ||
| 68 | │ ┌─────────────────────┐ ┌─────────────────────┐ │ | ||
| 69 | │ │ handle_receive_pack │ │ purgatory sync │ │ | ||
| 70 | │ │ (push received) │ │ (fetch completed) │ │ | ||
| 71 | │ └──────────┬──────────┘ └──────────┬──────────┘ │ | ||
| 72 | │ │ │ │ | ||
| 73 | │ │ source_repo_path │ source_repo_path │ | ||
| 74 | │ │ new_oids │ new_oids │ | ||
| 75 | │ │ │ │ | ||
| 76 | │ └────────────────┬───────────────────┘ │ | ||
| 77 | │ │ │ | ||
| 78 | │ ▼ │ | ||
| 79 | │ ┌────────────────────────────────────────┐ │ | ||
| 80 | │ │ process_newly_available_git_data() │ │ | ||
| 81 | │ │ │ │ | ||
| 82 | │ │ 1. Extract identifier from path │ │ | ||
| 83 | │ │ 2. Fetch repository data from DB │ │ | ||
| 84 | │ │ 3. Find satisfiable state events │ │ | ||
| 85 | │ │ 4. Find satisfiable PR events │ │ | ||
| 86 | │ │ 5. For each event: │ │ | ||
| 87 | │ │ - Sync OIDs to owner repos │ │ | ||
| 88 | │ │ - Align refs (+ set HEAD) │ │ | ||
| 89 | │ │ - Save to database │ │ | ||
| 90 | │ │ - Notify WebSocket │ │ | ||
| 91 | │ │ - Remove from purgatory │ │ | ||
| 92 | │ └────────────────────────────────────────┘ │ | ||
| 93 | └─────────────────────────────────────────────────────────────────────────────────┘ | ||
| 94 | ``` | ||
| 95 | |||
| 96 | ### Event Discovery | ||
| 97 | |||
| 98 | The function discovers satisfiable events by scanning purgatory: | ||
| 99 | |||
| 100 | **For State Events:** | ||
| 101 | 1. Get all state entries for the identifier from purgatory | ||
| 102 | 2. For each entry, check if ALL required OIDs exist in source repo | ||
| 103 | 3. Quick optimization: skip if none of `new_oids` are in the state's OID set | ||
| 104 | |||
| 105 | **For PR Events:** | ||
| 106 | 1. Get all PR entries for the identifier from purgatory (via secondary index) | ||
| 107 | 2. For each entry with an event, check if the commit OID exists in source repo | ||
| 108 | 3. Quick optimization: skip if commit not in `new_oids` | ||
| 109 | |||
| 110 | ### Sync to Owner Repos | ||
| 111 | |||
| 112 | **For State Events:** | ||
| 113 | |||
| 114 | For each owner whose maintainer set authorizes the state author: | ||
| 115 | 1. Skip if a newer state already exists for that owner | ||
| 116 | 2. Copy missing OIDs from source repo to target repo | ||
| 117 | 3. Align refs (create/update/delete branches and tags) | ||
| 118 | 4. Set HEAD per state announcement | ||
| 119 | |||
| 120 | **For PR Events:** | ||
| 121 | |||
| 122 | For each owner whose maintainer set includes any tagged owner (from `a` tags): | ||
| 123 | 1. Copy commit from source repo to target repo (if missing) | ||
| 124 | 2. Create `refs/nostr/<event-id>` pointing to the commit | ||
| 125 | |||
| 126 | ## Data Structure Changes | ||
| 127 | |||
| 128 | ### PrPurgatoryEntry | ||
| 129 | |||
| 130 | Add `identifier` field for secondary index lookup: | ||
| 131 | |||
| 132 | ```rust | ||
| 133 | #[derive(Debug, Clone)] | ||
| 134 | pub struct PrPurgatoryEntry { | ||
| 135 | /// The nostr PR event, if received (None = git data arrived first) | ||
| 136 | pub event: Option<Event>, | ||
| 137 | |||
| 138 | /// The expected commit SHA from 'c' tag or actual commit pushed | ||
| 139 | pub commit: String, | ||
| 140 | |||
| 141 | /// Repository identifier extracted from 'a' tag (30617:<owner>:<identifier>) | ||
| 142 | /// Used for lookup when git data arrives | ||
| 143 | pub identifier: Option<String>, | ||
| 144 | |||
| 145 | /// When this entry was added to purgatory | ||
| 146 | pub created_at: Instant, | ||
| 147 | |||
| 148 | /// Expiry deadline | ||
| 149 | pub expires_at: Instant, | ||
| 150 | } | ||
| 151 | ``` | ||
| 152 | |||
| 153 | ### Purgatory Secondary Index | ||
| 154 | |||
| 155 | Add index for finding PR events by identifier: | ||
| 156 | |||
| 157 | ```rust | ||
| 158 | pub struct Purgatory { | ||
| 159 | /// State events indexed by repository identifier | ||
| 160 | state_events: Arc<DashMap<String, Vec<StatePurgatoryEntry>>>, | ||
| 161 | |||
| 162 | /// PR events indexed by event ID (hex string) | ||
| 163 | pr_events: Arc<DashMap<String, PrPurgatoryEntry>>, | ||
| 164 | |||
| 165 | /// Secondary index: identifier -> event_ids for PR events | ||
| 166 | pr_events_by_identifier: Arc<DashMap<String, HashSet<String>>>, | ||
| 167 | |||
| 168 | git_data_path: PathBuf, | ||
| 169 | } | ||
| 170 | ``` | ||
| 171 | |||
| 172 | ### New Purgatory Methods | ||
| 173 | |||
| 174 | ```rust | ||
| 175 | impl Purgatory { | ||
| 176 | /// Find all PR events for an identifier | ||
| 177 | pub fn find_prs_for_identifier(&self, identifier: &str) -> Vec<PrPurgatoryEntry>; | ||
| 178 | |||
| 179 | /// Add PR with automatic identifier extraction and indexing | ||
| 180 | pub fn add_pr(&self, event: Event, event_id: String, commit: String); | ||
| 181 | |||
| 182 | /// Add placeholder with optional identifier | ||
| 183 | pub fn add_pr_placeholder(&self, event_id: String, commit: String, identifier: Option<String>); | ||
| 184 | |||
| 185 | /// Remove PR (also cleans up secondary index) | ||
| 186 | pub fn remove_pr(&self, event_id: &str); | ||
| 187 | } | ||
| 188 | ``` | ||
| 189 | |||
| 190 | ## Implementation | ||
| 191 | |||
| 192 | ### Core Function | ||
| 193 | |||
| 194 | ```rust | ||
| 195 | /// Unified processing of newly available git data. | ||
| 196 | /// | ||
| 197 | /// Called whenever git data becomes available, whether from: | ||
| 198 | /// - A successful `git push` (handle_receive_pack) | ||
| 199 | /// - Purgatory sync fetching OIDs from remote servers | ||
| 200 | /// | ||
| 201 | /// # What it does | ||
| 202 | /// | ||
| 203 | /// 1. **Discover satisfiable events**: Scans purgatory for state and PR events | ||
| 204 | /// whose required OIDs are now available in `source_repo_path` | ||
| 205 | /// | ||
| 206 | /// 2. **For each satisfiable STATE event**: | ||
| 207 | /// - Find all owner repos that authorize this state's author | ||
| 208 | /// - Copy OIDs from source repo to each authorized owner repo | ||
| 209 | /// - Align refs (create/update/delete) to match state | ||
| 210 | /// - Set HEAD per state announcement | ||
| 211 | /// - Save event to database | ||
| 212 | /// - Notify WebSocket subscribers | ||
| 213 | /// - Remove from purgatory | ||
| 214 | /// | ||
| 215 | /// 3. **For each satisfiable PR event**: | ||
| 216 | /// - Find all owner repos that list tagged owners as maintainers | ||
| 217 | /// - Copy commit from source repo to each relevant owner repo | ||
| 218 | /// - Create refs/nostr/<event-id> in each repo | ||
| 219 | /// - Save event to database | ||
| 220 | /// - Notify WebSocket subscribers | ||
| 221 | /// - Remove from purgatory | ||
| 222 | pub async fn process_newly_available_git_data( | ||
| 223 | source_repo_path: &Path, | ||
| 224 | new_oids: &HashSet<String>, | ||
| 225 | database: &SharedDatabase, | ||
| 226 | local_relay: Option<&nostr_relay_builder::LocalRelay>, | ||
| 227 | purgatory: &Purgatory, | ||
| 228 | git_data_path: &Path, | ||
| 229 | ) -> ProcessResult { | ||
| 230 | let mut result = ProcessResult::default(); | ||
| 231 | |||
| 232 | // Extract identifier from repo path | ||
| 233 | let identifier = match extract_identifier_from_repo_path(source_repo_path, git_data_path) { | ||
| 234 | Some(id) => id, | ||
| 235 | None => return result, | ||
| 236 | }; | ||
| 237 | |||
| 238 | // Fetch repository data once for all operations | ||
| 239 | let db_repo_data = match fetch_repository_data(database, &identifier).await { | ||
| 240 | Ok(data) => data, | ||
| 241 | Err(e) => { | ||
| 242 | result.errors.push(format!("Failed to fetch repo data: {}", e)); | ||
| 243 | return result; | ||
| 244 | } | ||
| 245 | }; | ||
| 246 | |||
| 247 | // Process satisfiable state events | ||
| 248 | let state_result = process_satisfiable_state_events( | ||
| 249 | source_repo_path, | ||
| 250 | &identifier, | ||
| 251 | new_oids, | ||
| 252 | &db_repo_data, | ||
| 253 | database, | ||
| 254 | local_relay, | ||
| 255 | purgatory, | ||
| 256 | git_data_path, | ||
| 257 | ).await; | ||
| 258 | |||
| 259 | result.merge_state_result(state_result); | ||
| 260 | |||
| 261 | // Process satisfiable PR events | ||
| 262 | let pr_result = process_satisfiable_pr_events( | ||
| 263 | source_repo_path, | ||
| 264 | &identifier, | ||
| 265 | new_oids, | ||
| 266 | &db_repo_data, | ||
| 267 | database, | ||
| 268 | local_relay, | ||
| 269 | purgatory, | ||
| 270 | git_data_path, | ||
| 271 | ).await; | ||
| 272 | |||
| 273 | result.merge_pr_result(pr_result); | ||
| 274 | |||
| 275 | result | ||
| 276 | } | ||
| 277 | ``` | ||
| 278 | |||
| 279 | ### Result Type | ||
| 280 | |||
| 281 | ```rust | ||
| 282 | /// Result of processing newly available git data | ||
| 283 | #[derive(Debug, Default)] | ||
| 284 | pub struct ProcessResult { | ||
| 285 | /// Number of state events released from purgatory | ||
| 286 | pub states_released: usize, | ||
| 287 | /// Number of PR events released from purgatory | ||
| 288 | pub prs_released: usize, | ||
| 289 | /// Number of owner repositories synced | ||
| 290 | pub repos_synced: usize, | ||
| 291 | /// Number of refs created across all repos | ||
| 292 | pub refs_created: usize, | ||
| 293 | /// Number of refs updated across all repos | ||
| 294 | pub refs_updated: usize, | ||
| 295 | /// Number of refs deleted across all repos | ||
| 296 | pub refs_deleted: usize, | ||
| 297 | /// Errors encountered (non-fatal) | ||
| 298 | pub errors: Vec<String>, | ||
| 299 | } | ||
| 300 | ``` | ||
| 301 | |||
| 302 | ### Helper: Extract Identifier from PR Event | ||
| 303 | |||
| 304 | ```rust | ||
| 305 | /// Extract identifier from PR event's `a` tag. | ||
| 306 | /// Format: 30617:<owner_pubkey>:<identifier> | ||
| 307 | fn extract_identifier_from_pr_event(event: &Event) -> Option<String> { | ||
| 308 | event.tags.iter().find_map(|tag| { | ||
| 309 | let tag_vec = tag.clone().to_vec(); | ||
| 310 | if tag_vec.len() >= 2 && tag_vec[0] == "a" && tag_vec[1].starts_with("30617:") { | ||
| 311 | let parts: Vec<&str> = tag_vec[1].split(':').collect(); | ||
| 312 | if parts.len() >= 3 { | ||
| 313 | Some(parts[2].to_string()) | ||
| 314 | } else { | ||
| 315 | None | ||
| 316 | } | ||
| 317 | } else { | ||
| 318 | None | ||
| 319 | } | ||
| 320 | }) | ||
| 321 | } | ||
| 322 | ``` | ||
| 323 | |||
| 324 | ### Helper: Extract Identifier from Repo Path | ||
| 325 | |||
| 326 | ```rust | ||
| 327 | /// Extract identifier from repository path. | ||
| 328 | /// Path format: {git_data_path}/{npub}/{identifier}.git | ||
| 329 | fn extract_identifier_from_repo_path(repo_path: &Path, git_data_path: &Path) -> Option<String> { | ||
| 330 | let relative = repo_path.strip_prefix(git_data_path).ok()?; | ||
| 331 | let components: Vec<_> = relative.components().collect(); | ||
| 332 | |||
| 333 | if components.len() >= 2 { | ||
| 334 | let identifier_with_git = components[1].as_os_str().to_str()?; | ||
| 335 | Some(identifier_with_git.trim_end_matches(".git").to_string()) | ||
| 336 | } else { | ||
| 337 | None | ||
| 338 | } | ||
| 339 | } | ||
| 340 | ``` | ||
| 341 | |||
| 342 | ## Integration | ||
| 343 | |||
| 344 | ### handle_receive_pack (Simplified) | ||
| 345 | |||
| 346 | ```rust | ||
| 347 | // After git receive-pack succeeds: | ||
| 348 | |||
| 349 | // Collect new OIDs from the push | ||
| 350 | let new_oids: HashSet<String> = pushed_refs | ||
| 351 | .iter() | ||
| 352 | .filter(|(_, new_oid, _)| new_oid != "0000000000000000000000000000000000000000") | ||
| 353 | .map(|(_, new_oid, _)| new_oid.clone()) | ||
| 354 | .collect(); | ||
| 355 | |||
| 356 | // Single unified call handles everything | ||
| 357 | let result = process_newly_available_git_data( | ||
| 358 | &repo_path, | ||
| 359 | &new_oids, | ||
| 360 | &database, | ||
| 361 | Some(&relay), | ||
| 362 | &purgatory, | ||
| 363 | Path::new(git_data_path), | ||
| 364 | ).await; | ||
| 365 | |||
| 366 | info!( | ||
| 367 | "Processed push: {} states, {} PRs released, {} repos synced", | ||
| 368 | result.states_released, | ||
| 369 | result.prs_released, | ||
| 370 | result.repos_synced | ||
| 371 | ); | ||
| 372 | ``` | ||
| 373 | |||
| 374 | ### Purgatory Sync (Simplified) | ||
| 375 | |||
| 376 | ```rust | ||
| 377 | // After fetching OIDs from remote: | ||
| 378 | |||
| 379 | let new_oids: HashSet<String> = fetched_oids.into_iter().collect(); | ||
| 380 | |||
| 381 | let result = process_newly_available_git_data( | ||
| 382 | &source_repo_path, | ||
| 383 | &new_oids, | ||
| 384 | &database, | ||
| 385 | local_relay.as_ref(), | ||
| 386 | &purgatory, | ||
| 387 | &git_data_path, | ||
| 388 | ).await; | ||
| 389 | ``` | ||
| 390 | |||
| 391 | ### Integration with Purgatory Sync Redesign | ||
| 392 | |||
| 393 | The purgatory sync redesign (see `purgatory-sync-redesign.md`) uses this unified function in its `sync_identifier_from_url` implementation: | ||
| 394 | |||
| 395 | ```rust | ||
| 396 | pub async fn sync_identifier_from_url<C: SyncContext>( | ||
| 397 | ctx: &C, | ||
| 398 | identifier: &str, | ||
| 399 | url: &str, | ||
| 400 | throttle_manager: &Arc<ThrottleManager>, | ||
| 401 | ) -> usize { | ||
| 402 | // ... fetch OIDs from URL ... | ||
| 403 | |||
| 404 | let fetched_oids = ctx.fetch_oids(&target_repo, url, &needed_oids).await?; | ||
| 405 | |||
| 406 | if !fetched_oids.is_empty() { | ||
| 407 | // Use unified processing | ||
| 408 | let new_oids: HashSet<String> = fetched_oids.into_iter().collect(); | ||
| 409 | |||
| 410 | let result = process_newly_available_git_data( | ||
| 411 | &target_repo, | ||
| 412 | &new_oids, | ||
| 413 | ctx.database(), | ||
| 414 | ctx.local_relay(), | ||
| 415 | ctx.purgatory(), | ||
| 416 | ctx.git_data_path(), | ||
| 417 | ).await; | ||
| 418 | |||
| 419 | // Result already handled purgatory removal, DB saves, etc. | ||
| 420 | } | ||
| 421 | |||
| 422 | fetched_oids.len() | ||
| 423 | } | ||
| 424 | ``` | ||
| 425 | |||
| 426 | The `SyncContext` trait wraps this function in its `process_newly_available_git_data` method for testability. | ||
| 427 | |||
| 428 | ## Benefits | ||
| 429 | |||
| 430 | 1. **Single source of truth** - One function handles all post-git-data processing | ||
| 431 | 2. **Always fresh discovery** - Events discovered from purgatory at processing time | ||
| 432 | 3. **Consistent behavior** - Push and sync paths behave identically | ||
| 433 | 4. **Simpler callers** - Just pass repo_path + new_oids | ||
| 434 | 5. **Complete processing** - Handles all event types, all repo syncing, HEAD, DB, WebSocket, purgatory | ||
| 435 | 6. **PR sync parity** - PR events now synced in purgatory path (was missing) | ||
| 436 | |||
| 437 | ## Code to Remove/Simplify | ||
| 438 | |||
| 439 | After implementing the unified function: | ||
| 440 | |||
| 441 | 1. **Remove**: Most of `sync_state_git_data` in `src/purgatory/mod.rs` | ||
| 442 | 2. **Simplify**: Event handling in `handle_receive_pack` (replace ~100 lines with single call) | ||
| 443 | 3. **Internalize**: `sync_to_owner_repos` and `sync_pr_refs_to_tagged_owner_repos` become internal helpers | ||
| 444 | |||
| 445 | ## Testing Strategy | ||
| 446 | |||
| 447 | ### Unit Tests | ||
| 448 | |||
| 449 | 1. `extract_identifier_from_repo_path` - Various path formats | ||
| 450 | 2. `extract_identifier_from_pr_event` - Various tag formats | ||
| 451 | 3. Event discovery logic with mock purgatory | ||
| 452 | |||
| 453 | ### Integration Tests | ||
| 454 | |||
| 455 | 1. Push triggers processing and releases state event | ||
| 456 | 2. Push triggers processing and releases PR event | ||
| 457 | 3. Purgatory sync triggers processing | ||
| 458 | 4. Multiple events for same identifier processed correctly | ||
| 459 | 5. Cross-repo sync works for both state and PR events | ||
| 460 | |||
| 461 | ## Future Considerations | ||
| 462 | |||
| 463 | ### Batch Processing | ||
| 464 | |||
| 465 | Currently processes events one at a time. Could batch database saves and WebSocket notifications for efficiency with many events. | ||
| 466 | |||
| 467 | ### Partial Failures | ||
| 468 | |||
| 469 | Currently continues on errors and collects them in result. Could add retry logic or transaction semantics if needed. | ||
| 470 | |||
| 471 | ### Metrics | ||
| 472 | |||
| 473 | Add Prometheus metrics for: | ||
| 474 | - Events processed by type (state/PR) | ||
| 475 | - Repos synced per processing call | ||
| 476 | - Processing duration | ||
| 477 | - Errors by type | ||
| 478 | |||
| 479 | ## Related Documents | ||
| 480 | |||
| 481 | - [Purgatory Sync Redesign](purgatory-sync-redesign.md) - Uses this unified function for purgatory sync operations | ||