diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-07 11:18:19 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-01-07 11:18:19 +0000 |
| commit | 852eddcc33b59ed027e06d30456f6b9e3b9a31cb (patch) | |
| tree | 4b5a44fcb44af187a1fb6130c56b650b846562f9 | |
| parent | c43e72898b7a026a2b34b9873561bfd9200ad98b (diff) | |
docs: purgatory tweak implementation plan
| -rw-r--r-- | docs/explanation/purgatory-sync-redesign.md | 68 |
1 files changed, 54 insertions, 14 deletions
diff --git a/docs/explanation/purgatory-sync-redesign.md b/docs/explanation/purgatory-sync-redesign.md index dd59870..629c2ff 100644 --- a/docs/explanation/purgatory-sync-redesign.md +++ b/docs/explanation/purgatory-sync-redesign.md | |||
| @@ -745,6 +745,17 @@ This separation enables: | |||
| 745 | - DomainThrottle to process queued identifiers when capacity frees | 745 | - DomainThrottle to process queued identifiers when capacity frees |
| 746 | - Clean testability with mocked SyncContext | 746 | - Clean testability with mocked SyncContext |
| 747 | 747 | ||
| 748 | ### Helper: extract_domain | ||
| 749 | |||
| 750 | ```rust | ||
| 751 | /// Extract domain from a URL (e.g., "https://github.com/foo/bar.git" → "github.com") | ||
| 752 | fn extract_domain(url: &str) -> Option<String> { | ||
| 753 | url::Url::parse(url) | ||
| 754 | .ok() | ||
| 755 | .and_then(|u| u.host_str().map(|s| s.to_string())) | ||
| 756 | } | ||
| 757 | ``` | ||
| 758 | |||
| 748 | ### sync_identifier_next_url | 759 | ### sync_identifier_next_url |
| 749 | 760 | ||
| 750 | ```rust | 761 | ```rust |
| @@ -1286,8 +1297,11 @@ Each phase has clear deliverables, unit tests, and success criteria. Unit tests | |||
| 1286 | **Goal**: Implement the sync queue entry struct with backoff calculation. | 1297 | **Goal**: Implement the sync queue entry struct with backoff calculation. |
| 1287 | 1298 | ||
| 1288 | **Files**: | 1299 | **Files**: |
| 1300 | - `src/purgatory/sync/mod.rs` (new - module declaration) | ||
| 1289 | - `src/purgatory/sync/queue.rs` (new) | 1301 | - `src/purgatory/sync/queue.rs` (new) |
| 1290 | 1302 | ||
| 1303 | **Note**: This creates a new `sync` submodule under `src/purgatory/`. The current purgatory structure is flat (`mod.rs`, `helpers.rs`, `types.rs`). Add `pub mod sync;` to `src/purgatory/mod.rs`. | ||
| 1304 | |||
| 1291 | **Deliverables**: | 1305 | **Deliverables**: |
| 1292 | ```rust | 1306 | ```rust |
| 1293 | pub struct SyncQueueEntry { | 1307 | pub struct SyncQueueEntry { |
| @@ -1397,9 +1411,9 @@ mod tests { | |||
| 1397 | 1411 | ||
| 1398 | --- | 1412 | --- |
| 1399 | 1413 | ||
| 1400 | ### Phase 3: ThrottleManager | 1414 | ### Phase 3: ThrottleManager (Rate Limiting Only) |
| 1401 | 1415 | ||
| 1402 | **Goal**: Implement the manager that owns all domain throttles and provides the sync interface. | 1416 | **Goal**: Implement the manager that owns all domain throttles and provides rate limiting. Queue processing methods (`set_context`, `process_queued_identifier`) are added in Phase 6 after `SyncContext` exists. |
| 1403 | 1417 | ||
| 1404 | **Files**: | 1418 | **Files**: |
| 1405 | - `src/purgatory/sync/throttle.rs` (extend) | 1419 | - `src/purgatory/sync/throttle.rs` (extend) |
| @@ -1410,14 +1424,15 @@ pub struct ThrottleManager { | |||
| 1410 | throttles: DashMap<String, DomainThrottle>, | 1424 | throttles: DashMap<String, DomainThrottle>, |
| 1411 | max_concurrent_per_domain: u32, | 1425 | max_concurrent_per_domain: u32, |
| 1412 | max_per_minute_per_domain: u32, | 1426 | max_per_minute_per_domain: u32, |
| 1427 | // Note: ctx: OnceLock<Arc<dyn SyncContext>> added in Phase 6 | ||
| 1413 | } | 1428 | } |
| 1414 | 1429 | ||
| 1415 | impl ThrottleManager { | 1430 | impl ThrottleManager { |
| 1416 | pub fn new(max_concurrent: u32, max_per_minute: u32) -> Self; | 1431 | pub fn new(max_concurrent: u32, max_per_minute: u32) -> Self; |
| 1417 | pub fn is_throttled(&self, domain: &str) -> bool; | 1432 | pub fn is_throttled(&self, domain: &str) -> bool; |
| 1418 | pub fn start_request(&self, domain: &str); | 1433 | pub fn start_request(&self, domain: &str); |
| 1419 | pub fn complete_request(&self, domain: &str); | 1434 | pub fn complete_request(&self, domain: &str); // Trigger logic added in Phase 6 |
| 1420 | pub fn enqueue_identifier(&self, domain: &str, identifier: String, tried_urls: HashSet<String>); | 1435 | pub fn enqueue_identifier(&self, domain: &str, identifier: String, tried_urls: HashSet<String>); // Trigger logic added in Phase 6 |
| 1421 | } | 1436 | } |
| 1422 | ``` | 1437 | ``` |
| 1423 | 1438 | ||
| @@ -1438,6 +1453,8 @@ mod tests { | |||
| 1438 | - [ ] `enqueue_identifier()` creates domain throttle if needed | 1453 | - [ ] `enqueue_identifier()` creates domain throttle if needed |
| 1439 | - [ ] Unit test passes | 1454 | - [ ] Unit test passes |
| 1440 | 1455 | ||
| 1456 | **Note**: The `complete_request()` and `enqueue_identifier()` methods in this phase only update state. The trigger-based processing (spawning tasks when capacity frees) is added in Phase 6 after `SyncContext` is available. | ||
| 1457 | |||
| 1441 | --- | 1458 | --- |
| 1442 | 1459 | ||
| 1443 | ### Phase 4: SyncContext Trait and MockSyncContext | 1460 | ### Phase 4: SyncContext Trait and MockSyncContext |
| @@ -1533,20 +1550,36 @@ mod tests { | |||
| 1533 | 1550 | ||
| 1534 | --- | 1551 | --- |
| 1535 | 1552 | ||
| 1536 | ### Phase 6: sync_identifier Orchestration | 1553 | ### Phase 6: sync_identifier Orchestration + ThrottleManager Queue Processing |
| 1537 | 1554 | ||
| 1538 | **Goal**: Implement the main sync loop for a single identifier. | 1555 | **Goal**: Implement the main sync loop for a single identifier AND add trigger-based queue processing to ThrottleManager. |
| 1539 | 1556 | ||
| 1540 | **Files**: | 1557 | **Files**: |
| 1541 | - `src/purgatory/sync/functions.rs` (extend) | 1558 | - `src/purgatory/sync/functions.rs` (extend) |
| 1559 | - `src/purgatory/sync/throttle.rs` (extend - add queue processing triggers) | ||
| 1542 | 1560 | ||
| 1543 | **Deliverables**: | 1561 | **Deliverables**: |
| 1544 | ```rust | 1562 | ```rust |
| 1563 | // In functions.rs | ||
| 1545 | pub async fn sync_identifier<C: SyncContext>( | 1564 | pub async fn sync_identifier<C: SyncContext>( |
| 1546 | ctx: &C, | 1565 | ctx: &C, |
| 1547 | identifier: &str, | 1566 | identifier: &str, |
| 1548 | throttle_manager: &Arc<ThrottleManager>, | 1567 | throttle_manager: &Arc<ThrottleManager>, |
| 1549 | ) -> bool; // true if complete, false if pending | 1568 | ) -> bool; // true if complete, false if pending |
| 1569 | |||
| 1570 | // In throttle.rs - add to ThrottleManager | ||
| 1571 | impl ThrottleManager { | ||
| 1572 | /// Set the sync context (called once at startup) | ||
| 1573 | pub fn set_context(&self, ctx: Arc<dyn SyncContext>); | ||
| 1574 | |||
| 1575 | /// Try to process the next queued identifier for a domain (internal) | ||
| 1576 | fn try_process_next(self: &Arc<Self>, domain: &str); | ||
| 1577 | |||
| 1578 | /// Process a single identifier from a domain's queue (internal) | ||
| 1579 | async fn process_queued_identifier(self: &Arc<Self>, domain: &str, identifier: &str); | ||
| 1580 | } | ||
| 1581 | |||
| 1582 | // Update complete_request and enqueue_identifier to trigger processing | ||
| 1550 | ``` | 1583 | ``` |
| 1551 | 1584 | ||
| 1552 | **Unit Tests** (2 tests): | 1585 | **Unit Tests** (2 tests): |
| @@ -1569,6 +1602,8 @@ mod tests { | |||
| 1569 | - [ ] Loops through available URLs until sync complete or all tried | 1602 | - [ ] Loops through available URLs until sync complete or all tried |
| 1570 | - [ ] Enqueues with throttled domains when OIDs still needed | 1603 | - [ ] Enqueues with throttled domains when OIDs still needed |
| 1571 | - [ ] Returns `true` when all OIDs fetched, `false` otherwise | 1604 | - [ ] Returns `true` when all OIDs fetched, `false` otherwise |
| 1605 | - [ ] `complete_request()` triggers `try_process_next()` when capacity available | ||
| 1606 | - [ ] `enqueue_identifier()` triggers `try_process_next()` when capacity available | ||
| 1572 | - [ ] Both unit tests pass | 1607 | - [ ] Both unit tests pass |
| 1573 | 1608 | ||
| 1574 | --- | 1609 | --- |
| @@ -1627,11 +1662,13 @@ impl Purgatory { | |||
| 1627 | } | 1662 | } |
| 1628 | ``` | 1663 | ``` |
| 1629 | 1664 | ||
| 1665 | **Note**: The sync loop interval is hardcoded to 1 second. No configuration option needed. | ||
| 1666 | |||
| 1630 | **Unit Tests** (0 tests): | 1667 | **Unit Tests** (0 tests): |
| 1631 | - The sync loop is tested via integration tests; unit testing async loops is fragile | 1668 | - The sync loop is tested via integration tests; unit testing async loops is fragile |
| 1632 | 1669 | ||
| 1633 | **Success Criteria**: | 1670 | **Success Criteria**: |
| 1634 | - [ ] Loop runs every 1 second | 1671 | - [ ] Loop runs every 1 second (hardcoded) |
| 1635 | - [ ] Finds ready identifiers and spawns sync tasks | 1672 | - [ ] Finds ready identifiers and spawns sync tasks |
| 1636 | - [ ] Applies backoff on incomplete syncs | 1673 | - [ ] Applies backoff on incomplete syncs |
| 1637 | - [ ] Removes completed identifiers from queue | 1674 | - [ ] Removes completed identifiers from queue |
| @@ -1647,8 +1684,9 @@ This is the core function described in [Unified Git Data Sync](unify-git-data-sy | |||
| 1647 | - `RealSyncContext::process_newly_available_git_data` after purgatory sync fetches OIDs | 1684 | - `RealSyncContext::process_newly_available_git_data` after purgatory sync fetches OIDs |
| 1648 | 1685 | ||
| 1649 | **Files**: | 1686 | **Files**: |
| 1650 | - `src/git/sync.rs` (new) | 1687 | - `src/git/sync.rs` (extend - add `ProcessResult` alongside existing types) |
| 1651 | - `src/purgatory/mod.rs` (extend - add secondary index for PR events by identifier) | 1688 | |
| 1689 | **Note**: `src/git/sync.rs` already exists with `sync_to_owner_repos`, `align_repository_with_state`, etc. This phase extends it with the unified processing function. | ||
| 1652 | 1690 | ||
| 1653 | **Deliverables**: | 1691 | **Deliverables**: |
| 1654 | ```rust | 1692 | ```rust |
| @@ -1684,7 +1722,8 @@ fn extract_identifier_from_pr_event(event: &Event) -> Option<String>; | |||
| 1684 | 1722 | ||
| 1685 | // Purgatory additions | 1723 | // Purgatory additions |
| 1686 | impl Purgatory { | 1724 | impl Purgatory { |
| 1687 | /// Find all PR events for an identifier (uses secondary index) | 1725 | /// Find all PR events for an identifier. |
| 1726 | /// Filters pr_events entries where the identifier matches (no secondary index needed). | ||
| 1688 | pub fn find_prs_for_identifier(&self, identifier: &str) -> Vec<PrPurgatoryEntry>; | 1727 | pub fn find_prs_for_identifier(&self, identifier: &str) -> Vec<PrPurgatoryEntry>; |
| 1689 | } | 1728 | } |
| 1690 | ``` | 1729 | ``` |
| @@ -1714,7 +1753,7 @@ mod tests { | |||
| 1714 | - [ ] `process_newly_available_git_data` discovers satisfiable events from purgatory | 1753 | - [ ] `process_newly_available_git_data` discovers satisfiable events from purgatory |
| 1715 | - [ ] State events: syncs OIDs to owner repos, aligns refs, sets HEAD, saves to DB, notifies WS, removes from purgatory | 1754 | - [ ] State events: syncs OIDs to owner repos, aligns refs, sets HEAD, saves to DB, notifies WS, removes from purgatory |
| 1716 | - [ ] PR events: syncs commit to owner repos, creates refs/nostr/<event-id>, saves to DB, notifies WS, removes from purgatory | 1755 | - [ ] PR events: syncs commit to owner repos, creates refs/nostr/<event-id>, saves to DB, notifies WS, removes from purgatory |
| 1717 | - [ ] Secondary index `pr_events_by_identifier` maintained correctly | 1756 | - [ ] `find_prs_for_identifier` filters pr_events by identifier correctly |
| 1718 | - [ ] All 3 unit tests pass | 1757 | - [ ] All 3 unit tests pass |
| 1719 | 1758 | ||
| 1720 | --- | 1759 | --- |
| @@ -1879,10 +1918,10 @@ async fn push_triggers_unified_processing() { | |||
| 1879 | |-------|------------|-------------------|-------| | 1918 | |-------|------------|-------------------|-------| |
| 1880 | | 1. SyncQueueEntry | 2 | - | 2 | | 1919 | | 1. SyncQueueEntry | 2 | - | 2 | |
| 1881 | | 2. DomainThrottle | 4 | - | 4 | | 1920 | | 2. DomainThrottle | 4 | - | 4 | |
| 1882 | | 3. ThrottleManager | 1 | - | 1 | | 1921 | | 3. ThrottleManager (rate limiting) | 1 | - | 1 | |
| 1883 | | 4. SyncContext | 0 | - | 0 | | 1922 | | 4. SyncContext | 0 | - | 0 | |
| 1884 | | 5. Core Functions | 3 | - | 3 | | 1923 | | 5. Core Functions | 3 | - | 3 | |
| 1885 | | 6. sync_identifier | 2 | - | 2 | | 1924 | | 6. sync_identifier + queue triggers | 2 | - | 2 | |
| 1886 | | 7. Queue Integration | 1 | - | 1 | | 1925 | | 7. Queue Integration | 1 | - | 1 | |
| 1887 | | 8. Sync Loop | 0 | - | 0 | | 1926 | | 8. Sync Loop | 0 | - | 0 | |
| 1888 | | 9. Unified Function | 3 | - | 3 | | 1927 | | 9. Unified Function | 3 | - | 3 | |
| @@ -1896,12 +1935,13 @@ async fn push_triggers_unified_processing() { | |||
| 1896 | 1935 | ||
| 1897 | | Option | CLI Flag | Environment Variable | Default | | 1936 | | Option | CLI Flag | Environment Variable | Default | |
| 1898 | |--------|----------|---------------------|---------| | 1937 | |--------|----------|---------------------|---------| |
| 1899 | | Sync loop interval | `--sync-loop-interval-ms` | `NGIT_SYNC_LOOP_INTERVAL_MS` | `1000` | | ||
| 1900 | | Domain concurrent limit | `--sync-domain-concurrent` | `NGIT_SYNC_DOMAIN_CONCURRENT` | `5` | | 1938 | | Domain concurrent limit | `--sync-domain-concurrent` | `NGIT_SYNC_DOMAIN_CONCURRENT` | `5` | |
| 1901 | | Domain rate limit | `--sync-domain-rate-limit` | `NGIT_SYNC_DOMAIN_RATE_LIMIT` | `30` | | 1939 | | Domain rate limit | `--sync-domain-rate-limit` | `NGIT_SYNC_DOMAIN_RATE_LIMIT` | `30` | |
| 1902 | | Default sync delay | `--sync-default-delay-secs` | `NGIT_SYNC_DEFAULT_DELAY_SECS` | `180` | | 1940 | | Default sync delay | `--sync-default-delay-secs` | `NGIT_SYNC_DEFAULT_DELAY_SECS` | `180` | |
| 1903 | | Immediate sync delay | `--sync-immediate-delay-ms` | `NGIT_SYNC_IMMEDIATE_DELAY_MS` | `500` | | 1941 | | Immediate sync delay | `--sync-immediate-delay-ms` | `NGIT_SYNC_IMMEDIATE_DELAY_MS` | `500` | |
| 1904 | 1942 | ||
| 1943 | **Note**: Sync loop interval is hardcoded to 1 second (not configurable). | ||
| 1944 | |||
| 1905 | ## Observability | 1945 | ## Observability |
| 1906 | 1946 | ||
| 1907 | ### Metrics | 1947 | ### Metrics |