diff options
| -rw-r--r-- | docs/explanation/grasp-02-proactive-sync.md | 102 |
1 files changed, 60 insertions, 42 deletions
diff --git a/docs/explanation/grasp-02-proactive-sync.md b/docs/explanation/grasp-02-proactive-sync.md index f11148e..2a86126 100644 --- a/docs/explanation/grasp-02-proactive-sync.md +++ b/docs/explanation/grasp-02-proactive-sync.md | |||
| @@ -90,21 +90,35 @@ impl RelayState { | |||
| 90 | ### PendingSyncIndex (In-Flight Batches) | 90 | ### PendingSyncIndex (In-Flight Batches) |
| 91 | 91 | ||
| 92 | ```rust | 92 | ```rust |
| 93 | |||
| 94 | /// Method used for synchronization | ||
| 95 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||
| 96 | pub enum SyncMethod { | ||
| 97 | /// Traditional REQ+EOSE flow - waits for EOSE on subscriptions | ||
| 98 | ReqEose, | ||
| 99 | /// NIP-77 negentropy sync - confirms immediately after sync completes | ||
| 100 | Negentropy, | ||
| 101 | } | ||
| 102 | |||
| 103 | |||
| 93 | /// Tracks batches of subscriptions that are in-flight, awaiting EOSE. | 104 | /// Tracks batches of subscriptions that are in-flight, awaiting EOSE. |
| 94 | /// Each batch has its own ID and can confirm independently. | 105 | /// Each batch has its own ID and can confirm independently. |
| 95 | /// Key: relay URL | 106 | /// Key: relay URL |
| 96 | pub type PendingSyncIndex = Arc<RwLock<HashMap<String, Vec<PendingBatch>>>>; | 107 | pub type PendingSyncIndex = Arc<RwLock<HashMap<String, Vec<PendingBatch>>>>; |
| 97 | 108 | ||
| 98 | #[derive(Debug, Clone)] | ||
| 99 | pub struct PendingBatch { | 109 | pub struct PendingBatch { |
| 100 | /// Unique ID for this batch - for debugging/logging | 110 | /// Unique ID for this batch - for debugging/logging |
| 101 | pub batch_id: u64, | 111 | pub batch_id: u64, |
| 102 | /// The items this batch is syncing | 112 | /// The items this batch is syncing |
| 103 | pub items: PendingItems, | 113 | pub items: PendingItems, |
| 104 | /// Subscription IDs that must ALL receive EOSE before confirming | 114 | /// Subscription IDs that must ALL receive EOSE before confirming (for ReqEose) |
| 115 | /// Empty for Negentropy sync method | ||
| 105 | pub outstanding_subs: HashSet<SubscriptionId>, | 116 | pub outstanding_subs: HashSet<SubscriptionId>, |
| 117 | /// The sync method used for this batch | ||
| 118 | pub sync_method: SyncMethod, | ||
| 106 | } | 119 | } |
| 107 | 120 | ||
| 121 | |||
| 108 | #[derive(Debug, Clone, Default)] | 122 | #[derive(Debug, Clone, Default)] |
| 109 | pub struct PendingItems { | 123 | pub struct PendingItems { |
| 110 | pub repos: HashSet<String>, | 124 | pub repos: HashSet<String>, |
| @@ -290,7 +304,7 @@ Transforms the repo-centric `RepoSyncIndex` into a relay-centric view. For each | |||
| 290 | 304 | ||
| 291 | ```rust | 305 | ```rust |
| 292 | // Conceptual: inverts repo → relays to relay → repos | 306 | // Conceptual: inverts repo → relays to relay → repos |
| 293 | fn derive_relay_targets(repo_index: &HashMap<String, RepoSyncNeeds>) | 307 | fn derive_relay_targets(repo_index: &HashMap<String, RepoSyncNeeds>) |
| 294 | -> HashMap<String, RelaySyncNeeds> | 308 | -> HashMap<String, RelaySyncNeeds> |
| 295 | ``` | 309 | ``` |
| 296 | 310 | ||
| @@ -336,6 +350,7 @@ The filter strategy uses three layers to ensure comprehensive event coverage: | |||
| 336 | ### Combined Layer 2+3 | 350 | ### Combined Layer 2+3 |
| 337 | 351 | ||
| 338 | The [`build_layer2_and_layer3_filters()`](../../src/sync/filters.rs:152) function combines both layers. Used by: | 352 | The [`build_layer2_and_layer3_filters()`](../../src/sync/filters.rs:152) function combines both layers. Used by: |
| 353 | |||
| 339 | - `compute_actions` for incremental subscriptions | 354 | - `compute_actions` for incremental subscriptions |
| 340 | - `rebuild_layer2_and_layer3` during reconnection | 355 | - `rebuild_layer2_and_layer3` during reconnection |
| 341 | - Consolidation rebuilds (Layer 1 remains active separately) | 356 | - Consolidation rebuilds (Layer 1 remains active separately) |
| @@ -350,29 +365,29 @@ The [`SyncManager`](../../src/sync/mod.rs:308) orchestrates all sync operations. | |||
| 350 | 365 | ||
| 351 | ### Connection Lifecycle | 366 | ### Connection Lifecycle |
| 352 | 367 | ||
| 353 | | Method | Purpose | | 368 | | Method | Purpose | |
| 354 | |--------|---------| | 369 | | ------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | |
| 355 | | `handle_connect_or_reconnect()` | Unified handler for initial connect and reconnect. Determines fresh vs quick reconnect based on `last_connected` and 15-minute rule | | 370 | | `handle_connect_or_reconnect()` | Unified handler for initial connect and reconnect. Determines fresh vs quick reconnect based on `last_connected` and 15-minute rule | |
| 356 | | `handle_disconnect()` | Updates RelayState to Disconnected, sets disconnected_at, clears pending batches, records failure in health tracker | | 371 | | `handle_disconnect()` | Updates RelayState to Disconnected, sets disconnected_at, clears pending batches, records failure in health tracker | |
| 357 | | `spawn_relay_connection()` | Creates RelayConnection, subscribes to Layer 1, spawns event loop task | | 372 | | `spawn_relay_connection()` | Creates RelayConnection, subscribes to Layer 1, spawns event loop task | |
| 358 | 373 | ||
| 359 | ### Sync Operations | 374 | ### Sync Operations |
| 360 | 375 | ||
| 361 | | Method | Purpose | | 376 | | Method | Purpose | |
| 362 | |--------|---------| | 377 | | ------------------------------- | ------------------------------------------------------------------------------------------------------------------- | |
| 363 | | `handle_add_filters()` | Auto-spawns connection if needed, checks consolidation threshold (>70 filters), subscribes and creates PendingBatch | | 378 | | `handle_add_filters()` | Auto-spawns connection if needed, checks consolidation threshold (>70 filters), subscribes and creates PendingBatch | |
| 364 | | `handle_eose()` | Processes EOSE for subscription, moves items from pending to confirmed when batch completes | | 379 | | `handle_eose()` | Processes EOSE for subscription, moves items from pending to confirmed when batch completes | |
| 365 | | `recompute_actions_for_relay()` | Runs derive_relay_targets → compute_actions for a specific relay to find new items | | 380 | | `recompute_actions_for_relay()` | Runs derive_relay_targets → compute_actions for a specific relay to find new items | |
| 366 | | `rebuild_layer2_and_layer3()` | Rebuilds subscriptions from confirmed state with optional since filter | | 381 | | `rebuild_layer2_and_layer3()` | Rebuilds subscriptions from confirmed state with optional since filter | |
| 367 | 382 | ||
| 368 | ### Maintenance | 383 | ### Maintenance |
| 369 | 384 | ||
| 370 | | Method | Purpose | | 385 | | Method | Purpose | |
| 371 | |--------|---------| | 386 | | --------------------- | -------------------------------------------------------------------------- | |
| 372 | | `daily_sync()` | Full fresh sync - unsubscribes all, clears state, recomputes actions | | 387 | | `daily_sync()` | Full fresh sync - unsubscribes all, clears state, recomputes actions | |
| 373 | | `consolidate()` | Reduces filter count by unsubscribing and rebuilding with combined filters | | 388 | | `consolidate()` | Reduces filter count by unsubscribing and rebuilding with combined filters | |
| 374 | | `check_disconnects()` | Periodic check for empty relays (no repos) to disconnect | | 389 | | `check_disconnects()` | Periodic check for empty relays (no repos) to disconnect | |
| 375 | | `check_reconnects()` | Attempts reconnection for disconnected relays with pending work | | 390 | | `check_reconnects()` | Attempts reconnection for disconnected relays with pending work | |
| 376 | 391 | ||
| 377 | --- | 392 | --- |
| 378 | 393 | ||
| @@ -474,21 +489,21 @@ flowchart TB | |||
| 474 | 489 | ||
| 475 | ## Key Design Decisions | 490 | ## Key Design Decisions |
| 476 | 491 | ||
| 477 | | Decision | Choice | Rationale | | 492 | | Decision | Choice | Rationale | |
| 478 | |----------|--------|-----------| | 493 | | -------------------------- | -------------------------------------- | --------------------------------------------------------------------------- | |
| 479 | | Startup mechanism | Self-subscription only | Single code path, fresh DB behaves same as reconnect | | 494 | | Startup mechanism | Self-subscription only | Single code path, fresh DB behaves same as reconnect | |
| 480 | | Connect/reconnect handling | Unified handle_connect_or_reconnect | Single entry point for both initial and reconnect | | 495 | | Connect/reconnect handling | Unified handle_connect_or_reconnect | Single entry point for both initial and reconnect | |
| 481 | | Layer 1 handling | Separate build_announcement_filter | Connection-level: subscribe ONCE on connect, NOT rebuilt in consolidation | | 496 | | Layer 1 handling | Separate build_announcement_filter | Connection-level: subscribe ONCE on connect, NOT rebuilt in consolidation | |
| 482 | | Layer 2+3 handling | Separate rebuild_layer2_and_layer3 | Item-level: managed by compute_actions, consolidated when filter count > 70 | | 497 | | Layer 2+3 handling | Separate rebuild_layer2_and_layer3 | Item-level: managed by compute_actions, consolidated when filter count > 70 | |
| 483 | | Filter functions | since as Option parameter | Allows same functions for fresh sync and catch-up | | 498 | | Filter functions | since as Option parameter | Allows same functions for fresh sync and catch-up | |
| 484 | | Since filter | Only on catch-up paths | Initial/stale gets full history, quick reconnect catches up | | 499 | | Since filter | Only on catch-up paths | Initial/stale gets full history, quick reconnect catches up | |
| 485 | | compute_actions role | ONLY for new Layer 2+3 items | Does NOT handle Layer 1 or catch-up | | 500 | | compute_actions role | ONLY for new Layer 2+3 items | Does NOT handle Layer 1 or catch-up | |
| 486 | | Catch-up pending tracking | No PendingBatch | Items already confirmed, don't need re-confirmation | | 501 | | Catch-up pending tracking | No PendingBatch | Items already confirmed, don't need re-confirmation | |
| 487 | | Consolidation trigger | On filter add, not periodic | Check in handle_add_filters before adding new filters | | 502 | | Consolidation trigger | On filter add, not periodic | Check in handle_add_filters before adding new filters | |
| 488 | | Clear on disconnect | Clear PSI on disconnect | Cleanup at event boundary, simpler than on reconnect | | 503 | | Clear on disconnect | Clear PSI on disconnect | Cleanup at event boundary, simpler than on reconnect | |
| 489 | | 15-minute rule | Clear confirmed if disconnected >15min | Matches since filter buffer, prevents stale subscriptions | | 504 | | 15-minute rule | Clear confirmed if disconnected >15min | Matches since filter buffer, prevents stale subscriptions | |
| 490 | | Daily timer | Fresh sync (clears state) | Ensures consistency, detects drift | | 505 | | Daily timer | Fresh sync (clears state) | Ensures consistency, detects drift | |
| 491 | | NIP-77 negentropy | Try first, fallback to REQ | Efficient set reconciliation when supported | | 506 | | NIP-77 negentropy | Try first, fallback to REQ | Efficient set reconciliation when supported | |
| 492 | 507 | ||
| 493 | --- | 508 | --- |
| 494 | 509 | ||
| @@ -503,11 +518,13 @@ NIP-77 defines the negentropy protocol for efficient event set comparison. Inste | |||
| 503 | ### When Negentropy is Used | 518 | ### When Negentropy is Used |
| 504 | 519 | ||
| 505 | Negentropy sync is attempted for: | 520 | Negentropy sync is attempted for: |
| 521 | |||
| 506 | - **Initial connect** - Fresh sync without `last_connected` | 522 | - **Initial connect** - Fresh sync without `last_connected` |
| 507 | - **Daily sync** - Periodic full refresh (23-25 hour timer) | 523 | - **Daily sync** - Periodic full refresh (23-25 hour timer) |
| 508 | - **Stale reconnect** - Disconnected for more than 15 minutes | 524 | - **Stale reconnect** - Disconnected for more than 15 minutes |
| 509 | 525 | ||
| 510 | Negentropy is NOT used for: | 526 | Negentropy is NOT used for: |
| 527 | |||
| 511 | - **Quick reconnect** - Less than 15 minutes disconnected (uses REQ with `since`) | 528 | - **Quick reconnect** - Less than 15 minutes disconnected (uses REQ with `since`) |
| 512 | - **Live subscriptions** - Ongoing event streams always use REQ | 529 | - **Live subscriptions** - Ongoing event streams always use REQ |
| 513 | 530 | ||
| @@ -536,17 +553,18 @@ flowchart TB | |||
| 536 | CONNECT[Connect to relay] --> NEG{Try negentropy} | 553 | CONNECT[Connect to relay] --> NEG{Try negentropy} |
| 537 | NEG --> |success| L1[Layer 1 synced via negentropy] | 554 | NEG --> |success| L1[Layer 1 synced via negentropy] |
| 538 | NEG --> |failure| FALLBACK[Fall back to REQ+EOSE] | 555 | NEG --> |failure| FALLBACK[Fall back to REQ+EOSE] |
| 539 | 556 | ||
| 540 | L1 --> SINCE[Record timestamp = now] | 557 | L1 --> SINCE[Record timestamp = now] |
| 541 | FALLBACK --> EOSE[Wait for EOSE] | 558 | FALLBACK --> EOSE[Wait for EOSE] |
| 542 | EOSE --> SINCE | 559 | EOSE --> SINCE |
| 543 | 560 | ||
| 544 | SINCE --> LIVE[Open live REQ with since=now] | 561 | SINCE --> LIVE[Open live REQ with since=now] |
| 545 | ``` | 562 | ``` |
| 546 | 563 | ||
| 547 | ### Fallback Behavior | 564 | ### Fallback Behavior |
| 548 | 565 | ||
| 549 | If negentropy fails (relay doesn't support NIP-77, network error, etc.): | 566 | If negentropy fails (relay doesn't support NIP-77, network error, etc.): |
| 567 | |||
| 550 | 1. A warning is logged (once per relay to avoid spam) | 568 | 1. A warning is logged (once per relay to avoid spam) |
| 551 | 2. The sync falls back to traditional REQ+EOSE | 569 | 2. The sync falls back to traditional REQ+EOSE |
| 552 | 3. No error is raised - fallback is automatic | 570 | 3. No error is raised - fallback is automatic |
| @@ -555,12 +573,12 @@ If negentropy fails (relay doesn't support NIP-77, network error, etc.): | |||
| 555 | 573 | ||
| 556 | ### Key Design Decisions for Negentropy | 574 | ### Key Design Decisions for Negentropy |
| 557 | 575 | ||
| 558 | | Decision | Choice | Rationale | | 576 | | Decision | Choice | Rationale | |
| 559 | |----------|--------|-----------| | 577 | | ------------------ | --------------------------- | ------------------------------------------------- | |
| 560 | | Detection approach | Try and fallback | More reliable than NIP-11 document detection | | 578 | | Detection approach | Try and fallback | More reliable than NIP-11 document detection | |
| 561 | | When to use | Fresh/daily/stale sync only | Quick reconnect with `since` is already efficient | | 579 | | When to use | Fresh/daily/stale sync only | Quick reconnect with `since` is already efficient | |
| 562 | | Error handling | Log once, fallback silently | Avoid log spam while maintaining visibility | | 580 | | Error handling | Log once, fallback silently | Avoid log spam while maintaining visibility | |
| 563 | | Layer application | Layer 1 first | Announcements are highest priority | | 581 | | Layer application | Layer 1 first | Announcements are highest priority | |
| 564 | 582 | ||
| 565 | --- | 583 | --- |
| 566 | 584 | ||