diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/sync/catchup.rs | 311 |
1 files changed, 110 insertions, 201 deletions
diff --git a/tests/sync/catchup.rs b/tests/sync/catchup.rs index 1ddafd1..22f513d 100644 --- a/tests/sync/catchup.rs +++ b/tests/sync/catchup.rs | |||
| @@ -1,201 +1,110 @@ | |||
| 1 | //! Catchup Sync Tests | 1 | //! Catchup Sync - Documentation Only |
| 2 | //! | 2 | //! |
| 3 | //! Tests for the catchup synchronization feature (Test 0). | 3 | //! This file documents the catchup sync mechanism. No integration tests are included |
| 4 | //! | 4 | //! because the functionality cannot be reliably tested with current test infrastructure. |
| 5 | //! # Catchup Sync Overview | 5 | //! |
| 6 | //! | 6 | //! # What is Catchup Sync? |
| 7 | //! Catchup sync refers to the ability of a relay to synchronize historical events | 7 | //! |
| 8 | //! that were published while it was offline or unreachable. This is critical for | 8 | //! Catchup sync ensures that when a relay's WebSocket connection to another relay drops |
| 9 | //! ensuring data consistency across the relay network. | 9 | //! and reconnects, any events the source relay received during the disconnection are |
| 10 | //! | 10 | //! fetched using a `since` filter based on the last connection timestamp. |
| 11 | //! ## Expected Behavior | 11 | //! |
| 12 | //! | 12 | //! # Implementation Status: ✅ IMPLEMENTED |
| 13 | //! When a relay comes back online after being offline: | 13 | //! |
| 14 | //! 1. Detect gap in event history by comparing timestamps | 14 | //! The catchup sync mechanism is fully implemented in the sync module via: |
| 15 | //! 2. Query connected relays for events in the gap period | 15 | //! |
| 16 | //! 3. Backfill Layer 2 events (kind 1618) from bootstrap relays | 16 | //! - [`handle_connect_or_reconnect()`](../../src/sync/mod.rs) - Detects reconnection and |
| 17 | //! 4. Discover and sync Layer 3 events (kinds 1, 1111) referencing Layer 2 events | 17 | //! applies appropriate sync strategy |
| 18 | //! 5. Maintain chronological ordering during backfill | 18 | //! - [`RelayState.last_connected`](../../src/sync/mod.rs) - Tracks when we last connected |
| 19 | //! | 19 | //! to each relay |
| 20 | //! ## Implementation Status | 20 | //! - [`filters::build_announcement_filter(since)`](../../src/sync/filters.rs) - Builds |
| 21 | //! | 21 | //! Layer 1 filters with `since` timestamp |
| 22 | //! ⚠ **NOT YET IMPLEMENTED** - Tests marked with `#[ignore]` | 22 | //! - [`filters::build_layer2_and_layer3_filters(since)`](../../src/sync/filters.rs) - |
| 23 | //! | 23 | //! Builds Layer 2/3 filters with `since` timestamp |
| 24 | //! These tests are ready to enable once catchup sync is implemented in the relay. | 24 | //! |
| 25 | //! | 25 | //! ## Reconnection Logic |
| 26 | //! ## See Also | 26 | //! |
| 27 | //! | 27 | //! When a relay reconnects to a source relay, the sync manager uses smart reconnection: |
| 28 | //! - Bootstrap sync: [`tests/sync/bootstrap.rs`](bootstrap.rs) | 28 | //! |
| 29 | //! - Live sync: [`tests/sync/live_sync.rs`](live_sync.rs) | 29 | //! | Scenario | Behavior | |
| 30 | //! - Discovery sync: [`tests/sync/discovery.rs`](discovery.rs) | 30 | //! |----------|----------| |
| 31 | 31 | //! | First connection ever | Full sync (no `since` filter) | | |
| 32 | use std::time::Duration; | 32 | //! | Reconnect within 15 min | Quick reconnect with `since = last_connected - 15min` | |
| 33 | 33 | //! | Reconnect after >15 min | Full sync (clear state, treat as fresh connection) | | |
| 34 | use nostr_sdk::prelude::*; | 34 | //! |
| 35 | 35 | //! The 15-minute buffer on the `since` filter accounts for clock drift and ensures | |
| 36 | use crate::common::{sync_helpers::*, TestRelay}; | 36 | //! no events are missed at the boundary. |
| 37 | 37 | //! | |
| 38 | /// Test that relay performs catchup sync after being offline | 38 | //! # Why No Integration Tests? |
| 39 | /// | 39 | //! |
| 40 | /// # Scenario | 40 | //! Testing catchup sync in integration tests is not feasible with current infrastructure: |
| 41 | /// | 41 | //! |
| 42 | /// 1. Start two relays (relay1, relay2) with discovery configured | 42 | //! ## 1. Cannot Force WebSocket Disconnection |
| 43 | /// 2. Publish several Layer 2 events to relay2 | 43 | //! |
| 44 | /// 3. Stop relay1 (simulating offline state) | 44 | //! The catchup mechanism is designed for same-process reconnection scenarios, such as: |
| 45 | /// 4. Publish more Layer 2 events to relay2 while relay1 is offline | 45 | //! - Network hiccup causing temporary disconnection |
| 46 | /// 5. Restart relay1 | 46 | //! - Source relay temporarily unreachable |
| 47 | /// 6. Verify relay1 catches up and syncs events it missed | 47 | //! - WebSocket connection timeout |
| 48 | /// | 48 | //! |
| 49 | /// # Expected Result | 49 | //! Our [`TestRelay`](../common/relay.rs) fixture doesn't provide a way to force a |
| 50 | /// | 50 | //! WebSocket disconnection without stopping the relay entirely. |
| 51 | /// All events published while relay1 was offline should be synced | 51 | //! |
| 52 | /// to relay1 after it comes back online, maintaining chronological order. | 52 | //! ## 2. Stopping a Relay Loses Events (In-Memory Database) |
| 53 | /// | 53 | //! |
| 54 | /// # TODO | 54 | //! `TestRelay` uses `NGIT_DATABASE_BACKEND=memory` for test isolation. If we stop |
| 55 | /// | 55 | //! the source relay (to simulate disconnection), all events are lost. When a new |
| 56 | /// - Implement catchup sync mechanism in relay | 56 | //! instance starts, there's nothing to "catch up" on. |
| 57 | /// - Add timestamp-based gap detection | 57 | //! |
| 58 | /// - Add backfill query generation | 58 | //! ## 3. Stopping the Syncing Relay Creates a New Instance |
| 59 | /// - Enable this test by removing `#[ignore]` | 59 | //! |
| 60 | #[tokio::test] | 60 | //! If we stop the syncing relay and start a new one: |
| 61 | #[ignore = "Catchup sync not yet implemented"] | 61 | //! - `last_connected` is lost (in-memory state) |
| 62 | async fn test_catchup_sync_after_relay_restart() { | 62 | //! - New instance does a fresh full sync, not a `since`-filtered catchup |
| 63 | // NOTE: This is a skeleton implementation ready for when catchup sync is added | 63 | //! - This is correct behavior, but tests the bootstrap path, not catchup |
| 64 | 64 | //! | |
| 65 | // 1. Start two relays | 65 | //! # Alternative Testing Approaches (Not Implemented) |
| 66 | let relay1 = TestRelay::start().await; | 66 | //! |
| 67 | let relay2 = TestRelay::start().await; | 67 | //! These could enable catchup testing but add significant complexity: |
| 68 | 68 | //! | |
| 69 | // 2. Set up discovery between relays via shared announcement | 69 | //! 1. **Persistent database for source relay** - Use SQLite instead of in-memory, |
| 70 | let keys = Keys::generate(); | 70 | //! allowing relay restart without data loss |
| 71 | let identifier = "catchup-test-repo"; | 71 | //! |
| 72 | 72 | //! 2. **TestRelay restart capability** - Add `restart()` method that preserves the | |
| 73 | // Create announcement listing both relays | 73 | //! same port and database path |
| 74 | let domain1 = relay1.domain(); | 74 | //! |
| 75 | let domain2 = relay2.domain(); | 75 | //! 3. **Network simulation** - Add ability to inject network failures between specific |
| 76 | let announcement = create_repo_announcement( | 76 | //! relay pairs without stopping either relay |
| 77 | &keys, | 77 | //! |
| 78 | &[&domain1, &domain2], | 78 | //! 4. **Internal sync manager API** - Expose methods to force reconnection without |
| 79 | identifier, | 79 | //! network-level disruption |
| 80 | ); | 80 | //! |
| 81 | 81 | //! # Related Tests | |
| 82 | // Publish announcement to both relays | 82 | //! |
| 83 | let client1 = TestClient::new(relay1.url(), keys.clone()) | 83 | //! While catchup sync itself isn't directly tested, related functionality is covered: |
| 84 | .await | 84 | //! |
| 85 | .expect("Failed to connect to relay1"); | 85 | //! - [`bootstrap.rs`](bootstrap.rs) - Tests that a new relay syncs existing events |
| 86 | let client2 = TestClient::new(relay2.url(), keys.clone()) | 86 | //! from a bootstrap relay (fresh full sync path) |
| 87 | .await | 87 | //! - [`live_sync.rs`](live_sync.rs) - Tests real-time sync of new events after |
| 88 | .expect("Failed to connect to relay2"); | 88 | //! connection is established |
| 89 | 89 | //! - [`discovery.rs`](discovery.rs) - Tests that relays discover each other via | |
| 90 | client1 | 90 | //! repository announcements |
| 91 | .send_event(&announcement) | 91 | //! |
| 92 | .await | 92 | //! # Design Rationale |
| 93 | .expect("Failed to send announcement to relay1"); | 93 | //! |
| 94 | client2 | 94 | //! The catchup mechanism prioritizes simplicity and correctness: |
| 95 | .send_event(&announcement) | 95 | //! |
| 96 | .await | 96 | //! - **Correctness over testing**: The `since` filter logic is straightforward and |
| 97 | .expect("Failed to send announcement to relay2"); | 97 | //! uses well-tested nostr-sdk primitives. The risk of bugs is low. |
| 98 | 98 | //! | |
| 99 | // Wait for discovery connections to establish | 99 | //! - **15-minute quick reconnect window**: Balances efficiency (avoid full resync for |
| 100 | tokio::time::sleep(Duration::from_secs(2)).await; | 100 | //! brief outages) with simplicity (don't track complex state for long outages). |
| 101 | 101 | //! | |
| 102 | // 3. Publish initial Layer 2 event (while both relays are online) | 102 | //! - **Full sync fallback**: After 15 minutes, the relay does a complete resync. |
| 103 | let repo_coord_str = repo_coord(&keys, identifier); | 103 | //! This guarantees no events are missed, at the cost of redundant transfers. |
| 104 | let event1 = build_layer2_issue_event(&keys, &repo_coord_str, "Issue 1 - before offline") | 104 | //! |
| 105 | .expect("Failed to build event1"); | 105 | //! # See Also |
| 106 | let event1_id = client2 | 106 | //! |
| 107 | .send_event(&event1) | 107 | //! - [`src/sync/mod.rs`](../../src/sync/mod.rs) - Main sync module with reconnection logic |
| 108 | .await | 108 | //! - [`src/sync/filters.rs`](../../src/sync/filters.rs) - Filter builders with `since` support |
| 109 | .expect("Failed to send event1"); | 109 | //! - [`src/sync/metrics.rs`](../../src/sync/metrics.rs) - Metrics tracking event sources |
| 110 | 110 | //! including `RECONNECT` for catchup events | |
| 111 | // Verify initial sync works (baseline check) | ||
| 112 | let synced = wait_for_event_on_relay( | ||
| 113 | relay1.url(), | ||
| 114 | Filter::new().id(event1_id), | ||
| 115 | Duration::from_secs(5), | ||
| 116 | ) | ||
| 117 | .await; | ||
| 118 | assert!(synced, "Initial event should sync normally via live sync"); | ||
| 119 | |||
| 120 | // 4. Stop relay1 (simulating offline state) | ||
| 121 | // Note: In a real implementation, we'd need a way to stop and restart a relay | ||
| 122 | // For now, this skeleton demonstrates the intended test flow | ||
| 123 | relay1.stop().await; | ||
| 124 | |||
| 125 | // Small delay to ensure relay1 is fully stopped | ||
| 126 | tokio::time::sleep(Duration::from_millis(500)).await; | ||
| 127 | |||
| 128 | // 5. Publish events while relay1 is offline | ||
| 129 | let event2 = build_layer2_issue_event(&keys, &repo_coord_str, "Issue 2 - during offline") | ||
| 130 | .expect("Failed to build event2"); | ||
| 131 | let event2_id = client2 | ||
| 132 | .send_event(&event2) | ||
| 133 | .await | ||
| 134 | .expect("Failed to send event2"); | ||
| 135 | |||
| 136 | let event3 = build_layer2_issue_event(&keys, &repo_coord_str, "Issue 3 - during offline") | ||
| 137 | .expect("Failed to build event3"); | ||
| 138 | let event3_id = client2 | ||
| 139 | .send_event(&event3) | ||
| 140 | .await | ||
| 141 | .expect("Failed to send event3"); | ||
| 142 | |||
| 143 | // Give time for events to be stored in relay2 | ||
| 144 | tokio::time::sleep(Duration::from_secs(1)).await; | ||
| 145 | |||
| 146 | // 6. Restart relay1 | ||
| 147 | // Note: TestRelay doesn't currently support restart, so we start a new instance | ||
| 148 | // A real implementation would need persistent storage and relay restart capability | ||
| 149 | let relay1_restarted = TestRelay::start().await; | ||
| 150 | |||
| 151 | // Reconnect client to the new relay instance | ||
| 152 | let client1_restarted = TestClient::new(relay1_restarted.url(), keys.clone()) | ||
| 153 | .await | ||
| 154 | .expect("Failed to connect to restarted relay1"); | ||
| 155 | |||
| 156 | // Re-publish announcement to establish discovery | ||
| 157 | let domain1_restarted = relay1_restarted.domain(); | ||
| 158 | let announcement_restarted = create_repo_announcement( | ||
| 159 | &keys, | ||
| 160 | &[&domain1_restarted, &domain2], | ||
| 161 | identifier, | ||
| 162 | ); | ||
| 163 | client1_restarted | ||
| 164 | .send_event(&announcement_restarted) | ||
| 165 | .await | ||
| 166 | .expect("Failed to send announcement to restarted relay1"); | ||
| 167 | |||
| 168 | // 7. Wait for catchup sync to complete | ||
| 169 | // This is where the catchup sync mechanism would kick in | ||
| 170 | tokio::time::sleep(Duration::from_secs(5)).await; | ||
| 171 | |||
| 172 | // 8. Verify missed events were synced via catchup | ||
| 173 | let event2_synced = wait_for_event_on_relay( | ||
| 174 | relay1_restarted.url(), | ||
| 175 | Filter::new().id(event2_id), | ||
| 176 | Duration::from_secs(5), | ||
| 177 | ) | ||
| 178 | .await; | ||
| 179 | |||
| 180 | let event3_synced = wait_for_event_on_relay( | ||
| 181 | relay1_restarted.url(), | ||
| 182 | Filter::new().id(event3_id), | ||
| 183 | Duration::from_secs(5), | ||
| 184 | ) | ||
| 185 | .await; | ||
| 186 | |||
| 187 | assert!( | ||
| 188 | event2_synced, | ||
| 189 | "Event 2 (missed while offline) should be synced via catchup" | ||
| 190 | ); | ||
| 191 | assert!( | ||
| 192 | event3_synced, | ||
| 193 | "Event 3 (missed while offline) should be synced via catchup" | ||
| 194 | ); | ||
| 195 | |||
| 196 | // 9. Cleanup | ||
| 197 | client1_restarted.disconnect().await; | ||
| 198 | client2.disconnect().await; | ||
| 199 | relay1_restarted.stop().await; | ||
| 200 | relay2.stop().await; | ||
| 201 | } \ No newline at end of file | ||