upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/tests/proactive_sync_resilience.rs
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2025-12-04 17:58:31 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2025-12-04 17:58:31 +0000
commitf639ecfac6687c9e8de4e3f305e168b2e4e1bb87 (patch)
treecfcbf16a937a59048930ccaf8557f78ed5576bde /tests/proactive_sync_resilience.rs
parentbf558b0dc17e14f96eea624ea5591315a2909154 (diff)
feat(sync): Phase 3 - resilience and health tracking
- Add RelayHealthTracker with DashMap - Implement exponential backoff (5s -> 1h max) - Handle dead relays (24h failures -> daily retry) - Add startup jitter to prevent thundering herd - Add NGIT_SYNC_MAX_BACKOFF_SECS config
Diffstat (limited to 'tests/proactive_sync_resilience.rs')
-rw-r--r--tests/proactive_sync_resilience.rs476
1 files changed, 476 insertions, 0 deletions
diff --git a/tests/proactive_sync_resilience.rs b/tests/proactive_sync_resilience.rs
new file mode 100644
index 0000000..60b18dd
--- /dev/null
+++ b/tests/proactive_sync_resilience.rs
@@ -0,0 +1,476 @@
1//! Integration tests for GRASP-02 Phase 3: Resilience & Health Tracking
2//!
3//! Tests verify:
4//! - Exponential backoff on connection failures (5s → 1h max)
5//! - Dead relay detection after 24h of failures
6//! - Successful connection resets to Healthy
7//! - Dead relays retry minimally (once per day)
8//! - Health state tracking is thread-safe
9
10use std::time::{Duration, Instant};
11
12use ngit_grasp::sync::health::{HealthState, RelayHealthTracker};
13
14/// Test that a single failure transitions relay to Degraded state
15#[test]
16fn test_single_failure_causes_degraded_state() {
17 let tracker = RelayHealthTracker::with_defaults();
18 let url = "wss://test-relay.example.com";
19
20 // Initial state should allow connection
21 assert!(tracker.should_attempt_connection(url));
22
23 // Record a failure
24 tracker.record_failure(url);
25
26 // Should be in degraded state
27 assert_eq!(tracker.get_state(url), HealthState::Degraded);
28 assert_eq!(tracker.get_failure_count(url), 1);
29}
30
31/// Test that successful connection resets to Healthy state
32#[test]
33fn test_success_resets_to_healthy() {
34 let tracker = RelayHealthTracker::with_defaults();
35 let url = "wss://test-relay.example.com";
36
37 // Simulate multiple failures
38 tracker.record_failure(url);
39 tracker.record_failure(url);
40 tracker.record_failure(url);
41
42 assert_eq!(tracker.get_state(url), HealthState::Degraded);
43 assert_eq!(tracker.get_failure_count(url), 3);
44
45 // Success should reset everything
46 tracker.record_success(url);
47
48 assert_eq!(tracker.get_state(url), HealthState::Healthy);
49 assert_eq!(tracker.get_failure_count(url), 0);
50 assert!(tracker.should_attempt_connection(url));
51}
52
53/// Test that backoff increases exponentially
54#[test]
55fn test_exponential_backoff_calculation() {
56 let max_backoff = 3600u64; // 1 hour
57
58 // failure 1: 5s (5 * 2^0)
59 assert_eq!(
60 RelayHealthTracker::get_backoff_duration(1, max_backoff),
61 Duration::from_secs(5)
62 );
63
64 // failure 2: 10s (5 * 2^1)
65 assert_eq!(
66 RelayHealthTracker::get_backoff_duration(2, max_backoff),
67 Duration::from_secs(10)
68 );
69
70 // failure 3: 20s (5 * 2^2)
71 assert_eq!(
72 RelayHealthTracker::get_backoff_duration(3, max_backoff),
73 Duration::from_secs(20)
74 );
75
76 // failure 4: 40s (5 * 2^3)
77 assert_eq!(
78 RelayHealthTracker::get_backoff_duration(4, max_backoff),
79 Duration::from_secs(40)
80 );
81
82 // failure 5: 80s (5 * 2^4)
83 assert_eq!(
84 RelayHealthTracker::get_backoff_duration(5, max_backoff),
85 Duration::from_secs(80)
86 );
87
88 // failure 6: 160s (5 * 2^5)
89 assert_eq!(
90 RelayHealthTracker::get_backoff_duration(6, max_backoff),
91 Duration::from_secs(160)
92 );
93
94 // failure 7: 320s (5 * 2^6)
95 assert_eq!(
96 RelayHealthTracker::get_backoff_duration(7, max_backoff),
97 Duration::from_secs(320)
98 );
99
100 // failure 8: 640s (5 * 2^7)
101 assert_eq!(
102 RelayHealthTracker::get_backoff_duration(8, max_backoff),
103 Duration::from_secs(640)
104 );
105
106 // failure 9: 1280s (5 * 2^8)
107 assert_eq!(
108 RelayHealthTracker::get_backoff_duration(9, max_backoff),
109 Duration::from_secs(1280)
110 );
111
112 // failure 10: 2560s (5 * 2^9)
113 assert_eq!(
114 RelayHealthTracker::get_backoff_duration(10, max_backoff),
115 Duration::from_secs(2560)
116 );
117}
118
119/// Test that backoff is capped at max_backoff
120#[test]
121fn test_backoff_capped_at_maximum() {
122 let max_backoff = 3600u64; // 1 hour
123
124 // After many failures, should cap at max_backoff
125 assert_eq!(
126 RelayHealthTracker::get_backoff_duration(15, max_backoff),
127 Duration::from_secs(max_backoff)
128 );
129
130 assert_eq!(
131 RelayHealthTracker::get_backoff_duration(20, max_backoff),
132 Duration::from_secs(max_backoff)
133 );
134
135 assert_eq!(
136 RelayHealthTracker::get_backoff_duration(100, max_backoff),
137 Duration::from_secs(max_backoff)
138 );
139}
140
141/// Test that custom max_backoff is respected
142#[test]
143fn test_custom_max_backoff() {
144 let custom_max = 60u64; // 1 minute max
145
146 // After several failures, should cap at custom max
147 assert_eq!(
148 RelayHealthTracker::get_backoff_duration(10, custom_max),
149 Duration::from_secs(custom_max)
150 );
151
152 // Tracker with custom max should use it
153 let tracker = RelayHealthTracker::with_max_backoff(custom_max);
154 let url = "wss://test-relay.example.com";
155
156 // Simulate many failures
157 for _ in 0..20 {
158 tracker.record_failure(url);
159 }
160
161 // Should still be degraded (not dead without 24h)
162 assert_eq!(tracker.get_state(url), HealthState::Degraded);
163}
164
165/// Test that backoff blocks immediate reconnection
166#[test]
167fn test_backoff_blocks_immediate_reconnection() {
168 let tracker = RelayHealthTracker::with_defaults();
169 let url = "wss://test-relay.example.com";
170
171 // First connection attempt should be allowed
172 assert!(tracker.should_attempt_connection(url));
173
174 // Record a failure
175 tracker.record_failure(url);
176
177 // Immediately after failure, connection should be blocked (backoff active)
178 assert!(!tracker.should_attempt_connection(url));
179
180 // Should have remaining backoff
181 let remaining = tracker.get_remaining_backoff(url);
182 assert!(remaining.is_some());
183 assert!(remaining.unwrap() > Duration::ZERO);
184}
185
186/// Test that multiple relays are tracked independently
187#[test]
188fn test_multiple_relays_independent() {
189 let tracker = RelayHealthTracker::with_defaults();
190 let url1 = "wss://relay1.example.com";
191 let url2 = "wss://relay2.example.com";
192 let url3 = "wss://relay3.example.com";
193
194 // Fail relay1 multiple times
195 tracker.record_failure(url1);
196 tracker.record_failure(url1);
197 tracker.record_failure(url1);
198
199 // Succeed on relay2
200 tracker.record_success(url2);
201
202 // Fail relay3 once
203 tracker.record_failure(url3);
204
205 // Verify independent states
206 assert_eq!(tracker.get_state(url1), HealthState::Degraded);
207 assert_eq!(tracker.get_failure_count(url1), 3);
208
209 assert_eq!(tracker.get_state(url2), HealthState::Healthy);
210 assert_eq!(tracker.get_failure_count(url2), 0);
211
212 assert_eq!(tracker.get_state(url3), HealthState::Degraded);
213 assert_eq!(tracker.get_failure_count(url3), 1);
214}
215
216/// Test is_dead returns false for degraded relays
217#[test]
218fn test_is_dead_false_for_degraded() {
219 let tracker = RelayHealthTracker::with_defaults();
220 let url = "wss://test-relay.example.com";
221
222 // Simulate failures
223 for _ in 0..10 {
224 tracker.record_failure(url);
225 }
226
227 // Should be degraded but not dead (24h hasn't passed)
228 assert_eq!(tracker.get_state(url), HealthState::Degraded);
229 assert!(!tracker.is_dead(url));
230}
231
232/// Test get_tracked_relays returns all tracked URLs
233#[test]
234fn test_get_tracked_relays() {
235 let tracker = RelayHealthTracker::with_defaults();
236
237 // Track multiple relays
238 tracker.record_success("wss://relay1.example.com");
239 tracker.record_failure("wss://relay2.example.com");
240 tracker.record_success("wss://relay3.example.com");
241
242 let tracked = tracker.get_tracked_relays();
243 assert_eq!(tracked.len(), 3);
244 assert!(tracked.contains(&"wss://relay1.example.com".to_string()));
245 assert!(tracked.contains(&"wss://relay2.example.com".to_string()));
246 assert!(tracked.contains(&"wss://relay3.example.com".to_string()));
247}
248
249/// Test get_health returns cloned health info
250#[test]
251fn test_get_health_returns_clone() {
252 let tracker = RelayHealthTracker::with_defaults();
253 let url = "wss://test-relay.example.com";
254
255 // Record success
256 tracker.record_success(url);
257
258 // Get health info
259 let health = tracker.get_health(url);
260 assert!(health.is_some());
261
262 let health = health.unwrap();
263 assert_eq!(health.state, HealthState::Healthy);
264 assert!(health.last_success_time.is_some());
265 assert_eq!(health.consecutive_failures, 0);
266}
267
268/// Test get_health returns None for non-existent relay
269#[test]
270fn test_get_health_nonexistent() {
271 let tracker = RelayHealthTracker::with_defaults();
272
273 let health = tracker.get_health("wss://nonexistent.example.com");
274 assert!(health.is_none());
275}
276
277/// Test that new relays default to allowing connection
278#[test]
279fn test_new_relay_allows_connection() {
280 let tracker = RelayHealthTracker::with_defaults();
281
282 // A never-seen relay should allow connection
283 assert!(tracker.should_attempt_connection("wss://brand-new-relay.example.com"));
284}
285
286/// Test health state display
287#[test]
288fn test_health_state_display() {
289 assert_eq!(HealthState::Healthy.to_string(), "healthy");
290 assert_eq!(HealthState::Degraded.to_string(), "degraded");
291 assert_eq!(HealthState::Dead.to_string(), "dead");
292}
293
294/// Test thread safety with concurrent access
295#[tokio::test]
296async fn test_concurrent_health_tracking() {
297 use std::sync::Arc;
298
299 let tracker = Arc::new(RelayHealthTracker::with_defaults());
300 let url = "wss://concurrent-test-relay.example.com";
301
302 // Spawn multiple tasks that access the tracker concurrently
303 let mut handles = vec![];
304
305 for i in 0..10 {
306 let tracker_clone = tracker.clone();
307 let url_owned = url.to_string();
308 let handle = tokio::spawn(async move {
309 if i % 2 == 0 {
310 tracker_clone.record_failure(&url_owned);
311 } else {
312 tracker_clone.record_success(&url_owned);
313 }
314 tracker_clone.get_state(&url_owned);
315 tracker_clone.should_attempt_connection(&url_owned);
316 });
317 handles.push(handle);
318 }
319
320 // Wait for all tasks
321 for handle in handles {
322 handle.await.unwrap();
323 }
324
325 // Tracker should still be usable
326 let health = tracker.get_health(url);
327 assert!(health.is_some());
328}
329
330/// Test that failure streak tracking works correctly
331#[test]
332fn test_failure_streak_tracking() {
333 let tracker = RelayHealthTracker::with_defaults();
334 let url = "wss://test-relay.example.com";
335
336 // Build up a failure streak
337 for i in 1..=5 {
338 tracker.record_failure(url);
339 assert_eq!(tracker.get_failure_count(url), i);
340 }
341
342 // Success should reset the streak
343 tracker.record_success(url);
344 assert_eq!(tracker.get_failure_count(url), 0);
345
346 // Start a new streak
347 tracker.record_failure(url);
348 assert_eq!(tracker.get_failure_count(url), 1);
349}
350
351/// Test recovery from degraded state
352#[test]
353fn test_recovery_from_degraded() {
354 let tracker = RelayHealthTracker::with_defaults();
355 let url = "wss://test-relay.example.com";
356
357 // Enter degraded state
358 tracker.record_failure(url);
359 assert_eq!(tracker.get_state(url), HealthState::Degraded);
360
361 // Recover
362 tracker.record_success(url);
363 assert_eq!(tracker.get_state(url), HealthState::Healthy);
364 assert!(tracker.should_attempt_connection(url));
365 assert!(tracker.get_remaining_backoff(url).is_none());
366}
367
368/// Test that remaining backoff is None after success
369#[test]
370fn test_no_remaining_backoff_after_success() {
371 let tracker = RelayHealthTracker::with_defaults();
372 let url = "wss://test-relay.example.com";
373
374 // Fail to set backoff
375 tracker.record_failure(url);
376 assert!(tracker.get_remaining_backoff(url).is_some());
377
378 // Succeed to clear backoff
379 tracker.record_success(url);
380 assert!(tracker.get_remaining_backoff(url).is_none());
381}
382
383/// Integration test: simulate a realistic connection lifecycle
384#[test]
385fn test_realistic_connection_lifecycle() {
386 let tracker = RelayHealthTracker::with_max_backoff(60); // 1 minute max for test
387 let url = "wss://production-relay.example.com";
388
389 // Initial connection succeeds
390 tracker.record_success(url);
391 assert_eq!(tracker.get_state(url), HealthState::Healthy);
392
393 // Connection drops - first failure
394 tracker.record_failure(url);
395 assert_eq!(tracker.get_state(url), HealthState::Degraded);
396 assert_eq!(tracker.get_failure_count(url), 1);
397
398 // Second failure (retry failed)
399 tracker.record_failure(url);
400 assert_eq!(tracker.get_failure_count(url), 2);
401
402 // Third failure
403 tracker.record_failure(url);
404 assert_eq!(tracker.get_failure_count(url), 3);
405
406 // Connection finally succeeds
407 tracker.record_success(url);
408 assert_eq!(tracker.get_state(url), HealthState::Healthy);
409 assert_eq!(tracker.get_failure_count(url), 0);
410 assert!(tracker.should_attempt_connection(url));
411}
412
413/// Test backoff timing sequence
414#[test]
415fn test_backoff_timing_sequence() {
416 // With default max of 3600s (1 hour), verify the progression
417 let max = 3600u64;
418
419 let expected = vec![
420 (1, 5), // 5s
421 (2, 10), // 10s
422 (3, 20), // 20s
423 (4, 40), // 40s
424 (5, 80), // 80s
425 (6, 160), // 160s (~2.7 min)
426 (7, 320), // 320s (~5.3 min)
427 (8, 640), // 640s (~10.7 min)
428 (9, 1280), // 1280s (~21.3 min)
429 (10, 2560), // 2560s (~42.7 min)
430 (11, 3600), // capped at 3600s (1 hour)
431 (12, 3600), // still capped
432 ];
433
434 for (failures, expected_secs) in expected {
435 assert_eq!(
436 RelayHealthTracker::get_backoff_duration(failures, max),
437 Duration::from_secs(expected_secs),
438 "Failed for {} failures",
439 failures
440 );
441 }
442}
443
444/// Test that health info timestamp tracking works
445#[test]
446fn test_timestamp_tracking() {
447 let tracker = RelayHealthTracker::with_defaults();
448 let url = "wss://test-relay.example.com";
449
450 // Record initial success
451 let before = Instant::now();
452 tracker.record_success(url);
453 let after = Instant::now();
454
455 let health = tracker.get_health(url).unwrap();
456 let success_time = health.last_success_time.unwrap();
457
458 // Success time should be between before and after
459 assert!(success_time >= before);
460 assert!(success_time <= after);
461
462 // Record failure
463 let before_fail = Instant::now();
464 tracker.record_failure(url);
465 let after_fail = Instant::now();
466
467 let health = tracker.get_health(url).unwrap();
468 let failure_time = health.last_failure_time.unwrap();
469 let first_failure = health.first_failure_time.unwrap();
470
471 // Failure times should be between before and after
472 assert!(failure_time >= before_fail);
473 assert!(failure_time <= after_fail);
474 assert!(first_failure >= before_fail);
475 assert!(first_failure <= after_fail);
476} \ No newline at end of file