diff options
| -rw-r--r-- | tests/sync/metrics.rs | 682 |
1 files changed, 168 insertions, 514 deletions
diff --git a/tests/sync/metrics.rs b/tests/sync/metrics.rs index 09177ec..241af8f 100644 --- a/tests/sync/metrics.rs +++ b/tests/sync/metrics.rs | |||
| @@ -1,11 +1,7 @@ | |||
| 1 | //! Proactive Sync Metrics Tests | 1 | //! Proactive Sync Metrics Tests |
| 2 | //! | 2 | //! |
| 3 | //! Tests for Prometheus metrics integration with proactive sync: | 3 | //! Tests for Prometheus metrics integration with proactive sync. |
| 4 | //! - All sync metrics exposed at `/metrics` endpoint | 4 | //! These tests validate actual metric VALUES, not just existence. |
| 5 | //! - Connection metrics update correctly | ||
| 6 | //! - Health state metrics reflect actual state | ||
| 7 | //! - Gap events tracked correctly | ||
| 8 | //! - Load test with 3+ relays | ||
| 9 | //! | 5 | //! |
| 10 | //! # Running Tests | 6 | //! # Running Tests |
| 11 | //! | 7 | //! |
| @@ -18,266 +14,17 @@ use std::time::Duration; | |||
| 18 | 14 | ||
| 19 | use nostr_sdk::prelude::*; | 15 | use nostr_sdk::prelude::*; |
| 20 | 16 | ||
| 21 | use crate::common::{sync_helpers::*, TestRelay}; | 17 | use crate::common::{ |
| 22 | 18 | sync_helpers::{ | |
| 23 | /// Test that sync metrics are exposed at /metrics endpoint | 19 | create_repo_announcement, fetch_metrics, MetricsTestHarness, ParsedMetrics, TestClient, |
| 24 | #[tokio::test] | 20 | KIND_REPOSITORY_STATE, |
| 25 | async fn test_sync_metrics_exposed() { | 21 | }, |
| 26 | let relay = TestRelay::start().await; | 22 | TestRelay, |
| 27 | 23 | }; | |
| 28 | // Give time for relay to start | ||
| 29 | tokio::time::sleep(Duration::from_millis(500)).await; | ||
| 30 | |||
| 31 | // Fetch metrics using the shared helper | ||
| 32 | let metrics_result = fetch_metrics(&relay.url()).await; | ||
| 33 | |||
| 34 | relay.stop().await; | ||
| 35 | |||
| 36 | // Check that we got metrics (even if sync isn't configured) | ||
| 37 | let metrics = metrics_result.expect("Failed to fetch metrics"); | ||
| 38 | |||
| 39 | // Verify basic metrics structure exists | ||
| 40 | assert!( | ||
| 41 | metrics.contains("ngit_") || metrics.contains("# HELP"), | ||
| 42 | "Metrics endpoint should return Prometheus metrics" | ||
| 43 | ); | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Test that sync metrics include expected metric names | ||
| 47 | #[tokio::test] | ||
| 48 | async fn test_sync_metric_names_present() { | ||
| 49 | // Start a relay with sync configured | ||
| 50 | let source_relay = TestRelay::start().await; | ||
| 51 | let sync_relay = TestRelay::start_with_sync(Some(source_relay.url().into())).await; | ||
| 52 | |||
| 53 | // Give time for sync connection to attempt | ||
| 54 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 55 | |||
| 56 | // Fetch metrics from the syncing relay | ||
| 57 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 58 | .await | ||
| 59 | .expect("Failed to fetch metrics"); | ||
| 60 | |||
| 61 | sync_relay.stop().await; | ||
| 62 | source_relay.stop().await; | ||
| 63 | |||
| 64 | // Check for expected sync metric names (they may have zero values) | ||
| 65 | // At minimum, the ngit_ prefix metrics should be present | ||
| 66 | assert!( | ||
| 67 | metrics.contains("ngit_"), | ||
| 68 | "Metrics should include ngit_ prefixed metrics" | ||
| 69 | ); | ||
| 70 | } | ||
| 71 | |||
| 72 | /// Test connection metrics update correctly on successful connection | ||
| 73 | #[tokio::test] | ||
| 74 | async fn test_connection_metrics_on_success() { | ||
| 75 | // Start source relay | ||
| 76 | let source_relay = TestRelay::start().await; | ||
| 77 | tokio::time::sleep(Duration::from_millis(200)).await; | ||
| 78 | |||
| 79 | // Start syncing relay | ||
| 80 | let sync_relay = TestRelay::start_with_sync(Some(source_relay.url().into())).await; | ||
| 81 | |||
| 82 | // Wait for connection to establish | ||
| 83 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 84 | |||
| 85 | // Fetch metrics - we can verify the relay started and metrics endpoint works | ||
| 86 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 87 | .await | ||
| 88 | .expect("Failed to fetch metrics"); | ||
| 89 | |||
| 90 | sync_relay.stop().await; | ||
| 91 | source_relay.stop().await; | ||
| 92 | |||
| 93 | // Verify metrics endpoint returned data | ||
| 94 | assert!(!metrics.is_empty(), "Metrics endpoint should return data"); | ||
| 95 | } | ||
| 96 | |||
| 97 | /// Test that events syncing updates metrics | ||
| 98 | #[tokio::test] | ||
| 99 | async fn test_event_sync_metrics() { | ||
| 100 | // Start source relay | ||
| 101 | let source_relay = TestRelay::start().await; | ||
| 102 | tokio::time::sleep(Duration::from_millis(200)).await; | ||
| 103 | |||
| 104 | // Start syncing relay | ||
| 105 | let sync_relay = TestRelay::start_with_sync(Some(source_relay.url().into())).await; | ||
| 106 | |||
| 107 | // Wait for connection | ||
| 108 | tokio::time::sleep(Duration::from_secs(1)).await; | ||
| 109 | |||
| 110 | // Create and submit an event to source relay | ||
| 111 | let keys = Keys::generate(); | ||
| 112 | let event = create_repo_announcement(&keys, &[&source_relay.domain()], "metrics-test-repo"); | ||
| 113 | |||
| 114 | let client = Client::default(); | ||
| 115 | client | ||
| 116 | .add_relay(source_relay.url()) | ||
| 117 | .await | ||
| 118 | .expect("Failed to add relay"); | ||
| 119 | client.connect().await; | ||
| 120 | |||
| 121 | let _ = client.send_event(&event).await; | ||
| 122 | |||
| 123 | // Wait for sync to occur | ||
| 124 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 125 | |||
| 126 | // Fetch metrics from sync relay | ||
| 127 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 128 | .await | ||
| 129 | .expect("Failed to fetch metrics"); | ||
| 130 | |||
| 131 | client.disconnect().await; | ||
| 132 | sync_relay.stop().await; | ||
| 133 | source_relay.stop().await; | ||
| 134 | |||
| 135 | // Verify metrics endpoint returned data after sync activity | ||
| 136 | assert!( | ||
| 137 | !metrics.is_empty(), | ||
| 138 | "Metrics should be present after sync activity" | ||
| 139 | ); | ||
| 140 | } | ||
| 141 | |||
| 142 | /// Test health state tracking in metrics | ||
| 143 | #[tokio::test] | ||
| 144 | async fn test_health_state_metrics() { | ||
| 145 | // Start a syncing relay pointing to a non-existent source | ||
| 146 | // This will result in connection failures and health state changes | ||
| 147 | let sync_relay = TestRelay::start_with_sync(Some("ws://127.0.0.1:19999".into())).await; | ||
| 148 | |||
| 149 | // Wait for some connection attempts | ||
| 150 | tokio::time::sleep(Duration::from_secs(3)).await; | ||
| 151 | |||
| 152 | // Fetch metrics | ||
| 153 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 154 | .await | ||
| 155 | .expect("Failed to fetch metrics"); | ||
| 156 | |||
| 157 | sync_relay.stop().await; | ||
| 158 | |||
| 159 | // The relay should still be operational even with failed sync | ||
| 160 | assert!( | ||
| 161 | !metrics.is_empty(), | ||
| 162 | "Metrics should be present even with sync failures" | ||
| 163 | ); | ||
| 164 | } | ||
| 165 | |||
| 166 | /// Test gap event tracking (events received during catchup) | ||
| 167 | #[tokio::test] | ||
| 168 | async fn test_gap_event_tracking() { | ||
| 169 | // Start source relay and add some events first | ||
| 170 | let source_relay = TestRelay::start().await; | ||
| 171 | tokio::time::sleep(Duration::from_millis(200)).await; | ||
| 172 | |||
| 173 | let keys = Keys::generate(); | ||
| 174 | |||
| 175 | // Submit event before sync relay starts | ||
| 176 | let event = create_repo_announcement(&keys, &[&source_relay.domain()], "pre-existing-repo"); | ||
| 177 | |||
| 178 | let client = Client::default(); | ||
| 179 | client | ||
| 180 | .add_relay(source_relay.url()) | ||
| 181 | .await | ||
| 182 | .expect("Failed to add relay"); | ||
| 183 | client.connect().await; | ||
| 184 | let _ = client.send_event(&event).await; | ||
| 185 | |||
| 186 | // Now start syncing relay - it should catch up on existing events | ||
| 187 | let sync_relay = TestRelay::start_with_sync(Some(source_relay.url().into())).await; | ||
| 188 | |||
| 189 | // Wait for catchup | ||
| 190 | tokio::time::sleep(Duration::from_secs(3)).await; | ||
| 191 | |||
| 192 | // Fetch metrics | ||
| 193 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 194 | .await | ||
| 195 | .expect("Failed to fetch metrics"); | ||
| 196 | |||
| 197 | client.disconnect().await; | ||
| 198 | sync_relay.stop().await; | ||
| 199 | source_relay.stop().await; | ||
| 200 | |||
| 201 | // Verify metrics exist after gap sync scenario | ||
| 202 | assert!( | ||
| 203 | !metrics.is_empty(), | ||
| 204 | "Metrics should track gap sync activity" | ||
| 205 | ); | ||
| 206 | } | ||
| 207 | |||
| 208 | /// Load test with 3+ relays configured for sync | ||
| 209 | #[tokio::test] | ||
| 210 | async fn test_multi_relay_load() { | ||
| 211 | // Start 3 source relays | ||
| 212 | let source_relay_1 = TestRelay::start().await; | ||
| 213 | let source_relay_2 = TestRelay::start().await; | ||
| 214 | let source_relay_3 = TestRelay::start().await; | ||
| 215 | |||
| 216 | tokio::time::sleep(Duration::from_millis(500)).await; | ||
| 217 | |||
| 218 | // Start a syncing relay pointing to first source | ||
| 219 | // Note: The current implementation only supports single sync relay URL | ||
| 220 | // but the test demonstrates the system handles multiple relay scenarios | ||
| 221 | let sync_relay = TestRelay::start_with_sync(Some(source_relay_1.url().into())).await; | ||
| 222 | |||
| 223 | // Wait for connections | ||
| 224 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 225 | |||
| 226 | // Submit events to all source relays | ||
| 227 | let keys = Keys::generate(); | ||
| 228 | |||
| 229 | let event1 = create_repo_announcement(&keys, &[&source_relay_1.domain()], "repo-1"); | ||
| 230 | let event2 = create_repo_announcement(&keys, &[&source_relay_2.domain()], "repo-2"); | ||
| 231 | let event3 = create_repo_announcement(&keys, &[&source_relay_3.domain()], "repo-3"); | ||
| 232 | |||
| 233 | // Submit events | ||
| 234 | let client1 = Client::default(); | ||
| 235 | client1 | ||
| 236 | .add_relay(source_relay_1.url()) | ||
| 237 | .await | ||
| 238 | .expect("Failed to add relay"); | ||
| 239 | client1.connect().await; | ||
| 240 | let _ = client1.send_event(&event1).await; | ||
| 241 | |||
| 242 | let client2 = Client::default(); | ||
| 243 | client2 | ||
| 244 | .add_relay(source_relay_2.url()) | ||
| 245 | .await | ||
| 246 | .expect("Failed to add relay"); | ||
| 247 | client2.connect().await; | ||
| 248 | let _ = client2.send_event(&event2).await; | ||
| 249 | |||
| 250 | let client3 = Client::default(); | ||
| 251 | client3 | ||
| 252 | .add_relay(source_relay_3.url()) | ||
| 253 | .await | ||
| 254 | .expect("Failed to add relay"); | ||
| 255 | client3.connect().await; | ||
| 256 | let _ = client3.send_event(&event3).await; | ||
| 257 | |||
| 258 | // Wait for sync | ||
| 259 | tokio::time::sleep(Duration::from_secs(3)).await; | ||
| 260 | |||
| 261 | // Fetch metrics from sync relay | ||
| 262 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 263 | .await | ||
| 264 | .expect("Failed to fetch metrics"); | ||
| 265 | |||
| 266 | // Cleanup | ||
| 267 | client1.disconnect().await; | ||
| 268 | client2.disconnect().await; | ||
| 269 | client3.disconnect().await; | ||
| 270 | sync_relay.stop().await; | ||
| 271 | source_relay_1.stop().await; | ||
| 272 | source_relay_2.stop().await; | ||
| 273 | source_relay_3.stop().await; | ||
| 274 | 24 | ||
| 275 | // Verify metrics system handled load | 25 | // ============================================================================ |
| 276 | assert!( | 26 | // Format and Availability Tests (Keepers) |
| 277 | !metrics.is_empty(), | 27 | // ============================================================================ |
| 278 | "Metrics should be available under multi-relay load" | ||
| 279 | ); | ||
| 280 | } | ||
| 281 | 28 | ||
| 282 | /// Test that Prometheus text format is valid | 29 | /// Test that Prometheus text format is valid |
| 283 | #[tokio::test] | 30 | #[tokio::test] |
| @@ -333,187 +80,6 @@ async fn test_metrics_availability_during_sync() { | |||
| 333 | source_relay.stop().await; | 80 | source_relay.stop().await; |
| 334 | } | 81 | } |
| 335 | 82 | ||
| 336 | // ============================================================================ | ||
| 337 | // Additional Coverage Tests (Phase 8) | ||
| 338 | // ============================================================================ | ||
| 339 | |||
| 340 | /// Test metrics when connection to sync source fails | ||
| 341 | /// | ||
| 342 | /// Verifies that: | ||
| 343 | /// - Metrics endpoint remains functional when sync connection fails | ||
| 344 | /// - Connection attempt metrics are recorded even for failures | ||
| 345 | /// - The relay continues to operate despite sync failures | ||
| 346 | #[tokio::test] | ||
| 347 | async fn test_connection_failure_metrics() { | ||
| 348 | // Start a syncing relay pointing to a non-existent relay | ||
| 349 | // Port 19998 should not have anything running | ||
| 350 | let sync_relay = TestRelay::start_with_sync(Some("ws://127.0.0.1:19998".into())).await; | ||
| 351 | |||
| 352 | // Wait for connection attempts to fail | ||
| 353 | tokio::time::sleep(Duration::from_secs(3)).await; | ||
| 354 | |||
| 355 | // Fetch metrics - should still work despite sync failures | ||
| 356 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 357 | .await | ||
| 358 | .expect("Metrics endpoint should remain functional"); | ||
| 359 | |||
| 360 | sync_relay.stop().await; | ||
| 361 | |||
| 362 | // Verify connection attempt metrics are present (even with zeroes) | ||
| 363 | // The metrics endpoint should contain ngit_sync prefixed metrics | ||
| 364 | assert!( | ||
| 365 | metrics.contains("ngit_sync"), | ||
| 366 | "Sync metrics should be exposed even during connection failures" | ||
| 367 | ); | ||
| 368 | |||
| 369 | // Check for connection-related metric patterns | ||
| 370 | let has_connection_metrics = metrics.contains("connection") || metrics.contains("relay"); | ||
| 371 | assert!( | ||
| 372 | has_connection_metrics || metrics.contains("ngit_"), | ||
| 373 | "Should have some form of connection/relay metrics" | ||
| 374 | ); | ||
| 375 | } | ||
| 376 | |||
| 377 | /// Test that failure counters increment on repeated connection failures | ||
| 378 | /// | ||
| 379 | /// Verifies that the relay tracks consecutive failures and exposes | ||
| 380 | /// them via metrics (ngit_sync_relay_failures metric). | ||
| 381 | #[tokio::test] | ||
| 382 | async fn test_failure_counter_increments() { | ||
| 383 | // Use a very high port that definitely won't be listening | ||
| 384 | let sync_relay = TestRelay::start_with_sync(Some("ws://127.0.0.1:59999".into())).await; | ||
| 385 | |||
| 386 | // First check - initial state | ||
| 387 | tokio::time::sleep(Duration::from_secs(1)).await; | ||
| 388 | let metrics_initial = fetch_metrics(&sync_relay.url()) | ||
| 389 | .await | ||
| 390 | .expect("Should fetch initial metrics"); | ||
| 391 | |||
| 392 | // Wait for more connection attempts | ||
| 393 | tokio::time::sleep(Duration::from_secs(3)).await; | ||
| 394 | |||
| 395 | // Second check - after more failures | ||
| 396 | let metrics_after = fetch_metrics(&sync_relay.url()) | ||
| 397 | .await | ||
| 398 | .expect("Should fetch metrics after failures"); | ||
| 399 | |||
| 400 | sync_relay.stop().await; | ||
| 401 | |||
| 402 | // Metrics should be present at both times | ||
| 403 | assert!(!metrics_initial.is_empty(), "Initial metrics should exist"); | ||
| 404 | assert!(!metrics_after.is_empty(), "Later metrics should exist"); | ||
| 405 | |||
| 406 | // Both should contain sync-related metrics | ||
| 407 | assert!( | ||
| 408 | metrics_after.contains("ngit_"), | ||
| 409 | "Should contain ngit_ prefixed metrics after failures" | ||
| 410 | ); | ||
| 411 | } | ||
| 412 | |||
| 413 | /// Test that relay counts are properly tracked in metrics | ||
| 414 | /// | ||
| 415 | /// Verifies: | ||
| 416 | /// - ngit_sync_relays_tracked_total reflects discovered relays | ||
| 417 | /// - ngit_sync_relays_connected_total updates with connection state | ||
| 418 | /// - Count metrics use proper gauges (can go up and down) | ||
| 419 | #[tokio::test] | ||
| 420 | async fn test_relay_count_metrics() { | ||
| 421 | // Start source relay first | ||
| 422 | let source_relay = TestRelay::start().await; | ||
| 423 | tokio::time::sleep(Duration::from_millis(200)).await; | ||
| 424 | |||
| 425 | // Start syncing relay pointing to actual source | ||
| 426 | let sync_relay = TestRelay::start_with_sync(Some(source_relay.url().into())).await; | ||
| 427 | |||
| 428 | // Wait for connection to establish | ||
| 429 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 430 | |||
| 431 | let metrics_connected = fetch_metrics(&sync_relay.url()) | ||
| 432 | .await | ||
| 433 | .expect("Should fetch metrics when connected"); | ||
| 434 | |||
| 435 | // Stop the source relay to trigger disconnection | ||
| 436 | source_relay.stop().await; | ||
| 437 | |||
| 438 | // Wait for disconnect detection | ||
| 439 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 440 | |||
| 441 | let metrics_disconnected = fetch_metrics(&sync_relay.url()) | ||
| 442 | .await | ||
| 443 | .expect("Should fetch metrics after source disconnection"); | ||
| 444 | |||
| 445 | sync_relay.stop().await; | ||
| 446 | |||
| 447 | // Metrics should exist in both states | ||
| 448 | assert!( | ||
| 449 | !metrics_connected.is_empty(), | ||
| 450 | "Connected state metrics should exist" | ||
| 451 | ); | ||
| 452 | assert!( | ||
| 453 | !metrics_disconnected.is_empty(), | ||
| 454 | "Disconnected state metrics should exist" | ||
| 455 | ); | ||
| 456 | } | ||
| 457 | |||
| 458 | /// Test event source label differentiation in metrics | ||
| 459 | /// | ||
| 460 | /// Verifies that the ngit_sync_events_total metric properly | ||
| 461 | /// distinguishes between event sources via labels: | ||
| 462 | /// - source="live" for real-time subscription events | ||
| 463 | /// - source="startup" for initial catchup events | ||
| 464 | /// - source="reconnect" for reconnection catchup events | ||
| 465 | /// - source="daily" for daily drift detection events | ||
| 466 | #[tokio::test] | ||
| 467 | async fn test_event_source_labels_in_metrics() { | ||
| 468 | // Set up source with pre-existing events (will trigger startup catchup) | ||
| 469 | let source_relay = TestRelay::start().await; | ||
| 470 | tokio::time::sleep(Duration::from_millis(200)).await; | ||
| 471 | |||
| 472 | // Create and submit an event before sync relay starts | ||
| 473 | let keys = Keys::generate(); | ||
| 474 | let pre_event = create_repo_announcement(&keys, &[&source_relay.domain()], "pre-startup-repo"); | ||
| 475 | |||
| 476 | let client = Client::default(); | ||
| 477 | client | ||
| 478 | .add_relay(source_relay.url()) | ||
| 479 | .await | ||
| 480 | .expect("Failed to add relay"); | ||
| 481 | client.connect().await; | ||
| 482 | let _ = client.send_event(&pre_event).await; | ||
| 483 | |||
| 484 | // Now start syncing relay - this triggers startup catchup | ||
| 485 | let sync_relay = TestRelay::start_with_sync(Some(source_relay.url().into())).await; | ||
| 486 | |||
| 487 | // Wait for startup sync | ||
| 488 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 489 | |||
| 490 | // Submit another event - this will be received via live sync | ||
| 491 | let live_event = create_repo_announcement(&keys, &[&source_relay.domain()], "live-sync-repo"); | ||
| 492 | let _ = client.send_event(&live_event).await; | ||
| 493 | |||
| 494 | // Wait for live sync | ||
| 495 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 496 | |||
| 497 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 498 | .await | ||
| 499 | .expect("Should fetch metrics"); | ||
| 500 | |||
| 501 | client.disconnect().await; | ||
| 502 | sync_relay.stop().await; | ||
| 503 | source_relay.stop().await; | ||
| 504 | |||
| 505 | // Verify metric line exists for events_total | ||
| 506 | // It should have labels distinguishing sources | ||
| 507 | let has_events_metric = metrics.contains("ngit_sync_events_total") | ||
| 508 | || metrics.contains("events") | ||
| 509 | || metrics.contains("ngit_sync"); | ||
| 510 | |||
| 511 | assert!( | ||
| 512 | has_events_metric, | ||
| 513 | "Should have event-related sync metrics" | ||
| 514 | ); | ||
| 515 | } | ||
| 516 | |||
| 517 | /// Test concurrent metrics requests don't cause issues | 83 | /// Test concurrent metrics requests don't cause issues |
| 518 | /// | 84 | /// |
| 519 | /// Verifies that the metrics endpoint is thread-safe and can | 85 | /// Verifies that the metrics endpoint is thread-safe and can |
| @@ -527,7 +93,7 @@ async fn test_concurrent_metrics_requests() { | |||
| 527 | 93 | ||
| 528 | // Clone the URL string so we have an owned value for spawned tasks | 94 | // Clone the URL string so we have an owned value for spawned tasks |
| 529 | let sync_url: String = sync_relay.url().to_string(); | 95 | let sync_url: String = sync_relay.url().to_string(); |
| 530 | 96 | ||
| 531 | // Spawn multiple concurrent metrics requests | 97 | // Spawn multiple concurrent metrics requests |
| 532 | let handles: Vec<_> = (0..5) | 98 | let handles: Vec<_> = (0..5) |
| 533 | .map(|i| { | 99 | .map(|i| { |
| @@ -605,65 +171,6 @@ async fn test_metric_values_are_numeric() { | |||
| 605 | ); | 171 | ); |
| 606 | } | 172 | } |
| 607 | 173 | ||
| 608 | /// Test gap events are tracked distinctly from other sync events | ||
| 609 | /// | ||
| 610 | /// Gap events are historical events discovered during catchup that weren't | ||
| 611 | /// received during live sync. This test verifies they are tracked separately | ||
| 612 | /// in the ngit_sync_gap_events_total metric. | ||
| 613 | #[tokio::test] | ||
| 614 | async fn test_gap_events_tracked_separately() { | ||
| 615 | // Create source relay with initial content | ||
| 616 | let source_relay = TestRelay::start().await; | ||
| 617 | tokio::time::sleep(Duration::from_millis(200)).await; | ||
| 618 | |||
| 619 | let keys = Keys::generate(); | ||
| 620 | |||
| 621 | // Create multiple events on source before sync relay starts | ||
| 622 | let client = Client::default(); | ||
| 623 | client | ||
| 624 | .add_relay(source_relay.url()) | ||
| 625 | .await | ||
| 626 | .expect("Failed to add relay"); | ||
| 627 | client.connect().await; | ||
| 628 | |||
| 629 | // Submit several events to create a "gap" | ||
| 630 | for i in 0..3 { | ||
| 631 | let event = create_repo_announcement( | ||
| 632 | &keys, | ||
| 633 | &[&source_relay.domain()], | ||
| 634 | &format!("gap-repo-{}", i), | ||
| 635 | ); | ||
| 636 | let _ = client.send_event(&event).await; | ||
| 637 | } | ||
| 638 | |||
| 639 | // Wait for events to be stored | ||
| 640 | tokio::time::sleep(Duration::from_millis(500)).await; | ||
| 641 | |||
| 642 | // Now start sync relay - it will catchup on the gap events | ||
| 643 | let sync_relay = TestRelay::start_with_sync(Some(source_relay.url().into())).await; | ||
| 644 | |||
| 645 | // Wait for catchup to complete | ||
| 646 | tokio::time::sleep(Duration::from_secs(3)).await; | ||
| 647 | |||
| 648 | let metrics = fetch_metrics(&sync_relay.url()) | ||
| 649 | .await | ||
| 650 | .expect("Should fetch metrics"); | ||
| 651 | |||
| 652 | client.disconnect().await; | ||
| 653 | sync_relay.stop().await; | ||
| 654 | source_relay.stop().await; | ||
| 655 | |||
| 656 | // Check for gap-related metrics or general sync metrics | ||
| 657 | let has_sync_metrics = metrics.contains("ngit_sync") | ||
| 658 | || metrics.contains("gap") | ||
| 659 | || metrics.contains("events"); | ||
| 660 | |||
| 661 | assert!( | ||
| 662 | has_sync_metrics, | ||
| 663 | "Metrics should track sync activity including gap events" | ||
| 664 | ); | ||
| 665 | } | ||
| 666 | |||
| 667 | // ============================================================================ | 174 | // ============================================================================ |
| 668 | // Phase 2: Real Metrics Tests (Using MetricsTestHarness) | 175 | // Phase 2: Real Metrics Tests (Using MetricsTestHarness) |
| 669 | // ============================================================================ | 176 | // ============================================================================ |
| @@ -700,11 +207,19 @@ fn create_event_referencing_repo(keys: &Keys, repo_coord: &str, kind: u16, conte | |||
| 700 | async fn test_startup_sync_event_count() { | 207 | async fn test_startup_sync_event_count() { |
| 701 | // 1. Start source relay (where we'll put the Layer 2 event to be synced) | 208 | // 1. Start source relay (where we'll put the Layer 2 event to be synced) |
| 702 | let source_relay = TestRelay::start().await; | 209 | let source_relay = TestRelay::start().await; |
| 703 | println!("Source relay started at {} (domain: {})", source_relay.url(), source_relay.domain()); | 210 | println!( |
| 211 | "Source relay started at {} (domain: {})", | ||
| 212 | source_relay.url(), | ||
| 213 | source_relay.domain() | ||
| 214 | ); | ||
| 704 | 215 | ||
| 705 | // 2. Start syncing relay (with sync enabled but no bootstrap - will discover via announcements) | 216 | // 2. Start syncing relay (with sync enabled but no bootstrap - will discover via announcements) |
| 706 | let syncing_relay = TestRelay::start_with_sync(None).await; | 217 | let syncing_relay = TestRelay::start_with_sync(None).await; |
| 707 | println!("Syncing relay started at {} (domain: {})", syncing_relay.url(), syncing_relay.domain()); | 218 | println!( |
| 219 | "Syncing relay started at {} (domain: {})", | ||
| 220 | syncing_relay.url(), | ||
| 221 | syncing_relay.domain() | ||
| 222 | ); | ||
| 708 | 223 | ||
| 709 | // 3. Create test keys | 224 | // 3. Create test keys |
| 710 | let keys = Keys::generate(); | 225 | let keys = Keys::generate(); |
| @@ -715,7 +230,11 @@ async fn test_startup_sync_event_count() { | |||
| 715 | &[&source_relay.domain(), &syncing_relay.domain()], | 230 | &[&source_relay.domain(), &syncing_relay.domain()], |
| 716 | "test-repo-metrics", | 231 | "test-repo-metrics", |
| 717 | ); | 232 | ); |
| 718 | println!("Created announcement {} (kind {})", announcement.id, announcement.kind.as_u16()); | 233 | println!( |
| 234 | "Created announcement {} (kind {})", | ||
| 235 | announcement.id, | ||
| 236 | announcement.kind.as_u16() | ||
| 237 | ); | ||
| 719 | 238 | ||
| 720 | // 5. Build the repo coordinate for the 'a' tag in the patches | 239 | // 5. Build the repo coordinate for the 'a' tag in the patches |
| 721 | let repo_coord = format!( | 240 | let repo_coord = format!( |
| @@ -727,7 +246,9 @@ async fn test_startup_sync_event_count() { | |||
| 727 | 246 | ||
| 728 | // 6. Create 3 patch events (Layer 2) that reference the announcement | 247 | // 6. Create 3 patch events (Layer 2) that reference the announcement |
| 729 | let patches: Vec<_> = (0..3) | 248 | let patches: Vec<_> = (0..3) |
| 730 | .map(|i| create_event_referencing_repo(&keys, &repo_coord, KIND_PATCH, &format!("Test patch {}", i))) | 249 | .map(|i| { |
| 250 | create_event_referencing_repo(&keys, &repo_coord, KIND_PATCH, &format!("Test patch {}", i)) | ||
| 251 | }) | ||
| 731 | .collect(); | 252 | .collect(); |
| 732 | println!("Created {} patches", patches.len()); | 253 | println!("Created {} patches", patches.len()); |
| 733 | 254 | ||
| @@ -781,7 +302,7 @@ async fn test_startup_sync_event_count() { | |||
| 781 | } | 302 | } |
| 782 | println!("===================\n"); | 303 | println!("===================\n"); |
| 783 | 304 | ||
| 784 | let metrics = crate::common::sync_helpers::ParsedMetrics::parse(&raw_metrics); | 305 | let metrics = ParsedMetrics::parse(&raw_metrics); |
| 785 | 306 | ||
| 786 | // 11. Check sync metrics | 307 | // 11. Check sync metrics |
| 787 | let tracked = metrics.gauge("ngit_sync_relays_tracked_total", &[]); | 308 | let tracked = metrics.gauge("ngit_sync_relays_tracked_total", &[]); |
| @@ -799,7 +320,8 @@ async fn test_startup_sync_event_count() { | |||
| 799 | .kind(Kind::Custom(KIND_PATCH)) | 320 | .kind(Kind::Custom(KIND_PATCH)) |
| 800 | .author(keys.public_key()); | 321 | .author(keys.public_key()); |
| 801 | 322 | ||
| 802 | let patches_synced = wait_for_event_on_relay(syncing_relay.url(), filter, Duration::from_secs(2)).await; | 323 | let patches_synced = |
| 324 | crate::common::sync_helpers::wait_for_event_on_relay(syncing_relay.url(), filter, Duration::from_secs(2)).await; | ||
| 803 | println!("Patches synced to syncing relay: {}", patches_synced); | 325 | println!("Patches synced to syncing relay: {}", patches_synced); |
| 804 | 326 | ||
| 805 | // Cleanup | 327 | // Cleanup |
| @@ -809,7 +331,10 @@ async fn test_startup_sync_event_count() { | |||
| 809 | // Assertions: | 331 | // Assertions: |
| 810 | // 1. Patches should have been synced (functional verification) | 332 | // 1. Patches should have been synced (functional verification) |
| 811 | // This proves the sync mechanism works even if metrics aren't fully wired | 333 | // This proves the sync mechanism works even if metrics aren't fully wired |
| 812 | assert!(patches_synced, "Patches should have been synced from source relay"); | 334 | assert!( |
| 335 | patches_synced, | ||
| 336 | "Patches should have been synced from source relay" | ||
| 337 | ); | ||
| 813 | 338 | ||
| 814 | // 2. Sync metrics should be exposed (they're registered, values may be 0) | 339 | // 2. Sync metrics should be exposed (they're registered, values may be 0) |
| 815 | // The ngit_sync_* metrics are defined and exposed at the /metrics endpoint. | 340 | // The ngit_sync_* metrics are defined and exposed at the /metrics endpoint. |
| @@ -826,6 +351,135 @@ async fn test_startup_sync_event_count() { | |||
| 826 | assert!( | 351 | assert!( |
| 827 | tracked.is_some() && connected.is_some(), | 352 | tracked.is_some() && connected.is_some(), |
| 828 | "Sync metrics should be exposed (tracked={:?}, connected={:?})", | 353 | "Sync metrics should be exposed (tracked={:?}, connected={:?})", |
| 829 | tracked, connected | 354 | tracked, |
| 355 | connected | ||
| 830 | ); | 356 | ); |
| 357 | } | ||
| 358 | |||
| 359 | // ============================================================================ | ||
| 360 | // Phase 3: Real Value-Checking Tests | ||
| 361 | // ============================================================================ | ||
| 362 | |||
| 363 | /// Test that connection failures increment the failure counter. | ||
| 364 | /// | ||
| 365 | /// This test validates that when sync cannot connect to a source relay, | ||
| 366 | /// the connection_attempts_total counter with result="failure" increases. | ||
| 367 | /// | ||
| 368 | /// NOTE: This test may fail until sync metrics recording is fully wired up. | ||
| 369 | /// The test documents the expected behavior. | ||
| 370 | #[tokio::test] | ||
| 371 | #[ignore] // Enable when metrics recording is implemented | ||
| 372 | async fn test_connection_failure_increments_counter() { | ||
| 373 | let mut harness = MetricsTestHarness::with_sources(0).await; // No sources | ||
| 374 | harness.start_syncing_relay_to_nowhere().await; | ||
| 375 | |||
| 376 | // Wait for initial connection attempts | ||
| 377 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 378 | let metrics_1 = harness.get_metrics().await.unwrap(); | ||
| 379 | |||
| 380 | // Wait for more attempts | ||
| 381 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 382 | let metrics_2 = harness.get_metrics().await.unwrap(); | ||
| 383 | |||
| 384 | // Failure counter should have increased | ||
| 385 | let failures_1 = metrics_1 | ||
| 386 | .counter("ngit_sync_connection_attempts_total", &[("result", "failure")]) | ||
| 387 | .unwrap_or(0); | ||
| 388 | let failures_2 = metrics_2 | ||
| 389 | .counter("ngit_sync_connection_attempts_total", &[("result", "failure")]) | ||
| 390 | .unwrap_or(0); | ||
| 391 | |||
| 392 | println!("Failures at t1: {}, at t2: {}", failures_1, failures_2); | ||
| 393 | |||
| 394 | assert!( | ||
| 395 | failures_2 > failures_1, | ||
| 396 | "Failure counter should increase: {} -> {}", | ||
| 397 | failures_1, | ||
| 398 | failures_2 | ||
| 399 | ); | ||
| 400 | |||
| 401 | harness.stop_all().await; | ||
| 402 | } | ||
| 403 | |||
| 404 | /// Test that live sync events are counted in metrics. | ||
| 405 | /// | ||
| 406 | /// This test validates that events received via live subscription | ||
| 407 | /// (after sync connection is established) are counted separately | ||
| 408 | /// from startup/bootstrap events. | ||
| 409 | /// | ||
| 410 | /// NOTE: This test may fail until sync metrics recording is fully wired up. | ||
| 411 | /// The test documents the expected behavior. | ||
| 412 | #[tokio::test] | ||
| 413 | #[ignore] // Enable when metrics recording is implemented | ||
| 414 | async fn test_live_sync_event_count() { | ||
| 415 | let mut harness = MetricsTestHarness::with_sources(1).await; | ||
| 416 | |||
| 417 | // Start syncing BEFORE adding events | ||
| 418 | harness.start_syncing_relay(0).await; | ||
| 419 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 420 | |||
| 421 | // Now add events - these should be "live" not "startup" | ||
| 422 | let keys = Keys::generate(); | ||
| 423 | let events: Vec<_> = (0..2) | ||
| 424 | .map(|i| create_repo_announcement(&keys, &[&harness.source_domain(0)], &format!("live-{}", i))) | ||
| 425 | .collect(); | ||
| 426 | harness.submit_events(0, &events).await.unwrap(); | ||
| 427 | |||
| 428 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 429 | let metrics = harness.get_metrics().await.unwrap(); | ||
| 430 | |||
| 431 | let live_count = metrics.events_total("live"); | ||
| 432 | println!("Live events synced: {:?}", live_count); | ||
| 433 | |||
| 434 | // NOTE: This will likely fail until sync metrics are wired up | ||
| 435 | // Test documents the expectation | ||
| 436 | assert_eq!(live_count, Some(2), "Should have 2 live events"); | ||
| 437 | |||
| 438 | harness.stop_all().await; | ||
| 439 | } | ||
| 440 | |||
| 441 | /// Test that relay connected status is tracked in metrics. | ||
| 442 | /// | ||
| 443 | /// This test validates that the ngit_sync_relay_connected gauge | ||
| 444 | /// correctly reflects the connection state of source relays. | ||
| 445 | /// | ||
| 446 | /// NOTE: This test may fail until sync metrics recording is fully wired up. | ||
| 447 | /// The test documents the expected behavior. | ||
| 448 | #[tokio::test] | ||
| 449 | #[ignore] // Enable when metrics recording is implemented | ||
| 450 | async fn test_relay_connected_status() { | ||
| 451 | let mut harness = MetricsTestHarness::with_sources(1).await; | ||
| 452 | harness.start_syncing_relay(0).await; | ||
| 453 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 454 | |||
| 455 | // Clone the URL to avoid borrow issues when stopping source | ||
| 456 | let source_url = harness.source_url(0).to_string(); | ||
| 457 | |||
| 458 | // Check connected status | ||
| 459 | let metrics = harness.get_metrics().await.unwrap(); | ||
| 460 | |||
| 461 | println!("Checking connection status for {}", source_url); | ||
| 462 | |||
| 463 | // NOTE: This will likely fail until sync metrics are wired up | ||
| 464 | // Test documents the expectation | ||
| 465 | assert_eq!( | ||
| 466 | metrics.relay_connected(&source_url), | ||
| 467 | Some(true), | ||
| 468 | "Should be connected to {}", | ||
| 469 | source_url | ||
| 470 | ); | ||
| 471 | |||
| 472 | // Stop the source | ||
| 473 | harness.stop_source(0).await; | ||
| 474 | tokio::time::sleep(Duration::from_secs(2)).await; | ||
| 475 | |||
| 476 | let metrics = harness.get_metrics().await.unwrap(); | ||
| 477 | assert_eq!( | ||
| 478 | metrics.relay_connected(&source_url), | ||
| 479 | Some(false), | ||
| 480 | "Should be disconnected from {}", | ||
| 481 | source_url | ||
| 482 | ); | ||
| 483 | |||
| 484 | harness.stop_all().await; | ||
| 831 | } \ No newline at end of file | 485 | } \ No newline at end of file |