diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2025-12-19 15:53:48 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2025-12-19 15:59:23 +0000 |
| commit | 02a90c109d4d08c6a54184f821c100f4eba92545 (patch) | |
| tree | a8c07978d5a7c58e2776cf057cc89e1233de1eee /src/sync/metrics.rs | |
| parent | 565715adf14cafd0f0155d553f583581334a8dac (diff) | |
Simplify sync metrics to track only newly saved events
Replace broken event counting that occurred before duplicate/policy checks
with accurate tracking of events that are new, accepted, and saved.
Changes:
- Added ProcessResult enum to track event processing outcomes
- Modified process_event_static() to return ProcessResult
- Replaced events_total (with source labels) with events_synced_total
- Removed gap_events_total and event_source module
- Removed eose_received flag (EOSE is per-subscription, not suitable)
- Updated all tests to use new simplified API
The new ngit_sync_events_synced_total metric only counts events that:
1. Are new (not duplicates)
2. Pass write policy validation
3. Are successfully saved to database
All 165 tests pass (124 lib + 41 integration)
Diffstat (limited to 'src/sync/metrics.rs')
| -rw-r--r-- | src/sync/metrics.rs | 128 |
1 files changed, 19 insertions, 109 deletions
diff --git a/src/sync/metrics.rs b/src/sync/metrics.rs index c3bebfc..22c9192 100644 --- a/src/sync/metrics.rs +++ b/src/sync/metrics.rs | |||
| @@ -3,12 +3,11 @@ | |||
| 3 | //! This module provides comprehensive sync monitoring metrics including: | 3 | //! This module provides comprehensive sync monitoring metrics including: |
| 4 | //! - Connection status and attempts per relay | 4 | //! - Connection status and attempts per relay |
| 5 | //! - Health state tracking (Healthy/Degraded/Dead) | 5 | //! - Health state tracking (Healthy/Degraded/Dead) |
| 6 | //! - Event sync tracking by source (live/startup/reconnect/daily catchup) | 6 | //! - Event sync tracking (only newly saved events) |
| 7 | //! - Gap events filled during catchup operations | ||
| 8 | //! | 7 | //! |
| 9 | //! All metrics follow the `ngit_sync_` prefix convention. | 8 | //! All metrics follow the `ngit_sync_` prefix convention. |
| 10 | 9 | ||
| 11 | use prometheus::{IntCounterVec, IntGauge, IntGaugeVec, Opts, Registry}; | 10 | use prometheus::{IntCounter, IntCounterVec, IntGauge, IntGaugeVec, Opts, Registry}; |
| 12 | 11 | ||
| 13 | use super::health::HealthState; | 12 | use super::health::HealthState; |
| 14 | 13 | ||
| @@ -31,10 +30,8 @@ pub struct SyncMetrics { | |||
| 31 | relay_failures: IntGaugeVec, | 30 | relay_failures: IntGaugeVec, |
| 32 | 31 | ||
| 33 | // === Event metrics === | 32 | // === Event metrics === |
| 34 | /// Events synced by source (live/startup/reconnect/daily) | 33 | /// Total events synced (newly saved events only) |
| 35 | events_total: IntCounterVec, | 34 | events_synced_total: IntCounter, |
| 36 | /// Gap events filled during catchup, by relay | ||
| 37 | gap_events_total: IntCounterVec, | ||
| 38 | 35 | ||
| 39 | // === Summary metrics === | 36 | // === Summary metrics === |
| 40 | /// Total relays discovered and tracked | 37 | /// Total relays discovered and tracked |
| @@ -91,23 +88,11 @@ impl SyncMetrics { | |||
| 91 | registry.register(Box::new(relay_failures.clone()))?; | 88 | registry.register(Box::new(relay_failures.clone()))?; |
| 92 | 89 | ||
| 93 | // Event metrics | 90 | // Event metrics |
| 94 | let events_total = IntCounterVec::new( | 91 | let events_synced_total = IntCounter::with_opts(Opts::new( |
| 95 | Opts::new( | 92 | "ngit_sync_events_synced_total", |
| 96 | "ngit_sync_events_total", | 93 | "Total events synced (newly saved events only)", |
| 97 | "Total events synced by source type", | 94 | ))?; |
| 98 | ), | 95 | registry.register(Box::new(events_synced_total.clone()))?; |
| 99 | &["source"], | ||
| 100 | )?; | ||
| 101 | registry.register(Box::new(events_total.clone()))?; | ||
| 102 | |||
| 103 | let gap_events_total = IntCounterVec::new( | ||
| 104 | Opts::new( | ||
| 105 | "ngit_sync_gap_events_total", | ||
| 106 | "Gap events filled during catchup by relay", | ||
| 107 | ), | ||
| 108 | &["relay"], | ||
| 109 | )?; | ||
| 110 | registry.register(Box::new(gap_events_total.clone()))?; | ||
| 111 | 96 | ||
| 112 | // Summary metrics | 97 | // Summary metrics |
| 113 | let relays_tracked_total = IntGauge::with_opts(Opts::new( | 98 | let relays_tracked_total = IntGauge::with_opts(Opts::new( |
| @@ -133,8 +118,7 @@ impl SyncMetrics { | |||
| 133 | connection_attempts_total, | 118 | connection_attempts_total, |
| 134 | relay_status, | 119 | relay_status, |
| 135 | relay_failures, | 120 | relay_failures, |
| 136 | events_total, | 121 | events_synced_total, |
| 137 | gap_events_total, | ||
| 138 | relays_tracked_total, | 122 | relays_tracked_total, |
| 139 | relays_connected_total, | 123 | relays_connected_total, |
| 140 | relays_dead_total, | 124 | relays_dead_total, |
| @@ -242,51 +226,12 @@ impl SyncMetrics { | |||
| 242 | 226 | ||
| 243 | // === Event Recording Methods === | 227 | // === Event Recording Methods === |
| 244 | 228 | ||
| 245 | /// Record a synced event by source type. | 229 | /// Record a successfully synced event (newly saved to database). |
| 246 | /// | ||
| 247 | /// # Arguments | ||
| 248 | /// | ||
| 249 | /// * `source` - The event source type. Use constants from [`event_source`]: | ||
| 250 | /// - [`event_source::LIVE`] - Real-time subscription events | ||
| 251 | /// - [`event_source::STARTUP`] - Events from startup catchup | ||
| 252 | /// - [`event_source::RECONNECT`] - Events from reconnection catchup | ||
| 253 | /// - [`event_source::DAILY`] - Events from daily catchup | ||
| 254 | pub fn record_event(&self, source: &str) { | ||
| 255 | self.events_total.with_label_values(&[source]).inc(); | ||
| 256 | } | ||
| 257 | |||
| 258 | /// Record multiple events synced by source type. | ||
| 259 | /// | ||
| 260 | /// # Arguments | ||
| 261 | /// | 230 | /// |
| 262 | /// * `source` - The event source type (see [`record_event`](Self::record_event)) | 231 | /// Only events that are new AND pass write policy should be counted. |
| 263 | /// * `count` - Number of events to record | 232 | /// Duplicates and rejected events are not counted. |
| 264 | pub fn record_events(&self, source: &str, count: u64) { | 233 | pub fn record_synced_event(&self) { |
| 265 | self.events_total.with_label_values(&[source]).inc_by(count); | 234 | self.events_synced_total.inc(); |
| 266 | } | ||
| 267 | |||
| 268 | /// Record a gap event filled during catchup. | ||
| 269 | /// | ||
| 270 | /// Gap events are historical events discovered during catchup that weren't | ||
| 271 | /// received during live sync. | ||
| 272 | /// | ||
| 273 | /// # Arguments | ||
| 274 | /// | ||
| 275 | /// * `relay` - The relay URL from which the gap event was received | ||
| 276 | pub fn record_gap_event(&self, relay: &str) { | ||
| 277 | self.gap_events_total.with_label_values(&[relay]).inc(); | ||
| 278 | } | ||
| 279 | |||
| 280 | /// Record multiple gap events filled during catchup. | ||
| 281 | /// | ||
| 282 | /// # Arguments | ||
| 283 | /// | ||
| 284 | /// * `relay` - The relay URL from which the gap events were received | ||
| 285 | /// * `count` - Number of gap events to record | ||
| 286 | pub fn record_gap_events(&self, relay: &str, count: u64) { | ||
| 287 | self.gap_events_total | ||
| 288 | .with_label_values(&[relay]) | ||
| 289 | .inc_by(count); | ||
| 290 | } | 235 | } |
| 291 | 236 | ||
| 292 | // === Summary Recording Methods === | 237 | // === Summary Recording Methods === |
| @@ -317,24 +262,6 @@ impl SyncMetrics { | |||
| 317 | } | 262 | } |
| 318 | } | 263 | } |
| 319 | 264 | ||
| 320 | /// Event source types for metrics tracking. | ||
| 321 | /// | ||
| 322 | /// These constants are used as labels for the `ngit_sync_events_total` metric | ||
| 323 | /// to categorize events by how they were discovered. | ||
| 324 | pub mod event_source { | ||
| 325 | /// Real-time subscription events received during live sync. | ||
| 326 | pub const LIVE: &str = "live"; | ||
| 327 | |||
| 328 | /// Events from startup catchup when the relay first starts. | ||
| 329 | pub const STARTUP: &str = "startup"; | ||
| 330 | |||
| 331 | /// Events from reconnection catchup after a relay reconnects. | ||
| 332 | pub const RECONNECT: &str = "reconnect"; | ||
| 333 | |||
| 334 | /// Events from daily catchup for drift detection. | ||
| 335 | pub const DAILY: &str = "daily"; | ||
| 336 | } | ||
| 337 | |||
| 338 | #[cfg(test)] | 265 | #[cfg(test)] |
| 339 | mod tests { | 266 | mod tests { |
| 340 | use super::*; | 267 | use super::*; |
| @@ -400,18 +327,10 @@ mod tests { | |||
| 400 | let registry = create_test_registry(); | 327 | let registry = create_test_registry(); |
| 401 | let metrics = SyncMetrics::register(®istry).unwrap(); | 328 | let metrics = SyncMetrics::register(®istry).unwrap(); |
| 402 | 329 | ||
| 403 | // Record single events | 330 | // Record synced events |
| 404 | metrics.record_event(event_source::LIVE); | 331 | metrics.record_synced_event(); |
| 405 | metrics.record_event(event_source::STARTUP); | 332 | metrics.record_synced_event(); |
| 406 | metrics.record_event(event_source::RECONNECT); | 333 | metrics.record_synced_event(); |
| 407 | metrics.record_event(event_source::DAILY); | ||
| 408 | |||
| 409 | // Record multiple events | ||
| 410 | metrics.record_events(event_source::STARTUP, 10); | ||
| 411 | |||
| 412 | // Record gap events | ||
| 413 | metrics.record_gap_event("wss://relay1.example.com"); | ||
| 414 | metrics.record_gap_events("wss://relay2.example.com", 5); | ||
| 415 | } | 334 | } |
| 416 | 335 | ||
| 417 | #[test] | 336 | #[test] |
| @@ -432,15 +351,6 @@ mod tests { | |||
| 432 | } | 351 | } |
| 433 | 352 | ||
| 434 | #[test] | 353 | #[test] |
| 435 | fn test_event_source_constants() { | ||
| 436 | // Verify constants have expected values | ||
| 437 | assert_eq!(event_source::LIVE, "live"); | ||
| 438 | assert_eq!(event_source::STARTUP, "startup"); | ||
| 439 | assert_eq!(event_source::RECONNECT, "reconnect"); | ||
| 440 | assert_eq!(event_source::DAILY, "daily"); | ||
| 441 | } | ||
| 442 | |||
| 443 | #[test] | ||
| 444 | fn test_duplicate_registration_fails() { | 354 | fn test_duplicate_registration_fails() { |
| 445 | let registry = create_test_registry(); | 355 | let registry = create_test_registry(); |
| 446 | 356 | ||