From 73a366cbd7be4edf9c74194cd0891c80a15236a5 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:41:10 +0000 Subject: Add structured logging for migration analysis - Add [PARSE_FAIL] logging when event parsing fails - Add [PURGATORY_EXPIRED] logging when repos expire from purgatory - Logs include: kind, event_id, repo, npub, reason - Supports Phase 4 migration scripts (30-extract-*.sh) - All 382 tests pass --- src/nostr/builder.rs | 135 +++++++++++++++++++++++++++++++++++++++++++++------ src/purgatory/mod.rs | 103 ++++++++++++++++++++++++++++++++------- 2 files changed, 206 insertions(+), 32 deletions(-) diff --git a/src/nostr/builder.rs b/src/nostr/builder.rs index 34014db..629c111 100644 --- a/src/nostr/builder.rs +++ b/src/nostr/builder.rs @@ -98,6 +98,62 @@ impl Nip34WritePolicy { self.ctx.set_local_relay(relay); } + /// Extract repository identifier from event's 'd' tag. + /// + /// Used for structured logging when parsing fails - we try to extract + /// the identifier even if full parsing failed. + fn extract_identifier_from_event(event: &Event) -> String { + use nostr_relay_builder::prelude::TagKind; + event + .tags + .iter() + .find(|t| t.kind() == TagKind::d()) + .and_then(|t| t.content()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "unknown".to_string()) + } + + /// Extract ALL repository identifiers from PR event's 'a' tags. + /// + /// PR events can reference multiple repositories via multiple 'a' tags + /// (e.g., when there are multiple maintainers). Each tag has format + /// `30617::`. + /// + /// Returns a vector of unique identifiers, or `["unknown"]` if none found. + fn extract_repos_from_pr_event(event: &Event) -> Vec { + let repos: Vec = event + .tags + .iter() + .filter_map(|tag| { + let tag_vec = tag.clone().to_vec(); + if tag_vec.len() >= 2 && tag_vec[0] == "a" && tag_vec[1].starts_with("30617:") { + // Format: 30617:: + let parts: Vec<&str> = tag_vec[1].split(':').collect(); + if parts.len() >= 3 { + Some(parts[2].to_string()) + } else { + None + } + } else { + None + } + }) + .collect(); + + // Deduplicate while preserving order + let mut seen = std::collections::HashSet::new(); + let unique_repos: Vec = repos + .into_iter() + .filter(|r| seen.insert(r.clone())) + .collect(); + + if unique_repos.is_empty() { + vec!["unknown".to_string()] + } else { + unique_repos + } + } + /// Handle repository announcement event async fn handle_announcement(&self, event: &Event) -> WritePolicyResult { let event_id_str = event.id.to_bech32().unwrap_or_else(|_| event.id.to_hex()); @@ -129,10 +185,18 @@ impl Nip34WritePolicy { WritePolicyResult::Accept } Err(e) => { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts tracing::warn!( - "Failed to parse repository announcement {}: {}", - event_id_str, - e + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub ); WritePolicyResult::reject(format!("Failed to parse announcement: {}", e)) } @@ -157,10 +221,18 @@ impl Nip34WritePolicy { WritePolicyResult::Accept } Err(e) => { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts tracing::warn!( - "Failed to parse maintainer announcement {}: {}", - event_id_str, - e + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub ); WritePolicyResult::reject(format!("Failed to parse announcement: {}", e)) } @@ -183,8 +255,6 @@ impl Nip34WritePolicy { /// * `event` - The state event to validate /// * `is_synced` - True if this event came from proactive sync (vs user-submitted) async fn handle_state(&self, event: &Event, is_synced: bool) -> WritePolicyResult { - let event_id_str = event.id.to_bech32().unwrap_or_else(|_| event.id.to_hex()); - match self.state_policy.validate(event) { StateResult::Accept => { // Process state alignment asynchronously @@ -195,7 +265,19 @@ impl Nip34WritePolicy { { Ok(poilicy_result) => poilicy_result, Err(e) => { - tracing::warn!("Failed to process state event {}: {}", event_id_str, e); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub + ); // reject if processing failed WritePolicyResult::Reject { status: false, @@ -205,7 +287,19 @@ impl Nip34WritePolicy { } } StateResult::Reject(reason) => { - tracing::warn!("Rejected repository state {}: {}", event_id_str, reason); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + reason, + repo, + npub + ); WritePolicyResult::reject(reason) } } @@ -323,11 +417,22 @@ impl Nip34WritePolicy { } Err(e) => { // Error checking git data - reject event - tracing::warn!( - "Failed to check git data for PR event {}: {}", - event_id_str, - e - ); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Extract ALL repo identifiers from 'a' tags for PR events + // (PR events can reference multiple repos when there are multiple maintainers) + let repos = Self::extract_repos_from_pr_event(event); + // Structured log for migration scripts - log once per repo + for repo in &repos { + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"git data check failed: {}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub + ); + } WritePolicyResult::reject(format!("Failed to check git data: {}", e)) } } diff --git a/src/purgatory/mod.rs b/src/purgatory/mod.rs index 47798a6..8b75351 100644 --- a/src/purgatory/mod.rs +++ b/src/purgatory/mod.rs @@ -21,6 +21,7 @@ pub use types::{PrPurgatoryEntry, RefPair, RefUpdate, StatePurgatoryEntry}; use dashmap::DashMap; use nostr_sdk::prelude::*; +use nostr_sdk::ToBech32; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::collections::HashSet; @@ -608,6 +609,9 @@ impl Purgatory { /// prevent infinite re-sync loops. Events that expire without finding git data /// will be filtered out during future negentropy/REQ sync operations. /// + /// Emits structured `[PURGATORY_EXPIRED]` log entries for each expired event + /// to support migration scripts and operational monitoring. + /// /// # Returns /// Tuple of (num_state_removed, num_pr_removed) pub fn cleanup(&self) -> (usize, usize) { @@ -615,18 +619,24 @@ impl Purgatory { let mut state_removed = 0; // Remove expired state events and mark them as expired - self.state_events.retain(|_, entries| { + self.state_events.retain(|identifier, entries| { let original_len = entries.len(); - // Collect event IDs before removing - let expired_ids: Vec = entries - .iter() - .filter(|entry| entry.expires_at <= now) - .map(|entry| entry.event.id) - .collect(); - // Mark as expired to prevent re-sync - for event_id in expired_ids { - self.mark_expired(event_id); + // Log and collect expired entries before removing + for entry in entries.iter().filter(|e| e.expires_at <= now) { + let npub = entry.author.to_bech32().unwrap_or_else(|_| entry.author.to_hex()); + let event_id_short = &entry.event.id.to_hex()[..12]; + + // Structured log for migration scripts + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} reason=\"git data not received within 30 minutes\"", + identifier, + npub, + event_id_short, + entry.event.kind.as_u16() + ); + + self.mark_expired(entry.event.id); } // Remove expired entries @@ -636,21 +646,80 @@ impl Purgatory { }); // Remove expired PR events and mark them as expired - let expired_prs: Vec<(String, Option)> = self + let expired_prs: Vec<_> = self .pr_events .iter() .filter(|entry| entry.value().expires_at <= now) .map(|entry| { - let event_id = entry.value().event.as_ref().map(|e| e.id); - (entry.key().clone(), event_id) + let pr_entry = entry.value(); + let event_id_str = entry.key().clone(); + let event_opt = pr_entry.event.clone(); + let commit = pr_entry.commit.clone(); + (event_id_str, event_opt, commit) }) .collect(); let pr_removed = expired_prs.len(); - for (event_id_str, event_id_opt) in expired_prs { - // Mark actual PR events as expired (not placeholders) - if let Some(event_id) = event_id_opt { - self.mark_expired(event_id); + for (event_id_str, event_opt, commit) in expired_prs { + // Log structured entry for PR events (not placeholders) + if let Some(ref event) = event_opt { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + + // Extract ALL repo identifiers from 'a' tags + // (PR events can reference multiple repos when there are multiple maintainers) + let repos: Vec = event + .tags + .iter() + .filter_map(|tag| { + let tag_vec = tag.clone().to_vec(); + if tag_vec.len() >= 2 && tag_vec[0] == "a" && tag_vec[1].starts_with("30617:") { + // Format: 30617:: + let parts: Vec<&str> = tag_vec[1].split(':').collect(); + if parts.len() >= 3 { + Some(parts[2].to_string()) + } else { + None + } + } else { + None + } + }) + .collect(); + + // Deduplicate while preserving order + let mut seen = std::collections::HashSet::new(); + let unique_repos: Vec = repos + .into_iter() + .filter(|r| seen.insert(r.clone())) + .collect(); + + let repos_to_log = if unique_repos.is_empty() { + vec!["unknown".to_string()] + } else { + unique_repos + }; + + // Structured log for migration scripts - log once per repo + for repo in &repos_to_log { + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} reason=\"git data not received within 30 minutes\"", + repo, + npub, + event_id_short, + event.kind.as_u16(), + &commit[..commit.len().min(12)] + ); + } + + self.mark_expired(event.id); + } else { + // Placeholder (git data arrived first, but PR event never came) + tracing::debug!( + "[PURGATORY_EXPIRED] placeholder event_id={} commit={} reason=\"PR event not received within 30 minutes\"", + &event_id_str[..event_id_str.len().min(12)], + &commit[..commit.len().min(12)] + ); } self.pr_events.remove(&event_id_str); } -- cgit v1.2.3