From 73a366cbd7be4edf9c74194cd0891c80a15236a5 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 23 Jan 2026 11:41:10 +0000 Subject: Add structured logging for migration analysis - Add [PARSE_FAIL] logging when event parsing fails - Add [PURGATORY_EXPIRED] logging when repos expire from purgatory - Logs include: kind, event_id, repo, npub, reason - Supports Phase 4 migration scripts (30-extract-*.sh) - All 382 tests pass --- src/nostr/builder.rs | 135 +++++++++++++++++++++++++++++++++++++++++++++------ src/purgatory/mod.rs | 103 ++++++++++++++++++++++++++++++++------- 2 files changed, 206 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/nostr/builder.rs b/src/nostr/builder.rs index 34014db..629c111 100644 --- a/src/nostr/builder.rs +++ b/src/nostr/builder.rs @@ -98,6 +98,62 @@ impl Nip34WritePolicy { self.ctx.set_local_relay(relay); } + /// Extract repository identifier from event's 'd' tag. + /// + /// Used for structured logging when parsing fails - we try to extract + /// the identifier even if full parsing failed. + fn extract_identifier_from_event(event: &Event) -> String { + use nostr_relay_builder::prelude::TagKind; + event + .tags + .iter() + .find(|t| t.kind() == TagKind::d()) + .and_then(|t| t.content()) + .map(|s| s.to_string()) + .unwrap_or_else(|| "unknown".to_string()) + } + + /// Extract ALL repository identifiers from PR event's 'a' tags. + /// + /// PR events can reference multiple repositories via multiple 'a' tags + /// (e.g., when there are multiple maintainers). Each tag has format + /// `30617::`. + /// + /// Returns a vector of unique identifiers, or `["unknown"]` if none found. + fn extract_repos_from_pr_event(event: &Event) -> Vec { + let repos: Vec = event + .tags + .iter() + .filter_map(|tag| { + let tag_vec = tag.clone().to_vec(); + if tag_vec.len() >= 2 && tag_vec[0] == "a" && tag_vec[1].starts_with("30617:") { + // Format: 30617:: + let parts: Vec<&str> = tag_vec[1].split(':').collect(); + if parts.len() >= 3 { + Some(parts[2].to_string()) + } else { + None + } + } else { + None + } + }) + .collect(); + + // Deduplicate while preserving order + let mut seen = std::collections::HashSet::new(); + let unique_repos: Vec = repos + .into_iter() + .filter(|r| seen.insert(r.clone())) + .collect(); + + if unique_repos.is_empty() { + vec!["unknown".to_string()] + } else { + unique_repos + } + } + /// Handle repository announcement event async fn handle_announcement(&self, event: &Event) -> WritePolicyResult { let event_id_str = event.id.to_bech32().unwrap_or_else(|_| event.id.to_hex()); @@ -129,10 +185,18 @@ impl Nip34WritePolicy { WritePolicyResult::Accept } Err(e) => { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts tracing::warn!( - "Failed to parse repository announcement {}: {}", - event_id_str, - e + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub ); WritePolicyResult::reject(format!("Failed to parse announcement: {}", e)) } @@ -157,10 +221,18 @@ impl Nip34WritePolicy { WritePolicyResult::Accept } Err(e) => { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts tracing::warn!( - "Failed to parse maintainer announcement {}: {}", - event_id_str, - e + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub ); WritePolicyResult::reject(format!("Failed to parse announcement: {}", e)) } @@ -183,8 +255,6 @@ impl Nip34WritePolicy { /// * `event` - The state event to validate /// * `is_synced` - True if this event came from proactive sync (vs user-submitted) async fn handle_state(&self, event: &Event, is_synced: bool) -> WritePolicyResult { - let event_id_str = event.id.to_bech32().unwrap_or_else(|_| event.id.to_hex()); - match self.state_policy.validate(event) { StateResult::Accept => { // Process state alignment asynchronously @@ -195,7 +265,19 @@ impl Nip34WritePolicy { { Ok(poilicy_result) => poilicy_result, Err(e) => { - tracing::warn!("Failed to process state event {}: {}", event_id_str, e); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub + ); // reject if processing failed WritePolicyResult::Reject { status: false, @@ -205,7 +287,19 @@ impl Nip34WritePolicy { } } StateResult::Reject(reason) => { - tracing::warn!("Rejected repository state {}: {}", event_id_str, reason); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Try to extract repo identifier from 'd' tag even if parsing failed + let repo = Self::extract_identifier_from_event(event); + // Structured log for migration scripts + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"{}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + reason, + repo, + npub + ); WritePolicyResult::reject(reason) } } @@ -323,11 +417,22 @@ impl Nip34WritePolicy { } Err(e) => { // Error checking git data - reject event - tracing::warn!( - "Failed to check git data for PR event {}: {}", - event_id_str, - e - ); + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + // Extract ALL repo identifiers from 'a' tags for PR events + // (PR events can reference multiple repos when there are multiple maintainers) + let repos = Self::extract_repos_from_pr_event(event); + // Structured log for migration scripts - log once per repo + for repo in &repos { + tracing::warn!( + "[PARSE_FAIL] kind={} event_id={}... reason=\"git data check failed: {}\" repo={} npub={}", + event.kind.as_u16(), + event_id_short, + e, + repo, + npub + ); + } WritePolicyResult::reject(format!("Failed to check git data: {}", e)) } } diff --git a/src/purgatory/mod.rs b/src/purgatory/mod.rs index 47798a6..8b75351 100644 --- a/src/purgatory/mod.rs +++ b/src/purgatory/mod.rs @@ -21,6 +21,7 @@ pub use types::{PrPurgatoryEntry, RefPair, RefUpdate, StatePurgatoryEntry}; use dashmap::DashMap; use nostr_sdk::prelude::*; +use nostr_sdk::ToBech32; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::collections::HashSet; @@ -608,6 +609,9 @@ impl Purgatory { /// prevent infinite re-sync loops. Events that expire without finding git data /// will be filtered out during future negentropy/REQ sync operations. /// + /// Emits structured `[PURGATORY_EXPIRED]` log entries for each expired event + /// to support migration scripts and operational monitoring. + /// /// # Returns /// Tuple of (num_state_removed, num_pr_removed) pub fn cleanup(&self) -> (usize, usize) { @@ -615,18 +619,24 @@ impl Purgatory { let mut state_removed = 0; // Remove expired state events and mark them as expired - self.state_events.retain(|_, entries| { + self.state_events.retain(|identifier, entries| { let original_len = entries.len(); - // Collect event IDs before removing - let expired_ids: Vec = entries - .iter() - .filter(|entry| entry.expires_at <= now) - .map(|entry| entry.event.id) - .collect(); - // Mark as expired to prevent re-sync - for event_id in expired_ids { - self.mark_expired(event_id); + // Log and collect expired entries before removing + for entry in entries.iter().filter(|e| e.expires_at <= now) { + let npub = entry.author.to_bech32().unwrap_or_else(|_| entry.author.to_hex()); + let event_id_short = &entry.event.id.to_hex()[..12]; + + // Structured log for migration scripts + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} reason=\"git data not received within 30 minutes\"", + identifier, + npub, + event_id_short, + entry.event.kind.as_u16() + ); + + self.mark_expired(entry.event.id); } // Remove expired entries @@ -636,21 +646,80 @@ impl Purgatory { }); // Remove expired PR events and mark them as expired - let expired_prs: Vec<(String, Option)> = self + let expired_prs: Vec<_> = self .pr_events .iter() .filter(|entry| entry.value().expires_at <= now) .map(|entry| { - let event_id = entry.value().event.as_ref().map(|e| e.id); - (entry.key().clone(), event_id) + let pr_entry = entry.value(); + let event_id_str = entry.key().clone(); + let event_opt = pr_entry.event.clone(); + let commit = pr_entry.commit.clone(); + (event_id_str, event_opt, commit) }) .collect(); let pr_removed = expired_prs.len(); - for (event_id_str, event_id_opt) in expired_prs { - // Mark actual PR events as expired (not placeholders) - if let Some(event_id) = event_id_opt { - self.mark_expired(event_id); + for (event_id_str, event_opt, commit) in expired_prs { + // Log structured entry for PR events (not placeholders) + if let Some(ref event) = event_opt { + let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); + let event_id_short = &event.id.to_hex()[..12]; + + // Extract ALL repo identifiers from 'a' tags + // (PR events can reference multiple repos when there are multiple maintainers) + let repos: Vec = event + .tags + .iter() + .filter_map(|tag| { + let tag_vec = tag.clone().to_vec(); + if tag_vec.len() >= 2 && tag_vec[0] == "a" && tag_vec[1].starts_with("30617:") { + // Format: 30617:: + let parts: Vec<&str> = tag_vec[1].split(':').collect(); + if parts.len() >= 3 { + Some(parts[2].to_string()) + } else { + None + } + } else { + None + } + }) + .collect(); + + // Deduplicate while preserving order + let mut seen = std::collections::HashSet::new(); + let unique_repos: Vec = repos + .into_iter() + .filter(|r| seen.insert(r.clone())) + .collect(); + + let repos_to_log = if unique_repos.is_empty() { + vec!["unknown".to_string()] + } else { + unique_repos + }; + + // Structured log for migration scripts - log once per repo + for repo in &repos_to_log { + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} reason=\"git data not received within 30 minutes\"", + repo, + npub, + event_id_short, + event.kind.as_u16(), + &commit[..commit.len().min(12)] + ); + } + + self.mark_expired(event.id); + } else { + // Placeholder (git data arrived first, but PR event never came) + tracing::debug!( + "[PURGATORY_EXPIRED] placeholder event_id={} commit={} reason=\"PR event not received within 30 minutes\"", + &event_id_str[..event_id_str.len().min(12)], + &commit[..commit.len().min(12)] + ); } self.pr_events.remove(&event_id_str); } -- cgit v1.2.3 From e93bf707bb5f8d690393449cee1b402f123ac923 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 09:39:48 +0000 Subject: fix: git naughty list DNS failure identication caught a production bug where npub in url string contained "dns" triggering false positive --- src/sync/naughty_list.rs | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/sync/naughty_list.rs b/src/sync/naughty_list.rs index 35fcc0f..097affe 100644 --- a/src/sync/naughty_list.rs +++ b/src/sync/naughty_list.rs @@ -114,11 +114,15 @@ impl NaughtyListTracker { pub fn classify_error(error: &str) -> Option { let error_lower = error.to_lowercase(); - // DNS lookup failures + // DNS lookup failures - use specific patterns to avoid false positives + // from URLs containing "dns" (e.g., npubs like "...cdns7..." or domains) if error_lower.contains("failed to lookup address") || error_lower.contains("name or service not known") || error_lower.contains("nodename nor servname provided") - || (error_lower.contains("dns") && !error_lower.contains("timeout")) + || error_lower.contains("dns error") + || error_lower.contains("dns lookup") + || error_lower.contains("dns resolution") + || error_lower.contains("getaddrinfo") { return Some(NaughtyCategory::DnsLookupFailed); } @@ -373,6 +377,34 @@ mod tests { NaughtyListTracker::classify_error("network unreachable"), None ); + + // Repository not found is transient (not an infrastructure issue) + assert_eq!( + NaughtyListTracker::classify_error( + "fatal: repository 'https://example.com/repo.git/' not found" + ), + None + ); + } + + #[test] + fn test_classify_false_positive_npub_with_dns() { + // This npub contains "dns" in its encoding: npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cDNS7jezx0 + // A "not found" error with this npub should NOT be classified as DNS failure + let error = "fatal: repository 'https://git.shakespeare.diy/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found"; + assert_eq!( + NaughtyListTracker::classify_error(error), + None, + "npub containing 'dns' should not trigger DNS failure classification" + ); + + // Same for relay.ngit.dev + let error2 = "fatal: repository 'https://relay.ngit.dev/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found"; + assert_eq!( + NaughtyListTracker::classify_error(error2), + None, + "npub containing 'dns' should not trigger DNS failure classification" + ); } #[test] -- cgit v1.2.3 From 1ae97cd85aec95f6270f853b28e48774cefc6bf6 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 16:17:55 +0000 Subject: feat: add NGIT_LOG_LEVEL configuration option Add proper log level configuration following standard approach: - CLI flag: --log-level - Environment variable: NGIT_LOG_LEVEL - Default: info - Supports simple levels (error, warn, info, debug, trace) - Supports filter expressions (e.g., ngit_grasp=debug,actix_web=info) Configuration is now consistent across all four sources: 1. src/config.rs - Config struct with log_level field 2. docs/reference/configuration.md - Full documentation 3. nix/module.nix - NixOS module with logLevel option 4. .env.example - Example configuration file This replaces the previous RUST_LOG approach with proper integration into the ngit-grasp configuration system, enabling trace logging from CLI, environment variables, or NixOS configuration. --- .env.example | 7 +++++-- docs/reference/configuration.md | 37 +++++++++++++++++++++++++------------ nix/module.nix | 11 ++++++++--- src/config.rs | 5 +++++ src/main.rs | 16 ++++++++-------- 5 files changed, 51 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/.env.example b/.env.example index e152b89..01854f4 100644 --- a/.env.example +++ b/.env.example @@ -101,9 +101,12 @@ # LOGGING # ============================================================================ -# Rust log level (not a ngit-grasp config, but useful for debugging) +# Log level for application logging +# CLI: --log-level +# Default: info # Options: error, warn, info, debug, trace -# RUST_LOG=info +# Can also use filter expressions: ngit_grasp=debug,actix_web=info +# NGIT_LOG_LEVEL=info # ============================================================================ # PROACTIVE SYNC (GRASP-02) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index b24b498..b09b20f 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1041,10 +1041,10 @@ Per-connection limits (built-in to relay-builder, not configurable): ### Logging Configuration -#### `RUST_LOG` +#### `NGIT_LOG_LEVEL` -**Description:** Logging level and filters (standard Rust environment variable) -**Type:** String (log level or filter) +**Description:** Logging level and filters for application logging +**Type:** String (log level or filter expression) **Default:** `info` **Required:** No @@ -1052,17 +1052,17 @@ Per-connection limits (built-in to relay-builder, not configurable): ```bash # Simple levels -RUST_LOG=error # Errors only -RUST_LOG=warn # Warnings and errors -RUST_LOG=info # Info, warnings, errors -RUST_LOG=debug # Debug and above -RUST_LOG=trace # Everything +NGIT_LOG_LEVEL=error # Errors only +NGIT_LOG_LEVEL=warn # Warnings and errors +NGIT_LOG_LEVEL=info # Info, warnings, errors (default) +NGIT_LOG_LEVEL=debug # Debug and above +NGIT_LOG_LEVEL=trace # Everything (very verbose) -# Module-specific -RUST_LOG=ngit_grasp=debug,actix_web=info +# Module-specific filtering +NGIT_LOG_LEVEL=ngit_grasp=debug,actix_web=info # Complex filters -RUST_LOG=debug,hyper=info,tokio=warn +NGIT_LOG_LEVEL=debug,hyper=info,tokio=warn ``` **Log levels (most to least verbose):** @@ -1073,12 +1073,25 @@ RUST_LOG=debug,hyper=info,tokio=warn 4. `warn` - Warnings about potential issues 5. `error` - Errors only +**CLI flag:** + +```bash +ngit-grasp --log-level trace +``` + **Production recommendation:** ```bash -RUST_LOG=info,ngit_grasp=debug +NGIT_LOG_LEVEL=info ``` +**Notes:** + +- Uses Rust's `tracing` crate filter syntax +- Supports module-level filtering (e.g., `ngit_grasp=debug,hyper=info`) +- `trace` level can significantly impact performance +- For production, `info` or `warn` is recommended + --- ### Security Configuration (Planned) diff --git a/nix/module.nix b/nix/module.nix index 4a6fc94..89d58de 100644 --- a/nix/module.nix +++ b/nix/module.nix @@ -127,9 +127,14 @@ let }; logLevel = mkOption { - type = types.enum [ "trace" "debug" "info" "warn" "error" ]; + type = types.str; default = "info"; - description = "Logging level for RUST_LOG environment variable"; + example = "debug"; + description = '' + Logging level for application logging. + Can be a simple level (trace, debug, info, warn, error) or a filter expression. + Examples: "info", "debug", "ngit_grasp=debug,actix_web=info" + ''; }; syncMaxBackoffSecs = mkOption { @@ -334,7 +339,7 @@ let NGIT_REPOSITORY_BLACKLIST = concatStringsSep "," cfg.repositoryBlacklist; NGIT_EVENT_BLACKLIST = concatStringsSep "," cfg.eventBlacklist; NGIT_MAX_CONNECTIONS = toString cfg.maxConnections; - RUST_LOG = cfg.logLevel; + NGIT_LOG_LEVEL = cfg.logLevel; } // optionalAttrs (cfg.relayName != null) { NGIT_RELAY_NAME = cfg.relayName; } // optionalAttrs (cfg.archiveReadOnly != null) { diff --git a/src/config.rs b/src/config.rs index 271a340..df7a7ef 100644 --- a/src/config.rs +++ b/src/config.rs @@ -500,6 +500,10 @@ pub struct Config { /// Prevents connection exhaustion DoS attacks #[arg(long, env = "NGIT_MAX_CONNECTIONS", default_value_t = 4096)] pub max_connections: usize, + + /// Log level for application logging + #[arg(long, env = "NGIT_LOG_LEVEL", default_value = "info")] + pub log_level: String, } impl Config { @@ -782,6 +786,7 @@ impl Config { repository_blacklist: String::new(), event_blacklist: String::new(), max_connections: 500, + log_level: "debug".to_string(), } } } diff --git a/src/main.rs b/src/main.rs index 5e5b83a..105b861 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,8 +3,8 @@ use std::{path::PathBuf, sync::Arc}; use anyhow::Result; use tokio::signal; -use tracing::{error, info, warn, Level}; -use tracing_subscriber::FmtSubscriber; +use tracing::{error, info, warn}; +use tracing_subscriber::{EnvFilter, FmtSubscriber}; use ngit_grasp::{ config::{Config, DatabaseBackend}, @@ -17,16 +17,16 @@ use ngit_grasp::{ #[tokio::main] async fn main() -> Result<()> { - // Initialize tracing + // Load configuration first (priority: CLI flags > env vars > .env file > defaults) + let config = Config::load()?; + + // Initialize tracing with configured log level let subscriber = FmtSubscriber::builder() - .with_max_level(Level::DEBUG) + .with_env_filter(EnvFilter::new(&config.log_level)) .finish(); tracing::subscriber::set_global_default(subscriber)?; - info!("Starting ngit-grasp with nostr-relay-builder..."); - - // Load configuration (priority: CLI flags > env vars > .env file > defaults) - let config = Config::load()?; + info!("Starting ngit-grasp with log level: {}", config.log_level); // Validate configuration and fail fast on fatal errors // Recoverable issues (e.g., malformed whitelist entries) are logged as warnings -- cgit v1.2.3 From 905ebd838a9ff8cc777cf3b3b6306066e8c177fc Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Mon, 26 Jan 2026 17:20:11 +0000 Subject: fix: load existing events from database on startup with two-pass queries Previously, SelfSubscriber only saw events returned by the WebSocket subscription to the local relay, which has limits on the number of events returned. This caused repos with announcements in the database to never get Layer 2/3 filters created, resulting in missing state events. Now, on startup, we query the database directly with two separate queries: 1. Query announcements (30617) to populate repo_sync_index 2. Query root events (1617/1618/1621) to create Layer 3 filters Both queries use .since(last_connected) if available for incremental loading on reconnect. Filters are created inline and made mutable to support the .since() clause, rather than using a shared create_event_filter() method. Fixes the issue where state events were missing for repos like cashbird and creative-space that had announcements in the database but weren't returned by the WebSocket subscription. --- src/sync/mod.rs | 1 + src/sync/self_subscriber.rs | 167 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 145 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/sync/mod.rs b/src/sync/mod.rs index bc8c428..226e681 100644 --- a/src/sync/mod.rs +++ b/src/sync/mod.rs @@ -1442,6 +1442,7 @@ impl SyncManager { self.service_domain.clone(), Arc::clone(&self.repo_sync_index), action_tx, + self.database.clone(), ); let subscriber_shutdown = shutdown_tx.subscribe(); tokio::spawn(async move { self_subscriber.run(Some(subscriber_shutdown)).await }); diff --git a/src/sync/self_subscriber.rs b/src/sync/self_subscriber.rs index 3cc408d..e9505f1 100644 --- a/src/sync/self_subscriber.rs +++ b/src/sync/self_subscriber.rs @@ -16,6 +16,8 @@ use nostr_sdk::Timestamp; use tokio::sync::broadcast::error::RecvError; use tokio::sync::{broadcast, mpsc}; +use crate::nostr::builder::SharedDatabase; + use super::{AddFilters, RepoSyncIndex, RepoSyncNeeds}; // ============================================================================= @@ -98,6 +100,8 @@ pub struct SelfSubscriber { action_tx: mpsc::Sender, /// Last time we connected - used for since filter on reconnect last_connected: Option, + /// Database for querying existing events on startup + database: SharedDatabase, } impl SelfSubscriber { @@ -108,11 +112,13 @@ impl SelfSubscriber { /// * `relay_domain` - Our service domain (used for filtering relevant repos) /// * `repo_sync_index` - Shared index to update with discovered repos /// * `action_tx` - Channel to send AddFilters actions to the SyncManager + /// * `database` - Database for querying existing events on startup pub fn new( own_relay_url: String, relay_domain: String, repo_sync_index: RepoSyncIndex, action_tx: mpsc::Sender, + database: SharedDatabase, ) -> Self { Self { own_relay_url, @@ -120,6 +126,7 @@ impl SelfSubscriber { repo_sync_index, action_tx, last_connected: None, + database, } } @@ -135,6 +142,127 @@ impl SelfSubscriber { .unwrap_or(Duration::from_millis(5000)) } + /// Load existing events from database on startup + /// + /// Queries the database with two separate queries to build the initial + /// PendingUpdates state. This ensures all repos get Layer 2/3 filters + /// created, not just those returned by the WebSocket subscription + /// (which has limits on the number of events returned). + /// + /// Query order: + /// 1. First query: Get announcements (30617) to populate repo_sync_index + /// with repos and their relays + /// 2. Second query: Get root events (1617/1618/1621) for handle_root_event() + /// to add root event IDs for Layer 3 filter creation + /// + /// Both queries use `.since(last_connected)` if available for incremental + /// loading on reconnect. + /// + /// Returns a PendingUpdates containing all repos that need Layer 2/3 filters. + async fn load_existing_events(&self) -> PendingUpdates { + let mut pending = PendingUpdates::new(); + + // Log whether this is a full or incremental load + if let Some(since) = self.last_connected { + tracing::info!( + since = %since, + "Loading events incrementally from database (reconnect)" + ); + } else { + tracing::info!("Loading all events from database (first connection)"); + } + + // First query: Get announcements to populate repo_sync_index + let mut announcement_filter = Filter::new().kind(Kind::GitRepoAnnouncement); + if let Some(timestamp) = self.last_connected { + announcement_filter = announcement_filter.since(timestamp); + } + + let announcements = match self.database.query(announcement_filter).await { + Ok(events) => { + tracing::info!( + count = events.len(), + "Loaded announcements from database" + ); + events + } + Err(e) => { + tracing::error!( + error = %e, + "Failed to query announcements from database" + ); + return pending; + } + }; + + // Process announcements + let mut announcements_loaded = 0; + for event in announcements.iter() { + if let Some(repo_id) = Self::extract_repo_id(event) { + let relays = Self::extract_relay_urls(event); + pending.add_repo(repo_id, relays, HashSet::new()); + announcements_loaded += 1; + } + } + + // Update repo_sync_index with announcements BEFORE querying root events + { + let mut index = self.repo_sync_index.write().await; + for (repo_id, needs) in &pending.repos { + let entry = index + .entry(repo_id.clone()) + .or_insert_with(|| RepoSyncNeeds { + relays: HashSet::new(), + root_events: HashSet::new(), + }); + entry.relays.extend(needs.relays.clone()); + } + } + + // Second query: Get root events for handle_root_event() + let mut root_filter = Filter::new().kinds(vec![ + Kind::GitPatch, + Kind::GitIssue, + Kind::GitPullRequest, + ]); + if let Some(timestamp) = self.last_connected { + root_filter = root_filter.since(timestamp); + } + + let root_events = match self.database.query(root_filter).await { + Ok(events) => { + tracing::info!( + count = events.len(), + "Loaded root events from database" + ); + events + } + Err(e) => { + tracing::error!( + error = %e, + "Failed to query root events from database" + ); + // Continue with just announcements + return pending; + } + }; + + // Process root events + let mut root_events_processed = 0; + for event in root_events.iter() { + self.handle_root_event(event, &mut pending).await; + root_events_processed += 1; + } + + tracing::info!( + announcements_loaded = announcements_loaded, + root_events_processed = root_events_processed, + "Processed existing events from database" + ); + + pending + } + /// Process a relay pool notification /// /// Handles incoming events from the subscription, queueing 30617 announcements @@ -276,33 +404,22 @@ impl SelfSubscriber { // Subscribe to announcement and root event kinds // Per v4 spec: 30617, 1617, 1618, 1621 (NOT 30618) // Plus kind 10317 (User Grasp List) for GRASP discovery - // Check if we have a last_connected time for reconnect filtering - let filter = if let Some(last) = self.last_connected { + let mut filter = Filter::new().kinds(vec![ + Kind::GitRepoAnnouncement, + Kind::GitPatch, + Kind::GitIssue, + Kind::GitPullRequest, + Kind::GitUserGraspList, + ]); + if let Some(timestamp) = self.last_connected { // Quick reconnect - use since filter (15 min buffer) - let since = Timestamp::from(last.as_secs().saturating_sub(15 * 60)); + let since = Timestamp::from(timestamp.as_secs().saturating_sub(15 * 60)); tracing::debug!( since = %since, "Using since filter for reconnect" ); - Filter::new() - .kinds(vec![ - Kind::GitRepoAnnouncement, // Repository Announcements - Kind::GitPatch, // Patches - Kind::GitIssue, // Issues - Kind::GitPullRequest, // Pull Requests - Kind::GitUserGraspList, // User Grasp List - ]) - .since(since) - } else { - // First connection - no since filter - Filter::new().kinds(vec![ - Kind::GitRepoAnnouncement, // Repository Announcements - Kind::GitPatch, // Patches - Kind::GitIssue, // Issues - Kind::GitPullRequest, // Pull Requests - Kind::GitUserGraspList, // User Grasp List - ]) - }; + filter = filter.since(since); + } // Update last_connected AFTER creating filter but BEFORE subscribing self.last_connected = Some(Timestamp::now()); @@ -323,7 +440,11 @@ impl SelfSubscriber { let mut notifications = client.notifications(); let batch_window = Self::get_batch_window(); - let mut pending = PendingUpdates::new(); + + // Load existing events from database on startup + // This ensures all repos get Layer 2/3 filters created, not just those + // returned by the WebSocket subscription (which has limits) + let mut pending = self.load_existing_events().await; // Timer does NOT reset on new events - use interval let mut timer = tokio::time::interval(batch_window); -- cgit v1.2.3 From 04056a12110928e406d2aca456fc3169ae39f8ad Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 07:37:35 +0000 Subject: increase git throttle limits to 60/m --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index 105b861..6c9da05 100644 --- a/src/main.rs +++ b/src/main.rs @@ -187,8 +187,8 @@ async fn main() -> Result<()> { )); // Create throttle manager for rate limiting remote git servers - // Default: 5 concurrent requests per domain, 30 requests per minute per domain - let throttle_manager = Arc::new(ThrottleManager::new(5, 30)); + // Default: 5 concurrent requests per domain, 60 requests per minute per domain + let throttle_manager = Arc::new(ThrottleManager::new(5, 60)); throttle_manager.set_context(sync_ctx.clone()); throttle_manager.set_git_naughty_list(git_naughty_list.clone()); -- cgit v1.2.3 From 6e5b7eb84b3ca8a902ac4bcbab9c2a9f9ecdee51 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 09:16:41 +0000 Subject: fix(sync): Remove .since() filter from database queries in load_existing_events() Root cause: `last_connected` was set to Timestamp::now() BEFORE load_existing_events() was called (line 425), causing the database query to filter out all existing events with .since(current_time). The query became: SELECT * FROM events WHERE created_at >= Result: 0 events returned (nothing has created_at in the future) Solution: Remove .since() filter from database queries entirely. The `last_connected` field is now only used for WebSocket subscription filters to avoid re-fetching events from remote relays on reconnect. Rationale for this approach over reordering operations: - Database queries are fast (indexed by kind and created_at) - Loading all events on startup ensures consistency - Eliminates subtle ordering dependency that could break in refactoring - Cleaner mental model: database = full load, WebSocket = incremental This fixes the issue where ~190 state events weren't being fetched after deploying the database query fix (commit 4162c90). Evidence: Production logs showed "Loaded announcements from database count=0" when there should have been hundreds of announcements. --- src/sync/self_subscriber.rs | 42 ++++++++---------------------------------- 1 file changed, 8 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/sync/self_subscriber.rs b/src/sync/self_subscriber.rs index e9505f1..86e4583 100644 --- a/src/sync/self_subscriber.rs +++ b/src/sync/self_subscriber.rs @@ -155,35 +155,18 @@ impl SelfSubscriber { /// 2. Second query: Get root events (1617/1618/1621) for handle_root_event() /// to add root event IDs for Layer 3 filter creation /// - /// Both queries use `.since(last_connected)` if available for incremental - /// loading on reconnect. - /// /// Returns a PendingUpdates containing all repos that need Layer 2/3 filters. async fn load_existing_events(&self) -> PendingUpdates { let mut pending = PendingUpdates::new(); - // Log whether this is a full or incremental load - if let Some(since) = self.last_connected { - tracing::info!( - since = %since, - "Loading events incrementally from database (reconnect)" - ); - } else { - tracing::info!("Loading all events from database (first connection)"); - } + tracing::info!("Loading all events from database"); - // First query: Get announcements to populate repo_sync_index - let mut announcement_filter = Filter::new().kind(Kind::GitRepoAnnouncement); - if let Some(timestamp) = self.last_connected { - announcement_filter = announcement_filter.since(timestamp); - } + // First query: Get all announcements to populate repo_sync_index + let announcement_filter = Filter::new().kind(Kind::GitRepoAnnouncement); let announcements = match self.database.query(announcement_filter).await { Ok(events) => { - tracing::info!( - count = events.len(), - "Loaded announcements from database" - ); + tracing::info!(count = events.len(), "Loaded announcements from database"); events } Err(e) => { @@ -219,22 +202,13 @@ impl SelfSubscriber { } } - // Second query: Get root events for handle_root_event() - let mut root_filter = Filter::new().kinds(vec![ - Kind::GitPatch, - Kind::GitIssue, - Kind::GitPullRequest, - ]); - if let Some(timestamp) = self.last_connected { - root_filter = root_filter.since(timestamp); - } + // Second query: Get all root events for handle_root_event() + let root_filter = + Filter::new().kinds(vec![Kind::GitPatch, Kind::GitIssue, Kind::GitPullRequest]); let root_events = match self.database.query(root_filter).await { Ok(events) => { - tracing::info!( - count = events.len(), - "Loaded root events from database" - ); + tracing::info!(count = events.len(), "Loaded root events from database"); events } Err(e) => { -- cgit v1.2.3 From dd9b00c644853a8db0ec463a7e1eddabd6634e41 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 11:06:09 +0000 Subject: fix: improve logging to enable migration script to detect announcement parse failures --- src/sync/mod.rs | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/sync/mod.rs b/src/sync/mod.rs index 226e681..a0dfa59 100644 --- a/src/sync/mod.rs +++ b/src/sync/mod.rs @@ -2812,6 +2812,7 @@ impl SyncManager { event_id = %event.id, kind = %event.kind.as_u16(), identifier = %identifier, + pubkey = %event.pubkey, "Added rejected announcement to two-tier index" ); } -- cgit v1.2.3 From ddcba2b350615e6d6ad7028b570206efb42f0338 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 11:15:58 +0000 Subject: fix: prevent false positives in naughty list classification Strip URLs (http://, https://, git://, ws://, wss://) from error messages before classification to prevent false positives from repository names, paths, or identifiers containing keywords like 'ssl', 'certificate', etc. - Add strip_urls() function to remove URLs before pattern matching - Add WebSocket protocol support (ws://, wss://) for relay errors - Filter remote warnings that don't indicate infrastructure problems - Use more specific SSL/TLS patterns to avoid npub substring matches - Reduce test suite from 40 to 13 tests, keeping only edge cases Fixes false positives seen in production: - git.shakespeare.diy: 'repository not found' with npub containing 'ssl' - relay.ngit.dev: HTTP 500 error with npub containing 'ssl' - gitnostr.com: remote permission warning misclassified as protocol error --- src/sync/naughty_list.rs | 428 +++++++++++++++++++++++++---------------------- 1 file changed, 232 insertions(+), 196 deletions(-) (limited to 'src') diff --git a/src/sync/naughty_list.rs b/src/sync/naughty_list.rs index 097affe..60ab949 100644 --- a/src/sync/naughty_list.rs +++ b/src/sync/naughty_list.rs @@ -101,6 +101,69 @@ impl NaughtyListTracker { Self::new(12) } + /// Strip URLs from an error message to prevent false positives from URL components. + /// + /// URLs can contain path components, repository names, or user identifiers that + /// accidentally match error patterns (e.g., "my-openssl-project", "ssl-team", + /// "certificate-manager"). By stripping URLs before classification, we ensure + /// only the actual error message text is analyzed. + /// + /// Handles: http://, https://, git://, ws://, wss:// + fn strip_urls(error: &str) -> String { + let mut result = String::with_capacity(error.len()); + let mut chars = error.chars().peekable(); + + while let Some(c) = chars.next() { + // Check for URL start patterns + let potential_url = match c { + 'h' => { + // Check for http:// or https:// + let rest: String = chars.clone().take(7).collect(); + rest.starts_with("ttp://") || rest.starts_with("ttps://") + } + 'g' => { + // Check for git:// + let rest: String = chars.clone().take(5).collect(); + rest.starts_with("it://") + } + 'w' => { + // Check for ws:// or wss:// + let rest: String = chars.clone().take(5).collect(); + rest.starts_with("s://") || rest.starts_with("ss://") + } + _ => false, + }; + + if potential_url { + // Found URL start, consume until URL end + result.push_str("[URL]"); + + // Skip until we hit a URL terminator + loop { + match chars.peek() { + Some(&ch) if Self::is_url_char(ch) => { + chars.next(); + } + _ => break, + } + } + } else { + result.push(c); + } + } + + result + } + + /// Check if a character can be part of a URL + #[inline] + fn is_url_char(c: char) -> bool { + // URLs end at whitespace, quotes, or certain brackets + // This is conservative - real URLs can contain more, but git errors + // typically have URLs followed by these terminators + !matches!(c, ' ' | '\t' | '\n' | '\r' | '"' | '\'' | '>' | ']' | ')') + } + /// Classify an error string into a naughty category or return None for transient errors /// /// # Arguments @@ -112,10 +175,32 @@ impl NaughtyListTracker { /// - `Some(NaughtyCategory)` if the error indicates a persistent infrastructure issue /// - `None` if the error is a transient network issue (use HealthTracker backoff) pub fn classify_error(error: &str) -> Option { - let error_lower = error.to_lowercase(); + // Filter out remote warnings - these are informational messages from the remote + // server that don't indicate infrastructure problems with the domain itself. + // Example: "remote: warning: unable to access '/root/.config/git/attributes': Permission denied" + // These warnings are about the remote server's internal configuration, not connectivity. + let filtered_error: String = error + .lines() + .filter(|line| { + let line_lower = line.to_lowercase(); + // Keep lines that are NOT remote warnings + !(line_lower.starts_with("remote: warning:") + || line_lower.starts_with("warning: remote")) + }) + .collect::>() + .join("\n"); + + // If after filtering we have no content, this was just warnings - not a real error + if filtered_error.trim().is_empty() { + return None; + } + + // Strip URLs to prevent false positives from URL components + // (e.g., repository named "openssl-test" or path containing "certificate") + let url_stripped = Self::strip_urls(&filtered_error); + let error_lower = url_stripped.to_lowercase(); - // DNS lookup failures - use specific patterns to avoid false positives - // from URLs containing "dns" (e.g., npubs like "...cdns7..." or domains) + // DNS lookup failures if error_lower.contains("failed to lookup address") || error_lower.contains("name or service not known") || error_lower.contains("nodename nor servname provided") @@ -129,8 +214,17 @@ impl NaughtyListTracker { // TLS certificate errors if error_lower.contains("certificate") - || error_lower.contains("ssl") - || error_lower.contains("tls") + || error_lower.contains("ssl error") + || error_lower.contains("ssl certificate") + || error_lower.contains("ssl handshake") + || error_lower.contains("ssl_error") + || error_lower.contains("tls error") + || error_lower.contains("tls handshake") + || error_lower.contains("tls alert") + || error_lower.contains("tls_error") + || error_lower.contains("openssl") + || error_lower.contains("schannel") + || error_lower.contains("secure channel") { // Exclude timeout errors that mention TLS if !error_lower.contains("timeout") && !error_lower.contains("timed out") { @@ -294,211 +388,216 @@ impl NaughtyListTracker { mod tests { use super::*; + // ========================================================================= + // URL STRIPPING TESTS + // ========================================================================= + #[test] - fn test_classify_dns_errors() { - assert_eq!( - NaughtyListTracker::classify_error("failed to lookup address information"), - Some(NaughtyCategory::DnsLookupFailed) - ); + fn test_strip_urls_basic_protocols() { + // HTTP/HTTPS assert_eq!( - NaughtyListTracker::classify_error("Name or service not known"), - Some(NaughtyCategory::DnsLookupFailed) - ); - assert_eq!( - NaughtyListTracker::classify_error("nodename nor servname provided"), - Some(NaughtyCategory::DnsLookupFailed) + NaughtyListTracker::strip_urls("error: https://example.com/repo.git failed"), + "error: [URL] failed" ); assert_eq!( - NaughtyListTracker::classify_error("dns error: NXDOMAIN"), - Some(NaughtyCategory::DnsLookupFailed) + NaughtyListTracker::strip_urls("error: http://example.com/path failed"), + "error: [URL] failed" ); - } - #[test] - fn test_classify_tls_errors() { + // Git protocol assert_eq!( - NaughtyListTracker::classify_error("certificate not valid for 'example.com'"), - Some(NaughtyCategory::TlsCertificateInvalid) + NaughtyListTracker::strip_urls("fatal: git://github.com/user/repo.git not found"), + "fatal: [URL] not found" ); + + // WebSocket protocols (used for relay URLs) assert_eq!( - NaughtyListTracker::classify_error("SSL certificate problem"), - Some(NaughtyCategory::TlsCertificateInvalid) + NaughtyListTracker::strip_urls("error: wss://relay.example.com failed"), + "error: [URL] failed" ); assert_eq!( - NaughtyListTracker::classify_error("TLS handshake failed"), - Some(NaughtyCategory::TlsCertificateInvalid) + NaughtyListTracker::strip_urls("error: ws://localhost:8080 failed"), + "error: [URL] failed" ); + } - // TLS timeout should NOT be classified as naughty - assert_eq!( - NaughtyListTracker::classify_error("TLS connection timed out"), - None - ); + #[test] + fn test_strip_urls_multiple() { + let error = "failed to clone https://a.com/repo.git and wss://relay.com"; + let stripped = NaughtyListTracker::strip_urls(error); + assert_eq!(stripped, "failed to clone [URL] and [URL]"); } #[test] - fn test_classify_protocol_errors() { - assert_eq!( - NaughtyListTracker::classify_error("websocket protocol error"), - Some(NaughtyCategory::ProtocolError) - ); + fn test_strip_urls_preserves_error_text() { + let error = + "fatal: unable to access 'https://example.com/repo.git/': SSL certificate problem"; + let stripped = NaughtyListTracker::strip_urls(error); + assert!(stripped.contains("SSL certificate problem")); + assert!(!stripped.contains("example.com")); + } + + // ========================================================================= + // EDGE CASES: TIMEOUT/CONNECTION EXCEPTIONS + // These are the "unusual rules" where a pattern matches but should be excluded + // ========================================================================= + + #[test] + fn test_tls_timeout_not_naughty() { + // TLS errors with timeout should NOT be classified as naughty + // (timeout is transient, not a certificate problem) assert_eq!( - NaughtyListTracker::classify_error("invalid frame header"), - Some(NaughtyCategory::ProtocolError) + NaughtyListTracker::classify_error("TLS connection timed out"), + None ); - - // WebSocket connection errors should NOT be classified as naughty assert_eq!( - NaughtyListTracker::classify_error("websocket connection refused"), + NaughtyListTracker::classify_error("SSL handshake timeout"), None ); } #[test] - fn test_classify_transient_errors() { - // Timeouts are transient + fn test_websocket_connection_errors_not_naughty() { + // WebSocket connection errors are transient, not protocol violations assert_eq!( - NaughtyListTracker::classify_error("connection timed out"), + NaughtyListTracker::classify_error("websocket connection refused"), None ); assert_eq!( - NaughtyListTracker::classify_error("operation timed out"), + NaughtyListTracker::classify_error("websocket connection timeout"), None ); + } - // Connection refused is transient + #[test] + fn test_remote_warnings_filtered() { + // Remote warnings should be filtered out before classification + let warning_only = + "remote: warning: unable to access '/root/.config/git/attributes': Permission denied"; + assert_eq!(NaughtyListTracker::classify_error(warning_only), None); + + // But real errors after warnings should still be classified + let warning_with_error = "remote: warning: something\nfatal: failed to lookup address"; assert_eq!( - NaughtyListTracker::classify_error("connection refused"), - None + NaughtyListTracker::classify_error(warning_with_error), + Some(NaughtyCategory::DnsLookupFailed) ); + } - // Generic network errors are transient - assert_eq!( - NaughtyListTracker::classify_error("network unreachable"), - None - ); + // ========================================================================= + // INTEGRATION: FULL CLASSIFICATION FLOW + // Verify URL stripping + classification work together correctly + // ========================================================================= + + #[test] + fn test_url_with_keywords_not_false_positive() { + // URLs containing keywords should NOT trigger classification + let cases = [ + ("https://example.com/my-openssl-project.git", "not found"), + ("https://example.com/ssl-team/repo.git", "not found"), + ("https://example.com/certificate-manager.git", "not found"), + ("https://example.com/dns-tools.git", "not found"), + ("wss://relay-tls-test.example.com", "connection refused"), + ]; + + for (url, suffix) in cases { + let error = format!("fatal: repository '{}/' {}", url, suffix); + assert_eq!( + NaughtyListTracker::classify_error(&error), + None, + "URL '{}' should not trigger false positive", + url + ); + } + } - // Repository not found is transient (not an infrastructure issue) + #[test] + fn test_real_errors_still_detected() { + // Real errors in the message text (not URL) should still be detected assert_eq!( NaughtyListTracker::classify_error( - "fatal: repository 'https://example.com/repo.git/' not found" + "fatal: 'https://example.com/repo.git': SSL certificate problem" ), - None + Some(NaughtyCategory::TlsCertificateInvalid) ); - } - - #[test] - fn test_classify_false_positive_npub_with_dns() { - // This npub contains "dns" in its encoding: npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cDNS7jezx0 - // A "not found" error with this npub should NOT be classified as DNS failure - let error = "fatal: repository 'https://git.shakespeare.diy/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found"; assert_eq!( - NaughtyListTracker::classify_error(error), - None, - "npub containing 'dns' should not trigger DNS failure classification" + NaughtyListTracker::classify_error( + "fatal: 'https://example.com/repo.git': failed to lookup address" + ), + Some(NaughtyCategory::DnsLookupFailed) ); - - // Same for relay.ngit.dev - let error2 = "fatal: repository 'https://relay.ngit.dev/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found"; assert_eq!( - NaughtyListTracker::classify_error(error2), - None, - "npub containing 'dns' should not trigger DNS failure classification" + NaughtyListTracker::classify_error("websocket protocol error"), + Some(NaughtyCategory::ProtocolError) ); } #[test] - fn test_record_new_entry() { - let tracker = NaughtyListTracker::with_defaults(); - let url = "wss://bad-relay.example.com"; - - let is_new = tracker.record( - url, - NaughtyCategory::DnsLookupFailed, - "failed to lookup address".to_string(), + fn test_url_with_keyword_and_real_error() { + // URL contains keyword AND there's a real error - should detect the error + let error = "fatal: 'https://example.com/ssl-tools/repo.git': SSL certificate problem"; + assert_eq!( + NaughtyListTracker::classify_error(error), + Some(NaughtyCategory::TlsCertificateInvalid) ); - - assert!(is_new); - assert!(tracker.is_naughty(url)); - - let entry = tracker.get_entry(url).unwrap(); - assert_eq!(entry.category, NaughtyCategory::DnsLookupFailed); - assert_eq!(entry.occurrence_count, 1); } + // ========================================================================= + // TRACKER FUNCTIONALITY + // ========================================================================= + #[test] - fn test_record_updates_existing() { + fn test_tracker_record_and_update() { let tracker = NaughtyListTracker::with_defaults(); let url = "wss://bad-relay.example.com"; // First occurrence - let is_new1 = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 1".to_string()); - assert!(is_new1); + let is_new = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 1".to_string()); + assert!(is_new); + assert!(tracker.is_naughty(url)); - // Second occurrence + // Second occurrence updates existing let is_new2 = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 2".to_string()); assert!(!is_new2); let entry = tracker.get_entry(url).unwrap(); assert_eq!(entry.occurrence_count, 2); - assert_eq!(entry.reason, "error 2"); // Updated to latest + assert_eq!(entry.reason, "error 2"); } #[test] - fn test_is_naughty() { - let tracker = NaughtyListTracker::with_defaults(); - let url = "wss://bad-relay.example.com"; - - assert!(!tracker.is_naughty(url)); + fn test_tracker_expiration() { + let tracker = NaughtyListTracker::new(0); // Expire immediately tracker.record( - url, - NaughtyCategory::TlsCertificateInvalid, - "cert error".to_string(), + "wss://relay.example.com", + NaughtyCategory::DnsLookupFailed, + "error".to_string(), ); - assert!(tracker.is_naughty(url)); - } - - #[test] - fn test_get_all() { - let tracker = NaughtyListTracker::with_defaults(); + // Entry exists but is expired + assert!(!tracker.is_naughty("wss://relay.example.com")); - tracker.record( - "wss://relay1.example.com", - NaughtyCategory::DnsLookupFailed, - "dns error".to_string(), - ); - tracker.record( - "wss://relay2.example.com", - NaughtyCategory::TlsCertificateInvalid, - "tls error".to_string(), - ); + std::thread::sleep(std::time::Duration::from_millis(10)); - let all = tracker.get_all(); - assert_eq!(all.len(), 2); + let expired = tracker.expire_old_entries(); + assert_eq!(expired.len(), 1); + assert_eq!(tracker.total_count(), 0); } #[test] - fn test_count_by_category() { + fn test_tracker_counts() { let tracker = NaughtyListTracker::with_defaults(); + tracker.record("wss://r1.com", NaughtyCategory::DnsLookupFailed, "e".into()); + tracker.record("wss://r2.com", NaughtyCategory::DnsLookupFailed, "e".into()); tracker.record( - "wss://relay1.example.com", - NaughtyCategory::DnsLookupFailed, - "error".to_string(), - ); - tracker.record( - "wss://relay2.example.com", - NaughtyCategory::DnsLookupFailed, - "error".to_string(), - ); - tracker.record( - "wss://relay3.example.com", + "wss://r3.com", NaughtyCategory::TlsCertificateInvalid, - "error".to_string(), + "e".into(), ); + assert_eq!(tracker.total_count(), 3); assert_eq!( tracker.count_by_category(NaughtyCategory::DnsLookupFailed), 2 @@ -507,74 +606,11 @@ mod tests { tracker.count_by_category(NaughtyCategory::TlsCertificateInvalid), 1 ); - assert_eq!(tracker.count_by_category(NaughtyCategory::ProtocolError), 0); - } - - #[test] - fn test_total_count() { - let tracker = NaughtyListTracker::with_defaults(); - assert_eq!(tracker.total_count(), 0); - - tracker.record( - "wss://relay1.example.com", - NaughtyCategory::DnsLookupFailed, - "error".to_string(), - ); - assert_eq!(tracker.total_count(), 1); - - tracker.record( - "wss://relay2.example.com", - NaughtyCategory::TlsCertificateInvalid, - "error".to_string(), - ); - assert_eq!(tracker.total_count(), 2); - } - - #[test] - fn test_expire_old_entries() { - // Use very short expiration for testing - let tracker = NaughtyListTracker::new(0); // Expire immediately (0 hours) - - tracker.record( - "wss://relay1.example.com", - NaughtyCategory::DnsLookupFailed, - "error".to_string(), - ); - - // Entry should exist in the map - assert_eq!(tracker.total_count(), 1); - - // But is_naughty should return false since it's already expired (0 hours) - assert!(!tracker.is_naughty("wss://relay1.example.com")); - - // Sleep to ensure time passes - std::thread::sleep(std::time::Duration::from_millis(10)); - - // Expire old entries (should remove the 0-hour expired entry) - let expired = tracker.expire_old_entries(); - assert_eq!(expired.len(), 1); - assert_eq!(expired[0], "wss://relay1.example.com"); - - // Entry should be gone - assert!(!tracker.is_naughty("wss://relay1.example.com")); - assert_eq!(tracker.total_count(), 0); + assert_eq!(tracker.get_all().len(), 3); } #[test] fn test_category_display() { - assert_eq!( - NaughtyCategory::DnsLookupFailed.to_string(), - "dns_lookup_failed" - ); - assert_eq!( - NaughtyCategory::TlsCertificateInvalid.to_string(), - "tls_certificate_invalid" - ); - assert_eq!(NaughtyCategory::ProtocolError.to_string(), "protocol_error"); - } - - #[test] - fn test_category_as_str() { assert_eq!( NaughtyCategory::DnsLookupFailed.as_str(), "dns_lookup_failed" -- cgit v1.2.3 From a1573c6018c2e81795dc87d36011604dfed80936 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 13:56:45 +0000 Subject: fix: prevent git protocol errors from triggering naughty list Change protocol error detection to only match WebSocket-specific errors (websocket, invalid frame) instead of generic 'protocol' keyword which was incorrectly catching transient git protocol errors. Git protocol errors like 'fatal: protocol error: bad line length' are transient network issues that should use backoff/retry, not permanent naughty list blocking. Only WebSocket/Nostr protocol violations indicate persistent infrastructure problems. Fixes production false positive: - relay.ngit.dev: git protocol error + remote warning misclassified Add production test cases for git protocol errors and warning combinations. --- src/sync/naughty_list.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/sync/naughty_list.rs b/src/sync/naughty_list.rs index 60ab949..0abb986 100644 --- a/src/sync/naughty_list.rs +++ b/src/sync/naughty_list.rs @@ -232,12 +232,12 @@ impl NaughtyListTracker { } } - // Protocol errors - if error_lower.contains("websocket") - || error_lower.contains("protocol") - || error_lower.contains("invalid frame") - { - // Exclude connection errors + // Protocol errors - specifically WebSocket/Nostr protocol violations + // Note: We check for "websocket" specifically, NOT generic "protocol" keyword + // because git errors often contain "protocol error" (e.g., "fatal: protocol error: bad line length") + // which are transient network issues, not persistent infrastructure problems. + if error_lower.contains("websocket") || error_lower.contains("invalid frame") { + // Exclude connection errors (transient) if !error_lower.contains("connection") && !error_lower.contains("timeout") && !error_lower.contains("refused") @@ -622,3 +622,68 @@ mod tests { assert_eq!(NaughtyCategory::ProtocolError.as_str(), "protocol_error"); } } + +#[cfg(test)] +mod production_tests { + use super::*; + + /// Production case from relay.ngit.dev - remote warning should not be classified + #[test] + fn test_classify_production_relay_ngit_dev_warning() { + let error = + "remote: warning: unable to access '/root/.config/git/attributes': Permission denied"; + assert_eq!(NaughtyListTracker::classify_error(error), None); + } + + /// Git protocol errors are transient, not persistent infrastructure issues + #[test] + fn test_git_protocol_errors_not_naughty() { + // These are common git protocol errors that should NOT be classified as naughty + let git_protocol_errors = [ + "fatal: protocol error: bad line length character: remo", + "fatal: protocol error: expected old/new/ref, got 'shallow", + "fatal: git upload-pack: protocol error", + "error: protocol error: bad pack header", + "fatal: protocol error: bad band #3", + ]; + + for error in git_protocol_errors { + assert_eq!( + NaughtyListTracker::classify_error(error), + None, + "Git protocol error should not be classified as naughty: {}", + error + ); + } + } + + /// Remote warning followed by git protocol error - both should be filtered/ignored + #[test] + fn test_warning_with_git_protocol_error() { + let error = "remote: warning: unable to access '/root/.config/git/attributes': Permission denied\nfatal: protocol error: bad line length character: remo"; + assert_eq!( + NaughtyListTracker::classify_error(error), + None, + "Warning + git protocol error should not be classified as naughty" + ); + } + + /// WebSocket protocol errors ARE naughty (persistent infrastructure issues) + #[test] + fn test_websocket_errors_still_naughty() { + let websocket_errors = [ + "websocket protocol error", + "websocket handshake failed", + "invalid frame received", + ]; + + for error in websocket_errors { + assert_eq!( + NaughtyListTracker::classify_error(error), + Some(NaughtyCategory::ProtocolError), + "WebSocket error should be classified as protocol_error: {}", + error + ); + } + } +} -- cgit v1.2.3 From 3a7fa1d1288c28eae0ee58b4c448c672ec3b69c2 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 19:43:41 +0000 Subject: fix: return HTTP 200 with ERR pkt-line for git protocol errors Previously, all git upload-pack/receive-pack failures returned HTTP 500, but the git smart HTTP protocol requires protocol-level errors (like "not our ref") to be returned as HTTP 200 OK with an ERR pkt-line in the response body. Changes: - Add build_git_protocol_error_response() to create HTTP 200 responses with properly formatted ERR pkt-line ("ERR \n") - Add is_git_protocol_error() to detect protocol errors (exit code 128 with stderr content) vs transport errors - Update handle_upload_pack() and handle_receive_pack() to return protocol errors as HTTP 200 with ERR pkt-line - Keep HTTP 500 for actual transport errors (spawn failures, I/O errors, signals) This allows git clients to properly parse and display protocol error messages instead of seeing generic HTTP 500 errors. --- src/git/handlers.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'src') diff --git a/src/git/handlers.rs b/src/git/handlers.rs index 017eee4..e3a6ad4 100644 --- a/src/git/handlers.rs +++ b/src/git/handlers.rs @@ -99,6 +99,42 @@ pub async fn handle_info_refs( .unwrap()) } +/// Build an HTTP 200 OK response with an ERR pkt-line for git protocol errors. +/// +/// Per the git smart HTTP protocol spec, protocol-level errors (like "not our ref") +/// should be returned as HTTP 200 OK with the error message in pkt-line format: +/// `PKT-LINE("ERR" SP explanation-text)` +/// +/// This allows git clients to properly parse and display the error message. +fn build_git_protocol_error_response( + service: GitService, + error_message: &str, +) -> Response> { + // Format: "ERR \n" + let err_content = format!("ERR {}\n", error_message.trim()); + let err_pktline = PktLine::data(err_content.as_bytes()).encode(); + + Response::builder() + .status(StatusCode::OK) + .header("content-type", service.result_content_type()) + .header("cache-control", "no-cache") + .body(Full::new(Bytes::from(err_pktline))) + .unwrap() +} + +/// Check if a git process failure is a protocol error (vs transport error). +/// +/// Protocol errors are communicated via stderr when git exits with code 128. +/// These should be returned to the client as HTTP 200 with ERR pkt-line. +/// +/// Transport errors (process spawn failures, I/O errors, signals) should +/// remain as HTTP 500 errors. +fn is_git_protocol_error(exit_code: Option, stderr: &[u8]) -> bool { + // Git uses exit code 128 for protocol/usage errors + // If there's stderr content, it's a protocol error message + exit_code == Some(128) && !stderr.is_empty() +} + /// Handle POST /git-upload-pack (clone/fetch) pub async fn handle_upload_pack( repo_path: PathBuf, @@ -150,6 +186,21 @@ pub async fn handle_upload_pack( if !status.success() { let stderr_str = String::from_utf8_lossy(&stderr_output); + + // Check if this is a git protocol error (exit code 128 with stderr) + // Protocol errors should be returned as HTTP 200 with ERR pkt-line + if is_git_protocol_error(status.code(), &stderr_output) { + warn!( + "Git upload-pack protocol error (returning ERR pkt-line): {}", + stderr_str + ); + return Ok(build_git_protocol_error_response( + GitService::UploadPack, + &stderr_str, + )); + } + + // Transport errors (spawn failures, signals, etc.) remain as HTTP 500 error!("Git upload-pack failed: {}", stderr_str); return Err(GitError::GitFailed(status.code())); } @@ -277,6 +328,21 @@ pub async fn handle_receive_pack( if !status.success() { let stderr_str = String::from_utf8_lossy(&stderr_output); + + // Check if this is a git protocol error (exit code 128 with stderr) + // Protocol errors should be returned as HTTP 200 with ERR pkt-line + if is_git_protocol_error(status.code(), &stderr_output) { + warn!( + "Git receive-pack protocol error (returning ERR pkt-line): {}", + stderr_str + ); + return Ok(build_git_protocol_error_response( + GitService::ReceivePack, + &stderr_str, + )); + } + + // Transport errors (spawn failures, signals, etc.) remain as HTTP 500 error!("Git receive-pack failed: {}", stderr_str); return Err(GitError::GitFailed(status.code())); } -- cgit v1.2.3 From 51c331f26ad3c8c422b41267e3695c8f2295510e Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 21:05:50 +0000 Subject: feat: implement OID retry logic for 'not our ref' errors Add retry loop in fetch_oids that handles git's behavior of stopping at the first missing OID. When a 'not our ref' error occurs: - Parse the missing OID from stderr - Remove it from the fetch list and track it as missing - Retry with remaining OIDs until success or all OIDs exhausted This ensures we fetch all available OIDs even when some are missing from the remote, rather than failing the entire batch. Also improves error reporting: - Include URL in all error messages for easier debugging - Log stderr even when domain is already on naughty list --- src/purgatory/sync/context.rs | 163 +++++++++++++++++++++++++----------------- 1 file changed, 96 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs index 33c2d12..0df8be0 100644 --- a/src/purgatory/sync/context.rs +++ b/src/purgatory/sync/context.rs @@ -361,59 +361,60 @@ impl SyncContext for RealSyncContext { let naughty_list = self.git_naughty_list.clone(); tokio::task::spawn_blocking(move || -> Result> { - // git fetch ... - fetch all OIDs with full history - let mut args = vec!["fetch", &url]; - args.extend(missing_oids.iter().map(|s| s.as_str())); - - let output = Command::new("git") - .args(&args) - .current_dir(&repo_path) - .output(); - - match output { - Ok(result) if result.status.success() => { - // Count how many OIDs we now have - let fetched: Vec = missing_oids - .iter() - .filter(|oid| crate::git::oid_exists(&repo_path, oid)) - .cloned() - .collect(); - - debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); - - Ok(fetched) + let mut remaining_oids = missing_oids.clone(); + let mut missing_from_remote: Vec = Vec::new(); + + // Retry loop: keep fetching until success or no OIDs left + loop { + if remaining_oids.is_empty() { + // All OIDs were missing from remote + debug!( + url = %url, + missing_count = missing_from_remote.len(), + "All requested OIDs missing from remote" + ); + return Ok(vec![]); } - Ok(result) => { - let stderr = String::from_utf8_lossy(&result.stderr); - - // Extract domain and classify error for naughty list - if let Some(domain) = extract_domain(&url) { - if let Some(category) = NaughtyListTracker::classify_error(&stderr) { - let is_new = naughty_list.record(&domain, category, stderr.to_string()); - - if is_new { - tracing::warn!( - domain = %domain, - category = %category, - error = %stderr, - "Git remote domain added to naughty list" - ); - } else { - debug!( - domain = %domain, - category = %category, - "Git remote domain still on naughty list" - ); - } + + // git fetch ... - fetch all OIDs with full history + let mut args = vec!["fetch".to_string(), url.clone()]; + args.extend(remaining_oids.iter().cloned()); + + let output = Command::new("git") + .args(&args) + .current_dir(&repo_path) + .output(); + + match output { + Ok(result) if result.status.success() => { + // Fetch succeeded - count how many OIDs we now have + let fetched: Vec = missing_oids + .iter() + .filter(|oid| crate::git::oid_exists(&repo_path, oid)) + .cloned() + .collect(); + + if !missing_from_remote.is_empty() { + debug!( + url = %url, + fetched_count = fetched.len(), + missing_count = missing_from_remote.len(), + missing_oids = ?missing_from_remote, + "Fetch completed after retries - some OIDs were missing from remote" + ); + } else { + debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); } + + return Ok(fetched); } + Ok(result) => { + let stderr = String::from_utf8_lossy(&result.stderr); - // Check for "not our ref" errors and provide a clearer error message - let error_msg = if stderr.contains("upload-pack: not our ref") { - // Parse out the missing OID from stderr (git only reports one at a time) - let missing_oid = stderr - .lines() - .find_map(|line| { + // Check for "not our ref" error - this is retryable + if stderr.contains("upload-pack: not our ref") { + // Parse out the missing OID from stderr + let missing_oid = stderr.lines().find_map(|line| { if line.contains("not our ref") { // Extract the OID from lines like: // "fatal: remote error: upload-pack: not our ref " @@ -423,32 +424,60 @@ impl SyncContext for RealSyncContext { } }); - let total_requested = missing_oids.len(); + if let Some(ref oid) = missing_oid { + // Remove the missing OID and retry with remaining + remaining_oids.retain(|o| o != oid); + missing_from_remote.push(oid.clone()); - if let Some(oid) = missing_oid { - if total_requested > 1 { - // BUG: Git stops at first missing OID, so we don't know if the others exist - // We need retry logic to fetch remaining OIDs individually - tracing::warn!( + debug!( url = %url, missing_oid = %oid, - total_requested = total_requested, - "Git fetch failed on first missing OID - other requested OIDs may exist but were not fetched. Retry logic needed." + remaining_count = remaining_oids.len(), + "OID not found on remote, retrying with remaining OIDs" ); - format!("remote missing oid {} (BUG: {} other oids not attempted)", oid, total_requested - 1) - } else { - format!("remote missing only oid requested: {}", oid) + + continue; // Retry with remaining OIDs + } + } + + // Non-retryable error - record to naughty list and return error + if let Some(domain) = extract_domain(&url) { + if let Some(category) = NaughtyListTracker::classify_error(&stderr) { + let is_new = + naughty_list.record(&domain, category, stderr.to_string()); + + if is_new { + tracing::warn!( + domain = %domain, + category = %category, + error = %stderr, + "Git remote domain added to naughty list" + ); + } else { + debug!( + domain = %domain, + category = %category, + error = %stderr, + "Git fetch failed (domain on naughty list)" + ); + } } - } else { - format!("git fetch failed: {}", stderr) } - } else { - format!("git fetch failed: {}", stderr) - }; - Err(anyhow::anyhow!("{}", error_msg)) + return Err(anyhow::anyhow!( + "git fetch failed for {}: {}", + url, + stderr + )); + } + Err(e) => { + return Err(anyhow::anyhow!( + "git fetch command error for {}: {}", + url, + e + )) + } } - Err(e) => Err(anyhow::anyhow!("git fetch command error: {}", e)), } }) .await -- cgit v1.2.3 From 847acdecb9c28a5307123b9ee685b769a598cfc1 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 21:40:46 +0000 Subject: fix: distinguish 0 OIDs fetched from successful fetch in logging When fetch_oids returns Ok(vec![]) (all requested OIDs missing from remote), the log message now says 'Fetch returned no OIDs (not available on remote)' instead of the misleading 'Fetch succeeded' with oids_fetched=0. --- src/purgatory/sync/functions.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 65d29af..2b7e71f 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs @@ -369,7 +369,7 @@ pub async fn sync_identifier_from_url( throttle_manager.complete_request(&domain); let oids_fetched = match fetch_result { - Ok(fetched) => { + Ok(fetched) if !fetched.is_empty() => { debug!( identifier = %identifier, url = %url, @@ -378,6 +378,14 @@ pub async fn sync_identifier_from_url( ); fetched.len() } + Ok(_) => { + debug!( + identifier = %identifier, + url = %url, + "Fetch returned no OIDs (not available on remote)" + ); + 0 + } Err(e) => { debug!( identifier = %identifier, -- cgit v1.2.3 From 6d920cae2704016869500889a92b358d845b69e1 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 21:42:25 +0000 Subject: improve logging --- src/purgatory/sync/context.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/purgatory/sync/context.rs b/src/purgatory/sync/context.rs index 0df8be0..904f8af 100644 --- a/src/purgatory/sync/context.rs +++ b/src/purgatory/sync/context.rs @@ -403,7 +403,7 @@ impl SyncContext for RealSyncContext { "Fetch completed after retries - some OIDs were missing from remote" ); } else { - debug!(fetched_count = fetched.len(), "Successfully fetched OIDs"); + debug!(url = %url, fetched_count = fetched.len(), "Successfully fetched OIDs"); } return Ok(fetched); @@ -418,7 +418,9 @@ impl SyncContext for RealSyncContext { if line.contains("not our ref") { // Extract the OID from lines like: // "fatal: remote error: upload-pack: not our ref " - line.split("not our ref").nth(1).map(|s| s.trim().to_string()) + line.split("not our ref") + .nth(1) + .map(|s| s.trim().to_string()) } else { None } @@ -464,11 +466,7 @@ impl SyncContext for RealSyncContext { } } - return Err(anyhow::anyhow!( - "git fetch failed for {}: {}", - url, - stderr - )); + return Err(anyhow::anyhow!("git fetch failed for {}: {}", url, stderr)); } Err(e) => { return Err(anyhow::anyhow!( -- cgit v1.2.3 From efc3da477d4edb9d1334718e3e20d197ba711468 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Tue, 27 Jan 2026 21:55:34 +0000 Subject: fix: pass actually fetched OIDs to process_newly_available_git_data Previously, sync_identifier_from_url passed all needed OIDs to process_newly_available_git_data, not just the OIDs that were successfully fetched. This caused incorrect logging (new_oids_count would show all needed OIDs, not just fetched ones). While this didn't break functionality (the actual processing uses can_apply_state which checks the repository on disk), it made debugging confusing. Changes: - Rename oids_fetched to fetched_oids and change type from usize to Vec - Return Vec from match arms instead of counts - Pass fetched_oids (not needed_oids) to process_newly_available_git_data - Return fetched_oids.len() at the end This ensures logging accurately reflects which OIDs were actually fetched from the remote. --- src/purgatory/sync/functions.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/purgatory/sync/functions.rs b/src/purgatory/sync/functions.rs index 2b7e71f..9207d58 100644 --- a/src/purgatory/sync/functions.rs +++ b/src/purgatory/sync/functions.rs @@ -368,7 +368,7 @@ pub async fn sync_identifier_from_url( let fetch_result = ctx.fetch_oids(&target_repo, url, &needed_oids).await; throttle_manager.complete_request(&domain); - let oids_fetched = match fetch_result { + let fetched_oids = match fetch_result { Ok(fetched) if !fetched.is_empty() => { debug!( identifier = %identifier, @@ -376,7 +376,7 @@ pub async fn sync_identifier_from_url( oids_fetched = fetched.len(), "Fetch succeeded" ); - fetched.len() + fetched } Ok(_) => { debug!( @@ -384,7 +384,7 @@ pub async fn sync_identifier_from_url( url = %url, "Fetch returned no OIDs (not available on remote)" ); - 0 + vec![] } Err(e) => { debug!( @@ -393,13 +393,13 @@ pub async fn sync_identifier_from_url( error = %e, "Fetch failed" ); - 0 + vec![] } }; // Try to process any events that can now be satisfied - if oids_fetched > 0 { - let new_oids: HashSet = needed_oids.into_iter().collect(); + if !fetched_oids.is_empty() { + let new_oids: HashSet = fetched_oids.iter().cloned().collect(); if let Err(e) = ctx .process_newly_available_git_data(&target_repo, &new_oids) .await @@ -412,7 +412,7 @@ pub async fn sync_identifier_from_url( } } - oids_fetched + fetched_oids.len() } /// Sync git data for an identifier. -- cgit v1.2.3 From f148b3a0e4b032c0acf835cda6d2935e19b9f67e Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Wed, 28 Jan 2026 21:00:14 +0000 Subject: feat(purgatory): track event source for filtered expiry logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add EventSource enum (Direct/Sync) to purgatory entries to distinguish between user-submitted events and sync-fetched events. This enables: - WARN-level logging for direct submissions that expire (user should know) - DEBUG-level logging for sync-fetched expirations (expected behavior) - Source upgrade from Sync→Direct if user submits after sync - Expiry timer reset on source upgrade (fresh 30-min window for user) The source is included in [PURGATORY_EXPIRED] logs as source=direct or source=sync for easy filtering. --- src/nostr/builder.rs | 2 +- src/nostr/policy/state.rs | 2 +- src/purgatory/mod.rs | 206 ++++++++++++++++++++++++++++++++++------------ src/purgatory/types.rs | 30 +++++++ 4 files changed, 187 insertions(+), 53 deletions(-) (limited to 'src') diff --git a/src/nostr/builder.rs b/src/nostr/builder.rs index 629c111..9211972 100644 --- a/src/nostr/builder.rs +++ b/src/nostr/builder.rs @@ -399,7 +399,7 @@ impl Nip34WritePolicy { // Add to purgatory self.ctx .purgatory - .add_pr(event.clone(), event.id.to_hex(), commit.clone()); + .add_pr(event.clone(), event.id.to_hex(), commit.clone(), is_synced); WritePolicyResult::Reject { status: true, // Client sees OK diff --git a/src/nostr/policy/state.rs b/src/nostr/policy/state.rs index f94f004..52f0483 100644 --- a/src/nostr/policy/state.rs +++ b/src/nostr/policy/state.rs @@ -207,7 +207,7 @@ impl StatePolicy { // (add_state automatically enqueues for background sync) self.ctx .purgatory - .add_state(event.clone(), state.identifier.clone(), event.pubkey); + .add_state(event.clone(), state.identifier.clone(), event.pubkey, is_synced); tracing::info!( "state event added to purgatory: eventid: {}, identifier: {}", diff --git a/src/purgatory/mod.rs b/src/purgatory/mod.rs index 8b75351..d442ad8 100644 --- a/src/purgatory/mod.rs +++ b/src/purgatory/mod.rs @@ -17,7 +17,7 @@ pub mod sync; mod types; pub use helpers::{can_apply_state, can_satisfy_state, extract_refs_from_state, get_unpushed_refs}; -pub use types::{PrPurgatoryEntry, RefPair, RefUpdate, StatePurgatoryEntry}; +pub use types::{EventSource, PrPurgatoryEntry, RefPair, RefUpdate, StatePurgatoryEntry}; use dashmap::DashMap; use nostr_sdk::prelude::*; @@ -58,6 +58,9 @@ struct SerializableStatePurgatoryEntry { created_at_offset_secs: u64, /// Duration offset from saved_at for expires_at expires_at_offset_secs: u64, + /// Source of this event (direct submission vs sync) + #[serde(default)] + source: types::EventSource, } /// Serializable wrapper for `PrPurgatoryEntry` with time offsets. @@ -75,6 +78,9 @@ struct SerializablePrPurgatoryEntry { created_at_offset_secs: u64, /// Duration offset from saved_at for expires_at expires_at_offset_secs: u64, + /// Source of this event (direct submission vs sync) + #[serde(default)] + source: types::EventSource, } /// Serializable purgatory state for disk persistence. @@ -271,11 +277,38 @@ impl Purgatory { /// For sync-triggered events, the SyncManager calls `enqueue_sync_immediate` separately /// to override this delay. /// + /// If an event already exists in purgatory with `Sync` source and the new submission + /// is direct (`!from_sync`), the source is upgraded to `Direct` without extending expiry. + /// /// # Arguments /// * `event` - The state event (kind 30618) to hold /// * `identifier` - The repository identifier from the 'd' tag /// * `author` - The event author's public key - pub fn add_state(&self, event: Event, identifier: String, author: PublicKey) { + /// * `from_sync` - True if this event came from proactive sync (vs user-submitted) + pub fn add_state(&self, event: Event, identifier: String, author: PublicKey, from_sync: bool) { + let source = if from_sync { + types::EventSource::Sync + } else { + types::EventSource::Direct + }; + + // Check if event already exists - if so, potentially upgrade source + if let Some(mut entries) = self.state_events.get_mut(&identifier) { + if let Some(existing) = entries.iter_mut().find(|e| e.event.id == event.id) { + // Upgrade source from Sync to Direct if new submission is direct + if existing.source == types::EventSource::Sync && !from_sync { + existing.source = types::EventSource::Direct; + existing.expires_at = Instant::now() + DEFAULT_EXPIRY; + tracing::debug!( + event_id = %event.id, + identifier = %identifier, + "Upgraded purgatory entry source from Sync to Direct, reset expiry" + ); + } + return; // Event already exists, don't add duplicate + } + } + let now = Instant::now(); let entry = StatePurgatoryEntry { event, @@ -283,6 +316,7 @@ impl Purgatory { author, created_at: now, expires_at: now + DEFAULT_EXPIRY, + source, }; self.state_events @@ -302,11 +336,35 @@ impl Purgatory { /// Automatically enqueues the referenced repository identifier for background sync /// with the default delay (3 minutes), giving time for a git push to arrive. /// + /// If an event already exists in purgatory with `Sync` source and the new submission + /// is direct (`!from_sync`), the source is upgraded to `Direct` without extending expiry. + /// /// # Arguments /// * `event` - The PR event (kind 1617/1618) to hold /// * `event_id` - The event ID (hex string) from the 'e' tag /// * `commit` - The commit SHA from the 'c' tag - pub fn add_pr(&self, event: Event, event_id: String, commit: String) { + /// * `from_sync` - True if this event came from proactive sync (vs user-submitted) + pub fn add_pr(&self, event: Event, event_id: String, commit: String, from_sync: bool) { + let source = if from_sync { + types::EventSource::Sync + } else { + types::EventSource::Direct + }; + + // Check if event already exists - if so, potentially upgrade source + if let Some(mut existing) = self.pr_events.get_mut(&event_id) { + // Upgrade source from Sync to Direct if new submission is direct + if existing.source == types::EventSource::Sync && !from_sync { + existing.source = types::EventSource::Direct; + existing.expires_at = Instant::now() + DEFAULT_EXPIRY; + tracing::debug!( + event_id = %event_id, + "Upgraded PR purgatory entry source from Sync to Direct, reset expiry" + ); + } + return; // Event already exists, don't add duplicate + } + // Extract identifier from the event's `a` tag for sync enqueueing let identifier = crate::git::sync::extract_identifier_from_pr_event(&event); @@ -316,6 +374,7 @@ impl Purgatory { commit, created_at: now, expires_at: now + DEFAULT_EXPIRY, + source, }; self.pr_events.insert(event_id, entry); @@ -329,6 +388,8 @@ impl Purgatory { /// Add a PR placeholder (git data arrived before PR event). /// /// Creates a placeholder entry waiting for the corresponding PR event. + /// Placeholders are always marked as `Direct` source since they originate + /// from git pushes (direct user action). /// /// # Arguments /// * `event_id` - The expected event ID (from git ref name) @@ -340,6 +401,7 @@ impl Purgatory { commit, created_at: now, expires_at: now + DEFAULT_EXPIRY, + source: types::EventSource::Direct, // Git pushes are direct user actions }; self.pr_events.insert(event_id, entry); @@ -626,15 +688,29 @@ impl Purgatory { for entry in entries.iter().filter(|e| e.expires_at <= now) { let npub = entry.author.to_bech32().unwrap_or_else(|_| entry.author.to_hex()); let event_id_short = &entry.event.id.to_hex()[..12]; + let source_str = if entry.source.is_direct() { "direct" } else { "sync" }; // Structured log for migration scripts - tracing::warn!( - "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} reason=\"git data not received within 30 minutes\"", - identifier, - npub, - event_id_short, - entry.event.kind.as_u16() - ); + // Direct submissions log at WARN, synced events at DEBUG + if entry.source.is_direct() { + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} source={} reason=\"git data not received within 30 minutes\"", + identifier, + npub, + event_id_short, + entry.event.kind.as_u16(), + source_str + ); + } else { + tracing::debug!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} source={} reason=\"git data not received within 30 minutes\"", + identifier, + npub, + event_id_short, + entry.event.kind.as_u16(), + source_str + ); + } self.mark_expired(entry.event.id); } @@ -655,16 +731,18 @@ impl Purgatory { let event_id_str = entry.key().clone(); let event_opt = pr_entry.event.clone(); let commit = pr_entry.commit.clone(); - (event_id_str, event_opt, commit) + let source = pr_entry.source; + (event_id_str, event_opt, commit, source) }) .collect(); let pr_removed = expired_prs.len(); - for (event_id_str, event_opt, commit) in expired_prs { + for (event_id_str, event_opt, commit, source) in expired_prs { // Log structured entry for PR events (not placeholders) if let Some(ref event) = event_opt { let npub = event.pubkey.to_bech32().unwrap_or_else(|_| event.pubkey.to_hex()); let event_id_short = &event.id.to_hex()[..12]; + let source_str = if source.is_direct() { "direct" } else { "sync" }; // Extract ALL repo identifiers from 'a' tags // (PR events can reference multiple repos when there are multiple maintainers) @@ -701,22 +779,37 @@ impl Purgatory { }; // Structured log for migration scripts - log once per repo + // Direct submissions log at WARN, synced events at DEBUG for repo in &repos_to_log { - tracing::warn!( - "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} reason=\"git data not received within 30 minutes\"", - repo, - npub, - event_id_short, - event.kind.as_u16(), - &commit[..commit.len().min(12)] - ); + if source.is_direct() { + tracing::warn!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} source={} reason=\"git data not received within 30 minutes\"", + repo, + npub, + event_id_short, + event.kind.as_u16(), + &commit[..commit.len().min(12)], + source_str + ); + } else { + tracing::debug!( + "[PURGATORY_EXPIRED] repo={} npub={} event_id={}... kind={} commit={} source={} reason=\"git data not received within 30 minutes\"", + repo, + npub, + event_id_short, + event.kind.as_u16(), + &commit[..commit.len().min(12)], + source_str + ); + } } self.mark_expired(event.id); } else { // Placeholder (git data arrived first, but PR event never came) + // Placeholders are always Direct source (from git push) tracing::debug!( - "[PURGATORY_EXPIRED] placeholder event_id={} commit={} reason=\"PR event not received within 30 minutes\"", + "[PURGATORY_EXPIRED] placeholder event_id={} commit={} source=direct reason=\"PR event not received within 30 minutes\"", &event_id_str[..event_id_str.len().min(12)], &commit[..commit.len().min(12)] ); @@ -869,6 +962,7 @@ impl Purgatory { author: e.author, created_at_offset_secs: created_offset.as_secs(), expires_at_offset_secs: expires_offset.as_secs(), + source: e.source, } }) .collect(); @@ -891,6 +985,7 @@ impl Purgatory { commit: e.commit.clone(), created_at_offset_secs: created_offset.as_secs(), expires_at_offset_secs: expires_offset.as_secs(), + source: e.source, }; pr_events.insert(event_id, serializable); } @@ -992,6 +1087,7 @@ impl Purgatory { author: e.author, created_at, expires_at, + source: e.source, } }) .collect(); @@ -1017,6 +1113,7 @@ impl Purgatory { commit: e.commit, created_at, expires_at, + source: e.source, }; self.pr_events.insert(event_id, entry); @@ -1074,8 +1171,8 @@ mod tests { .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event.clone(), "test-repo".to_string(), keys.public_key()); - purgatory.add_pr(event, "test-event-id".to_string(), "abc123".to_string()); + purgatory.add_state(event.clone(), "test-repo".to_string(), keys.public_key(), false); + purgatory.add_pr(event, "test-event-id".to_string(), "abc123".to_string(), false); let (state_count, pr_count) = purgatory.count(); assert_eq!(state_count, 1); @@ -1126,7 +1223,7 @@ mod tests { let event = EventBuilder::text_note("state") .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event, "test-repo".to_string(), keys.public_key()); + purgatory.add_state(event, "test-repo".to_string(), keys.public_key(), false); // Now should have pending events assert!(purgatory.has_pending_events("test-repo")); @@ -1156,7 +1253,7 @@ mod tests { .sign_with_keys(&keys) .unwrap(); - purgatory.add_pr(event, "pr-event-id".to_string(), "commit123".to_string()); + purgatory.add_pr(event, "pr-event-id".to_string(), "commit123".to_string(), false); // Now should have pending events for test-repo assert!(purgatory.has_pending_events("test-repo")); @@ -1221,6 +1318,7 @@ fn test_pr_event_vs_placeholder() { event.clone(), "event-id-1".to_string(), "commit-abc".to_string(), + false, ); // Add a placeholder (no event) @@ -1277,8 +1375,9 @@ fn test_cleanup_removes_expired_entries() { state_event.clone(), "test-repo".to_string(), keys.public_key(), + false, ); - purgatory.add_pr(pr_event, "pr-123".to_string(), "commit-abc".to_string()); + purgatory.add_pr(pr_event, "pr-123".to_string(), "commit-abc".to_string(), false); purgatory.add_pr_placeholder("pr-456".to_string(), "commit-def".to_string()); // Verify entries are there @@ -1325,8 +1424,8 @@ fn test_cleanup_preserves_non_expired_entries() { .unwrap(); // Add fresh entries - purgatory.add_state(state_event, "test-repo".to_string(), keys.public_key()); - purgatory.add_pr(pr_event, "pr-123".to_string(), "commit-abc".to_string()); + purgatory.add_state(state_event, "test-repo".to_string(), keys.public_key(), false); + purgatory.add_pr(pr_event, "pr-123".to_string(), "commit-abc".to_string(), false); // Run cleanup let (state_removed, pr_removed) = purgatory.cleanup(); @@ -1356,8 +1455,8 @@ fn test_cleanup_mixed_expired_and_fresh() { .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event1, "test-repo".to_string(), keys.public_key()); - purgatory.add_state(event2, "test-repo".to_string(), keys.public_key()); + purgatory.add_state(event1, "test-repo".to_string(), keys.public_key(), false); + purgatory.add_state(event2, "test-repo".to_string(), keys.public_key(), false); // Expire only the first one if let Some(mut entries) = purgatory.state_events.get_mut("test-repo") { @@ -1374,8 +1473,8 @@ fn test_cleanup_mixed_expired_and_fresh() { .sign_with_keys(&keys) .unwrap(); - purgatory.add_pr(pr1, "pr-1".to_string(), "commit-1".to_string()); - purgatory.add_pr(pr2, "pr-2".to_string(), "commit-2".to_string()); + purgatory.add_pr(pr1, "pr-1".to_string(), "commit-1".to_string(), false); + purgatory.add_pr(pr2, "pr-2".to_string(), "commit-2".to_string(), false); // Expire only first PR if let Some(mut entry) = purgatory.pr_events.get_mut("pr-1") { @@ -1407,8 +1506,8 @@ fn test_remove_expired_legacy_method() { .unwrap(); let pr_event = EventBuilder::text_note("pr").sign_with_keys(&keys).unwrap(); - purgatory.add_state(state_event, "repo".to_string(), keys.public_key()); - purgatory.add_pr(pr_event, "pr-id".to_string(), "commit".to_string()); + purgatory.add_state(state_event, "repo".to_string(), keys.public_key(), false); + purgatory.add_pr(pr_event, "pr-id".to_string(), "commit".to_string(), false); // Expire both if let Some(mut entries) = purgatory.state_events.get_mut("repo") { @@ -1442,8 +1541,8 @@ fn test_expired_event_tracking() { let pr_event_id = pr_event.id; // Add events to purgatory - purgatory.add_state(state_event, "repo".to_string(), keys.public_key()); - purgatory.add_pr(pr_event, "pr-id".to_string(), "commit".to_string()); + purgatory.add_state(state_event, "repo".to_string(), keys.public_key(), false); + purgatory.add_pr(pr_event, "pr-id".to_string(), "commit".to_string(), false); // Events should not be marked as expired yet assert!(!purgatory.is_expired(&state_event_id)); @@ -1495,7 +1594,7 @@ fn test_cleanup_expired_events() { let event2_id = event2.id; // Add and immediately expire event1 - purgatory.add_state(event1, "repo1".to_string(), keys.public_key()); + purgatory.add_state(event1, "repo1".to_string(), keys.public_key(), false); if let Some(mut entries) = purgatory.state_events.get_mut("repo1") { for entry in entries.iter_mut() { entry.expires_at = Instant::now() - Duration::from_secs(1); @@ -1504,7 +1603,7 @@ fn test_cleanup_expired_events() { purgatory.cleanup(); // Add and expire event2 (will be more recent) - purgatory.add_state(event2, "repo2".to_string(), keys.public_key()); + purgatory.add_state(event2, "repo2".to_string(), keys.public_key(), false); if let Some(mut entries) = purgatory.state_events.get_mut("repo2") { for entry in entries.iter_mut() { entry.expires_at = Instant::now() - Duration::from_secs(1); @@ -1546,7 +1645,7 @@ fn test_expired_events_prevent_readdition() { let event_id = event.id; // Add event to purgatory - purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key()); + purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key(), false); // Expire it if let Some(mut entries) = purgatory.state_events.get_mut("repo") { @@ -1566,7 +1665,7 @@ fn test_expired_events_prevent_readdition() { // This simulates what negentropy/REQ+EOSE should do: // Check if event is in event_ids() before adding if !ids.contains(&event_id) { - purgatory.add_state(event, "repo".to_string(), keys.public_key()); + purgatory.add_state(event, "repo".to_string(), keys.public_key(), false); } // Event should NOT be re-added @@ -1609,7 +1708,7 @@ fn test_user_can_resubmit_expired_event() { let event_id = event.id; // Add event to purgatory - purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key()); + purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key(), false); // Expire it if let Some(mut entries) = purgatory.state_events.get_mut("repo") { @@ -1658,8 +1757,8 @@ async fn test_save_and_restore_state_events() { let event1_id = event1.id; let event2_id = event2.id; - purgatory.add_state(event1.clone(), "test-repo".to_string(), keys.public_key()); - purgatory.add_state(event2.clone(), "test-repo".to_string(), keys.public_key()); + purgatory.add_state(event1.clone(), "test-repo".to_string(), keys.public_key(), false); + purgatory.add_state(event2.clone(), "test-repo".to_string(), keys.public_key(), false); // Save to disk purgatory.save_to_disk(&state_file).unwrap(); @@ -1721,6 +1820,7 @@ async fn test_save_and_restore_pr_events() { pr_event.clone(), "pr-event-id".to_string(), "commit-abc".to_string(), + false, ); // Save to disk @@ -1790,7 +1890,7 @@ async fn test_save_and_restore_expired_events() { let event_id = event.id; // Add and expire event - purgatory.add_state(event, "repo".to_string(), keys.public_key()); + purgatory.add_state(event, "repo".to_string(), keys.public_key(), false); if let Some(mut entries) = purgatory.state_events.get_mut("repo") { for entry in entries.iter_mut() { entry.expires_at = Instant::now() - Duration::from_secs(1); @@ -1929,7 +2029,7 @@ async fn test_downtime_calculation() { .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key()); + purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key(), false); // Get original expiry time let original_entries = purgatory.find_state("repo"); @@ -1985,7 +2085,7 @@ async fn test_expiry_times_preserved() { .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key()); + purgatory.add_state(event.clone(), "repo".to_string(), keys.public_key(), false); // Manually set expiry to a specific time in the future let custom_expiry = Instant::now() + Duration::from_secs(600); // 10 minutes @@ -2044,16 +2144,19 @@ async fn test_multiple_state_events_same_identifier() { event1.clone(), "shared-repo".to_string(), keys1.public_key(), + false, ); purgatory.add_state( event2.clone(), "shared-repo".to_string(), keys2.public_key(), + false, ); purgatory.add_state( event3.clone(), "shared-repo".to_string(), keys3.public_key(), + false, ); // Save to disk @@ -2100,6 +2203,7 @@ async fn test_mixed_pr_events_and_placeholders() { pr_event.clone(), "pr-with-event".to_string(), "commit-abc".to_string(), + false, ); // Add PR placeholder @@ -2145,7 +2249,7 @@ async fn test_file_cleanup_after_successful_restore() { let event = EventBuilder::text_note("test") .sign_with_keys(&keys) .unwrap(); - purgatory.add_state(event, "repo".to_string(), keys.public_key()); + purgatory.add_state(event, "repo".to_string(), keys.public_key(), false); // Save to disk purgatory.save_to_disk(&state_file).unwrap(); @@ -2179,8 +2283,8 @@ async fn test_comprehensive_roundtrip() { .sign_with_keys(&keys2) .unwrap(); - purgatory.add_state(state1.clone(), "repo1".to_string(), keys1.public_key()); - purgatory.add_state(state2.clone(), "repo2".to_string(), keys2.public_key()); + purgatory.add_state(state1.clone(), "repo1".to_string(), keys1.public_key(), false); + purgatory.add_state(state2.clone(), "repo2".to_string(), keys2.public_key(), false); // Add PR event let tags = vec![Tag::custom( @@ -2191,7 +2295,7 @@ async fn test_comprehensive_roundtrip() { .tags(tags) .sign_with_keys(&keys1) .unwrap(); - purgatory.add_pr(pr_event.clone(), "pr-1".to_string(), "commit-1".to_string()); + purgatory.add_pr(pr_event.clone(), "pr-1".to_string(), "commit-1".to_string(), false); // Add PR placeholder purgatory.add_pr_placeholder("pr-2".to_string(), "commit-2".to_string()); @@ -2201,7 +2305,7 @@ async fn test_comprehensive_roundtrip() { .sign_with_keys(&keys1) .unwrap(); let expired_id = expired_event.id; - purgatory.add_state(expired_event, "repo3".to_string(), keys1.public_key()); + purgatory.add_state(expired_event, "repo3".to_string(), keys1.public_key(), false); if let Some(mut entries) = purgatory.state_events.get_mut("repo3") { for entry in entries.iter_mut() { entry.expires_at = Instant::now() - Duration::from_secs(1); diff --git a/src/purgatory/types.rs b/src/purgatory/types.rs index 919504b..e37a3e1 100644 --- a/src/purgatory/types.rs +++ b/src/purgatory/types.rs @@ -8,6 +8,28 @@ use nostr_sdk::prelude::*; use serde::{Deserialize, Serialize}; use std::time::Instant; +/// Source of an event entering purgatory. +/// +/// Tracks whether an event was submitted directly by a user or fetched via +/// proactive sync from another relay. This distinction is used for: +/// - Filtered logging: Direct submissions log at WARN level, synced at DEBUG +/// - Operational monitoring: Helps identify user-facing issues vs sync noise +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum EventSource { + /// Event was published directly to this relay by a user + #[default] + Direct, + /// Event was fetched via proactive sync from another relay + Sync, +} + +impl EventSource { + /// Returns true if this is a direct submission (not synced) + pub fn is_direct(&self) -> bool { + matches!(self, EventSource::Direct) + } +} + /// Default value for Instant fields during deserialization fn instant_now() -> Instant { Instant::now() @@ -86,6 +108,10 @@ pub struct StatePurgatoryEntry { /// Expiry deadline (30 min from creation, may be extended) #[serde(skip, default = "instant_now")] pub expires_at: Instant, + + /// Source of this event (direct submission vs sync) + #[serde(default)] + pub source: EventSource, } /// Entry for a PR event (kind 1617/1618) or placeholder waiting in purgatory. @@ -112,4 +138,8 @@ pub struct PrPurgatoryEntry { /// Expiry deadline (30 min from creation, may be extended) #[serde(skip, default = "instant_now")] pub expires_at: Instant, + + /// Source of this event (direct submission vs sync) + #[serde(default)] + pub source: EventSource, } -- cgit v1.2.3