upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-01-27 11:15:58 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-01-27 20:38:20 +0000
commitddcba2b350615e6d6ad7028b570206efb42f0338 (patch)
tree1664c293c8de6a04fe5e6e4d16949e54d8ecdd2a /src
parentdd9b00c644853a8db0ec463a7e1eddabd6634e41 (diff)
fix: prevent false positives in naughty list classification
Strip URLs (http://, https://, git://, ws://, wss://) from error messages before classification to prevent false positives from repository names, paths, or identifiers containing keywords like 'ssl', 'certificate', etc. - Add strip_urls() function to remove URLs before pattern matching - Add WebSocket protocol support (ws://, wss://) for relay errors - Filter remote warnings that don't indicate infrastructure problems - Use more specific SSL/TLS patterns to avoid npub substring matches - Reduce test suite from 40 to 13 tests, keeping only edge cases Fixes false positives seen in production: - git.shakespeare.diy: 'repository not found' with npub containing 'ssl' - relay.ngit.dev: HTTP 500 error with npub containing 'ssl' - gitnostr.com: remote permission warning misclassified as protocol error
Diffstat (limited to 'src')
-rw-r--r--src/sync/naughty_list.rs428
1 files changed, 232 insertions, 196 deletions
diff --git a/src/sync/naughty_list.rs b/src/sync/naughty_list.rs
index 097affe..60ab949 100644
--- a/src/sync/naughty_list.rs
+++ b/src/sync/naughty_list.rs
@@ -101,6 +101,69 @@ impl NaughtyListTracker {
101 Self::new(12) 101 Self::new(12)
102 } 102 }
103 103
104 /// Strip URLs from an error message to prevent false positives from URL components.
105 ///
106 /// URLs can contain path components, repository names, or user identifiers that
107 /// accidentally match error patterns (e.g., "my-openssl-project", "ssl-team",
108 /// "certificate-manager"). By stripping URLs before classification, we ensure
109 /// only the actual error message text is analyzed.
110 ///
111 /// Handles: http://, https://, git://, ws://, wss://
112 fn strip_urls(error: &str) -> String {
113 let mut result = String::with_capacity(error.len());
114 let mut chars = error.chars().peekable();
115
116 while let Some(c) = chars.next() {
117 // Check for URL start patterns
118 let potential_url = match c {
119 'h' => {
120 // Check for http:// or https://
121 let rest: String = chars.clone().take(7).collect();
122 rest.starts_with("ttp://") || rest.starts_with("ttps://")
123 }
124 'g' => {
125 // Check for git://
126 let rest: String = chars.clone().take(5).collect();
127 rest.starts_with("it://")
128 }
129 'w' => {
130 // Check for ws:// or wss://
131 let rest: String = chars.clone().take(5).collect();
132 rest.starts_with("s://") || rest.starts_with("ss://")
133 }
134 _ => false,
135 };
136
137 if potential_url {
138 // Found URL start, consume until URL end
139 result.push_str("[URL]");
140
141 // Skip until we hit a URL terminator
142 loop {
143 match chars.peek() {
144 Some(&ch) if Self::is_url_char(ch) => {
145 chars.next();
146 }
147 _ => break,
148 }
149 }
150 } else {
151 result.push(c);
152 }
153 }
154
155 result
156 }
157
158 /// Check if a character can be part of a URL
159 #[inline]
160 fn is_url_char(c: char) -> bool {
161 // URLs end at whitespace, quotes, or certain brackets
162 // This is conservative - real URLs can contain more, but git errors
163 // typically have URLs followed by these terminators
164 !matches!(c, ' ' | '\t' | '\n' | '\r' | '"' | '\'' | '>' | ']' | ')')
165 }
166
104 /// Classify an error string into a naughty category or return None for transient errors 167 /// Classify an error string into a naughty category or return None for transient errors
105 /// 168 ///
106 /// # Arguments 169 /// # Arguments
@@ -112,10 +175,32 @@ impl NaughtyListTracker {
112 /// - `Some(NaughtyCategory)` if the error indicates a persistent infrastructure issue 175 /// - `Some(NaughtyCategory)` if the error indicates a persistent infrastructure issue
113 /// - `None` if the error is a transient network issue (use HealthTracker backoff) 176 /// - `None` if the error is a transient network issue (use HealthTracker backoff)
114 pub fn classify_error(error: &str) -> Option<NaughtyCategory> { 177 pub fn classify_error(error: &str) -> Option<NaughtyCategory> {
115 let error_lower = error.to_lowercase(); 178 // Filter out remote warnings - these are informational messages from the remote
179 // server that don't indicate infrastructure problems with the domain itself.
180 // Example: "remote: warning: unable to access '/root/.config/git/attributes': Permission denied"
181 // These warnings are about the remote server's internal configuration, not connectivity.
182 let filtered_error: String = error
183 .lines()
184 .filter(|line| {
185 let line_lower = line.to_lowercase();
186 // Keep lines that are NOT remote warnings
187 !(line_lower.starts_with("remote: warning:")
188 || line_lower.starts_with("warning: remote"))
189 })
190 .collect::<Vec<_>>()
191 .join("\n");
192
193 // If after filtering we have no content, this was just warnings - not a real error
194 if filtered_error.trim().is_empty() {
195 return None;
196 }
197
198 // Strip URLs to prevent false positives from URL components
199 // (e.g., repository named "openssl-test" or path containing "certificate")
200 let url_stripped = Self::strip_urls(&filtered_error);
201 let error_lower = url_stripped.to_lowercase();
116 202
117 // DNS lookup failures - use specific patterns to avoid false positives 203 // DNS lookup failures
118 // from URLs containing "dns" (e.g., npubs like "...cdns7..." or domains)
119 if error_lower.contains("failed to lookup address") 204 if error_lower.contains("failed to lookup address")
120 || error_lower.contains("name or service not known") 205 || error_lower.contains("name or service not known")
121 || error_lower.contains("nodename nor servname provided") 206 || error_lower.contains("nodename nor servname provided")
@@ -129,8 +214,17 @@ impl NaughtyListTracker {
129 214
130 // TLS certificate errors 215 // TLS certificate errors
131 if error_lower.contains("certificate") 216 if error_lower.contains("certificate")
132 || error_lower.contains("ssl") 217 || error_lower.contains("ssl error")
133 || error_lower.contains("tls") 218 || error_lower.contains("ssl certificate")
219 || error_lower.contains("ssl handshake")
220 || error_lower.contains("ssl_error")
221 || error_lower.contains("tls error")
222 || error_lower.contains("tls handshake")
223 || error_lower.contains("tls alert")
224 || error_lower.contains("tls_error")
225 || error_lower.contains("openssl")
226 || error_lower.contains("schannel")
227 || error_lower.contains("secure channel")
134 { 228 {
135 // Exclude timeout errors that mention TLS 229 // Exclude timeout errors that mention TLS
136 if !error_lower.contains("timeout") && !error_lower.contains("timed out") { 230 if !error_lower.contains("timeout") && !error_lower.contains("timed out") {
@@ -294,211 +388,216 @@ impl NaughtyListTracker {
294mod tests { 388mod tests {
295 use super::*; 389 use super::*;
296 390
391 // =========================================================================
392 // URL STRIPPING TESTS
393 // =========================================================================
394
297 #[test] 395 #[test]
298 fn test_classify_dns_errors() { 396 fn test_strip_urls_basic_protocols() {
299 assert_eq!( 397 // HTTP/HTTPS
300 NaughtyListTracker::classify_error("failed to lookup address information"),
301 Some(NaughtyCategory::DnsLookupFailed)
302 );
303 assert_eq!( 398 assert_eq!(
304 NaughtyListTracker::classify_error("Name or service not known"), 399 NaughtyListTracker::strip_urls("error: https://example.com/repo.git failed"),
305 Some(NaughtyCategory::DnsLookupFailed) 400 "error: [URL] failed"
306 );
307 assert_eq!(
308 NaughtyListTracker::classify_error("nodename nor servname provided"),
309 Some(NaughtyCategory::DnsLookupFailed)
310 ); 401 );
311 assert_eq!( 402 assert_eq!(
312 NaughtyListTracker::classify_error("dns error: NXDOMAIN"), 403 NaughtyListTracker::strip_urls("error: http://example.com/path failed"),
313 Some(NaughtyCategory::DnsLookupFailed) 404 "error: [URL] failed"
314 ); 405 );
315 }
316 406
317 #[test] 407 // Git protocol
318 fn test_classify_tls_errors() {
319 assert_eq!( 408 assert_eq!(
320 NaughtyListTracker::classify_error("certificate not valid for 'example.com'"), 409 NaughtyListTracker::strip_urls("fatal: git://github.com/user/repo.git not found"),
321 Some(NaughtyCategory::TlsCertificateInvalid) 410 "fatal: [URL] not found"
322 ); 411 );
412
413 // WebSocket protocols (used for relay URLs)
323 assert_eq!( 414 assert_eq!(
324 NaughtyListTracker::classify_error("SSL certificate problem"), 415 NaughtyListTracker::strip_urls("error: wss://relay.example.com failed"),
325 Some(NaughtyCategory::TlsCertificateInvalid) 416 "error: [URL] failed"
326 ); 417 );
327 assert_eq!( 418 assert_eq!(
328 NaughtyListTracker::classify_error("TLS handshake failed"), 419 NaughtyListTracker::strip_urls("error: ws://localhost:8080 failed"),
329 Some(NaughtyCategory::TlsCertificateInvalid) 420 "error: [URL] failed"
330 ); 421 );
422 }
331 423
332 // TLS timeout should NOT be classified as naughty 424 #[test]
333 assert_eq!( 425 fn test_strip_urls_multiple() {
334 NaughtyListTracker::classify_error("TLS connection timed out"), 426 let error = "failed to clone https://a.com/repo.git and wss://relay.com";
335 None 427 let stripped = NaughtyListTracker::strip_urls(error);
336 ); 428 assert_eq!(stripped, "failed to clone [URL] and [URL]");
337 } 429 }
338 430
339 #[test] 431 #[test]
340 fn test_classify_protocol_errors() { 432 fn test_strip_urls_preserves_error_text() {
341 assert_eq!( 433 let error =
342 NaughtyListTracker::classify_error("websocket protocol error"), 434 "fatal: unable to access 'https://example.com/repo.git/': SSL certificate problem";
343 Some(NaughtyCategory::ProtocolError) 435 let stripped = NaughtyListTracker::strip_urls(error);
344 ); 436 assert!(stripped.contains("SSL certificate problem"));
437 assert!(!stripped.contains("example.com"));
438 }
439
440 // =========================================================================
441 // EDGE CASES: TIMEOUT/CONNECTION EXCEPTIONS
442 // These are the "unusual rules" where a pattern matches but should be excluded
443 // =========================================================================
444
445 #[test]
446 fn test_tls_timeout_not_naughty() {
447 // TLS errors with timeout should NOT be classified as naughty
448 // (timeout is transient, not a certificate problem)
345 assert_eq!( 449 assert_eq!(
346 NaughtyListTracker::classify_error("invalid frame header"), 450 NaughtyListTracker::classify_error("TLS connection timed out"),
347 Some(NaughtyCategory::ProtocolError) 451 None
348 ); 452 );
349
350 // WebSocket connection errors should NOT be classified as naughty
351 assert_eq!( 453 assert_eq!(
352 NaughtyListTracker::classify_error("websocket connection refused"), 454 NaughtyListTracker::classify_error("SSL handshake timeout"),
353 None 455 None
354 ); 456 );
355 } 457 }
356 458
357 #[test] 459 #[test]
358 fn test_classify_transient_errors() { 460 fn test_websocket_connection_errors_not_naughty() {
359 // Timeouts are transient 461 // WebSocket connection errors are transient, not protocol violations
360 assert_eq!( 462 assert_eq!(
361 NaughtyListTracker::classify_error("connection timed out"), 463 NaughtyListTracker::classify_error("websocket connection refused"),
362 None 464 None
363 ); 465 );
364 assert_eq!( 466 assert_eq!(
365 NaughtyListTracker::classify_error("operation timed out"), 467 NaughtyListTracker::classify_error("websocket connection timeout"),
366 None 468 None
367 ); 469 );
470 }
368 471
369 // Connection refused is transient 472 #[test]
473 fn test_remote_warnings_filtered() {
474 // Remote warnings should be filtered out before classification
475 let warning_only =
476 "remote: warning: unable to access '/root/.config/git/attributes': Permission denied";
477 assert_eq!(NaughtyListTracker::classify_error(warning_only), None);
478
479 // But real errors after warnings should still be classified
480 let warning_with_error = "remote: warning: something\nfatal: failed to lookup address";
370 assert_eq!( 481 assert_eq!(
371 NaughtyListTracker::classify_error("connection refused"), 482 NaughtyListTracker::classify_error(warning_with_error),
372 None 483 Some(NaughtyCategory::DnsLookupFailed)
373 ); 484 );
485 }
374 486
375 // Generic network errors are transient 487 // =========================================================================
376 assert_eq!( 488 // INTEGRATION: FULL CLASSIFICATION FLOW
377 NaughtyListTracker::classify_error("network unreachable"), 489 // Verify URL stripping + classification work together correctly
378 None 490 // =========================================================================
379 ); 491
492 #[test]
493 fn test_url_with_keywords_not_false_positive() {
494 // URLs containing keywords should NOT trigger classification
495 let cases = [
496 ("https://example.com/my-openssl-project.git", "not found"),
497 ("https://example.com/ssl-team/repo.git", "not found"),
498 ("https://example.com/certificate-manager.git", "not found"),
499 ("https://example.com/dns-tools.git", "not found"),
500 ("wss://relay-tls-test.example.com", "connection refused"),
501 ];
502
503 for (url, suffix) in cases {
504 let error = format!("fatal: repository '{}/' {}", url, suffix);
505 assert_eq!(
506 NaughtyListTracker::classify_error(&error),
507 None,
508 "URL '{}' should not trigger false positive",
509 url
510 );
511 }
512 }
380 513
381 // Repository not found is transient (not an infrastructure issue) 514 #[test]
515 fn test_real_errors_still_detected() {
516 // Real errors in the message text (not URL) should still be detected
382 assert_eq!( 517 assert_eq!(
383 NaughtyListTracker::classify_error( 518 NaughtyListTracker::classify_error(
384 "fatal: repository 'https://example.com/repo.git/' not found" 519 "fatal: 'https://example.com/repo.git': SSL certificate problem"
385 ), 520 ),
386 None 521 Some(NaughtyCategory::TlsCertificateInvalid)
387 ); 522 );
388 }
389
390 #[test]
391 fn test_classify_false_positive_npub_with_dns() {
392 // This npub contains "dns" in its encoding: npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cDNS7jezx0
393 // A "not found" error with this npub should NOT be classified as DNS failure
394 let error = "fatal: repository 'https://git.shakespeare.diy/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found";
395 assert_eq!( 523 assert_eq!(
396 NaughtyListTracker::classify_error(error), 524 NaughtyListTracker::classify_error(
397 None, 525 "fatal: 'https://example.com/repo.git': failed to lookup address"
398 "npub containing 'dns' should not trigger DNS failure classification" 526 ),
527 Some(NaughtyCategory::DnsLookupFailed)
399 ); 528 );
400
401 // Same for relay.ngit.dev
402 let error2 = "fatal: repository 'https://relay.ngit.dev/npub17plqkxhsv66g8quxxc9p5t9mxazzn20m426exqnl8lxnh5a4cdns7jezx0/kuboslopp%20by%20Shakespeare.git/' not found";
403 assert_eq!( 529 assert_eq!(
404 NaughtyListTracker::classify_error(error2), 530 NaughtyListTracker::classify_error("websocket protocol error"),
405 None, 531 Some(NaughtyCategory::ProtocolError)
406 "npub containing 'dns' should not trigger DNS failure classification"
407 ); 532 );
408 } 533 }
409 534
410 #[test] 535 #[test]
411 fn test_record_new_entry() { 536 fn test_url_with_keyword_and_real_error() {
412 let tracker = NaughtyListTracker::with_defaults(); 537 // URL contains keyword AND there's a real error - should detect the error
413 let url = "wss://bad-relay.example.com"; 538 let error = "fatal: 'https://example.com/ssl-tools/repo.git': SSL certificate problem";
414 539 assert_eq!(
415 let is_new = tracker.record( 540 NaughtyListTracker::classify_error(error),
416 url, 541 Some(NaughtyCategory::TlsCertificateInvalid)
417 NaughtyCategory::DnsLookupFailed,
418 "failed to lookup address".to_string(),
419 ); 542 );
420
421 assert!(is_new);
422 assert!(tracker.is_naughty(url));
423
424 let entry = tracker.get_entry(url).unwrap();
425 assert_eq!(entry.category, NaughtyCategory::DnsLookupFailed);
426 assert_eq!(entry.occurrence_count, 1);
427 } 543 }
428 544
545 // =========================================================================
546 // TRACKER FUNCTIONALITY
547 // =========================================================================
548
429 #[test] 549 #[test]
430 fn test_record_updates_existing() { 550 fn test_tracker_record_and_update() {
431 let tracker = NaughtyListTracker::with_defaults(); 551 let tracker = NaughtyListTracker::with_defaults();
432 let url = "wss://bad-relay.example.com"; 552 let url = "wss://bad-relay.example.com";
433 553
434 // First occurrence 554 // First occurrence
435 let is_new1 = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 1".to_string()); 555 let is_new = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 1".to_string());
436 assert!(is_new1); 556 assert!(is_new);
557 assert!(tracker.is_naughty(url));
437 558
438 // Second occurrence 559 // Second occurrence updates existing
439 let is_new2 = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 2".to_string()); 560 let is_new2 = tracker.record(url, NaughtyCategory::DnsLookupFailed, "error 2".to_string());
440 assert!(!is_new2); 561 assert!(!is_new2);
441 562
442 let entry = tracker.get_entry(url).unwrap(); 563 let entry = tracker.get_entry(url).unwrap();
443 assert_eq!(entry.occurrence_count, 2); 564 assert_eq!(entry.occurrence_count, 2);
444 assert_eq!(entry.reason, "error 2"); // Updated to latest 565 assert_eq!(entry.reason, "error 2");
445 } 566 }
446 567
447 #[test] 568 #[test]
448 fn test_is_naughty() { 569 fn test_tracker_expiration() {
449 let tracker = NaughtyListTracker::with_defaults(); 570 let tracker = NaughtyListTracker::new(0); // Expire immediately
450 let url = "wss://bad-relay.example.com";
451
452 assert!(!tracker.is_naughty(url));
453 571
454 tracker.record( 572 tracker.record(
455 url, 573 "wss://relay.example.com",
456 NaughtyCategory::TlsCertificateInvalid, 574 NaughtyCategory::DnsLookupFailed,
457 "cert error".to_string(), 575 "error".to_string(),
458 ); 576 );
459 577
460 assert!(tracker.is_naughty(url)); 578 // Entry exists but is expired
461 } 579 assert!(!tracker.is_naughty("wss://relay.example.com"));
462
463 #[test]
464 fn test_get_all() {
465 let tracker = NaughtyListTracker::with_defaults();
466 580
467 tracker.record( 581 std::thread::sleep(std::time::Duration::from_millis(10));
468 "wss://relay1.example.com",
469 NaughtyCategory::DnsLookupFailed,
470 "dns error".to_string(),
471 );
472 tracker.record(
473 "wss://relay2.example.com",
474 NaughtyCategory::TlsCertificateInvalid,
475 "tls error".to_string(),
476 );
477 582
478 let all = tracker.get_all(); 583 let expired = tracker.expire_old_entries();
479 assert_eq!(all.len(), 2); 584 assert_eq!(expired.len(), 1);
585 assert_eq!(tracker.total_count(), 0);
480 } 586 }
481 587
482 #[test] 588 #[test]
483 fn test_count_by_category() { 589 fn test_tracker_counts() {
484 let tracker = NaughtyListTracker::with_defaults(); 590 let tracker = NaughtyListTracker::with_defaults();
485 591
592 tracker.record("wss://r1.com", NaughtyCategory::DnsLookupFailed, "e".into());
593 tracker.record("wss://r2.com", NaughtyCategory::DnsLookupFailed, "e".into());
486 tracker.record( 594 tracker.record(
487 "wss://relay1.example.com", 595 "wss://r3.com",
488 NaughtyCategory::DnsLookupFailed,
489 "error".to_string(),
490 );
491 tracker.record(
492 "wss://relay2.example.com",
493 NaughtyCategory::DnsLookupFailed,
494 "error".to_string(),
495 );
496 tracker.record(
497 "wss://relay3.example.com",
498 NaughtyCategory::TlsCertificateInvalid, 596 NaughtyCategory::TlsCertificateInvalid,
499 "error".to_string(), 597 "e".into(),
500 ); 598 );
501 599
600 assert_eq!(tracker.total_count(), 3);
502 assert_eq!( 601 assert_eq!(
503 tracker.count_by_category(NaughtyCategory::DnsLookupFailed), 602 tracker.count_by_category(NaughtyCategory::DnsLookupFailed),
504 2 603 2
@@ -507,75 +606,12 @@ mod tests {
507 tracker.count_by_category(NaughtyCategory::TlsCertificateInvalid), 606 tracker.count_by_category(NaughtyCategory::TlsCertificateInvalid),
508 1 607 1
509 ); 608 );
510 assert_eq!(tracker.count_by_category(NaughtyCategory::ProtocolError), 0); 609 assert_eq!(tracker.get_all().len(), 3);
511 }
512
513 #[test]
514 fn test_total_count() {
515 let tracker = NaughtyListTracker::with_defaults();
516 assert_eq!(tracker.total_count(), 0);
517
518 tracker.record(
519 "wss://relay1.example.com",
520 NaughtyCategory::DnsLookupFailed,
521 "error".to_string(),
522 );
523 assert_eq!(tracker.total_count(), 1);
524
525 tracker.record(
526 "wss://relay2.example.com",
527 NaughtyCategory::TlsCertificateInvalid,
528 "error".to_string(),
529 );
530 assert_eq!(tracker.total_count(), 2);
531 }
532
533 #[test]
534 fn test_expire_old_entries() {
535 // Use very short expiration for testing
536 let tracker = NaughtyListTracker::new(0); // Expire immediately (0 hours)
537
538 tracker.record(
539 "wss://relay1.example.com",
540 NaughtyCategory::DnsLookupFailed,
541 "error".to_string(),
542 );
543
544 // Entry should exist in the map
545 assert_eq!(tracker.total_count(), 1);
546
547 // But is_naughty should return false since it's already expired (0 hours)
548 assert!(!tracker.is_naughty("wss://relay1.example.com"));
549
550 // Sleep to ensure time passes
551 std::thread::sleep(std::time::Duration::from_millis(10));
552
553 // Expire old entries (should remove the 0-hour expired entry)
554 let expired = tracker.expire_old_entries();
555 assert_eq!(expired.len(), 1);
556 assert_eq!(expired[0], "wss://relay1.example.com");
557
558 // Entry should be gone
559 assert!(!tracker.is_naughty("wss://relay1.example.com"));
560 assert_eq!(tracker.total_count(), 0);
561 } 610 }
562 611
563 #[test] 612 #[test]
564 fn test_category_display() { 613 fn test_category_display() {
565 assert_eq!( 614 assert_eq!(
566 NaughtyCategory::DnsLookupFailed.to_string(),
567 "dns_lookup_failed"
568 );
569 assert_eq!(
570 NaughtyCategory::TlsCertificateInvalid.to_string(),
571 "tls_certificate_invalid"
572 );
573 assert_eq!(NaughtyCategory::ProtocolError.to_string(), "protocol_error");
574 }
575
576 #[test]
577 fn test_category_as_str() {
578 assert_eq!(
579 NaughtyCategory::DnsLookupFailed.as_str(), 615 NaughtyCategory::DnsLookupFailed.as_str(),
580 "dns_lookup_failed" 616 "dns_lookup_failed"
581 ); 617 );