diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-04-09 15:24:17 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-04-09 15:24:17 +0000 |
| commit | 2d74b9ca69b3a1e0b9a2359c12cc2d1979fc6130 (patch) | |
| tree | 61180841310feaca54c1661552d88347a0bebd72 | |
| parent | 28168a7701c897a5b6af13bc472d6f5902e0a96d (diff) | |
fix: reject identifiers with whitespace and URL-decode path components
Two bugs allowed a repository announcement with a space-containing
identifier ('kuboslopp by Shakespeare') to enter purgatory and create
a bare repo on disk, but then fail to serve git data over HTTP.
Bug 1 (serving): parse_git_url and parse_repo_url did not percent-decode
the URL path before resolving the filesystem path. A client requesting
/npub.../kuboslopp%20by%20Shakespeare.git/info/refs had the identifier
extracted as 'kuboslopp%20by%20Shakespeare' (literal %20), which did not
match the on-disk directory 'kuboslopp by Shakespeare.git'.
Fix: add percent_decode() in src/git/mod.rs and apply it to the repo
component in both parse_git_url and parse_repo_url.
Bug 2 (validation): validate_announcement did not check that the
identifier is safe as a filesystem path component and URL segment.
Identifiers containing whitespace, path separators, null bytes, or
reserved names (. / ..) should be rejected at acceptance time.
Fix: add validate_identifier() in src/nostr/events.rs and call it from
validate_announcement before any other policy checks.
| -rw-r--r-- | src/git/mod.rs | 69 | ||||
| -rw-r--r-- | src/http/mod.rs | 19 | ||||
| -rw-r--r-- | src/nostr/events.rs | 117 |
3 files changed, 191 insertions, 14 deletions
diff --git a/src/git/mod.rs b/src/git/mod.rs index 1255b6f..999d3c8 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs | |||
| @@ -451,12 +451,42 @@ pub fn get_repository_head(repo_path: &Path) -> Option<String> { | |||
| 451 | } | 451 | } |
| 452 | } | 452 | } |
| 453 | 453 | ||
| 454 | /// Decode percent-encoded characters in a URL path component. | ||
| 455 | /// | ||
| 456 | /// Handles `%XX` sequences (e.g. `%20` → space). Invalid sequences are left as-is. | ||
| 457 | pub fn percent_decode(s: &str) -> String { | ||
| 458 | let bytes = s.as_bytes(); | ||
| 459 | let mut out = Vec::with_capacity(bytes.len()); | ||
| 460 | let mut i = 0; | ||
| 461 | while i < bytes.len() { | ||
| 462 | if bytes[i] == b'%' && i + 2 < bytes.len() { | ||
| 463 | if let (Some(hi), Some(lo)) = ( | ||
| 464 | (bytes[i + 1] as char).to_digit(16), | ||
| 465 | (bytes[i + 2] as char).to_digit(16), | ||
| 466 | ) { | ||
| 467 | out.push((hi * 16 + lo) as u8); | ||
| 468 | i += 3; | ||
| 469 | continue; | ||
| 470 | } | ||
| 471 | } | ||
| 472 | out.push(bytes[i]); | ||
| 473 | i += 1; | ||
| 474 | } | ||
| 475 | String::from_utf8(out).unwrap_or_else(|_| s.to_string()) | ||
| 476 | } | ||
| 477 | |||
| 454 | /// Extract npub and identifier from a Git URL path | 478 | /// Extract npub and identifier from a Git URL path |
| 455 | /// | 479 | /// |
| 456 | /// Parses paths like `/<npub>/<identifier>.git/info/refs` | 480 | /// Parses paths like `/<npub>/<identifier>.git/info/refs` |
| 457 | /// | 481 | /// |
| 482 | /// The identifier component is percent-decoded so that URLs like | ||
| 483 | /// `/npub1.../my%20repo.git/info/refs` resolve to the filesystem path | ||
| 484 | /// `my repo.git` (though such identifiers should be rejected at announcement | ||
| 485 | /// validation time — see `validate_announcement`). | ||
| 486 | /// | ||
| 458 | /// Returns (npub, identifier, subpath) where subpath is the part after .git/ | 487 | /// Returns (npub, identifier, subpath) where subpath is the part after .git/ |
| 459 | pub fn parse_git_url(path: &str) -> Option<(&str, &str, &str)> { | 488 | /// and identifier has been percent-decoded. |
| 489 | pub fn parse_git_url(path: &str) -> Option<(String, String, String)> { | ||
| 460 | // Remove leading slash | 490 | // Remove leading slash |
| 461 | let path = path.strip_prefix('/').unwrap_or(path); | 491 | let path = path.strip_prefix('/').unwrap_or(path); |
| 462 | 492 | ||
| @@ -467,12 +497,15 @@ pub fn parse_git_url(path: &str) -> Option<(&str, &str, &str)> { | |||
| 467 | return None; | 497 | return None; |
| 468 | } | 498 | } |
| 469 | 499 | ||
| 470 | let npub = parts[0]; | 500 | let npub = parts[0].to_string(); |
| 471 | let repo_part = parts[1]; | 501 | let repo_part = percent_decode(parts[1]); |
| 472 | let subpath = parts[2]; | 502 | let subpath = parts[2].to_string(); |
| 473 | 503 | ||
| 474 | // Extract identifier (remove .git suffix if present for the middle part) | 504 | // Extract identifier (remove .git suffix if present for the middle part) |
| 475 | let identifier = repo_part.strip_suffix(".git").unwrap_or(repo_part); | 505 | let identifier = repo_part |
| 506 | .strip_suffix(".git") | ||
| 507 | .unwrap_or(&repo_part) | ||
| 508 | .to_string(); | ||
| 476 | 509 | ||
| 477 | Some((npub, identifier, subpath)) | 510 | Some((npub, identifier, subpath)) |
| 478 | } | 511 | } |
| @@ -613,6 +646,32 @@ mod tests { | |||
| 613 | } | 646 | } |
| 614 | 647 | ||
| 615 | #[test] | 648 | #[test] |
| 649 | fn test_parse_git_url_percent_encoded_identifier() { | ||
| 650 | // Identifiers with spaces encoded as %20 must be decoded so the | ||
| 651 | // filesystem path lookup finds the correct directory. | ||
| 652 | let (npub, id, subpath) = | ||
| 653 | parse_git_url("/npub17plqk/kuboslopp%20by%20Shakespeare.git/info/refs").unwrap(); | ||
| 654 | assert_eq!(npub, "npub17plqk"); | ||
| 655 | assert_eq!(id, "kuboslopp by Shakespeare"); | ||
| 656 | assert_eq!(subpath, "info/refs"); | ||
| 657 | } | ||
| 658 | |||
| 659 | #[test] | ||
| 660 | fn test_percent_decode_basic() { | ||
| 661 | assert_eq!(percent_decode("hello%20world"), "hello world"); | ||
| 662 | assert_eq!(percent_decode("no-encoding"), "no-encoding"); | ||
| 663 | assert_eq!(percent_decode("a%2Fb"), "a/b"); | ||
| 664 | assert_eq!(percent_decode("%41%42%43"), "ABC"); | ||
| 665 | } | ||
| 666 | |||
| 667 | #[test] | ||
| 668 | fn test_percent_decode_invalid_sequence_passthrough() { | ||
| 669 | // Incomplete or invalid sequences are left as-is | ||
| 670 | assert_eq!(percent_decode("foo%2"), "foo%2"); | ||
| 671 | assert_eq!(percent_decode("foo%zz"), "foo%zz"); | ||
| 672 | } | ||
| 673 | |||
| 674 | #[test] | ||
| 616 | fn test_commit_exists_nonexistent() { | 675 | fn test_commit_exists_nonexistent() { |
| 617 | let (_temp_dir, repo_path) = create_test_repo(); | 676 | let (_temp_dir, repo_path) = create_test_repo(); |
| 618 | assert!(!commit_exists( | 677 | assert!(!commit_exists( |
diff --git a/src/http/mod.rs b/src/http/mod.rs index c397365..154d6c5 100644 --- a/src/http/mod.rs +++ b/src/http/mod.rs | |||
| @@ -42,8 +42,11 @@ const ICON_PNG: &[u8] = include_bytes!("../../static/icon.png"); | |||
| 42 | /// | 42 | /// |
| 43 | /// Parses paths like `/<npub>/<identifier>.git` (for repository webpage/404) | 43 | /// Parses paths like `/<npub>/<identifier>.git` (for repository webpage/404) |
| 44 | /// | 44 | /// |
| 45 | /// The identifier is percent-decoded so that URLs like `/npub1.../my%20repo.git` | ||
| 46 | /// resolve to the correct filesystem path. | ||
| 47 | /// | ||
| 45 | /// Returns (npub, identifier) if the path matches a repository URL pattern | 48 | /// Returns (npub, identifier) if the path matches a repository URL pattern |
| 46 | fn parse_repo_url(path: &str) -> Option<(&str, &str)> { | 49 | fn parse_repo_url(path: &str) -> Option<(String, String)> { |
| 47 | // Remove leading slash | 50 | // Remove leading slash |
| 48 | let path = path.strip_prefix('/').unwrap_or(path); | 51 | let path = path.strip_prefix('/').unwrap_or(path); |
| 49 | 52 | ||
| @@ -56,7 +59,7 @@ fn parse_repo_url(path: &str) -> Option<(&str, &str)> { | |||
| 56 | } | 59 | } |
| 57 | 60 | ||
| 58 | let npub = parts[0]; | 61 | let npub = parts[0]; |
| 59 | let repo_part = parts[1]; | 62 | let repo_part = git::percent_decode(parts[1]); |
| 60 | 63 | ||
| 61 | // The repo part must end with .git | 64 | // The repo part must end with .git |
| 62 | if !repo_part.ends_with(".git") { | 65 | if !repo_part.ends_with(".git") { |
| @@ -69,14 +72,17 @@ fn parse_repo_url(path: &str) -> Option<(&str, &str)> { | |||
| 69 | } | 72 | } |
| 70 | 73 | ||
| 71 | // Extract identifier (remove .git suffix) | 74 | // Extract identifier (remove .git suffix) |
| 72 | let identifier = repo_part.strip_suffix(".git").unwrap_or(repo_part); | 75 | let identifier = repo_part |
| 76 | .strip_suffix(".git") | ||
| 77 | .unwrap_or(&repo_part) | ||
| 78 | .to_string(); | ||
| 73 | 79 | ||
| 74 | // Identifier must not be empty | 80 | // Identifier must not be empty |
| 75 | if identifier.is_empty() { | 81 | if identifier.is_empty() { |
| 76 | return None; | 82 | return None; |
| 77 | } | 83 | } |
| 78 | 84 | ||
| 79 | Some((npub, identifier)) | 85 | Some((npub.to_string(), identifier)) |
| 80 | } | 86 | } |
| 81 | 87 | ||
| 82 | /// Add CORS headers to a response builder | 88 | /// Add CORS headers to a response builder |
| @@ -160,9 +166,6 @@ impl Service<Request<Incoming>> for HttpService { | |||
| 160 | 166 | ||
| 161 | // Check for Git HTTP requests first | 167 | // Check for Git HTTP requests first |
| 162 | if let Some((npub, identifier, subpath)) = git::parse_git_url(&path) { | 168 | if let Some((npub, identifier, subpath)) = git::parse_git_url(&path) { |
| 163 | let npub = npub.to_string(); | ||
| 164 | let identifier = identifier.to_string(); | ||
| 165 | let subpath = subpath.to_string(); | ||
| 166 | 169 | ||
| 167 | // Extract Git-Protocol header for protocol v2 support | 170 | // Extract Git-Protocol header for protocol v2 support |
| 168 | let git_protocol = req | 171 | let git_protocol = req |
| @@ -391,8 +394,6 @@ impl Service<Request<Incoming>> for HttpService { | |||
| 391 | // GRASP-01: "SHOULD serve a webpage at the same endpoint linking to git nostr client(s) | 394 | // GRASP-01: "SHOULD serve a webpage at the same endpoint linking to git nostr client(s) |
| 392 | // to browse the repository and a 404 page for repositories it doesn't host" | 395 | // to browse the repository and a 404 page for repositories it doesn't host" |
| 393 | if let Some((npub, identifier)) = parse_repo_url(&path) { | 396 | if let Some((npub, identifier)) = parse_repo_url(&path) { |
| 394 | let npub = npub.to_string(); | ||
| 395 | let identifier = identifier.to_string(); | ||
| 396 | let config = self.config.clone(); | 397 | let config = self.config.clone(); |
| 397 | let repo_path = git::resolve_repo_path(&git_data_path, &npub, &identifier); | 398 | let repo_path = git::resolve_repo_path(&git_data_path, &npub, &identifier); |
| 398 | 399 | ||
diff --git a/src/nostr/events.rs b/src/nostr/events.rs index 00e4486..88ed6ae 100644 --- a/src/nostr/events.rs +++ b/src/nostr/events.rs | |||
| @@ -361,6 +361,52 @@ impl RepositoryState { | |||
| 361 | } | 361 | } |
| 362 | } | 362 | } |
| 363 | 363 | ||
| 364 | /// Validate that a repository identifier is safe for use as a filesystem path component | ||
| 365 | /// and as a URL path segment without percent-encoding. | ||
| 366 | /// | ||
| 367 | /// Rejects identifiers that: | ||
| 368 | /// - Are empty | ||
| 369 | /// - Contain path separators (`/`, `\`) | ||
| 370 | /// - Contain null bytes | ||
| 371 | /// - Contain whitespace (spaces, tabs, newlines, etc.) — these require percent-encoding | ||
| 372 | /// in URLs and cause a mismatch between the stored path and the URL-decoded request | ||
| 373 | /// - Are `.` or `..` (directory traversal) | ||
| 374 | /// | ||
| 375 | /// NIP-34 recommends kebab-case identifiers; this function enforces the minimum | ||
| 376 | /// safety constraints needed for correct filesystem and HTTP serving behaviour. | ||
| 377 | pub fn validate_identifier(identifier: &str) -> Result<(), String> { | ||
| 378 | if identifier.is_empty() { | ||
| 379 | return Err("identifier must not be empty".to_string()); | ||
| 380 | } | ||
| 381 | if identifier == "." || identifier == ".." { | ||
| 382 | return Err(format!( | ||
| 383 | "identifier '{}' is a reserved path component", | ||
| 384 | identifier | ||
| 385 | )); | ||
| 386 | } | ||
| 387 | for ch in identifier.chars() { | ||
| 388 | if ch == '/' || ch == '\\' { | ||
| 389 | return Err(format!( | ||
| 390 | "identifier '{}' contains path separator '{}'", | ||
| 391 | identifier, ch | ||
| 392 | )); | ||
| 393 | } | ||
| 394 | if ch == '\0' { | ||
| 395 | return Err(format!( | ||
| 396 | "identifier '{}' contains a null byte", | ||
| 397 | identifier | ||
| 398 | )); | ||
| 399 | } | ||
| 400 | if ch.is_whitespace() { | ||
| 401 | return Err(format!( | ||
| 402 | "identifier '{}' contains whitespace — use hyphens instead (e.g. 'my-repo')", | ||
| 403 | identifier | ||
| 404 | )); | ||
| 405 | } | ||
| 406 | } | ||
| 407 | Ok(()) | ||
| 408 | } | ||
| 409 | |||
| 364 | /// Validate a repository announcement according to GRASP-01 and GRASP-05 | 410 | /// Validate a repository announcement according to GRASP-01 and GRASP-05 |
| 365 | /// | 411 | /// |
| 366 | /// Returns: | 412 | /// Returns: |
| @@ -405,6 +451,11 @@ pub fn validate_announcement( | |||
| 405 | Err(e) => return AnnouncementResult::Reject(format!("Invalid announcement: {}", e)), | 451 | Err(e) => return AnnouncementResult::Reject(format!("Invalid announcement: {}", e)), |
| 406 | }; | 452 | }; |
| 407 | 453 | ||
| 454 | // Validate identifier is safe for filesystem and URL use | ||
| 455 | if let Err(reason) = validate_identifier(&announcement.identifier) { | ||
| 456 | return AnnouncementResult::Reject(format!("Invalid identifier: {}", reason)); | ||
| 457 | } | ||
| 458 | |||
| 408 | // Get validated configs (config.validate() must be called at startup) | 459 | // Get validated configs (config.validate() must be called at startup) |
| 409 | let archive_config = config.archive_config(); | 460 | let archive_config = config.archive_config(); |
| 410 | let repository_config = config.repository_config(); | 461 | let repository_config = config.repository_config(); |
| @@ -1511,4 +1562,70 @@ mod tests { | |||
| 1511 | let result = validate_announcement(&event, &config); | 1562 | let result = validate_announcement(&event, &config); |
| 1512 | assert!(matches!(result, AnnouncementResult::Accept)); | 1563 | assert!(matches!(result, AnnouncementResult::Accept)); |
| 1513 | } | 1564 | } |
| 1565 | |||
| 1566 | // ------------------------------------------------------------------------- | ||
| 1567 | // validate_identifier tests | ||
| 1568 | // ------------------------------------------------------------------------- | ||
| 1569 | |||
| 1570 | #[test] | ||
| 1571 | fn test_validate_identifier_valid() { | ||
| 1572 | assert!(validate_identifier("my-repo").is_ok()); | ||
| 1573 | assert!(validate_identifier("my_repo").is_ok()); | ||
| 1574 | assert!(validate_identifier("repo123").is_ok()); | ||
| 1575 | assert!(validate_identifier("kuboslopp").is_ok()); | ||
| 1576 | } | ||
| 1577 | |||
| 1578 | #[test] | ||
| 1579 | fn test_validate_identifier_rejects_empty() { | ||
| 1580 | assert!(validate_identifier("").is_err()); | ||
| 1581 | } | ||
| 1582 | |||
| 1583 | #[test] | ||
| 1584 | fn test_validate_identifier_rejects_dot_components() { | ||
| 1585 | assert!(validate_identifier(".").is_err()); | ||
| 1586 | assert!(validate_identifier("..").is_err()); | ||
| 1587 | } | ||
| 1588 | |||
| 1589 | #[test] | ||
| 1590 | fn test_validate_identifier_rejects_path_separators() { | ||
| 1591 | assert!(validate_identifier("foo/bar").is_err()); | ||
| 1592 | assert!(validate_identifier("foo\\bar").is_err()); | ||
| 1593 | } | ||
| 1594 | |||
| 1595 | #[test] | ||
| 1596 | fn test_validate_identifier_rejects_whitespace() { | ||
| 1597 | assert!(validate_identifier("kuboslopp by Shakespeare").is_err()); | ||
| 1598 | assert!(validate_identifier("my\trepo").is_err()); | ||
| 1599 | assert!(validate_identifier("my\nrepo").is_err()); | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | #[test] | ||
| 1603 | fn test_validate_announcement_rejects_identifier_with_spaces() { | ||
| 1604 | use crate::config::Config; | ||
| 1605 | use crate::nostr::policy::AnnouncementResult; | ||
| 1606 | |||
| 1607 | let keys = create_test_keys(); | ||
| 1608 | // Identifier contains spaces — should be rejected regardless of clone/relay tags | ||
| 1609 | let event = create_announcement_event( | ||
| 1610 | &keys, | ||
| 1611 | "kuboslopp by Shakespeare", | ||
| 1612 | vec!["https://gitnostr.com/alice/kuboslopp%20by%20Shakespeare.git"], | ||
| 1613 | vec!["wss://gitnostr.com"], | ||
| 1614 | ); | ||
| 1615 | |||
| 1616 | let config = Config { | ||
| 1617 | domain: "gitnostr.com".to_string(), | ||
| 1618 | ..Config::for_testing() | ||
| 1619 | }; | ||
| 1620 | let result = validate_announcement(&event, &config); | ||
| 1621 | if let AnnouncementResult::Reject(reason) = result { | ||
| 1622 | assert!( | ||
| 1623 | reason.contains("whitespace") || reason.contains("identifier"), | ||
| 1624 | "unexpected rejection reason: {}", | ||
| 1625 | reason | ||
| 1626 | ); | ||
| 1627 | } else { | ||
| 1628 | panic!("Expected Reject for identifier with spaces, got {:?}", result); | ||
| 1629 | } | ||
| 1630 | } | ||
| 1514 | } | 1631 | } |