upleb.uk

Public git repos — served from a NIP-34 GRASP relay at git.upleb.uk

summaryrefslogtreecommitdiff
path: root/src/git/mod.rs
diff options
context:
space:
mode:
authorDanConwayDev <DanConwayDev@protonmail.com>2026-04-09 15:24:17 +0000
committerDanConwayDev <DanConwayDev@protonmail.com>2026-04-09 15:24:17 +0000
commit2d74b9ca69b3a1e0b9a2359c12cc2d1979fc6130 (patch)
tree61180841310feaca54c1661552d88347a0bebd72 /src/git/mod.rs
parent28168a7701c897a5b6af13bc472d6f5902e0a96d (diff)
fix: reject identifiers with whitespace and URL-decode path components
Two bugs allowed a repository announcement with a space-containing identifier ('kuboslopp by Shakespeare') to enter purgatory and create a bare repo on disk, but then fail to serve git data over HTTP. Bug 1 (serving): parse_git_url and parse_repo_url did not percent-decode the URL path before resolving the filesystem path. A client requesting /npub.../kuboslopp%20by%20Shakespeare.git/info/refs had the identifier extracted as 'kuboslopp%20by%20Shakespeare' (literal %20), which did not match the on-disk directory 'kuboslopp by Shakespeare.git'. Fix: add percent_decode() in src/git/mod.rs and apply it to the repo component in both parse_git_url and parse_repo_url. Bug 2 (validation): validate_announcement did not check that the identifier is safe as a filesystem path component and URL segment. Identifiers containing whitespace, path separators, null bytes, or reserved names (. / ..) should be rejected at acceptance time. Fix: add validate_identifier() in src/nostr/events.rs and call it from validate_announcement before any other policy checks.
Diffstat (limited to 'src/git/mod.rs')
-rw-r--r--src/git/mod.rs69
1 files changed, 64 insertions, 5 deletions
diff --git a/src/git/mod.rs b/src/git/mod.rs
index 1255b6f..999d3c8 100644
--- a/src/git/mod.rs
+++ b/src/git/mod.rs
@@ -451,12 +451,42 @@ pub fn get_repository_head(repo_path: &Path) -> Option<String> {
451 } 451 }
452} 452}
453 453
454/// Decode percent-encoded characters in a URL path component.
455///
456/// Handles `%XX` sequences (e.g. `%20` → space). Invalid sequences are left as-is.
457pub fn percent_decode(s: &str) -> String {
458 let bytes = s.as_bytes();
459 let mut out = Vec::with_capacity(bytes.len());
460 let mut i = 0;
461 while i < bytes.len() {
462 if bytes[i] == b'%' && i + 2 < bytes.len() {
463 if let (Some(hi), Some(lo)) = (
464 (bytes[i + 1] as char).to_digit(16),
465 (bytes[i + 2] as char).to_digit(16),
466 ) {
467 out.push((hi * 16 + lo) as u8);
468 i += 3;
469 continue;
470 }
471 }
472 out.push(bytes[i]);
473 i += 1;
474 }
475 String::from_utf8(out).unwrap_or_else(|_| s.to_string())
476}
477
454/// Extract npub and identifier from a Git URL path 478/// Extract npub and identifier from a Git URL path
455/// 479///
456/// Parses paths like `/<npub>/<identifier>.git/info/refs` 480/// Parses paths like `/<npub>/<identifier>.git/info/refs`
457/// 481///
482/// The identifier component is percent-decoded so that URLs like
483/// `/npub1.../my%20repo.git/info/refs` resolve to the filesystem path
484/// `my repo.git` (though such identifiers should be rejected at announcement
485/// validation time — see `validate_announcement`).
486///
458/// Returns (npub, identifier, subpath) where subpath is the part after .git/ 487/// Returns (npub, identifier, subpath) where subpath is the part after .git/
459pub fn parse_git_url(path: &str) -> Option<(&str, &str, &str)> { 488/// and identifier has been percent-decoded.
489pub fn parse_git_url(path: &str) -> Option<(String, String, String)> {
460 // Remove leading slash 490 // Remove leading slash
461 let path = path.strip_prefix('/').unwrap_or(path); 491 let path = path.strip_prefix('/').unwrap_or(path);
462 492
@@ -467,12 +497,15 @@ pub fn parse_git_url(path: &str) -> Option<(&str, &str, &str)> {
467 return None; 497 return None;
468 } 498 }
469 499
470 let npub = parts[0]; 500 let npub = parts[0].to_string();
471 let repo_part = parts[1]; 501 let repo_part = percent_decode(parts[1]);
472 let subpath = parts[2]; 502 let subpath = parts[2].to_string();
473 503
474 // Extract identifier (remove .git suffix if present for the middle part) 504 // Extract identifier (remove .git suffix if present for the middle part)
475 let identifier = repo_part.strip_suffix(".git").unwrap_or(repo_part); 505 let identifier = repo_part
506 .strip_suffix(".git")
507 .unwrap_or(&repo_part)
508 .to_string();
476 509
477 Some((npub, identifier, subpath)) 510 Some((npub, identifier, subpath))
478} 511}
@@ -613,6 +646,32 @@ mod tests {
613 } 646 }
614 647
615 #[test] 648 #[test]
649 fn test_parse_git_url_percent_encoded_identifier() {
650 // Identifiers with spaces encoded as %20 must be decoded so the
651 // filesystem path lookup finds the correct directory.
652 let (npub, id, subpath) =
653 parse_git_url("/npub17plqk/kuboslopp%20by%20Shakespeare.git/info/refs").unwrap();
654 assert_eq!(npub, "npub17plqk");
655 assert_eq!(id, "kuboslopp by Shakespeare");
656 assert_eq!(subpath, "info/refs");
657 }
658
659 #[test]
660 fn test_percent_decode_basic() {
661 assert_eq!(percent_decode("hello%20world"), "hello world");
662 assert_eq!(percent_decode("no-encoding"), "no-encoding");
663 assert_eq!(percent_decode("a%2Fb"), "a/b");
664 assert_eq!(percent_decode("%41%42%43"), "ABC");
665 }
666
667 #[test]
668 fn test_percent_decode_invalid_sequence_passthrough() {
669 // Incomplete or invalid sequences are left as-is
670 assert_eq!(percent_decode("foo%2"), "foo%2");
671 assert_eq!(percent_decode("foo%zz"), "foo%zz");
672 }
673
674 #[test]
616 fn test_commit_exists_nonexistent() { 675 fn test_commit_exists_nonexistent() {
617 let (_temp_dir, repo_path) = create_test_repo(); 676 let (_temp_dir, repo_path) = create_test_repo();
618 assert!(!commit_exists( 677 assert!(!commit_exists(