From 2d74b9ca69b3a1e0b9a2359c12cc2d1979fc6130 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Thu, 9 Apr 2026 15:24:17 +0000 Subject: fix: reject identifiers with whitespace and URL-decode path components Two bugs allowed a repository announcement with a space-containing identifier ('kuboslopp by Shakespeare') to enter purgatory and create a bare repo on disk, but then fail to serve git data over HTTP. Bug 1 (serving): parse_git_url and parse_repo_url did not percent-decode the URL path before resolving the filesystem path. A client requesting /npub.../kuboslopp%20by%20Shakespeare.git/info/refs had the identifier extracted as 'kuboslopp%20by%20Shakespeare' (literal %20), which did not match the on-disk directory 'kuboslopp by Shakespeare.git'. Fix: add percent_decode() in src/git/mod.rs and apply it to the repo component in both parse_git_url and parse_repo_url. Bug 2 (validation): validate_announcement did not check that the identifier is safe as a filesystem path component and URL segment. Identifiers containing whitespace, path separators, null bytes, or reserved names (. / ..) should be rejected at acceptance time. Fix: add validate_identifier() in src/nostr/events.rs and call it from validate_announcement before any other policy checks. --- src/git/mod.rs | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 5 deletions(-) (limited to 'src/git') diff --git a/src/git/mod.rs b/src/git/mod.rs index 1255b6f..999d3c8 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs @@ -451,12 +451,42 @@ pub fn get_repository_head(repo_path: &Path) -> Option { } } +/// Decode percent-encoded characters in a URL path component. +/// +/// Handles `%XX` sequences (e.g. `%20` → space). Invalid sequences are left as-is. +pub fn percent_decode(s: &str) -> String { + let bytes = s.as_bytes(); + let mut out = Vec::with_capacity(bytes.len()); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'%' && i + 2 < bytes.len() { + if let (Some(hi), Some(lo)) = ( + (bytes[i + 1] as char).to_digit(16), + (bytes[i + 2] as char).to_digit(16), + ) { + out.push((hi * 16 + lo) as u8); + i += 3; + continue; + } + } + out.push(bytes[i]); + i += 1; + } + String::from_utf8(out).unwrap_or_else(|_| s.to_string()) +} + /// Extract npub and identifier from a Git URL path /// /// Parses paths like `//.git/info/refs` /// +/// The identifier component is percent-decoded so that URLs like +/// `/npub1.../my%20repo.git/info/refs` resolve to the filesystem path +/// `my repo.git` (though such identifiers should be rejected at announcement +/// validation time — see `validate_announcement`). +/// /// Returns (npub, identifier, subpath) where subpath is the part after .git/ -pub fn parse_git_url(path: &str) -> Option<(&str, &str, &str)> { +/// and identifier has been percent-decoded. +pub fn parse_git_url(path: &str) -> Option<(String, String, String)> { // Remove leading slash let path = path.strip_prefix('/').unwrap_or(path); @@ -467,12 +497,15 @@ pub fn parse_git_url(path: &str) -> Option<(&str, &str, &str)> { return None; } - let npub = parts[0]; - let repo_part = parts[1]; - let subpath = parts[2]; + let npub = parts[0].to_string(); + let repo_part = percent_decode(parts[1]); + let subpath = parts[2].to_string(); // Extract identifier (remove .git suffix if present for the middle part) - let identifier = repo_part.strip_suffix(".git").unwrap_or(repo_part); + let identifier = repo_part + .strip_suffix(".git") + .unwrap_or(&repo_part) + .to_string(); Some((npub, identifier, subpath)) } @@ -612,6 +645,32 @@ mod tests { assert!(parse_git_url("/npub1abc/repo").is_none()); } + #[test] + fn test_parse_git_url_percent_encoded_identifier() { + // Identifiers with spaces encoded as %20 must be decoded so the + // filesystem path lookup finds the correct directory. + let (npub, id, subpath) = + parse_git_url("/npub17plqk/kuboslopp%20by%20Shakespeare.git/info/refs").unwrap(); + assert_eq!(npub, "npub17plqk"); + assert_eq!(id, "kuboslopp by Shakespeare"); + assert_eq!(subpath, "info/refs"); + } + + #[test] + fn test_percent_decode_basic() { + assert_eq!(percent_decode("hello%20world"), "hello world"); + assert_eq!(percent_decode("no-encoding"), "no-encoding"); + assert_eq!(percent_decode("a%2Fb"), "a/b"); + assert_eq!(percent_decode("%41%42%43"), "ABC"); + } + + #[test] + fn test_percent_decode_invalid_sequence_passthrough() { + // Incomplete or invalid sequences are left as-is + assert_eq!(percent_decode("foo%2"), "foo%2"); + assert_eq!(percent_decode("foo%zz"), "foo%zz"); + } + #[test] fn test_commit_exists_nonexistent() { let (_temp_dir, repo_path) = create_test_repo(); -- cgit v1.2.3