From 2d74b9ca69b3a1e0b9a2359c12cc2d1979fc6130 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Thu, 9 Apr 2026 15:24:17 +0000 Subject: fix: reject identifiers with whitespace and URL-decode path components Two bugs allowed a repository announcement with a space-containing identifier ('kuboslopp by Shakespeare') to enter purgatory and create a bare repo on disk, but then fail to serve git data over HTTP. Bug 1 (serving): parse_git_url and parse_repo_url did not percent-decode the URL path before resolving the filesystem path. A client requesting /npub.../kuboslopp%20by%20Shakespeare.git/info/refs had the identifier extracted as 'kuboslopp%20by%20Shakespeare' (literal %20), which did not match the on-disk directory 'kuboslopp by Shakespeare.git'. Fix: add percent_decode() in src/git/mod.rs and apply it to the repo component in both parse_git_url and parse_repo_url. Bug 2 (validation): validate_announcement did not check that the identifier is safe as a filesystem path component and URL segment. Identifiers containing whitespace, path separators, null bytes, or reserved names (. / ..) should be rejected at acceptance time. Fix: add validate_identifier() in src/nostr/events.rs and call it from validate_announcement before any other policy checks. --- src/git/mod.rs | 69 ++++++++++++++++++++++++++++--- src/http/mod.rs | 19 +++++---- src/nostr/events.rs | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 14 deletions(-) diff --git a/src/git/mod.rs b/src/git/mod.rs index 1255b6f..999d3c8 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs @@ -451,12 +451,42 @@ pub fn get_repository_head(repo_path: &Path) -> Option { } } +/// Decode percent-encoded characters in a URL path component. +/// +/// Handles `%XX` sequences (e.g. `%20` → space). Invalid sequences are left as-is. +pub fn percent_decode(s: &str) -> String { + let bytes = s.as_bytes(); + let mut out = Vec::with_capacity(bytes.len()); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'%' && i + 2 < bytes.len() { + if let (Some(hi), Some(lo)) = ( + (bytes[i + 1] as char).to_digit(16), + (bytes[i + 2] as char).to_digit(16), + ) { + out.push((hi * 16 + lo) as u8); + i += 3; + continue; + } + } + out.push(bytes[i]); + i += 1; + } + String::from_utf8(out).unwrap_or_else(|_| s.to_string()) +} + /// Extract npub and identifier from a Git URL path /// /// Parses paths like `//.git/info/refs` /// +/// The identifier component is percent-decoded so that URLs like +/// `/npub1.../my%20repo.git/info/refs` resolve to the filesystem path +/// `my repo.git` (though such identifiers should be rejected at announcement +/// validation time — see `validate_announcement`). +/// /// Returns (npub, identifier, subpath) where subpath is the part after .git/ -pub fn parse_git_url(path: &str) -> Option<(&str, &str, &str)> { +/// and identifier has been percent-decoded. +pub fn parse_git_url(path: &str) -> Option<(String, String, String)> { // Remove leading slash let path = path.strip_prefix('/').unwrap_or(path); @@ -467,12 +497,15 @@ pub fn parse_git_url(path: &str) -> Option<(&str, &str, &str)> { return None; } - let npub = parts[0]; - let repo_part = parts[1]; - let subpath = parts[2]; + let npub = parts[0].to_string(); + let repo_part = percent_decode(parts[1]); + let subpath = parts[2].to_string(); // Extract identifier (remove .git suffix if present for the middle part) - let identifier = repo_part.strip_suffix(".git").unwrap_or(repo_part); + let identifier = repo_part + .strip_suffix(".git") + .unwrap_or(&repo_part) + .to_string(); Some((npub, identifier, subpath)) } @@ -612,6 +645,32 @@ mod tests { assert!(parse_git_url("/npub1abc/repo").is_none()); } + #[test] + fn test_parse_git_url_percent_encoded_identifier() { + // Identifiers with spaces encoded as %20 must be decoded so the + // filesystem path lookup finds the correct directory. + let (npub, id, subpath) = + parse_git_url("/npub17plqk/kuboslopp%20by%20Shakespeare.git/info/refs").unwrap(); + assert_eq!(npub, "npub17plqk"); + assert_eq!(id, "kuboslopp by Shakespeare"); + assert_eq!(subpath, "info/refs"); + } + + #[test] + fn test_percent_decode_basic() { + assert_eq!(percent_decode("hello%20world"), "hello world"); + assert_eq!(percent_decode("no-encoding"), "no-encoding"); + assert_eq!(percent_decode("a%2Fb"), "a/b"); + assert_eq!(percent_decode("%41%42%43"), "ABC"); + } + + #[test] + fn test_percent_decode_invalid_sequence_passthrough() { + // Incomplete or invalid sequences are left as-is + assert_eq!(percent_decode("foo%2"), "foo%2"); + assert_eq!(percent_decode("foo%zz"), "foo%zz"); + } + #[test] fn test_commit_exists_nonexistent() { let (_temp_dir, repo_path) = create_test_repo(); diff --git a/src/http/mod.rs b/src/http/mod.rs index c397365..154d6c5 100644 --- a/src/http/mod.rs +++ b/src/http/mod.rs @@ -42,8 +42,11 @@ const ICON_PNG: &[u8] = include_bytes!("../../static/icon.png"); /// /// Parses paths like `//.git` (for repository webpage/404) /// +/// The identifier is percent-decoded so that URLs like `/npub1.../my%20repo.git` +/// resolve to the correct filesystem path. +/// /// Returns (npub, identifier) if the path matches a repository URL pattern -fn parse_repo_url(path: &str) -> Option<(&str, &str)> { +fn parse_repo_url(path: &str) -> Option<(String, String)> { // Remove leading slash let path = path.strip_prefix('/').unwrap_or(path); @@ -56,7 +59,7 @@ fn parse_repo_url(path: &str) -> Option<(&str, &str)> { } let npub = parts[0]; - let repo_part = parts[1]; + let repo_part = git::percent_decode(parts[1]); // The repo part must end with .git if !repo_part.ends_with(".git") { @@ -69,14 +72,17 @@ fn parse_repo_url(path: &str) -> Option<(&str, &str)> { } // Extract identifier (remove .git suffix) - let identifier = repo_part.strip_suffix(".git").unwrap_or(repo_part); + let identifier = repo_part + .strip_suffix(".git") + .unwrap_or(&repo_part) + .to_string(); // Identifier must not be empty if identifier.is_empty() { return None; } - Some((npub, identifier)) + Some((npub.to_string(), identifier)) } /// Add CORS headers to a response builder @@ -160,9 +166,6 @@ impl Service> for HttpService { // Check for Git HTTP requests first if let Some((npub, identifier, subpath)) = git::parse_git_url(&path) { - let npub = npub.to_string(); - let identifier = identifier.to_string(); - let subpath = subpath.to_string(); // Extract Git-Protocol header for protocol v2 support let git_protocol = req @@ -391,8 +394,6 @@ impl Service> for HttpService { // GRASP-01: "SHOULD serve a webpage at the same endpoint linking to git nostr client(s) // to browse the repository and a 404 page for repositories it doesn't host" if let Some((npub, identifier)) = parse_repo_url(&path) { - let npub = npub.to_string(); - let identifier = identifier.to_string(); let config = self.config.clone(); let repo_path = git::resolve_repo_path(&git_data_path, &npub, &identifier); diff --git a/src/nostr/events.rs b/src/nostr/events.rs index 00e4486..88ed6ae 100644 --- a/src/nostr/events.rs +++ b/src/nostr/events.rs @@ -361,6 +361,52 @@ impl RepositoryState { } } +/// Validate that a repository identifier is safe for use as a filesystem path component +/// and as a URL path segment without percent-encoding. +/// +/// Rejects identifiers that: +/// - Are empty +/// - Contain path separators (`/`, `\`) +/// - Contain null bytes +/// - Contain whitespace (spaces, tabs, newlines, etc.) — these require percent-encoding +/// in URLs and cause a mismatch between the stored path and the URL-decoded request +/// - Are `.` or `..` (directory traversal) +/// +/// NIP-34 recommends kebab-case identifiers; this function enforces the minimum +/// safety constraints needed for correct filesystem and HTTP serving behaviour. +pub fn validate_identifier(identifier: &str) -> Result<(), String> { + if identifier.is_empty() { + return Err("identifier must not be empty".to_string()); + } + if identifier == "." || identifier == ".." { + return Err(format!( + "identifier '{}' is a reserved path component", + identifier + )); + } + for ch in identifier.chars() { + if ch == '/' || ch == '\\' { + return Err(format!( + "identifier '{}' contains path separator '{}'", + identifier, ch + )); + } + if ch == '\0' { + return Err(format!( + "identifier '{}' contains a null byte", + identifier + )); + } + if ch.is_whitespace() { + return Err(format!( + "identifier '{}' contains whitespace — use hyphens instead (e.g. 'my-repo')", + identifier + )); + } + } + Ok(()) +} + /// Validate a repository announcement according to GRASP-01 and GRASP-05 /// /// Returns: @@ -405,6 +451,11 @@ pub fn validate_announcement( Err(e) => return AnnouncementResult::Reject(format!("Invalid announcement: {}", e)), }; + // Validate identifier is safe for filesystem and URL use + if let Err(reason) = validate_identifier(&announcement.identifier) { + return AnnouncementResult::Reject(format!("Invalid identifier: {}", reason)); + } + // Get validated configs (config.validate() must be called at startup) let archive_config = config.archive_config(); let repository_config = config.repository_config(); @@ -1511,4 +1562,70 @@ mod tests { let result = validate_announcement(&event, &config); assert!(matches!(result, AnnouncementResult::Accept)); } + + // ------------------------------------------------------------------------- + // validate_identifier tests + // ------------------------------------------------------------------------- + + #[test] + fn test_validate_identifier_valid() { + assert!(validate_identifier("my-repo").is_ok()); + assert!(validate_identifier("my_repo").is_ok()); + assert!(validate_identifier("repo123").is_ok()); + assert!(validate_identifier("kuboslopp").is_ok()); + } + + #[test] + fn test_validate_identifier_rejects_empty() { + assert!(validate_identifier("").is_err()); + } + + #[test] + fn test_validate_identifier_rejects_dot_components() { + assert!(validate_identifier(".").is_err()); + assert!(validate_identifier("..").is_err()); + } + + #[test] + fn test_validate_identifier_rejects_path_separators() { + assert!(validate_identifier("foo/bar").is_err()); + assert!(validate_identifier("foo\\bar").is_err()); + } + + #[test] + fn test_validate_identifier_rejects_whitespace() { + assert!(validate_identifier("kuboslopp by Shakespeare").is_err()); + assert!(validate_identifier("my\trepo").is_err()); + assert!(validate_identifier("my\nrepo").is_err()); + } + + #[test] + fn test_validate_announcement_rejects_identifier_with_spaces() { + use crate::config::Config; + use crate::nostr::policy::AnnouncementResult; + + let keys = create_test_keys(); + // Identifier contains spaces — should be rejected regardless of clone/relay tags + let event = create_announcement_event( + &keys, + "kuboslopp by Shakespeare", + vec!["https://gitnostr.com/alice/kuboslopp%20by%20Shakespeare.git"], + vec!["wss://gitnostr.com"], + ); + + let config = Config { + domain: "gitnostr.com".to_string(), + ..Config::for_testing() + }; + let result = validate_announcement(&event, &config); + if let AnnouncementResult::Reject(reason) = result { + assert!( + reason.contains("whitespace") || reason.contains("identifier"), + "unexpected rejection reason: {}", + reason + ); + } else { + panic!("Expected Reject for identifier with spaces, got {:?}", result); + } + } } -- cgit v1.2.3