From dfd20a39a7ddaea07103cac45d4d79bc7e6ce0d7 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 10 Apr 2026 16:42:35 +0000 Subject: fix: accept any d-tag identifier; percent-encode in URLs NIP-01 places no restriction on d tag characters and NIP-34 only recommends kebab-case without mandating it. Rejecting identifiers with whitespace or other URL-unsafe characters was therefore overly strict. The correct approach (per NIP-34 PR #2312 and GRASP-01) is to store identifiers verbatim on disk and percent-encode them when constructing URLs. The previous commit already handled the incoming direction (percent-decoding URL paths before filesystem lookup); this commit handles the outgoing direction and removes the validation restriction. Changes: - validate_identifier: drop whitespace rejection; only reject chars that are unsafe as filesystem directory names (/, \, null, . / ..) - git/mod.rs: add percent_encode() alongside percent_decode() - landing.rs: percent-encode identifier in nostr:// clone URL and gitworkshop link (also fixes a pre-existing bug where the clone URL displayed literal '{npub}' / '{identifier}' instead of the values) --- src/git/mod.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) (limited to 'src/git') diff --git a/src/git/mod.rs b/src/git/mod.rs index 999d3c8..156f125 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs @@ -451,6 +451,29 @@ pub fn get_repository_head(repo_path: &Path) -> Option { } } +/// Percent-encode a string for use as a URL path segment (RFC 3986 ยง2.1). +/// +/// Encodes all bytes that are not unreserved characters (`A-Z a-z 0-9 - _ . ~`). +/// This is suitable for encoding a repository identifier in a `nostr://` URL or +/// an HTTP path component such as `//.git`. +pub fn percent_encode(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for byte in s.bytes() { + match byte { + // RFC 3986 unreserved characters โ€” never encoded + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + out.push(byte as char); + } + _ => { + out.push('%'); + out.push(char::from_digit((byte >> 4) as u32, 16).unwrap().to_ascii_uppercase()); + out.push(char::from_digit((byte & 0xf) as u32, 16).unwrap().to_ascii_uppercase()); + } + } + } + out +} + /// Decode percent-encoded characters in a URL path component. /// /// Handles `%XX` sequences (e.g. `%20` โ†’ space). Invalid sequences are left as-is. @@ -481,8 +504,8 @@ pub fn percent_decode(s: &str) -> String { /// /// The identifier component is percent-decoded so that URLs like /// `/npub1.../my%20repo.git/info/refs` resolve to the filesystem path -/// `my repo.git` (though such identifiers should be rejected at announcement -/// validation time โ€” see `validate_announcement`). +/// `my repo.git`. Per NIP-34 and GRASP-01, identifiers MUST be percent-encoded +/// in URLs; they are stored verbatim on disk. /// /// Returns (npub, identifier, subpath) where subpath is the part after .git/ /// and identifier has been percent-decoded. @@ -671,6 +694,30 @@ mod tests { assert_eq!(percent_decode("foo%zz"), "foo%zz"); } + #[test] + fn test_percent_encode_basic() { + assert_eq!(percent_encode("my-repo"), "my-repo"); + assert_eq!(percent_encode("my_repo"), "my_repo"); + assert_eq!(percent_encode("repo123"), "repo123"); + assert_eq!(percent_encode("hello world"), "hello%20world"); + assert_eq!(percent_encode("kuboslopp by Shakespeare"), "kuboslopp%20by%20Shakespeare"); + } + + #[test] + fn test_percent_encode_special_chars() { + assert_eq!(percent_encode("a/b"), "a%2Fb"); + assert_eq!(percent_encode("a\\b"), "a%5Cb"); + assert_eq!(percent_encode("a b\tc"), "a%20b%09c"); + } + + #[test] + fn test_percent_encode_decode_roundtrip() { + let identifiers = ["my-repo", "my repo", "kuboslopp by Shakespeare", "a/b", "foo\0bar"]; + for id in &identifiers { + assert_eq!(percent_decode(&percent_encode(id)), *id); + } + } + #[test] fn test_commit_exists_nonexistent() { let (_temp_dir, repo_path) = create_test_repo(); -- cgit v1.2.3