diff options
| author | DanConwayDev <DanConwayDev@protonmail.com> | 2026-04-10 16:42:35 +0000 |
|---|---|---|
| committer | DanConwayDev <DanConwayDev@protonmail.com> | 2026-04-10 16:42:35 +0000 |
| commit | dfd20a39a7ddaea07103cac45d4d79bc7e6ce0d7 (patch) | |
| tree | f4d3c38c09c7b27a25f6b6933c9de0e42149c82f /src/git/mod.rs | |
| parent | 2d74b9ca69b3a1e0b9a2359c12cc2d1979fc6130 (diff) | |
fix: accept any d-tag identifier; percent-encode in URLs
NIP-01 places no restriction on d tag characters and NIP-34 only
recommends kebab-case without mandating it. Rejecting identifiers with
whitespace or other URL-unsafe characters was therefore overly strict.
The correct approach (per NIP-34 PR #2312 and GRASP-01) is to store
identifiers verbatim on disk and percent-encode them when constructing
URLs. The previous commit already handled the incoming direction
(percent-decoding URL paths before filesystem lookup); this commit
handles the outgoing direction and removes the validation restriction.
Changes:
- validate_identifier: drop whitespace rejection; only reject chars
that are unsafe as filesystem directory names (/, \, null, . / ..)
- git/mod.rs: add percent_encode() alongside percent_decode()
- landing.rs: percent-encode identifier in nostr:// clone URL and
gitworkshop link (also fixes a pre-existing bug where the clone URL
displayed literal '{npub}' / '{identifier}' instead of the values)
Diffstat (limited to 'src/git/mod.rs')
| -rw-r--r-- | src/git/mod.rs | 51 |
1 files changed, 49 insertions, 2 deletions
diff --git a/src/git/mod.rs b/src/git/mod.rs index 999d3c8..156f125 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs | |||
| @@ -451,6 +451,29 @@ pub fn get_repository_head(repo_path: &Path) -> Option<String> { | |||
| 451 | } | 451 | } |
| 452 | } | 452 | } |
| 453 | 453 | ||
| 454 | /// Percent-encode a string for use as a URL path segment (RFC 3986 §2.1). | ||
| 455 | /// | ||
| 456 | /// Encodes all bytes that are not unreserved characters (`A-Z a-z 0-9 - _ . ~`). | ||
| 457 | /// This is suitable for encoding a repository identifier in a `nostr://` URL or | ||
| 458 | /// an HTTP path component such as `/<npub>/<encoded-identifier>.git`. | ||
| 459 | pub fn percent_encode(s: &str) -> String { | ||
| 460 | let mut out = String::with_capacity(s.len()); | ||
| 461 | for byte in s.bytes() { | ||
| 462 | match byte { | ||
| 463 | // RFC 3986 unreserved characters — never encoded | ||
| 464 | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { | ||
| 465 | out.push(byte as char); | ||
| 466 | } | ||
| 467 | _ => { | ||
| 468 | out.push('%'); | ||
| 469 | out.push(char::from_digit((byte >> 4) as u32, 16).unwrap().to_ascii_uppercase()); | ||
| 470 | out.push(char::from_digit((byte & 0xf) as u32, 16).unwrap().to_ascii_uppercase()); | ||
| 471 | } | ||
| 472 | } | ||
| 473 | } | ||
| 474 | out | ||
| 475 | } | ||
| 476 | |||
| 454 | /// Decode percent-encoded characters in a URL path component. | 477 | /// Decode percent-encoded characters in a URL path component. |
| 455 | /// | 478 | /// |
| 456 | /// Handles `%XX` sequences (e.g. `%20` → space). Invalid sequences are left as-is. | 479 | /// Handles `%XX` sequences (e.g. `%20` → space). Invalid sequences are left as-is. |
| @@ -481,8 +504,8 @@ pub fn percent_decode(s: &str) -> String { | |||
| 481 | /// | 504 | /// |
| 482 | /// The identifier component is percent-decoded so that URLs like | 505 | /// The identifier component is percent-decoded so that URLs like |
| 483 | /// `/npub1.../my%20repo.git/info/refs` resolve to the filesystem path | 506 | /// `/npub1.../my%20repo.git/info/refs` resolve to the filesystem path |
| 484 | /// `my repo.git` (though such identifiers should be rejected at announcement | 507 | /// `my repo.git`. Per NIP-34 and GRASP-01, identifiers MUST be percent-encoded |
| 485 | /// validation time — see `validate_announcement`). | 508 | /// in URLs; they are stored verbatim on disk. |
| 486 | /// | 509 | /// |
| 487 | /// Returns (npub, identifier, subpath) where subpath is the part after .git/ | 510 | /// Returns (npub, identifier, subpath) where subpath is the part after .git/ |
| 488 | /// and identifier has been percent-decoded. | 511 | /// and identifier has been percent-decoded. |
| @@ -672,6 +695,30 @@ mod tests { | |||
| 672 | } | 695 | } |
| 673 | 696 | ||
| 674 | #[test] | 697 | #[test] |
| 698 | fn test_percent_encode_basic() { | ||
| 699 | assert_eq!(percent_encode("my-repo"), "my-repo"); | ||
| 700 | assert_eq!(percent_encode("my_repo"), "my_repo"); | ||
| 701 | assert_eq!(percent_encode("repo123"), "repo123"); | ||
| 702 | assert_eq!(percent_encode("hello world"), "hello%20world"); | ||
| 703 | assert_eq!(percent_encode("kuboslopp by Shakespeare"), "kuboslopp%20by%20Shakespeare"); | ||
| 704 | } | ||
| 705 | |||
| 706 | #[test] | ||
| 707 | fn test_percent_encode_special_chars() { | ||
| 708 | assert_eq!(percent_encode("a/b"), "a%2Fb"); | ||
| 709 | assert_eq!(percent_encode("a\\b"), "a%5Cb"); | ||
| 710 | assert_eq!(percent_encode("a b\tc"), "a%20b%09c"); | ||
| 711 | } | ||
| 712 | |||
| 713 | #[test] | ||
| 714 | fn test_percent_encode_decode_roundtrip() { | ||
| 715 | let identifiers = ["my-repo", "my repo", "kuboslopp by Shakespeare", "a/b", "foo\0bar"]; | ||
| 716 | for id in &identifiers { | ||
| 717 | assert_eq!(percent_decode(&percent_encode(id)), *id); | ||
| 718 | } | ||
| 719 | } | ||
| 720 | |||
| 721 | #[test] | ||
| 675 | fn test_commit_exists_nonexistent() { | 722 | fn test_commit_exists_nonexistent() { |
| 676 | let (_temp_dir, repo_path) = create_test_repo(); | 723 | let (_temp_dir, repo_path) = create_test_repo(); |
| 677 | assert!(!commit_exists( | 724 | assert!(!commit_exists( |