From dfd20a39a7ddaea07103cac45d4d79bc7e6ce0d7 Mon Sep 17 00:00:00 2001 From: DanConwayDev Date: Fri, 10 Apr 2026 16:42:35 +0000 Subject: fix: accept any d-tag identifier; percent-encode in URLs NIP-01 places no restriction on d tag characters and NIP-34 only recommends kebab-case without mandating it. Rejecting identifiers with whitespace or other URL-unsafe characters was therefore overly strict. The correct approach (per NIP-34 PR #2312 and GRASP-01) is to store identifiers verbatim on disk and percent-encode them when constructing URLs. The previous commit already handled the incoming direction (percent-decoding URL paths before filesystem lookup); this commit handles the outgoing direction and removes the validation restriction. Changes: - validate_identifier: drop whitespace rejection; only reject chars that are unsafe as filesystem directory names (/, \, null, . / ..) - git/mod.rs: add percent_encode() alongside percent_decode() - landing.rs: percent-encode identifier in nostr:// clone URL and gitworkshop link (also fixes a pre-existing bug where the clone URL displayed literal '{npub}' / '{identifier}' instead of the values) --- CHANGELOG.md | 4 ++++ src/git/mod.rs | 51 +++++++++++++++++++++++++++++++++++++++++++-- src/http/landing.rs | 6 ++++-- src/nostr/events.rs | 59 ++++++++++++++++++++++------------------------------- 4 files changed, 81 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 217781c..633ddbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Repository identifiers containing characters that require percent-encoding in URLs (e.g. spaces, emoji) are now accepted and served correctly. NIP-01 places no restriction on `d` tag values and NIP-34 only recommends kebab-case without mandating it, so rejecting non-kebab identifiers was overly strict. Identifiers are stored verbatim on disk and percent-encoded when used in URLs, per the `nostr://` clone URL spec formalised in [NIP-34 PR #2312](https://github.com/nostr-protocol/nips/pull/2312) and the GRASP-01 HTTP path spec. The landing page clone URL now also correctly percent-encodes the identifier. + ### Changed - Remove arbitrary default max connections limit; when `NGIT_MAX_CONNECTIONS` is unset the relay imposes no connection cap, deferring to OS fd limits and infrastructure controls diff --git a/src/git/mod.rs b/src/git/mod.rs index 999d3c8..156f125 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs @@ -451,6 +451,29 @@ pub fn get_repository_head(repo_path: &Path) -> Option { } } +/// Percent-encode a string for use as a URL path segment (RFC 3986 §2.1). +/// +/// Encodes all bytes that are not unreserved characters (`A-Z a-z 0-9 - _ . ~`). +/// This is suitable for encoding a repository identifier in a `nostr://` URL or +/// an HTTP path component such as `//.git`. +pub fn percent_encode(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for byte in s.bytes() { + match byte { + // RFC 3986 unreserved characters — never encoded + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + out.push(byte as char); + } + _ => { + out.push('%'); + out.push(char::from_digit((byte >> 4) as u32, 16).unwrap().to_ascii_uppercase()); + out.push(char::from_digit((byte & 0xf) as u32, 16).unwrap().to_ascii_uppercase()); + } + } + } + out +} + /// Decode percent-encoded characters in a URL path component. /// /// Handles `%XX` sequences (e.g. `%20` → space). Invalid sequences are left as-is. @@ -481,8 +504,8 @@ pub fn percent_decode(s: &str) -> String { /// /// The identifier component is percent-decoded so that URLs like /// `/npub1.../my%20repo.git/info/refs` resolve to the filesystem path -/// `my repo.git` (though such identifiers should be rejected at announcement -/// validation time — see `validate_announcement`). +/// `my repo.git`. Per NIP-34 and GRASP-01, identifiers MUST be percent-encoded +/// in URLs; they are stored verbatim on disk. /// /// Returns (npub, identifier, subpath) where subpath is the part after .git/ /// and identifier has been percent-decoded. @@ -671,6 +694,30 @@ mod tests { assert_eq!(percent_decode("foo%zz"), "foo%zz"); } + #[test] + fn test_percent_encode_basic() { + assert_eq!(percent_encode("my-repo"), "my-repo"); + assert_eq!(percent_encode("my_repo"), "my_repo"); + assert_eq!(percent_encode("repo123"), "repo123"); + assert_eq!(percent_encode("hello world"), "hello%20world"); + assert_eq!(percent_encode("kuboslopp by Shakespeare"), "kuboslopp%20by%20Shakespeare"); + } + + #[test] + fn test_percent_encode_special_chars() { + assert_eq!(percent_encode("a/b"), "a%2Fb"); + assert_eq!(percent_encode("a\\b"), "a%5Cb"); + assert_eq!(percent_encode("a b\tc"), "a%20b%09c"); + } + + #[test] + fn test_percent_encode_decode_roundtrip() { + let identifiers = ["my-repo", "my repo", "kuboslopp by Shakespeare", "a/b", "foo\0bar"]; + for id in &identifiers { + assert_eq!(percent_decode(&percent_encode(id)), *id); + } + } + #[test] fn test_commit_exists_nonexistent() { let (_temp_dir, repo_path) = create_test_repo(); diff --git a/src/http/landing.rs b/src/http/landing.rs index 5fc1e6e..042be5e 100644 --- a/src/http/landing.rs +++ b/src/http/landing.rs @@ -2,6 +2,7 @@ /// /// Generates HTML landing page for the Nostr relay. use crate::config::Config; +use crate::git::percent_encode; use crate::http::nip11::RelayInformationDocument; use std::collections::HashMap; @@ -847,7 +848,7 @@ pub fn get_repo_html(config: &Config, npub: &str, identifier: &str) -> String {
curl -Ls https://ngit.dev/install.sh | bash
-
git clone nostr://{{npub}}//{{identifier}}
+
git clone nostr://{npub}//{encoded_identifier}
@@ -867,7 +868,7 @@ pub fn get_repo_html(config: &Config, npub: &str, identifier: &str) -> String { // Construct gitworkshop link: gitworkshop.dev/npub/relayref/identifier const gitworkshopLink = document.getElementById('gitworkshop-link'); - gitworkshopLink.setAttribute('href', 'https://gitworkshop.dev/{npub}/' + relayref + '/{identifier}'); + gitworkshopLink.setAttribute('href', 'https://gitworkshop.dev/{npub}/' + relayref + '/{encoded_identifier}'); // Set footer domain var footerDomain = document.getElementById('footer-domain'); @@ -882,6 +883,7 @@ pub fn get_repo_html(config: &Config, npub: &str, identifier: &str) -> String { relay_name = config.relay_name(), npub = npub, identifier = identifier, + encoded_identifier = percent_encode(identifier), version = get_version(), theme_toggle = get_theme_toggle_html(), theme_script = get_theme_script(), diff --git a/src/nostr/events.rs b/src/nostr/events.rs index 88ed6ae..77a9d9f 100644 --- a/src/nostr/events.rs +++ b/src/nostr/events.rs @@ -361,19 +361,20 @@ impl RepositoryState { } } -/// Validate that a repository identifier is safe for use as a filesystem path component -/// and as a URL path segment without percent-encoding. +/// Validate that a repository identifier is safe for use as a filesystem path component. /// -/// Rejects identifiers that: -/// - Are empty -/// - Contain path separators (`/`, `\`) -/// - Contain null bytes -/// - Contain whitespace (spaces, tabs, newlines, etc.) — these require percent-encoding -/// in URLs and cause a mismatch between the stored path and the URL-decoded request -/// - Are `.` or `..` (directory traversal) +/// NIP-34 places no restriction on `d` tag characters beyond NIP-01 (any string). +/// Identifiers are stored on disk verbatim and percent-encoded when used in URLs +/// (per NIP-34 `nostr://` spec and GRASP-01 HTTP path spec). This function only +/// rejects identifiers that cannot safely be stored as a filesystem directory name: /// -/// NIP-34 recommends kebab-case identifiers; this function enforces the minimum -/// safety constraints needed for correct filesystem and HTTP serving behaviour. +/// - Empty string +/// - Path separators (`/`, `\`) — would escape the repository directory +/// - Null bytes — rejected by most filesystems +/// - `.` or `..` — reserved path components (directory traversal) +/// +/// Whitespace and other characters that require percent-encoding in URLs are +/// explicitly allowed here; callers are responsible for encoding them in URLs. pub fn validate_identifier(identifier: &str) -> Result<(), String> { if identifier.is_empty() { return Err("identifier must not be empty".to_string()); @@ -397,12 +398,6 @@ pub fn validate_identifier(identifier: &str) -> Result<(), String> { identifier )); } - if ch.is_whitespace() { - return Err(format!( - "identifier '{}' contains whitespace — use hyphens instead (e.g. 'my-repo')", - identifier - )); - } } Ok(()) } @@ -1573,6 +1568,11 @@ mod tests { assert!(validate_identifier("my_repo").is_ok()); assert!(validate_identifier("repo123").is_ok()); assert!(validate_identifier("kuboslopp").is_ok()); + // Whitespace is valid — identifiers are stored verbatim on disk and + // percent-encoded when used in URLs (NIP-34 / GRASP-01) + assert!(validate_identifier("kuboslopp by Shakespeare").is_ok()); + assert!(validate_identifier("my\trepo").is_ok()); + assert!(validate_identifier("my repo").is_ok()); } #[test] @@ -1593,19 +1593,12 @@ mod tests { } #[test] - fn test_validate_identifier_rejects_whitespace() { - assert!(validate_identifier("kuboslopp by Shakespeare").is_err()); - assert!(validate_identifier("my\trepo").is_err()); - assert!(validate_identifier("my\nrepo").is_err()); - } - - #[test] - fn test_validate_announcement_rejects_identifier_with_spaces() { + fn test_validate_announcement_accepts_identifier_with_spaces() { use crate::config::Config; use crate::nostr::policy::AnnouncementResult; let keys = create_test_keys(); - // Identifier contains spaces — should be rejected regardless of clone/relay tags + // Identifier contains spaces — valid per NIP-34; must be percent-encoded in URLs let event = create_announcement_event( &keys, "kuboslopp by Shakespeare", @@ -1618,14 +1611,10 @@ mod tests { ..Config::for_testing() }; let result = validate_announcement(&event, &config); - if let AnnouncementResult::Reject(reason) = result { - assert!( - reason.contains("whitespace") || reason.contains("identifier"), - "unexpected rejection reason: {}", - reason - ); - } else { - panic!("Expected Reject for identifier with spaces, got {:?}", result); - } + assert!( + matches!(result, AnnouncementResult::Accept), + "Expected Accept for identifier with spaces, got {:?}", + result + ); } } -- cgit v1.2.3