From ac2f6a6cf9c2368f1c6a87c1716751fdf7496707 Mon Sep 17 00:00:00 2001
From: Alex Gleason <alex@alexgleason.me>
Date: Tue, 17 Mar 2026 15:33:00 -0500
Subject: nip44: allow encryption of payloads larger than 65535 bytes

Extend the v2 padding format with a backwards-compatible sentinel:
when the first 2 bytes of the length prefix are zero, the next 4
bytes encode the plaintext length as a big-endian u32. This raises
the maximum from 65535 bytes to 2^32-1 bytes without requiring a
version bump.

Fixes from nostr-protocol/nips#1907:
- Fix off-by-one: use >= 65536 (not > 65536) for the extended path,
  since u16 can only represent 0..65535
- Fix padding validation: use dynamic prefix_len (2 or 6) instead of
  hardcoded 2 in the unpad() size check
- Fix len(d) typo in decode_payload (should be len(data))
- Remove upper-bound size checks in decode_payload that would reject
  large payloads
- Add write_u32_be, read_uint16_be, read_uint32_be to function list
- Add extended_prefix_threshold constant
- Update size range comments for both small and large payload paths
---
 44.md | 62 ++++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/44.md b/44.md
index a7c13f1..4fe3cc1 100644
--- a/44.md
+++ b/44.md
@@ -84,10 +84,12 @@ NIP-44 version 2 has the following design characteristics:
    - Slice 76-byte HKDF output into: `chacha_key` (bytes 0..32), `chacha_nonce` (bytes 32..44), `hmac_key` (bytes 44..76)
 4. Add padding
    - Content must be encoded from UTF-8 into byte array
-   - Validate plaintext length. Minimum is 1 byte, maximum is 65535 bytes
-   - Padding format is: `[plaintext_length: u16][plaintext][zero_bytes]`
+   - Validate plaintext length. Minimum is 1 byte, maximum is 4,294,967,295 bytes
    - Padding algorithm is related to powers-of-two, with min padded msg size of 32 bytes
-   - Plaintext length is encoded in big-endian as first 2 bytes of the padded blob
+   - Plaintext length prefix is encoded in big-endian:
+     - If length is less than 65536: prefix is 2 bytes (`u16`), format is `[plaintext_length: u16][plaintext][zero_bytes]`
+     - If length is 65536 or greater: prefix is 6 bytes (2 zero bytes + `u32`), format is `[0x00, 0x00][plaintext_length: u32][plaintext][zero_bytes]`
+   - A zero value in the first 2 bytes signals the extended format; since valid plaintext is at least 1 byte, a u16 length of 0 is otherwise invalid
 5. Encrypt padded content
    - Use ChaCha20, with key and nonce from step 3
 6. Calculate MAC (message authentication code)
@@ -112,8 +114,8 @@ validation rules, refer to BIP-340.
 2. Decode base64
    - Base64 is decoded into `version, nonce, ciphertext, mac`
    - If the version is unknown, implementations must indicate that the encryption version is not supported
-   - Validate length of base64 message to prevent DoS on base64 decoder: it can be in range from 132 to 87472 chars
-   - Validate length of decoded message to verify output of the decoder: it can be in range from 99 to 65603 bytes
+   - Validate minimum length of base64 message to prevent DoS on base64 decoder: it must be at least 132 chars
+   - Validate minimum length of decoded message to verify output of the decoder: it must be at least 99 bytes
 3. Calculate conversation key
    - See step 1 of [encryption](#Encryption)
 4. Calculate message keys
@@ -124,8 +126,10 @@ validation rules, refer to BIP-340.
 6. Decrypt ciphertext
    - Use ChaCha20 with key and nonce from step 3
 7. Remove padding
-   - Read the first two BE bytes of plaintext that correspond to plaintext length
-   - Verify that the length of sliced plaintext matches the value of the two BE bytes
+   - Read the first 2 bytes as a big-endian u16
+     - If zero, read the next 4 bytes as a big-endian u32 plaintext length (6-byte prefix total)
+     - Otherwise, use those 2 bytes as the u16 plaintext length (2-byte prefix total)
+   - Verify that the length of sliced plaintext matches the decoded length
    - Verify that calculated padding from step 3 of the [encryption](#Encryption) process matches the actual padding
 
 ### Details
@@ -149,7 +153,8 @@ validation rules, refer to BIP-340.
     `i`-th byte (inclusive) to the `j`-th byte (exclusive) of `x`.
 - Constants `c`:
   - `min_plaintext_size` is 1. 1 byte msg is padded to 32 bytes.
-  - `max_plaintext_size` is 65535 (64kB - 1). It is padded to 65536 bytes.
+  - `max_plaintext_size` is 4294967295 (2^32 - 1).
+  - `extended_prefix_threshold` is 65536. Lengths below this use a 2-byte u16 prefix; lengths at or above use a 6-byte prefix (2 zero bytes + u32).
 - Functions
   - `base64_encode(string)` and `base64_decode(bytes)` are Base64 ([RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648), with padding)
   - `concat` refers to byte array concatenation
@@ -157,6 +162,9 @@ validation rules, refer to BIP-340.
   - `utf8_encode(string)` and `utf8_decode(bytes)` transform string to byte array and back
   - `write_u8(number)` restricts number to values 0..255 and encodes into Big-Endian uint8 byte array
   - `write_u16_be(number)` restricts number to values 0..65535 and encodes into Big-Endian uint16 byte array
+  - `write_u32_be(number)` restricts number to values 0..4294967295 and encodes into Big-Endian uint32 byte array
+  - `read_uint16_be(bytes)` reads 2 bytes as a Big-Endian unsigned 16-bit integer
+  - `read_uint32_be(bytes)` reads 4 bytes as a Big-Endian unsigned 32-bit integer
   - `zeros(length)` creates byte array of length `length >= 0`, filled with zeros
   - `floor(number)` and `log2(number)` are well-known mathematical methods
 
@@ -181,35 +189,45 @@ def calc_padded_len(unpadded_len):
 # Converts unpadded plaintext to padded bytearray
 def pad(plaintext):
   unpadded = utf8_encode(plaintext)
-  unpadded_len = len(plaintext)
+  unpadded_len = len(unpadded)
   if (unpadded_len < c.min_plaintext_size or
       unpadded_len > c.max_plaintext_size): raise Exception('invalid plaintext length')
-  prefix = write_u16_be(unpadded_len)
+  if unpadded_len >= c.extended_prefix_threshold:
+    prefix = concat([0, 0], write_u32_be(unpadded_len))  # 6 bytes
+  else:
+    prefix = write_u16_be(unpadded_len)                   # 2 bytes
   suffix = zeros(calc_padded_len(unpadded_len) - unpadded_len)
   return concat(prefix, unpadded, suffix)
 
 # Converts padded bytearray to unpadded plaintext
 def unpad(padded):
-  unpadded_len = read_uint16_be(padded[0:2])
-  unpadded = padded[2:2+unpadded_len]
+  first_two = read_uint16_be(padded[0:2])
+  if first_two == 0:
+    unpadded_len = read_uint32_be(padded[2:6])
+    prefix_len = 6
+  else:
+    unpadded_len = first_two
+    prefix_len = 2
+  unpadded = padded[prefix_len:prefix_len+unpadded_len]
   if (unpadded_len == 0 or
       len(unpadded) != unpadded_len or
-      len(padded) != 2 + calc_padded_len(unpadded_len)): raise Exception('invalid padding')
+      len(padded) != prefix_len + calc_padded_len(unpadded_len)): raise Exception('invalid padding')
   return utf8_decode(unpadded)
 
-# metadata: always 65b (version: 1b, nonce: 32b, max: 32b)
-# plaintext: 1b to 0xffff
-# padded plaintext: 32b to 0xffff
-# ciphertext: 32b+2 to 0xffff+2
-# raw payload: 99 (65+32+2) to 65603 (65+0xffff+2)
-# compressed payload (base64): 132b to 87472b
+# metadata: always 65b (version: 1b, nonce: 32b, mac: 32b)
+# plaintext: 1b to 0xffffffff
+# padded plaintext (small, <65536): 32b to 0xffff, with 2b prefix -> 34b to 0xffff+2
+# padded plaintext (large, >=65536): 0x10000 to 0xffffffff, with 6b prefix -> 0x10006 to 0xffffffff+6
+# ciphertext: same as padded plaintext (chacha20 doesn't change length)
+# raw payload (small): 99 (65+34) to 65603 (65+0xffff+2)
+# raw payload (large): 65607 (65+0x10006) to 4294967362 (65+0xffffffff+6)
 def decode_payload(payload):
   plen = len(payload)
   if plen == 0 or payload[0] == '#': raise Exception('unknown version')
-  if plen < 132 or plen > 87472: raise Exception('invalid payload size')
+  if plen < 132: raise Exception('invalid payload size')
   data = base64_decode(payload)
-  dlen = len(d)
-  if dlen < 99 or dlen > 65603: raise Exception('invalid data size');
+  dlen = len(data)
+  if dlen < 99: raise Exception('invalid data size');
   vers = data[0]
   if vers != 2: raise Exception('unknown version ' + vers)
   nonce = data[1:33]
-- 
cgit v1.2.3