From ac2f6a6cf9c2368f1c6a87c1716751fdf7496707 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 17 Mar 2026 15:33:00 -0500 Subject: nip44: allow encryption of payloads larger than 65535 bytes Extend the v2 padding format with a backwards-compatible sentinel: when the first 2 bytes of the length prefix are zero, the next 4 bytes encode the plaintext length as a big-endian u32. This raises the maximum from 65535 bytes to 2^32-1 bytes without requiring a version bump. Fixes from nostr-protocol/nips#1907: - Fix off-by-one: use >= 65536 (not > 65536) for the extended path, since u16 can only represent 0..65535 - Fix padding validation: use dynamic prefix_len (2 or 6) instead of hardcoded 2 in the unpad() size check - Fix len(d) typo in decode_payload (should be len(data)) - Remove upper-bound size checks in decode_payload that would reject large payloads - Add write_u32_be, read_uint16_be, read_uint32_be to function list - Add extended_prefix_threshold constant - Update size range comments for both small and large payload paths --- 44.md | 62 ++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/44.md b/44.md index a7c13f1..4fe3cc1 100644 --- a/44.md +++ b/44.md @@ -84,10 +84,12 @@ NIP-44 version 2 has the following design characteristics: - Slice 76-byte HKDF output into: `chacha_key` (bytes 0..32), `chacha_nonce` (bytes 32..44), `hmac_key` (bytes 44..76) 4. Add padding - Content must be encoded from UTF-8 into byte array - - Validate plaintext length. Minimum is 1 byte, maximum is 65535 bytes - - Padding format is: `[plaintext_length: u16][plaintext][zero_bytes]` + - Validate plaintext length. Minimum is 1 byte, maximum is 4,294,967,295 bytes - Padding algorithm is related to powers-of-two, with min padded msg size of 32 bytes - - Plaintext length is encoded in big-endian as first 2 bytes of the padded blob + - Plaintext length prefix is encoded in big-endian: + - If length is less than 65536: prefix is 2 bytes (`u16`), format is `[plaintext_length: u16][plaintext][zero_bytes]` + - If length is 65536 or greater: prefix is 6 bytes (2 zero bytes + `u32`), format is `[0x00, 0x00][plaintext_length: u32][plaintext][zero_bytes]` + - A zero value in the first 2 bytes signals the extended format; since valid plaintext is at least 1 byte, a u16 length of 0 is otherwise invalid 5. Encrypt padded content - Use ChaCha20, with key and nonce from step 3 6. Calculate MAC (message authentication code) @@ -112,8 +114,8 @@ validation rules, refer to BIP-340. 2. Decode base64 - Base64 is decoded into `version, nonce, ciphertext, mac` - If the version is unknown, implementations must indicate that the encryption version is not supported - - Validate length of base64 message to prevent DoS on base64 decoder: it can be in range from 132 to 87472 chars - - Validate length of decoded message to verify output of the decoder: it can be in range from 99 to 65603 bytes + - Validate minimum length of base64 message to prevent DoS on base64 decoder: it must be at least 132 chars + - Validate minimum length of decoded message to verify output of the decoder: it must be at least 99 bytes 3. Calculate conversation key - See step 1 of [encryption](#Encryption) 4. Calculate message keys @@ -124,8 +126,10 @@ validation rules, refer to BIP-340. 6. Decrypt ciphertext - Use ChaCha20 with key and nonce from step 3 7. Remove padding - - Read the first two BE bytes of plaintext that correspond to plaintext length - - Verify that the length of sliced plaintext matches the value of the two BE bytes + - Read the first 2 bytes as a big-endian u16 + - If zero, read the next 4 bytes as a big-endian u32 plaintext length (6-byte prefix total) + - Otherwise, use those 2 bytes as the u16 plaintext length (2-byte prefix total) + - Verify that the length of sliced plaintext matches the decoded length - Verify that calculated padding from step 3 of the [encryption](#Encryption) process matches the actual padding ### Details @@ -149,7 +153,8 @@ validation rules, refer to BIP-340. `i`-th byte (inclusive) to the `j`-th byte (exclusive) of `x`. - Constants `c`: - `min_plaintext_size` is 1. 1 byte msg is padded to 32 bytes. - - `max_plaintext_size` is 65535 (64kB - 1). It is padded to 65536 bytes. + - `max_plaintext_size` is 4294967295 (2^32 - 1). + - `extended_prefix_threshold` is 65536. Lengths below this use a 2-byte u16 prefix; lengths at or above use a 6-byte prefix (2 zero bytes + u32). - Functions - `base64_encode(string)` and `base64_decode(bytes)` are Base64 ([RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648), with padding) - `concat` refers to byte array concatenation @@ -157,6 +162,9 @@ validation rules, refer to BIP-340. - `utf8_encode(string)` and `utf8_decode(bytes)` transform string to byte array and back - `write_u8(number)` restricts number to values 0..255 and encodes into Big-Endian uint8 byte array - `write_u16_be(number)` restricts number to values 0..65535 and encodes into Big-Endian uint16 byte array + - `write_u32_be(number)` restricts number to values 0..4294967295 and encodes into Big-Endian uint32 byte array + - `read_uint16_be(bytes)` reads 2 bytes as a Big-Endian unsigned 16-bit integer + - `read_uint32_be(bytes)` reads 4 bytes as a Big-Endian unsigned 32-bit integer - `zeros(length)` creates byte array of length `length >= 0`, filled with zeros - `floor(number)` and `log2(number)` are well-known mathematical methods @@ -181,35 +189,45 @@ def calc_padded_len(unpadded_len): # Converts unpadded plaintext to padded bytearray def pad(plaintext): unpadded = utf8_encode(plaintext) - unpadded_len = len(plaintext) + unpadded_len = len(unpadded) if (unpadded_len < c.min_plaintext_size or unpadded_len > c.max_plaintext_size): raise Exception('invalid plaintext length') - prefix = write_u16_be(unpadded_len) + if unpadded_len >= c.extended_prefix_threshold: + prefix = concat([0, 0], write_u32_be(unpadded_len)) # 6 bytes + else: + prefix = write_u16_be(unpadded_len) # 2 bytes suffix = zeros(calc_padded_len(unpadded_len) - unpadded_len) return concat(prefix, unpadded, suffix) # Converts padded bytearray to unpadded plaintext def unpad(padded): - unpadded_len = read_uint16_be(padded[0:2]) - unpadded = padded[2:2+unpadded_len] + first_two = read_uint16_be(padded[0:2]) + if first_two == 0: + unpadded_len = read_uint32_be(padded[2:6]) + prefix_len = 6 + else: + unpadded_len = first_two + prefix_len = 2 + unpadded = padded[prefix_len:prefix_len+unpadded_len] if (unpadded_len == 0 or len(unpadded) != unpadded_len or - len(padded) != 2 + calc_padded_len(unpadded_len)): raise Exception('invalid padding') + len(padded) != prefix_len + calc_padded_len(unpadded_len)): raise Exception('invalid padding') return utf8_decode(unpadded) -# metadata: always 65b (version: 1b, nonce: 32b, max: 32b) -# plaintext: 1b to 0xffff -# padded plaintext: 32b to 0xffff -# ciphertext: 32b+2 to 0xffff+2 -# raw payload: 99 (65+32+2) to 65603 (65+0xffff+2) -# compressed payload (base64): 132b to 87472b +# metadata: always 65b (version: 1b, nonce: 32b, mac: 32b) +# plaintext: 1b to 0xffffffff +# padded plaintext (small, <65536): 32b to 0xffff, with 2b prefix -> 34b to 0xffff+2 +# padded plaintext (large, >=65536): 0x10000 to 0xffffffff, with 6b prefix -> 0x10006 to 0xffffffff+6 +# ciphertext: same as padded plaintext (chacha20 doesn't change length) +# raw payload (small): 99 (65+34) to 65603 (65+0xffff+2) +# raw payload (large): 65607 (65+0x10006) to 4294967362 (65+0xffffffff+6) def decode_payload(payload): plen = len(payload) if plen == 0 or payload[0] == '#': raise Exception('unknown version') - if plen < 132 or plen > 87472: raise Exception('invalid payload size') + if plen < 132: raise Exception('invalid payload size') data = base64_decode(payload) - dlen = len(d) - if dlen < 99 or dlen > 65603: raise Exception('invalid data size'); + dlen = len(data) + if dlen < 99: raise Exception('invalid data size'); vers = data[0] if vers != 2: raise Exception('unknown version ' + vers) nonce = data[1:33] -- cgit v1.2.3