From 5d076060a5cb4cf1826b2c04dd86234a3770dd80 Mon Sep 17 00:00:00 2001 From: Yarchik Date: Tue, 23 Jun 2026 20:19:09 +0100 Subject: [PATCH] fix: sort dictionary keys bytewise per BEP 3 encode.dict and encode.dictMap sorted keys with Array.prototype.sort(), which orders by UTF-16 code unit. BEP 3 requires dictionary keys to be sorted as raw byte strings (bytewise on their UTF-8 encoding). The two orders diverge for keys containing an astral (non-BMP) code point: its leading UTF-16 surrogate (0xD800-0xDBFF) sorts before a BMP key in U+E000-U+FFFF, but its first UTF-8 byte (0xF0+) sorts after that key's 0xEE/0xEF byte. Because a torrent info-hash is SHA-1 of the bencoded info dictionary, a dict with such a key produces a non-canonical encoding and a wrong hash. Sort by the bytes that are actually emitted: the UTF-8 encoding for string and number keys, the raw bytes for Buffer keys. ASCII/BMP-only dictionaries are unaffected. --- lib/encode.js | 14 ++++++++++++-- test/encode.test.js | 8 ++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/encode.js b/lib/encode.js index 0667e1d..b210690 100644 --- a/lib/encode.js +++ b/lib/encode.js @@ -74,13 +74,23 @@ encode.number = function (buffers, data) { } } +function compareKeysByBytes (a, b) { + const ab = ArrayBuffer.isView(a) ? a : text2arr(String(a)) + const bb = ArrayBuffer.isView(b) ? b : text2arr(String(b)) + const length = Math.min(ab.length, bb.length) + for (let i = 0; i < length; i++) { + if (ab[i] !== bb[i]) return ab[i] - bb[i] + } + return ab.length - bb.length +} + encode.dict = function (buffers, data) { buffers.push(buffD) let j = 0 let k // fix for issue #13 - sorted dicts - const keys = Object.keys(data).sort() + const keys = Object.keys(data).sort(compareKeysByBytes) const kl = keys.length for (; j < kl; j++) { @@ -96,7 +106,7 @@ encode.dict = function (buffers, data) { encode.dictMap = function (buffers, data) { buffers.push(buffD) - const keys = Array.from(data.keys()).sort() + const keys = Array.from(data.keys()).sort(compareKeysByBytes) for (const key of keys) { if (data.get(key) == null) continue diff --git a/test/encode.test.js b/test/encode.test.js index cb6f3ab..8e9e371 100644 --- a/test/encode.test.js +++ b/test/encode.test.js @@ -22,6 +22,14 @@ test('bencode#encode()', function (t) { t.equal(Buffer.from(bencode.encode(data)).toString(), 'd7:integeri12345e6:string11:Hello Worlde') }) + t.test('should sort dictionary keys bytewise per BEP 3, including astral keys', function (t) { + t.plan(1) + // '\uE000' (UTF-8 ee 80 80) must sort before '\u{1F600}' (UTF-8 f0 9f 98 80), + // even though the astral key's leading UTF-16 surrogate (d83d) sorts first. + const data = { '\u{1F600}': 2, '\uE000': 1 } + t.equal(Buffer.from(bencode.encode(data)).toString('hex'), '64333aee8080693165343af09f988069326565') + }) + t.test('should force keys to be strings', function (t) { t.plan(1) const data = {