encoding: add base32 support (denoland/deno#3855)

caspervonb · Jan 24, 2021 · 2e1f67c · 2e1f67c
1 parent 2ae1d56
commit 2e1f67c
Show file tree

Hide file tree

Showing 3 changed files with 379 additions and 0 deletions.
diff --git a/encoding/README.md b/encoding/README.md
@@ -219,3 +219,26 @@ Serializes `object` as a YAML document.
 See [`./yaml/example`](./yaml/example) folder and [js-yaml] repository.
 
 [js-yaml]: https://github.com/nodeca/js-yaml
+
+## base32
+
+[RFC4648 base32](https://tools.ietf.org/html/rfc4648#section-6) encoder/decoder
+for Deno
+
+### Basic usage
+
+`encode` encodes a `Uint8Array` to RFC4648 base32 representation, and `decode`
+decodes the given RFC4648 base32 representation to a `Uint8Array`.
+
+```ts
+import { encode, decode } from "https://deno.land/std/encoding/base32.ts";
+
+const b32Repr = "RC2E6GA=";
+
+const binaryData = decode(b32Repr);
+console.log(binaryData);
+// => Uint8Array [ 136, 180, 79, 24 ]
+
+console.log(encode(binaryData));
+// => RC2E6GA=
+```
diff --git a/encoding/base32.ts b/encoding/base32.ts
@@ -0,0 +1,218 @@
+// Modified from https://github.com/beatgammit/base64-js
+// Copyright (c) 2014 Jameson Little. MIT License.
+
+const lookup: string[] = [];
+const revLookup: number[] = [];
+
+// RFC4648 base32
+const code = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
+for (let i = 0, len = code.length; i < len; ++i) {
+  lookup[i] = code[i];
+  revLookup[code.charCodeAt(i)] = i;
+}
+
+const placeHolderPadLookup = [0, 1, , 2, 3, , 4];
+function _getPadLen(placeHoldersLen: number): number {
+  const maybeLen = placeHolderPadLookup[placeHoldersLen];
+  if (maybeLen === undefined) {
+    throw new Error("Invalid pad length");
+  }
+  return maybeLen!;
+}
+
+function getLens(b32: string): [number, number] {
+  const len = b32.length;
+
+  if (len % 8 > 0) {
+    throw new Error("Invalid string. Length must be a multiple of 8");
+  }
+
+  let validLen = b32.indexOf("=");
+  if (validLen === -1) validLen = len;
+
+  const placeHoldersLen = validLen === len ? 0 : 8 - (validLen % 8);
+
+  return [validLen, placeHoldersLen];
+}
+
+/**
+ * Returns number of bytes encoded in the given RFC4648 base32 string input.
+ * @param b32
+ */
+export function byteLength(b32: string): number {
+  const [validLen, placeHoldersLen] = getLens(b32);
+  return _byteLength(validLen, placeHoldersLen);
+}
+
+function _byteLength(validLen: number, placeHoldersLen: number): number {
+  return ((validLen + placeHoldersLen) * 5) / 8 - _getPadLen(placeHoldersLen);
+}
+
+/**
+ * Decodes a given RFC4648 base32 encoded string.
+ * @param b32
+ */
+export function decode(b32: string): Uint8Array {
+  let tmp: number;
+  const [validLen, placeHoldersLen] = getLens(b32);
+
+  const arr = new Uint8Array(_byteLength(validLen, placeHoldersLen));
+
+  let curByte = 0;
+
+  // if there are placeholders, only get up to the last complete 8 chars
+  const len = placeHoldersLen > 0 ? validLen - 8 : validLen;
+
+  let i: number;
+  for (i = 0; i < len; i += 8) {
+    tmp =
+      (revLookup[b32.charCodeAt(i)] << 20) |
+      (revLookup[b32.charCodeAt(i + 1)] << 15) |
+      (revLookup[b32.charCodeAt(i + 2)] << 10) |
+      (revLookup[b32.charCodeAt(i + 3)] << 5) |
+      revLookup[b32.charCodeAt(i + 4)];
+    arr[curByte++] = (tmp >> 17) & 0xff;
+    arr[curByte++] = (tmp >> 9) & 0xff;
+    arr[curByte++] = (tmp >> 1) & 0xff;
+
+    tmp =
+      ((tmp & 1) << 15) |
+      (revLookup[b32.charCodeAt(i + 5)] << 10) |
+      (revLookup[b32.charCodeAt(i + 6)] << 5) |
+      revLookup[b32.charCodeAt(i + 7)];
+    arr[curByte++] = (tmp >> 8) & 0xff;
+    arr[curByte++] = tmp & 0xff;
+  }
+
+  if (placeHoldersLen === 1) {
+    tmp =
+      (revLookup[b32.charCodeAt(i)] << 20) |
+      (revLookup[b32.charCodeAt(i + 1)] << 15) |
+      (revLookup[b32.charCodeAt(i + 2)] << 10) |
+      (revLookup[b32.charCodeAt(i + 3)] << 5) |
+      revLookup[b32.charCodeAt(i + 4)];
+    arr[curByte++] = (tmp >> 17) & 0xff;
+    arr[curByte++] = (tmp >> 9) & 0xff;
+    arr[curByte++] = (tmp >> 1) & 0xff;
+    tmp =
+      ((tmp & 1) << 7) |
+      (revLookup[b32.charCodeAt(i + 5)] << 2) |
+      (revLookup[b32.charCodeAt(i + 6)] >> 3);
+    arr[curByte++] = tmp & 0xff;
+  } else if (placeHoldersLen === 3) {
+    tmp =
+      (revLookup[b32.charCodeAt(i)] << 19) |
+      (revLookup[b32.charCodeAt(i + 1)] << 14) |
+      (revLookup[b32.charCodeAt(i + 2)] << 9) |
+      (revLookup[b32.charCodeAt(i + 3)] << 4) |
+      (revLookup[b32.charCodeAt(i + 4)] >> 1);
+    arr[curByte++] = (tmp >> 16) & 0xff;
+    arr[curByte++] = (tmp >> 8) & 0xff;
+    arr[curByte++] = tmp & 0xff;
+  } else if (placeHoldersLen === 4) {
+    tmp =
+      (revLookup[b32.charCodeAt(i)] << 11) |
+      (revLookup[b32.charCodeAt(i + 1)] << 6) |
+      (revLookup[b32.charCodeAt(i + 2)] << 1) |
+      (revLookup[b32.charCodeAt(i + 3)] >> 4);
+    arr[curByte++] = (tmp >> 8) & 0xff;
+    arr[curByte++] = tmp & 0xff;
+  } else if (placeHoldersLen === 6) {
+    tmp =
+      (revLookup[b32.charCodeAt(i)] << 3) |
+      (revLookup[b32.charCodeAt(i + 1)] >> 2);
+    arr[curByte++] = tmp & 0xff;
+  }
+
+  return arr;
+}
+
+function encodeChunk(uint8: Uint8Array, start: number, end: number): string {
+  let tmp: number;
+  const output = [];
+  for (let i = start; i < end; i += 5) {
+    tmp =
+      ((uint8[i] << 16) & 0xff0000) |
+      ((uint8[i + 1] << 8) & 0xff00) |
+      (uint8[i + 2] & 0xff);
+    output.push(lookup[(tmp >> 19) & 0x1f]);
+    output.push(lookup[(tmp >> 14) & 0x1f]);
+    output.push(lookup[(tmp >> 9) & 0x1f]);
+    output.push(lookup[(tmp >> 4) & 0x1f]);
+    tmp =
+      ((tmp & 0xf) << 16) |
+      ((uint8[i + 3] << 8) & 0xff00) |
+      (uint8[i + 4] & 0xff);
+    output.push(lookup[(tmp >> 15) & 0x1f]);
+    output.push(lookup[(tmp >> 10) & 0x1f]);
+    output.push(lookup[(tmp >> 5) & 0x1f]);
+    output.push(lookup[tmp & 0x1f]);
+  }
+  return output.join("");
+}
+
+/**
+ * Encodes a given Uint8Array into RFC4648 base32 representation
+ * @param uint8
+ */
+export function encode(uint8: Uint8Array): string {
+  let tmp: number;
+  const len = uint8.length;
+  const extraBytes = len % 5;
+  const parts = [];
+  const maxChunkLength = 16385; // must be multiple of 5
+  const len2 = len - extraBytes;
+
+  // go through the array every 5 bytes, we'll deal with trailing stuff later
+  for (let i = 0; i < len2; i += maxChunkLength) {
+    parts.push(
+      encodeChunk(
+        uint8,
+        i,
+        i + maxChunkLength > len2 ? len2 : i + maxChunkLength
+      )
+    );
+  }
+
+  // pad the end with zeros, but make sure to not forget the extra bytes
+  if (extraBytes === 4) {
+    tmp =
+      ((uint8[len2] & 0xff) << 16) |
+      ((uint8[len2 + 1] & 0xff) << 8) |
+      (uint8[len2 + 2] & 0xff);
+    parts.push(lookup[(tmp >> 19) & 0x1f]);
+    parts.push(lookup[(tmp >> 14) & 0x1f]);
+    parts.push(lookup[(tmp >> 9) & 0x1f]);
+    parts.push(lookup[(tmp >> 4) & 0x1f]);
+    tmp = ((tmp & 0xf) << 11) | (uint8[len2 + 3] << 3);
+    parts.push(lookup[(tmp >> 10) & 0x1f]);
+    parts.push(lookup[(tmp >> 5) & 0x1f]);
+    parts.push(lookup[tmp & 0x1f]);
+    parts.push("=");
+  } else if (extraBytes === 3) {
+    tmp =
+      ((uint8[len2] & 0xff) << 17) |
+      ((uint8[len2 + 1] & 0xff) << 9) |
+      ((uint8[len2 + 2] & 0xff) << 1);
+    parts.push(lookup[(tmp >> 20) & 0x1f]);
+    parts.push(lookup[(tmp >> 15) & 0x1f]);
+    parts.push(lookup[(tmp >> 10) & 0x1f]);
+    parts.push(lookup[(tmp >> 5) & 0x1f]);
+    parts.push(lookup[tmp & 0x1f]);
+    parts.push("===");
+  } else if (extraBytes === 2) {
+    tmp = ((uint8[len2] & 0xff) << 12) | ((uint8[len2 + 1] & 0xff) << 4);
+    parts.push(lookup[(tmp >> 15) & 0x1f]);
+    parts.push(lookup[(tmp >> 10) & 0x1f]);
+    parts.push(lookup[(tmp >> 5) & 0x1f]);
+    parts.push(lookup[tmp & 0x1f]);
+    parts.push("====");
+  } else if (extraBytes === 1) {
+    tmp = (uint8[len2] & 0xff) << 2;
+    parts.push(lookup[(tmp >> 5) & 0x1f]);
+    parts.push(lookup[tmp & 0x1f]);
+    parts.push("======");
+  }
+
+  return parts.join("");
+}
diff --git a/encoding/base32_test.ts b/encoding/base32_test.ts
@@ -0,0 +1,138 @@
+// Test cases copied from https://github.com/LinusU/base32-encode/blob/master/test.js
+// Copyright (c) 2016-2017 Linus Unnebäck. MIT license.
+// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
+import { test, runIfMain } from "../testing/mod.ts";
+import { assertEquals, assert } from "../testing/asserts.ts";
+import { encode, decode } from "./base32.ts";
+
+// Lifted from https://stackoverflow.com/questions/38987784
+const fromHexString = (hexString: string): Uint8Array =>
+  new Uint8Array(hexString.match(/.{1,2}/g).map(byte => parseInt(byte, 16)));
+const toHexString = (bytes: Uint8Array): string =>
+  bytes.reduce((str, byte) => str + byte.toString(16).padStart(2, "0"), "");
+
+const testCases = [
+  ["73", "OM======"],
+  ["f80c", "7AGA===="],
+  ["6450", "MRIA===="],
+  ["cc91d0", "ZSI5A==="],
+  ["6c60c0", "NRQMA==="],
+  ["4f6a23", "J5VCG==="],
+  ["88b44f18", "RC2E6GA="],
+  ["90bad04714", "SC5NARYU"],
+  ["e9ef1def8086", "5HXR334AQY======"],
+  ["83fe3f9c1e9302", "QP7D7HA6SMBA===="],
+  ["15aa1f7cafc17cb8", "CWVB67FPYF6LQ==="],
+  ["da51d4fed48b4c32dc", "3JI5J7WURNGDFXA="],
+  ["c4be14228512d7299831", "YS7BIIUFCLLSTGBR"],
+  ["2f273c5b5ef04724fab944", "F4TTYW266BDSJ6VZIQ======"],
+  ["969da1b80ec2442d2bdd4bdb", "S2O2DOAOYJCC2K65JPNQ===="],
+  ["31f5adb50792f549d3714f3f99", "GH223NIHSL2UTU3RJ47ZS==="],
+  ["6a654f7a072c29951930700c0a61", "NJSU66QHFQUZKGJQOAGAUYI="],
+  ["0fe29d6825ad999e87d9b7cac3589d", "B7RJ22BFVWMZ5B6ZW7FMGWE5"],
+  ["0f960ab44e165973a5172ccd294b3412", "B6LAVNCOCZMXHJIXFTGSSSZUCI======"],
+  ["325b9fd847a41fb0d485c207a1a5b02dcf", "GJNZ7WCHUQP3BVEFYID2DJNQFXHQ===="],
+  ["ddf80ebe21bf1b1e12a64c5cc6a74b5d92dd", "3X4A5PRBX4NR4EVGJROMNJ2LLWJN2==="],
+  [
+    "c0cae52c6f641ce04a7ee5b9a8fa8ded121bca",
+    "YDFOKLDPMQOOAST64W42R6UN5UJBXSQ="
+  ],
+  [
+    "872840a355c8c70586f462c9e669ee760cb3537e",
+    "Q4UEBI2VZDDQLBXUMLE6M2POOYGLGU36"
+  ],
+  [
+    "5773fe22662818a120c5688824c935fe018208a496",
+    "K5Z74ITGFAMKCIGFNCECJSJV7YAYECFESY======"
+  ],
+  [
+    "416e23abc524d1b85736e2bea6cfecd5192789034a28",
+    "IFXCHK6FETI3QVZW4K7KNT7M2UMSPCIDJIUA===="
+  ],
+  [
+    "83d2386ebdd7e8e818ec00e3ccd882aa933b905b7e2e44",
+    "QPJDQ3V527UOQGHMADR4ZWECVKJTXEC3PYXEI==="
+  ],
+  [
+    "a2fa8b881f3b8024f52745763c4ae08ea12bdf8bef1a72f8",
+    "UL5IXCA7HOACJ5JHIV3DYSXAR2QSXX4L54NHF6A="
+  ],
+  [
+    "b074ae8b9efde0f17f37bccadde006d039997b59c8efb05add",
+    "WB2K5C467XQPC7ZXXTFN3YAG2A4ZS62ZZDX3AWW5"
+  ],
+  [
+    "764fef941aee7e416dc204ae5ab9c5b9ce644567798e6849aea9",
+    "OZH67FA25Z7EC3OCASXFVOOFXHHGIRLHPGHGQSNOVE======"
+  ],
+  [
+    "4995d9811f37f59797d7c3b9b9e5325aa78277415f70f4accf588c",
+    "JGK5TAI7G72ZPF6XYO43TZJSLKTYE52BL5YPJLGPLCGA===="
+  ],
+  [
+    "24f0812ca8eed58374c11a7008f0b262698b72fd2792709208eaacb2",
+    "ETYICLFI53KYG5GBDJYAR4FSMJUYW4X5E6JHBEQI5KWLE==="
+  ],
+  [
+    "d70692543810d4bf50d81cf44a55801a557a388a341367c7ea077ca306",
+    "24DJEVBYCDKL6UGYDT2EUVMADJKXUOEKGQJWPR7KA56KGBQ="
+  ],
+  [
+    "6e08a89ca36b677ff8fe99e68a1241c8d8cef2570a5f60b6417d2538b30c",
+    "NYEKRHFDNNTX76H6THTIUESBZDMM54SXBJPWBNSBPUSTRMYM"
+  ],
+  [
+    "f2fc2319bd29457ccd01e8e194ee9bd7e97298b6610df4ab0f3d5baa0b2d7ccf69829edb74edef",
+    "6L6CGGN5FFCXZTIB5DQZJ3U327UXFGFWMEG7JKYPHVN2UCZNPTHWTAU63N2O33Y="
+  ]
+];
+
+test({
+  name: "[encoding.base32] encode",
+  fn(): void {
+    for (const [bin, b32] of testCases) {
+      assertEquals(encode(fromHexString(bin)), b32);
+    }
+  }
+});
+
+test({
+  name: "[encoding.base32] decode",
+  fn(): void {
+    for (const [bin, b32] of testCases) {
+      assertEquals(toHexString(decode(b32)), bin);
+    }
+  }
+});
+
+test({
+  name: "[encoding.base32] decode bad length",
+  fn(): void {
+    let errorCaught = false;
+    try {
+      decode("OOOO==");
+    } catch (e) {
+      assert(
+        e.message.includes("Invalid string. Length must be a multiple of 8")
+      );
+      errorCaught = true;
+    }
+    assert(errorCaught);
+  }
+});
+
+test({
+  name: "[encoding.base32] decode bad padding",
+  fn(): void {
+    let errorCaught = false;
+    try {
+      decode("OOOOOO==");
+    } catch (e) {
+      assert(e.message.includes("Invalid pad length"));
+      errorCaught = true;
+    }
+    assert(errorCaught);
+  }
+});
+
+runIfMain(import.meta);