Skip to content

Commit

Permalink
feat(std/encoding): add ascii85 module (denoland#6711)
Browse files Browse the repository at this point in the history
  • Loading branch information
oplik0 committed Jul 14, 2020
1 parent d49a021 commit e5724e6
Show file tree
Hide file tree
Showing 3 changed files with 363 additions and 0 deletions.
56 changes: 56 additions & 0 deletions std/encoding/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

Helper module for dealing with external data structures.

- [`ascii85`](#ascii85)
- [`base32`](#base32)
- [`binary`](#binary)
- [`csv`](#csv)
Expand Down Expand Up @@ -322,3 +323,58 @@ console.log(binaryData);
console.log(encode(binaryData));
// => RC2E6GA=
```

## ascii85

Ascii85/base85 encoder and decoder with support for multiple standards

### Basic usage

`encode` encodes a `Uint8Array` to a ascii85 representation, and `decode`
decodes the given ascii85 representation to a `Uint8Array`.

```ts
import { encode, decode } from "https://deno.land/std/encoding/ascii85.ts";

const a85Repr = "LpTqp";

const binaryData = decode(a85Repr);
console.log(binaryData);
// => Uint8Array [ 136, 180, 79, 24 ]

console.log(encode(binaryData));
// => LpTqp
```

### Specifying a standard and delimeter

By default all functions are using the most popular Adobe version of ascii85 and
not adding any delimeter. However, there are three more standards supported -
btoa (different delimeter and additional compression of 4 bytes equal to 32),
[Z85](https://rfc.zeromq.org/spec/32/) and
[RFC 1924](https://tools.ietf.org/html/rfc1924). It's possible to use a
different encoding by specifying it in `options` object as a second parameter.

Similarly, it's possible to make `encode` add a delimeter (`<~` and `~>` for
Adobe, `xbtoa Begin` and `xbtoa End` with newlines between the delimeters and
encoded data for btoa. Checksums for btoa are not supported. Delimeters are not
supported by other encodings.)

encoding examples:

```ts
import { encode, decode } from "https://deno.land/std/encoding/ascii85.ts";
const binaryData = new Uint8Array([136, 180, 79, 24]);
console.log(encode(binaryData));
// => LpTqp
console.log(encode(binaryData, { standard: "Adobe", delimeter: true }));
// => <~LpTqp~>
console.log(encode(binaryData, { standard: "btoa", delimeter: true }));
/* => xbtoa Begin
LpTqp
xbtoa End */
console.log(encode(binaryData, { standard: "RFC 1924" }));
// => h_p`_
console.log(encode(binaryData, { standard: "Z85" }));
// => H{P}{
```
129 changes: 129 additions & 0 deletions std/encoding/ascii85.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
/** This module is browser compatible. */

export type Ascii85Standard = "Adobe" | "btoa" | "RFC 1924" | "Z85";
/**
* encoding/decoding options
* @property standard - characterset and delimeter (if supported and used). Defaults to Adobe
* @property delimeter - whether to use a delimeter (if supported) - "<~" and "~>" by default
*/
export interface Ascii85Options {
standard?: Ascii85Standard;
delimiter?: boolean;
}
const rfc1924 =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
const Z85 =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#";
/**
* Encodes a given Uint8Array into ascii85, supports multiple standards
* @param uint8 input to encode
* @param [options] encoding options
* @param [options.standard=Adobe] encoding standard (Adobe, btoa, RFC 1924 or Z85)
* @param [options.delimeter] whether to use a delimeter, if supported by encoding standard
*/
export function encode(uint8: Uint8Array, options?: Ascii85Options): string {
const standard = options?.standard ?? "Adobe";
let output: string[] = [],
v: number,
n = 0,
difference = 0;
if (uint8.length % 4 !== 0) {
const tmp = uint8;
difference = 4 - (tmp.length % 4);
uint8 = new Uint8Array(tmp.length + difference);
uint8.set(tmp);
}
const view = new DataView(uint8.buffer);
for (let i = 0, len = uint8.length; i < len; i += 4) {
v = view.getUint32(i);
// Adobe and btoa standards compress 4 zeroes to single "z" character
if (
(standard === "Adobe" || standard === "btoa") &&
v === 0 &&
i < len - difference - 3
) {
output[n++] = "z";
continue;
}
// btoa compresses 4 spaces - that is, bytes equal to 32 - into single "y" character
if (standard === "btoa" && v === 538976288) {
output[n++] = "y";
continue;
}
for (let j = 4; j >= 0; j--) {
output[n + j] = String.fromCharCode((v % 85) + 33);
v = Math.trunc(v / 85);
}
n += 5;
}
switch (standard) {
case "Adobe":
if (options?.delimiter) {
return `<~${output.slice(0, output.length - difference).join("")}~>`;
}
break;
case "btoa":
if (options?.delimiter) {
return `xbtoa Begin\n${output
.slice(0, output.length - difference)
.join("")}\nxbtoa End`;
}
break;
case "RFC 1924":
output = output.map((val) => rfc1924[val.charCodeAt(0) - 33]);
break;
case "Z85":
output = output.map((val) => Z85[val.charCodeAt(0) - 33]);
break;
}
return output.slice(0, output.length - difference).join("");
}
/**
* Decodes a given ascii85 encoded string.
* @param ascii85 input to decode
* @param [options] decoding options
* @param [options.standard=Adobe] encoding standard used in the input string (Adobe, btoa, RFC 1924 or Z85)
*/
export function decode(ascii85: string, options?: Ascii85Options): Uint8Array {
const encoding = options?.standard ?? "Adobe";
// translate all encodings to most basic adobe/btoa one and decompress some special characters ("z" and "y")
switch (encoding) {
case "Adobe":
ascii85 = ascii85.replaceAll(/(<~|~>)/g, "").replaceAll("z", "!!!!!");
break;
case "btoa":
ascii85 = ascii85
.replaceAll(/(xbtoa Begin|xbtoa End|\n)/g, "")
.replaceAll("z", "!!!!!")
.replaceAll("y", "+<VdL");
break;
case "RFC 1924":
ascii85 = ascii85.replaceAll(/./g, (match) =>
String.fromCharCode(rfc1924.indexOf(match) + 33)
);
break;
case "Z85":
ascii85 = ascii85.replaceAll(/./g, (match) =>
String.fromCharCode(Z85.indexOf(match) + 33)
);
break;
}
//remove all invalid characters
ascii85 = ascii85.replaceAll(/[^!-u]/g, "");
const len = ascii85.length,
output = new Uint8Array(len + 4 - (len % 4));
const view = new DataView(output.buffer);
let v = 0,
n = 0,
max = 0;
for (let i = 0; i < len; ) {
for (max += 5; i < max; i++) {
v = v * 85 + (i < len ? ascii85.charCodeAt(i) : 117) - 33;
}
view.setUint32(n, v);
v = 0;
n += 4;
}
return output.slice(0, Math.trunc(len * 0.8));
}
178 changes: 178 additions & 0 deletions std/encoding/ascii85_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { assertEquals } from "../testing/asserts.ts";
import { encode, decode, Ascii85Standard } from "./ascii85.ts";
type TestCases = Partial<{ [index in Ascii85Standard]: string[][] }>;
const utf8encoder = new TextEncoder();
const testCasesNoDelimeter: TestCases = {
Adobe: [
["test", "FCfN8"],
["ascii85", "@<5pmBfIs"],
["Hello world!", "87cURD]j7BEbo80"],
//wikipedia example
[
"Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.",
"9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>[email protected]$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF\"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIal(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c",
],
["", ""],
["\0", "!!"],
["\0\0", "!!!"],
["\0\0\0", "!!!!"],
//special Adobe and btoa test cases - 4 bytes equal to 0 should become a "z"
["\0\0\0\0", "z"],
["\0\0\0\0\0", "z!!"],
[" ", "+<VdL"],
],
btoa: [
["test", "FCfN8"],
["ascii85", "@<5pmBfIs"],
["Hello world!", "87cURD]j7BEbo80"],
//wikipedia example
[
"Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.",
"9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>[email protected]$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF\"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIal(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c",
],
["", ""],
["\0", "!!"],
["\0\0", "!!!"],
["\0\0\0", "!!!!"],
//special Adobe and btoa test cases - 4 bytes equal to 0 should become a "z"
["\0\0\0\0", "z"],
["\0\0\0\0\0", "z!!"],
//special btoa test case - 4 spaces should become "y"
[" ", "y"],
],
"RFC 1924": [
["test", "bY*jN"],
["ascii85", "VRK_?X*e|"],
["Hello world!", "NM&qnZy<MXa%^NF"],
//wikipedia example
[
"Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.",
"O<`^zX>%ZCX>)XGZfA9Ab7*B`EFf-gbRchTY<VDJc_3(Mb0BhMVRLV8EFfZabRc4RAarPHb0BkRZfA9DVR9gFVRLh7Z*CxFa&K)QZ**v7av))DX>DO_b1WctXlY|;AZc?TVIXXEb95kYW*~HEWgu;7Ze%PVbZB98AYyqSVIXj2a&u*NWpZI|V`U(3W*}r`Y-wj`bRcPNAarPDAY*TCbZKsNWn>^>Ze$>7Ze(R<VRUI{VPb4$AZKN6WpZJ3X>V>IZ)PBCZf|#NWn^b%EFfigV`XJzb0BnRWgv5CZ*p`Xc4cT~ZDnp_Wgu^6AYpEKAY);2ZeeU7aBO8^b9HiME&",
],
["", ""],
["\0", "00"],
["\0\0", "000"],
["\0\0\0", "0000"],
["\0\0\0\0", "00000"],
["\0\0\0\0\0", "0000000"],
[" ", "ARr(h"],
],
Z85: [
["test", "By/Jn"],
["ascii85", "vrk{)x/E%"],
["Hello world!", "nm=QNzY<mxA+]nf"],
//wikipedia example
[
"Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.",
"o<}]Zx(+zcx(!xgzFa9aB7/b}efF?GBrCHty<vdjC{3^mB0bHmvrlv8efFzABrC4raARphB0bKrzFa9dvr9GfvrlH7z/cXfA=k!qz//V7AV!!dx(do{B1wCTxLy%&azC)tvixxeB95Kyw/#hewGU&7zE+pvBzb98ayYQsvixJ2A=U/nwPzi%v}u^3w/$R}y?WJ}BrCpnaARpday/tcBzkSnwN(](zE:(7zE^r<vrui@vpB4:azkn6wPzj3x(v(iz!pbczF%-nwN]B+efFIGv}xjZB0bNrwGV5cz/P}xC4Ct#zdNP{wGU]6ayPekay!&2zEEu7Abo8]B9hIme=",
],
["", ""],
["\0", "00"],
["\0\0", "000"],
["\0\0\0", "0000"],
["\0\0\0\0", "00000"],
["\0\0\0\0\0", "0000000"],
[" ", "arR^H"],
],
};
const testCasesDelimeter: TestCases = {
Adobe: [
["test", "<~FCfN8~>"],
["ascii85", "<~@<5pmBfIs~>"],
["Hello world!", "<~87cURD]j7BEbo80~>"],
//wikipedia example
[
"Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.",
"<~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>[email protected]$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF\"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIal(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>",
],
["", "<~~>"],
["\0", "<~!!~>"],
["\0\0", "<~!!!~>"],
["\0\0\0", "<~!!!!~>"],
//special Adobe and btoa test cases - 4 bytes equal to 0 should become a "z"
["\0\0\0\0", "<~z~>"],
["\0\0\0\0\0", "<~z!!~>"],
[" ", "<~+<VdL~>"],
],
btoa: [
["test", "xbtoa Begin\nFCfN8\nxbtoa End"],
["ascii85", "xbtoa Begin\n@<5pmBfIs\nxbtoa End"],
["Hello world!", "xbtoa Begin\n87cURD]j7BEbo80\nxbtoa End"],
//wikipedia example
[
"Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.",
"xbtoa Begin\n9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>[email protected]$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF\"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKYi(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIal(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c\nxbtoa End",
],
["", "xbtoa Begin\n\nxbtoa End"],
["\0", "xbtoa Begin\n!!\nxbtoa End"],
["\0\0", "xbtoa Begin\n!!!\nxbtoa End"],
["\0\0\0", "xbtoa Begin\n!!!!\nxbtoa End"],
//special Adobe and btoa test cases - 4 bytes equal to 0 should become a "z"
["\0\0\0\0", "xbtoa Begin\nz\nxbtoa End"],
["\0\0\0\0\0", "xbtoa Begin\nz!!\nxbtoa End"],
//special btoa test case - 4 spaces should become "y"
[" ", "xbtoa Begin\ny\nxbtoa End"],
],
};

for (const [standard, tests] of Object.entries(testCasesNoDelimeter)) {
if (tests === undefined) continue;
Deno.test({
name: `[encoding/ascii85] encode ${standard}`,
fn(): void {
for (const [bin, b85] of tests) {
assertEquals(
encode(utf8encoder.encode(bin), {
standard: standard as Ascii85Standard,
}),
b85
);
}
},
});

Deno.test({
name: `[encoding/ascii85] decode ${standard}`,
fn(): void {
for (const [bin, b85] of tests) {
assertEquals(
decode(b85, { standard: standard as Ascii85Standard }),
utf8encoder.encode(bin)
);
}
},
});
}
for (const [standard, tests] of Object.entries(testCasesDelimeter)) {
if (tests === undefined) continue;
Deno.test({
name: `[encoding/ascii85] encode ${standard} with delimeter`,
fn(): void {
for (const [bin, b85] of tests) {
assertEquals(
encode(utf8encoder.encode(bin), {
standard: standard as Ascii85Standard,
delimiter: true,
}),
b85
);
}
},
});

Deno.test({
name: `[encoding/ascii85] decode ${standard} with delimeter`,
fn(): void {
for (const [bin, b85] of tests) {
assertEquals(
decode(b85, {
standard: standard as Ascii85Standard,
delimiter: true,
}),
utf8encoder.encode(bin)
);
}
},
});
}

0 comments on commit e5724e6

Please sign in to comment.