Skip to content

Commit

Permalink
Use alternate TextEncoder/TextDecoder implementation (denoland#1281)
Browse files Browse the repository at this point in the history
This is faster and smaller.
  • Loading branch information
kitsonk authored and ry committed Dec 6, 2018
1 parent 60c008d commit 6cc89b9
Show file tree
Hide file tree
Showing 9 changed files with 366 additions and 42 deletions.
1 change: 1 addition & 0 deletions js/blob.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright 2018 the Deno authors. All rights reserved. MIT license.
import * as domTypes from "./dom_types";
import { containsOnlyASCII } from "./util";
import { TextEncoder } from "./text_encoding";

const bytesSymbol = Symbol("bytes");

Expand Down
2 changes: 2 additions & 0 deletions js/dom_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ See the Apache Version 2.0 License for specific language governing permissions
and limitations under the License.
*******************************************************************************/

export type BufferSource = ArrayBufferView | ArrayBuffer;

export type HeadersInit =
| Headers
| Array<[string, string]>
Expand Down
2 changes: 1 addition & 1 deletion js/fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as flatbuffers from "./flatbuffers";
import { sendAsync } from "./dispatch";
import * as msg from "gen/msg_generated";
import * as domTypes from "./dom_types";
import { TextDecoder } from "./text_encoding";
import { TextDecoder, TextEncoder } from "./text_encoding";
import { DenoBlob } from "./blob";
import { Headers } from "./headers";
import * as io from "./io";
Expand Down
6 changes: 2 additions & 4 deletions js/globals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ import { libdeno } from "./libdeno";
declare global {
const console: consoleTypes.Console;
const setTimeout: typeof timers.setTimeout;
// tslint:disable-next-line:variable-name
const TextEncoder: typeof textEncoding.TextEncoder;
}

// A reference to the global object.
Expand Down Expand Up @@ -69,7 +67,7 @@ export type Headers = domTypes.Headers;
window.FormData = formData.FormData as domTypes.FormDataConstructor;
export type FormData = domTypes.FormData;

// While these are classes, they have their global instance types created in
// other type definitions, therefore we do not have to include them here.
window.TextEncoder = textEncoding.TextEncoder;
export type TextEncoder = textEncoding.TextEncoder;
window.TextDecoder = textEncoding.TextDecoder;
export type TextDecoder = textEncoding.TextDecoder;
330 changes: 312 additions & 18 deletions js/text_encoding.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
// Copyright 2018 the Deno authors. All rights reserved. MIT license.
// The following code is based off of text-encoding at:
// https://github.com/inexorabletash/text-encoding
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
//
// In jurisdictions that recognize copyright laws, the author or authors
// of this software dedicate any and all copyright interest in the
// software to the public domain. We make this dedication for the benefit
// of the public at large and to the detriment of our heirs and
// successors. We intend this dedication to be an overt act of
// relinquishment in perpetuity of all present and future rights to this
// software under copyright law.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.

import * as base64 from "base64-js";
import * as domTypes from "./dom_types";
import { DenoError, ErrorKind } from "./errors";

/** Decodes a string of data which has been encoded using base-64. */
Expand Down Expand Up @@ -43,29 +67,299 @@ export function btoa(s: string): string {
return result;
}

// @types/text-encoding relies on lib.dom.d.ts for some interfaces. We do not
// want to include lib.dom.d.ts (due to size) into deno's global type scope.
// Therefore this hack: add a few of the missing interfaces in
// @types/text-encoding to the global scope before importing.
interface Decoder {
handler(stream: Stream, byte: number): number | number[] | null;
}

interface Encoder {
handler(codePoint: number): number | number[];
}

const CONTINUE = null;
const END_OF_STREAM = -1;
const FINISHED = -1;

function codePointsToString(codePoints: number[]): string {
let s = "";
for (const cp of codePoints) {
s += String.fromCodePoint(cp);
}
return s;
}

function decoderError(fatal: boolean): number | never {
if (fatal) {
throw new TypeError("Decoder error.");
}
return 0xfffd; // default code point
}

function inRange(a: number, min: number, max: number) {
return min <= a && a <= max;
}

function stringToCodePoints(input: string): number[] {
const u: number[] = [];
for (const c of input) {
u.push(c.codePointAt(0)!);
}
return u;
}

class Stream {
private _tokens: number[];
constructor(tokens: number[] | Uint8Array) {
this._tokens = [].slice.call(tokens);
this._tokens.reverse();
}

endOfStream(): boolean {
return !this._tokens.length;
}

read(): number {
return !this._tokens.length ? END_OF_STREAM : this._tokens.pop()!;
}

prepend(token: number | number[]): void {
if (Array.isArray(token)) {
while (token.length) {
this._tokens.push(token.pop()!);
}
} else {
this._tokens.push(token);
}
}

push(token: number | number[]): void {
if (Array.isArray(token)) {
while (token.length) {
this._tokens.unshift(token.shift()!);
}
} else {
this._tokens.unshift(token);
}
}
}

class UTF8Decoder implements Decoder {
private _codePoint = 0;
private _bytesSeen = 0;
private _bytesNeeded = 0;
private _fatal: boolean;
private _lowerBoundary = 0x80;
private _upperBoundary = 0xbf;

constructor(options = { fatal: false }) {
this._fatal = options.fatal;
}

handler(stream: Stream, byte: number): number | null {
if (byte === END_OF_STREAM && this._bytesNeeded !== 0) {
this._bytesNeeded = 0;
return decoderError(this._fatal);
}

if (byte === END_OF_STREAM) {
return FINISHED;
}

if (this._bytesNeeded === 0) {
if (inRange(byte, 0x00, 0x7f)) {
// Single byte code point
return byte;
} else if (inRange(byte, 0xc2, 0xdf)) {
// Two byte code point
this._bytesNeeded = 1;
this._codePoint = byte & 0x1f;
} else if (inRange(byte, 0xe0, 0xef)) {
// Three byte code point
if (byte === 0xe0) {
this._lowerBoundary = 0xa0;
} else if (byte === 0xed) {
this._upperBoundary = 0x9f;
}
this._bytesNeeded = 2;
this._codePoint = byte & 0xf;
} else if (inRange(byte, 0xf0, 0xf4)) {
if (byte === 0xf0) {
this._lowerBoundary = 0x90;
} else if (byte === 0xf4) {
this._upperBoundary = 0x8f;
}
this._bytesNeeded = 3;
this._codePoint = byte & 0x7;
} else {
return decoderError(this._fatal);
}
return CONTINUE;
}

if (!inRange(byte, this._lowerBoundary, this._upperBoundary)) {
// Byte out of range, so encoding error
this._codePoint = 0;
this._bytesNeeded = 0;
this._bytesSeen = 0;
stream.prepend(byte);
return decoderError(this._fatal);
}

this._lowerBoundary = 0x80;
this._upperBoundary = 0xbf;

this._codePoint = (this._codePoint << 6) | (byte & 0x3f);

this._bytesSeen++;

if (this._bytesSeen !== this._bytesNeeded) {
return CONTINUE;
}

const codePoint = this._codePoint;

this._codePoint = 0;
this._bytesNeeded = 0;
this._bytesSeen = 0;

return codePoint;
}
}

class UTF8Encoder implements Encoder {
handler(codePoint: number): number | number[] {
if (codePoint === END_OF_STREAM) {
return FINISHED;
}

declare global {
type BufferSource = ArrayBufferView | ArrayBuffer;
if (inRange(codePoint, 0x00, 0x7f)) {
return codePoint;
}

let count: number;
let offset: number;
if (inRange(codePoint, 0x0080, 0x07ff)) {
count = 1;
offset = 0xc0;
} else if (inRange(codePoint, 0x0800, 0xffff)) {
count = 2;
offset = 0xe0;
} else if (inRange(codePoint, 0x10000, 0x10ffff)) {
count = 3;
offset = 0xf0;
} else {
throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`);
}

const bytes = [(codePoint >> (6 * count)) + offset];

while (count > 0) {
const temp = codePoint >> (6 * (count - 1));
bytes.push(0x80 | (temp & 0x3f));
count--;
}

interface TextDecodeOptions {
stream?: boolean;
return bytes;
}
}

interface TextDecoderOptions {
fatal?: boolean;
ignoreBOM?: boolean;
export interface TextDecodeOptions {
stream?: false;
}

export interface TextDecoderOptions {
fatal?: boolean;
ignoreBOM?: false;
}

export class TextDecoder {
/** Returns encoding's name, lowercased. */
readonly encoding = "utf-8";
/** Returns `true` if error mode is "fatal", and `false` otherwise. */
readonly fatal: boolean = false;
/** Returns `true` if ignore BOM flag is set, and `false` otherwise. */
readonly ignoreBOM = false;

constructor(
label: "utf-8" = "utf-8",
options: TextDecoderOptions = { fatal: false }
) {
if (label !== "utf-8") {
throw new TypeError("Only UTF8 decoding supported.");
}
if (options.ignoreBOM) {
throw new TypeError("Ignoring the BOM not supported.");
}
if (options.fatal) {
this.fatal = true;
}
}

interface TextDecoder {
readonly encoding: string;
readonly fatal: boolean;
readonly ignoreBOM: boolean;
decode(input?: BufferSource, options?: TextDecodeOptions): string;
/** Returns the result of running encoding's decoder. */
decode(
input?: domTypes.BufferSource,
options: TextDecodeOptions = { stream: false }
): string {
if (options.stream) {
throw new TypeError("Stream not supported.");
}

let bytes: Uint8Array;
if (typeof input === "object" && input instanceof ArrayBuffer) {
bytes = new Uint8Array(input);
} else if (
typeof input === "object" &&
"buffer" in input &&
input.buffer instanceof ArrayBuffer
) {
bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
} else {
bytes = new Uint8Array(0);
}

const decoder = new UTF8Decoder({ fatal: this.fatal });
const inputStream = new Stream(bytes);
const output: number[] = [];

while (true) {
const result = decoder.handler(inputStream, inputStream.read());
if (result === FINISHED) {
break;
}

if (result !== CONTINUE) {
output.push(result);
}
}

if (output.length > 0 && output[0] === 0xfeff) {
output.shift();
}

return codePointsToString(output);
}
}

export { TextEncoder, TextDecoder } from "text-encoding";
export class TextEncoder {
/** Returns "utf-8". */
readonly encoding = "utf-8";
/** Returns the result of running UTF-8's encoder. */
encode(input = ""): Uint8Array {
const encoder = new UTF8Encoder();
const inputStream = new Stream(stringToCodePoints(input));
const output: number[] = [];

while (true) {
const result = encoder.handler(inputStream.read());
if (result === FINISHED) {
break;
}
if (Array.isArray(result)) {
output.push.apply(output, result);
} else {
output.push(result);
}
}

return new Uint8Array(output);
}
}
Loading

0 comments on commit 6cc89b9

Please sign in to comment.