Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: export chunks in std/io #3497

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions std/io/bufio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,93 @@ export class BufWriter implements Writer {
return nn;
}
}

/** Generate longest proper prefix which is also suffix array. */
function createLPS(pat: Uint8Array): Uint8Array {
const lps = new Uint8Array(pat.length);
lps[0] = 0;
let prefixEnd = 0;
let i = 1;
while (i < lps.length) {
if (pat[i] == pat[prefixEnd]) {
prefixEnd++;
lps[i] = prefixEnd;
i++;
} else if (prefixEnd === 0) {
lps[i] = 0;
i++;
} else {
prefixEnd = pat[prefixEnd - 1];
}
}
return lps;
}

/** Read from reader until EOF and emit string chunks separated */
export async function* chunks(
reader: Reader,
delim: string
): AsyncIterableIterator<string> {
const encoder = new TextEncoder();
const decoder = new TextDecoder();
// Avoid unicode problems
const delimArr = encoder.encode(delim);
const delimLen = delimArr.length;
const delimLPS = createLPS(delimArr);

let inputBuffer = new Deno.Buffer();
const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));

// Modified KMP
let inspectIndex = 0;
let matchIndex = 0;
while (true) {
const result = await reader.read(inspectArr);
if (result === Deno.EOF) {
// Yield last chunk.
const lastChunk = inputBuffer.toString();
yield lastChunk;
return;
}
if ((result as number) < 0) {
// Discard all remaining and silently fail.
return;
}
const sliceRead = inspectArr.subarray(0, result as number);
await Deno.writeAll(inputBuffer, sliceRead);

let sliceToProcess = inputBuffer.bytes();
while (inspectIndex < sliceToProcess.length) {
if (sliceToProcess[inspectIndex] === delimArr[matchIndex]) {
inspectIndex++;
matchIndex++;
if (matchIndex === delimLen) {
// Full match
const matchEnd = inspectIndex - delimLen;
const readyBytes = sliceToProcess.subarray(0, matchEnd);
// Copy
const pendingBytes = sliceToProcess.slice(inspectIndex);
const readyChunk = decoder.decode(readyBytes);
yield readyChunk;
// Reset match, different from KMP.
sliceToProcess = pendingBytes;
inspectIndex = 0;
matchIndex = 0;
}
} else {
if (matchIndex === 0) {
inspectIndex++;
} else {
matchIndex = delimLPS[matchIndex - 1];
}
}
}
// Keep inspectIndex and matchIndex.
inputBuffer = new Deno.Buffer(sliceToProcess);
}
}

/** Read from reader until EOF and emit lines */
export async function* lines(reader: Reader): AsyncIterableIterator<string> {
yield* chunks(reader, "\n");
}
27 changes: 27 additions & 0 deletions std/io/bufio_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import {
import {
BufReader,
BufWriter,
chunks,
lines,
BufferFullError,
UnexpectedEOFError
} from "./bufio.ts";
Expand Down Expand Up @@ -383,4 +385,29 @@ test(async function bufReaderReadFull(): Promise<void> {
}
});

test(async function chunksAndLines(): Promise<void> {
const enc = new TextEncoder();
const data = new Buffer(
enc.encode("Hello World\tHello World 2\tHello World 3")
);
const chunks_ = [];

for await (const c of chunks(data, "\t")) {
chunks_.push(c);
}

assertEquals(chunks_.length, 3);
assertEquals(chunks_, ["Hello World", "Hello World 2", "Hello World 3"]);

const linesData = new Buffer(enc.encode("0\n1\n2\n3\n4\n5\n6\n7\n8\n9"));
const lines_ = [];

for await (const l of lines(linesData)) {
lines_.push(l);
}

assertEquals(lines_.length, 10);
assertEquals(lines_, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]);
});

runIfMain(import.meta);
91 changes: 2 additions & 89 deletions std/xeval/mod.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { parse } from "../flags/mod.ts";
const { Buffer, EOF, args, exit, stdin, writeAll } = Deno;
import { chunks } from "../io/bufio.ts";
const { args, exit, stdin } = Deno;
type Reader = Deno.Reader;

/* eslint-disable-next-line max-len */
Expand Down Expand Up @@ -27,94 +28,6 @@ export interface XevalOptions {

const DEFAULT_DELIMITER = "\n";

// Generate longest proper prefix which is also suffix array.
function createLPS(pat: Uint8Array): Uint8Array {
const lps = new Uint8Array(pat.length);
lps[0] = 0;
let prefixEnd = 0;
let i = 1;
while (i < lps.length) {
if (pat[i] == pat[prefixEnd]) {
prefixEnd++;
lps[i] = prefixEnd;
i++;
} else if (prefixEnd === 0) {
lps[i] = 0;
i++;
} else {
prefixEnd = pat[prefixEnd - 1];
}
}
return lps;
}

// TODO(kevinkassimo): Move this utility somewhere public in deno_std.
// Import from there once doable.
// Read from reader until EOF and emit string chunks separated
// by the given delimiter.
async function* chunks(
reader: Reader,
delim: string
): AsyncIterableIterator<string> {
const encoder = new TextEncoder();
const decoder = new TextDecoder();
// Avoid unicode problems
const delimArr = encoder.encode(delim);
const delimLen = delimArr.length;
const delimLPS = createLPS(delimArr);

let inputBuffer = new Buffer();
const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));

// Modified KMP
let inspectIndex = 0;
let matchIndex = 0;
while (true) {
const result = await reader.read(inspectArr);
if (result === EOF) {
// Yield last chunk.
const lastChunk = inputBuffer.toString();
yield lastChunk;
return;
}
if ((result as number) < 0) {
// Discard all remaining and silently fail.
return;
}
const sliceRead = inspectArr.subarray(0, result as number);
await writeAll(inputBuffer, sliceRead);

let sliceToProcess = inputBuffer.bytes();
while (inspectIndex < sliceToProcess.length) {
if (sliceToProcess[inspectIndex] === delimArr[matchIndex]) {
inspectIndex++;
matchIndex++;
if (matchIndex === delimLen) {
// Full match
const matchEnd = inspectIndex - delimLen;
const readyBytes = sliceToProcess.subarray(0, matchEnd);
// Copy
const pendingBytes = sliceToProcess.slice(inspectIndex);
const readyChunk = decoder.decode(readyBytes);
yield readyChunk;
// Reset match, different from KMP.
sliceToProcess = pendingBytes;
inspectIndex = 0;
matchIndex = 0;
}
} else {
if (matchIndex === 0) {
inspectIndex++;
} else {
matchIndex = delimLPS[matchIndex - 1];
}
}
}
// Keep inspectIndex and matchIndex.
inputBuffer = new Buffer(sliceToProcess);
}
}

export async function xeval(
reader: Reader,
xevalFunc: XevalFunc,
Expand Down