From dda58f7f9ff89400ad6ca5a4abd02199aba62593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartek=20Iwa=C5=84czuk?= Date: Sat, 14 Dec 2019 12:29:59 +0100 Subject: [PATCH 1/5] move chunks to std/io --- std/io/bufio.ts | 90 +++++++++++++++++++++++++++++++++++++++++++ std/io/bufio_test.ts | 27 +++++++++++++ std/xeval/mod.ts | 91 +------------------------------------------- 3 files changed, 119 insertions(+), 89 deletions(-) diff --git a/std/io/bufio.ts b/std/io/bufio.ts index 5f0d53eb8534e2..88a38f075cd529 100644 --- a/std/io/bufio.ts +++ b/std/io/bufio.ts @@ -508,3 +508,93 @@ export class BufWriter implements Writer { return nn; } } + +/** Generate longest proper prefix which is also suffix array. */ +function createLPS(pat: Uint8Array): Uint8Array { + const lps = new Uint8Array(pat.length); + lps[0] = 0; + let prefixEnd = 0; + let i = 1; + while (i < lps.length) { + if (pat[i] == pat[prefixEnd]) { + prefixEnd++; + lps[i] = prefixEnd; + i++; + } else if (prefixEnd === 0) { + lps[i] = 0; + i++; + } else { + prefixEnd = pat[prefixEnd - 1]; + } + } + return lps; +} + +/** Read from reader until EOF and emit string chunks separated */ +export async function* chunks( + reader: Reader, + delim: string +): AsyncIterableIterator { + const encoder = new TextEncoder(); + const decoder = new TextDecoder(); + // Avoid unicode problems + const delimArr = encoder.encode(delim); + const delimLen = delimArr.length; + const delimLPS = createLPS(delimArr); + + let inputBuffer = new Deno.Buffer(); + const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1)); + + // Modified KMP + let inspectIndex = 0; + let matchIndex = 0; + while (true) { + const result = await reader.read(inspectArr); + if (result === Deno.EOF) { + // Yield last chunk. + const lastChunk = inputBuffer.toString(); + yield lastChunk; + return; + } + if ((result as number) < 0) { + // Discard all remaining and silently fail. + return; + } + const sliceRead = inspectArr.subarray(0, result as number); + await Deno.writeAll(inputBuffer, sliceRead); + + let sliceToProcess = inputBuffer.bytes(); + while (inspectIndex < sliceToProcess.length) { + if (sliceToProcess[inspectIndex] === delimArr[matchIndex]) { + inspectIndex++; + matchIndex++; + if (matchIndex === delimLen) { + // Full match + const matchEnd = inspectIndex - delimLen; + const readyBytes = sliceToProcess.subarray(0, matchEnd); + // Copy + const pendingBytes = sliceToProcess.slice(inspectIndex); + const readyChunk = decoder.decode(readyBytes); + yield readyChunk; + // Reset match, different from KMP. + sliceToProcess = pendingBytes; + inspectIndex = 0; + matchIndex = 0; + } + } else { + if (matchIndex === 0) { + inspectIndex++; + } else { + matchIndex = delimLPS[matchIndex - 1]; + } + } + } + // Keep inspectIndex and matchIndex. + inputBuffer = new Deno.Buffer(sliceToProcess); + } +} + +/** Read from reader until EOF and emit lines */ +export async function* lines(reader: Reader): AsyncIterableIterator { + return chunks(reader, "\n"); +} diff --git a/std/io/bufio_test.ts b/std/io/bufio_test.ts index 780dfd3db542c8..8d139bc6ef658e 100644 --- a/std/io/bufio_test.ts +++ b/std/io/bufio_test.ts @@ -15,6 +15,8 @@ import { import { BufReader, BufWriter, + chunks, + lines, BufferFullError, UnexpectedEOFError } from "./bufio.ts"; @@ -383,4 +385,29 @@ test(async function bufReaderReadFull(): Promise { } }); +test(async function chunksAndLines(): Promise { + const enc = new TextEncoder(); + const data = new Buffer( + enc.encode("Hello World\tHello World 2\tHello World 3") + ); + const chunks_ = []; + + for await (const c of chunks(data, "\t")) { + chunks_.push(c); + } + + assertEquals(chunks_.length, 3); + assertEquals(chunks_, ["World World", "Hello World 2", "Hello World 3"]); + + const linesData = new Buffer(enc.encode("0\n1\n2\n3\n4\n5\n6\n7\n8\n9")); + const lines_ = []; + + for await (const l of lines(linesData)) { + lines_.push(l); + } + + assertEquals(lines_.length, 3); + assertEquals(lines_, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]); +}); + runIfMain(import.meta); diff --git a/std/xeval/mod.ts b/std/xeval/mod.ts index 5843c8650a985d..74fa228a0c6992 100644 --- a/std/xeval/mod.ts +++ b/std/xeval/mod.ts @@ -1,5 +1,6 @@ import { parse } from "../flags/mod.ts"; -const { Buffer, EOF, args, exit, stdin, writeAll } = Deno; +import { chunks } from "../io/bufio.ts"; +const { args, exit, stdin } = Deno; type Reader = Deno.Reader; /* eslint-disable-next-line max-len */ @@ -27,94 +28,6 @@ export interface XevalOptions { const DEFAULT_DELIMITER = "\n"; -// Generate longest proper prefix which is also suffix array. -function createLPS(pat: Uint8Array): Uint8Array { - const lps = new Uint8Array(pat.length); - lps[0] = 0; - let prefixEnd = 0; - let i = 1; - while (i < lps.length) { - if (pat[i] == pat[prefixEnd]) { - prefixEnd++; - lps[i] = prefixEnd; - i++; - } else if (prefixEnd === 0) { - lps[i] = 0; - i++; - } else { - prefixEnd = pat[prefixEnd - 1]; - } - } - return lps; -} - -// TODO(kevinkassimo): Move this utility somewhere public in deno_std. -// Import from there once doable. -// Read from reader until EOF and emit string chunks separated -// by the given delimiter. -async function* chunks( - reader: Reader, - delim: string -): AsyncIterableIterator { - const encoder = new TextEncoder(); - const decoder = new TextDecoder(); - // Avoid unicode problems - const delimArr = encoder.encode(delim); - const delimLen = delimArr.length; - const delimLPS = createLPS(delimArr); - - let inputBuffer = new Buffer(); - const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1)); - - // Modified KMP - let inspectIndex = 0; - let matchIndex = 0; - while (true) { - const result = await reader.read(inspectArr); - if (result === EOF) { - // Yield last chunk. - const lastChunk = inputBuffer.toString(); - yield lastChunk; - return; - } - if ((result as number) < 0) { - // Discard all remaining and silently fail. - return; - } - const sliceRead = inspectArr.subarray(0, result as number); - await writeAll(inputBuffer, sliceRead); - - let sliceToProcess = inputBuffer.bytes(); - while (inspectIndex < sliceToProcess.length) { - if (sliceToProcess[inspectIndex] === delimArr[matchIndex]) { - inspectIndex++; - matchIndex++; - if (matchIndex === delimLen) { - // Full match - const matchEnd = inspectIndex - delimLen; - const readyBytes = sliceToProcess.subarray(0, matchEnd); - // Copy - const pendingBytes = sliceToProcess.slice(inspectIndex); - const readyChunk = decoder.decode(readyBytes); - yield readyChunk; - // Reset match, different from KMP. - sliceToProcess = pendingBytes; - inspectIndex = 0; - matchIndex = 0; - } - } else { - if (matchIndex === 0) { - inspectIndex++; - } else { - matchIndex = delimLPS[matchIndex - 1]; - } - } - } - // Keep inspectIndex and matchIndex. - inputBuffer = new Buffer(sliceToProcess); - } -} - export async function xeval( reader: Reader, xevalFunc: XevalFunc, From 13b0182b72f7bbdcb70bc01310c3dd23ff606c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartek=20Iwa=C5=84czuk?= Date: Sat, 14 Dec 2019 13:09:22 +0100 Subject: [PATCH 2/5] fix typo --- std/io/bufio_test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/std/io/bufio_test.ts b/std/io/bufio_test.ts index 8d139bc6ef658e..62c71857553275 100644 --- a/std/io/bufio_test.ts +++ b/std/io/bufio_test.ts @@ -397,7 +397,7 @@ test(async function chunksAndLines(): Promise { } assertEquals(chunks_.length, 3); - assertEquals(chunks_, ["World World", "Hello World 2", "Hello World 3"]); + assertEquals(chunks_, ["Hello World", "Hello World 2", "Hello World 3"]); const linesData = new Buffer(enc.encode("0\n1\n2\n3\n4\n5\n6\n7\n8\n9")); const lines_ = []; From 1061d1449da8440c64fca5f380f5b224b5fbb883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartek=20Iwa=C5=84czuk?= Date: Sat, 14 Dec 2019 14:06:45 +0100 Subject: [PATCH 3/5] fix --- std/io/bufio.ts | 4 +++- std/io/bufio_test.ts | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/std/io/bufio.ts b/std/io/bufio.ts index 88a38f075cd529..0a853032fa59c4 100644 --- a/std/io/bufio.ts +++ b/std/io/bufio.ts @@ -596,5 +596,7 @@ export async function* chunks( /** Read from reader until EOF and emit lines */ export async function* lines(reader: Reader): AsyncIterableIterator { - return chunks(reader, "\n"); + for await (const line of chunks(reader, "\n")) { + yield line; + } } diff --git a/std/io/bufio_test.ts b/std/io/bufio_test.ts index 62c71857553275..b7493a598d3e89 100644 --- a/std/io/bufio_test.ts +++ b/std/io/bufio_test.ts @@ -406,7 +406,7 @@ test(async function chunksAndLines(): Promise { lines_.push(l); } - assertEquals(lines_.length, 3); + assertEquals(lines_.length, 10); assertEquals(lines_, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]); }); From c0c576787c685b5162eeb763f00870fd9a9f38d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartek=20Iwa=C5=84czuk?= Date: Mon, 16 Dec 2019 18:05:20 +0100 Subject: [PATCH 4/5] short-hand --- std/io/bufio.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/std/io/bufio.ts b/std/io/bufio.ts index 0a853032fa59c4..77bcd90daf43b5 100644 --- a/std/io/bufio.ts +++ b/std/io/bufio.ts @@ -596,7 +596,5 @@ export async function* chunks( /** Read from reader until EOF and emit lines */ export async function* lines(reader: Reader): AsyncIterableIterator { - for await (const line of chunks(reader, "\n")) { - yield line; - } + yield* chunks(reader, "\n"); } From 88849ba1b675d522b983a0c0225cb5332148aff5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartek=20Iwa=C5=84czuk?= Date: Sat, 28 Dec 2019 13:01:04 +0100 Subject: [PATCH 5/5] reset CI