From 1234524bd630599cc77e7044598c36595c1df53a Mon Sep 17 00:00:00 2001 From: Juanjo Diaz Date: Tue, 7 Nov 2023 18:36:33 +0100 Subject: [PATCH] feat: allow emitting partial tokens & values --- packages/node/README.md | 29 +- packages/node/dist/deno/README.md | 29 +- packages/node/src/utils.ts | 16 - packages/node/test/emitPartial.ts | 647 ++++++++++++++++++ packages/node/test/types/numbers.ts | 2 +- packages/node/test/types/strings.ts | 18 +- packages/node/test/utils/testRunner.ts | 2 +- packages/plainjs/README.md | 80 +++ packages/plainjs/dist/deno/README.md | 80 +++ packages/plainjs/dist/deno/tokenizer.ts | 65 +- packages/plainjs/dist/deno/tokenparser.ts | 42 +- .../deno/utils/types/parsedElementInfo.ts | 3 +- .../dist/deno/utils/types/parsedTokenInfo.ts | 1 + .../dist/deno/utils/types/tokenType.ts | 17 - packages/plainjs/src/tokenizer.ts | 65 +- packages/plainjs/src/tokenparser.ts | 42 +- .../src/utils/types/parsedElementInfo.ts | 3 +- .../src/utils/types/parsedTokenInfo.ts | 1 + packages/plainjs/src/utils/types/tokenType.ts | 17 - packages/plainjs/test/callbacks.ts | 5 +- packages/plainjs/test/emitPartial.ts | 646 +++++++++++++++++ packages/plainjs/test/types/numbers.ts | 2 +- packages/plainjs/test/types/strings.ts | 18 +- packages/plainjs/test/utils/testRunner.ts | 2 +- packages/whatwg/README.md | 28 +- packages/whatwg/dist/deno/README.md | 28 +- packages/whatwg/dist/deno/utils.ts | 4 +- packages/whatwg/src/utils.ts | 4 +- packages/whatwg/test/emitPartial.ts | 647 ++++++++++++++++++ packages/whatwg/test/types/numbers.ts | 2 +- packages/whatwg/test/types/strings.ts | 2 +- packages/whatwg/test/utils/testRunner.ts | 2 +- 32 files changed, 2465 insertions(+), 84 deletions(-) delete mode 100644 packages/node/src/utils.ts create mode 100644 packages/node/test/emitPartial.ts create mode 100644 packages/plainjs/test/emitPartial.ts create mode 100644 packages/whatwg/test/emitPartial.ts diff --git a/packages/node/README.md b/packages/node/README.md index f0a188d..6ec2475 100644 --- a/packages/node/README.md +++ b/packages/node/README.md @@ -52,6 +52,7 @@ The available options are: stringBufferSize: , // set to 0 to don't buffer. Min valid value is 4. numberBufferSize: , // set to 0 to don't buffer. separator: , // separator between object. For example `\n` for nd-js. + emitPartialTokens: // whether to emit tokens mid-parsing. } ``` @@ -82,6 +83,7 @@ The available options are: paths: , keepStack: , // whether to keep all the properties in the stack separator: , // separator between object. For example `\n` for nd-js. If left empty or set to undefined, the token parser will end after parsing the first object. To parse multiple object without any delimiter just set it to the empty string `''`. + emitPartialValues: , // whether to emit values mid-parsing. } ``` @@ -108,7 +110,6 @@ const tokenParser = new TokenParser(); const jsonParser = tokenizer.pipeTrough(tokenParser); ``` - You can subscribe to the resulting data using the ```javascript @@ -138,7 +139,7 @@ Imagine an endpoint that send a large amount of JSON objects one after the other const response = await fetch('http://example.com/'); const reader = response.body.pipe(parser); - reader.on('data', value => /* process element */) + reader.on('data', value => /* process element */); ``` ### Stream-parsing a fetch request returning a JSON array @@ -152,11 +153,33 @@ Imagine an endpoint that send a large amount of JSON objects one after the other const response = await fetch('http://example.com/'); - const reader = response.body.pipe(parse)getReader(); + const reader = response.body.pipe(parse).getReader(); reader.on('data', ({ value, key, parent, stack }) => /* process element */) ``` +### Stream-parsing a fetch request returning a very long string getting previews of the string + +Imagine an endpoint that send a large amount of JSON objects one after the other (`"Once upon a midnight <...>"`). + +```js + import { JSONParser } from '@streamparser/json-node'; + + const parser = new JSONParser({ stringBufferSize: undefined, paths: ['$.*'], keepStack: false }); + + const response = await fetch('http://example.com/'); + + const reader = response.body.pipe(parse).getReader(); + + reader.on('data', ({ value, key, parent, stack, partial }) => { + if (partial) { + console.log(`Parsing value: ${value}... (still parsing)`); + } else { + console.log(`Value parsed: ${value}`); + } + }); +``` + ## License See [LICENSE.md]. diff --git a/packages/node/dist/deno/README.md b/packages/node/dist/deno/README.md index f0a188d..6ec2475 100644 --- a/packages/node/dist/deno/README.md +++ b/packages/node/dist/deno/README.md @@ -52,6 +52,7 @@ The available options are: stringBufferSize: , // set to 0 to don't buffer. Min valid value is 4. numberBufferSize: , // set to 0 to don't buffer. separator: , // separator between object. For example `\n` for nd-js. + emitPartialTokens: // whether to emit tokens mid-parsing. } ``` @@ -82,6 +83,7 @@ The available options are: paths: , keepStack: , // whether to keep all the properties in the stack separator: , // separator between object. For example `\n` for nd-js. If left empty or set to undefined, the token parser will end after parsing the first object. To parse multiple object without any delimiter just set it to the empty string `''`. + emitPartialValues: , // whether to emit values mid-parsing. } ``` @@ -108,7 +110,6 @@ const tokenParser = new TokenParser(); const jsonParser = tokenizer.pipeTrough(tokenParser); ``` - You can subscribe to the resulting data using the ```javascript @@ -138,7 +139,7 @@ Imagine an endpoint that send a large amount of JSON objects one after the other const response = await fetch('http://example.com/'); const reader = response.body.pipe(parser); - reader.on('data', value => /* process element */) + reader.on('data', value => /* process element */); ``` ### Stream-parsing a fetch request returning a JSON array @@ -152,11 +153,33 @@ Imagine an endpoint that send a large amount of JSON objects one after the other const response = await fetch('http://example.com/'); - const reader = response.body.pipe(parse)getReader(); + const reader = response.body.pipe(parse).getReader(); reader.on('data', ({ value, key, parent, stack }) => /* process element */) ``` +### Stream-parsing a fetch request returning a very long string getting previews of the string + +Imagine an endpoint that send a large amount of JSON objects one after the other (`"Once upon a midnight <...>"`). + +```js + import { JSONParser } from '@streamparser/json-node'; + + const parser = new JSONParser({ stringBufferSize: undefined, paths: ['$.*'], keepStack: false }); + + const response = await fetch('http://example.com/'); + + const reader = response.body.pipe(parse).getReader(); + + reader.on('data', ({ value, key, parent, stack, partial }) => { + if (partial) { + console.log(`Parsing value: ${value}... (still parsing)`); + } else { + console.log(`Value parsed: ${value}`); + } + }); +``` + ## License See [LICENSE.md]. diff --git a/packages/node/src/utils.ts b/packages/node/src/utils.ts deleted file mode 100644 index cb797ea..0000000 --- a/packages/node/src/utils.ts +++ /dev/null @@ -1,16 +0,0 @@ -import type { ParsedElementInfo } from "@streamparser/json/utils/types/parsedElementInfo.js"; - -export function cloneParsedElementInfo( - parsedElementInfo: ParsedElementInfo, -): ParsedElementInfo { - const { value, key, parent, stack } = parsedElementInfo; - return { value, key, parent: clone(parent), stack: clone(stack) }; -} - -function clone(obj: T): T { - // Only objects are passed by reference and must be cloned - if (typeof obj !== "object") return obj; - // Solve arrays with empty positions - if (Array.isArray(obj) && obj.filter((i) => i).length === 0) return obj; - return JSON.parse(JSON.stringify(obj)); -} diff --git a/packages/node/test/emitPartial.ts b/packages/node/test/emitPartial.ts new file mode 100644 index 0000000..49df7cd --- /dev/null +++ b/packages/node/test/emitPartial.ts @@ -0,0 +1,647 @@ +import TokenType from "@streamparser/json/utils/types/tokenType.js"; +import JSONParser from "../src/jsonparser.js"; +import Tokenizer from "../src/tokenizer.js"; +import { + TestData, + runJSONParserTest, + runTokenizerTest, +} from "./utils/testRunner.js"; + +describe("Emit Partial", () => { + describe("Tokenizer emit partial tokens", () => { + const emitPartialTokenTestData: TestData[] = [ + { + value: ["tr", "ue"], + expected: [ + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: false }, + ], + }, + { + value: ["t", "ru", "e"], + expected: [ + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: false }, + ], + }, + { + value: ["f", "al", "se"], + expected: [ + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: false }, + ], + }, + { + value: ["fal", "se"], + expected: [ + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: false }, + ], + }, + { + value: ["0", ".", "123"], + expected: [ + { token: TokenType.NUMBER, value: 0, partial: true }, + { token: TokenType.NUMBER, value: 0.123, partial: true }, + { token: TokenType.NUMBER, value: 0.123, partial: false }, + ], + }, + { + value: ["n", "u", "l", "l"], + expected: [ + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: false }, + ], + }, + { + value: ["n", "u", "l", "l"], + expected: [ + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: false }, + ], + }, + { + value: "{", + expected: [{ token: TokenType.LEFT_BRACE, value: "{", partial: false }], + }, + { + value: ['{ "fo', "o", '"', ': "', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "fo", partial: true }, + { token: TokenType.STRING, value: "foo", partial: true }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "", partial: true }, + { token: TokenType.STRING, value: "", partial: false }, + ], + }, + { + value: ['{ "foo": "ba', "r", '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "ba", partial: true }, + { token: TokenType.STRING, value: "bar", partial: true }, + { token: TokenType.STRING, value: "bar", partial: false }, + ], + }, + { + value: ['{ "foo": "bar"', "}"], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.RIGHT_BRACE, value: "}", partial: false }, + ], + }, + { + value: '{ "foo": "bar" }', + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.RIGHT_BRACE, value: "}", partial: false }, + ], + }, + { + value: [ + '{ "foo": "bar", "ba', + "z", + '": [', + '{ "foo": "bar", "baz": [', + '{ "foo": "bar", "baz": [1', + "2", + "3, ", + ], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "ba", partial: true }, + { token: TokenType.STRING, value: "baz", partial: true }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: true }, + { token: TokenType.NUMBER, value: 12, partial: true }, + { token: TokenType.NUMBER, value: 123, partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + ], + }, + { + value: '{ "foo": "bar", "baz": [1]', + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: false }, + { token: TokenType.RIGHT_BRACKET, value: "]", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", ', ' "baz": [1,'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [1,2', "3, 4", "5", "6] }"], + expected: [ + { + type: "complete", + token: TokenType.LEFT_BRACE, + value: "{", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "foo", + partial: false, + }, + { + type: "complete", + token: TokenType.COLON, + value: ":", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "bar", + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "baz", + partial: false, + }, + { + type: "complete", + token: TokenType.COLON, + value: ":", + partial: false, + }, + { + type: "complete", + token: TokenType.LEFT_BRACKET, + value: "[", + partial: false, + }, + { + type: "complete", + token: TokenType.NUMBER, + value: 1, + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { token: TokenType.NUMBER, value: 2, partial: true }, + { + type: "complete", + token: TokenType.NUMBER, + value: 23, + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { token: TokenType.NUMBER, value: 4, partial: true }, + { token: TokenType.NUMBER, value: 45, partial: true }, + { + type: "complete", + token: TokenType.NUMBER, + value: 456, + partial: false, + }, + { + type: "complete", + token: TokenType.RIGHT_BRACKET, + value: "]", + partial: false, + }, + { + type: "complete", + token: TokenType.RIGHT_BRACE, + value: "}", + partial: false, + }, + ], + }, + { + value: ['{ "foo": "bar", "baz"', ": [{"], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [{ "a', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "a", partial: true }, + { token: TokenType.STRING, value: "a", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [{ "a": "b', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "a", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "b", partial: true }, + { token: TokenType.STRING, value: "b", partial: false }, + ], + }, + ]; + + emitPartialTokenTestData.forEach(({ value, expected }) => { + test(`Tokenizer emit partial tokens: ${value}`, async () => { + let i = 0; + await runTokenizerTest( + new Tokenizer({ emitPartialTokens: true }), + value, + ({ token, value, partial }) => { + const expectedData = expected[i]; + expect(token).toEqual(expectedData.token); + expect(value).toEqual(expectedData.value); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); + }); + }); + + describe("TokenParser emit partial values", () => { + const emitPartialValuesTestData: TestData[] = [ + { + value: ['"a', "bc", '"'], + expected: [ + { value: "a", key: undefined, parent: undefined, partial: true }, + { value: "abc", key: undefined, parent: undefined, partial: true }, + { value: "abc", key: undefined, parent: undefined, partial: false }, + ], + }, + { + value: ["12", ".34"], + expected: [ + { value: 12, key: undefined, parent: undefined, partial: true }, + { value: 12.34, key: undefined, parent: undefined, partial: true }, + { value: 12.34, key: undefined, parent: undefined, partial: false }, + ], + }, + { + value: ["[", "]"], + expected: [ + { value: undefined, key: 0, parent: [], partial: true }, + { value: [], key: undefined, parent: undefined, partial: false }, + ], + }, + { + value: ["[", '"a', "bc", '"', ",", '"def"', "]"], + expected: [ + { value: undefined, key: 0, parent: [], partial: true }, + { value: "a", key: 0, parent: [], partial: true }, + { value: "abc", key: 0, parent: [], partial: true }, + { value: "abc", key: 0, parent: ["abc"], partial: false }, + { value: "def", key: 1, parent: ["abc", "def"], partial: false }, + { + value: ["abc", "def"], + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + { + value: [ + "{", + '"a', + "bc", + '"', + ":", + '"def"', + ",", + '"ghi":', + '"jkl"', + "}", + ], + expected: [ + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "a", parent: {}, partial: true }, + { value: undefined, key: "abc", parent: {}, partial: true }, + { value: undefined, key: "abc", parent: {}, partial: true }, + { value: "def", key: "abc", parent: { abc: "def" }, partial: false }, + { + value: undefined, + key: "ghi", + parent: { abc: "def" }, + partial: true, + }, + { + value: "jkl", + key: "ghi", + parent: { abc: "def", ghi: "jkl" }, + partial: false, + }, + { + value: { abc: "def", ghi: "jkl" }, + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + { + value: [ + '{ "foo"', + ": ", + '{ "foo1": "ba', + "r", + '" , "baz', + '": [', + '{ "foo2": "bar2", "baz2": [', + '{ "foo3": "bar3", "baz3": [1', + "2", + "3, ", + "3, 4", + "5", + "6] }", + "] }] }}", + ], + expected: [ + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo", parent: {}, partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo1", parent: {}, partial: true }, + { value: "ba", key: "foo1", parent: {}, partial: true }, + { value: "bar", key: "foo1", parent: {}, partial: true }, + { + value: "bar", + key: "foo1", + parent: { foo1: "bar" }, + partial: false, + }, + { + value: undefined, + key: "baz", + parent: { foo1: "bar" }, + partial: true, + }, + { + value: undefined, + key: "baz", + parent: { foo1: "bar" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo2", parent: {}, partial: true }, + { + value: "bar2", + key: "foo2", + parent: { foo2: "bar2" }, + partial: false, + }, + { + value: undefined, + key: "baz2", + parent: { foo2: "bar2" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo3", parent: {}, partial: true }, + { + value: "bar3", + key: "foo3", + parent: { foo3: "bar3" }, + partial: false, + }, + { + value: undefined, + key: "baz3", + parent: { foo3: "bar3" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: 1, key: 0, parent: [], partial: true }, + { value: 12, key: 0, parent: [], partial: true }, + { value: 123, key: 0, parent: [123], partial: false }, + { value: 3, key: 1, parent: [123, 3], partial: false }, + { value: 4, key: 2, parent: [123, 3], partial: true }, + { value: 45, key: 2, parent: [123, 3], partial: true }, + { value: 456, key: 2, parent: [123, 3, 456], partial: false }, + { + value: [123, 3, 456], + key: "baz3", + parent: { foo3: "bar3", baz3: [123, 3, 456] }, + partial: false, + }, + { + value: { foo3: "bar3", baz3: [123, 3, 456] }, + key: 0, + parent: [{ foo3: "bar3", baz3: [123, 3, 456] }], + partial: false, + }, + { + value: [{ foo3: "bar3", baz3: [123, 3, 456] }], + key: "baz2", + parent: { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + partial: false, + }, + { + value: { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + key: 0, + parent: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + partial: false, + }, + { + value: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + key: "baz", + parent: { + foo1: "bar", + baz: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + }, + partial: false, + }, + { + value: { + foo1: "bar", + baz: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + }, + key: "foo", + parent: { + foo: { + foo1: "bar", + baz: [ + { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + ], + }, + }, + partial: false, + }, + { + value: { + foo: { + foo1: "bar", + baz: [ + { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + ], + }, + }, + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + ]; + + emitPartialValuesTestData.forEach(({ value, expected }) => { + test(`TokenParser emit partial values: ${value}`, async () => { + let i = 0; + await runJSONParserTest( + new JSONParser({ emitPartialTokens: true, emitPartialValues: true }), + value, + ({ value, key, parent, partial }) => { + const expectedData = expected[i]; + expect(value).toEqual(expectedData.value); + expect(key).toEqual(expectedData.key); + expect(parent).toEqual(expectedData.parent); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); + }); + }); + + test("TokenParser emit partial values only if matching paths when paths is present", async () => { + const value = ['{ "a"', ": 1,", '"b":', '{ "c":', "1 } }"]; + const expected = [ + { value: undefined, key: "c", parent: {}, partial: true }, + { value: 1, key: "c", parent: { c: 1 }, partial: false }, + ]; + let i = 0; + await runJSONParserTest( + new JSONParser({ + paths: ["$.b.c"], + emitPartialTokens: true, + emitPartialValues: true, + }), + value, + ({ value, key, parent, partial }) => { + const expectedData = expected[i]; + expect(value).toEqual(expectedData.value); + expect(key).toEqual(expectedData.key); + expect(parent).toEqual(expectedData.parent); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); +}); diff --git a/packages/node/test/types/numbers.ts b/packages/node/test/types/numbers.ts index 9156932..20f208e 100644 --- a/packages/node/test/types/numbers.ts +++ b/packages/node/test/types/numbers.ts @@ -51,7 +51,7 @@ describe("number", () => { "21e999", ]; - const bufferSizes = [0, 64 * 1024]; + const bufferSizes = [0, 1, 64 * 1024]; bufferSizes.forEach((numberBufferSize) => { values.forEach((stringValue) => { diff --git a/packages/node/test/types/strings.ts b/packages/node/test/types/strings.ts index 4430c1d..bfde420 100644 --- a/packages/node/test/types/strings.ts +++ b/packages/node/test/types/strings.ts @@ -15,7 +15,7 @@ describe("string", () => { "õ", ]; - const bufferSizes = [0, 64 * 1024]; + const bufferSizes = [0, 1, 64 * 1024]; bufferSizes.forEach((stringBufferSize) => { values.forEach((stringValue) => { @@ -159,12 +159,22 @@ describe("string", () => { ); }); - const invalidValues = ["\n", "\\j", "\\ua", "\\u1*", "\\u12*", "\\u123*"]; + const invalidValues = [ + '"\n"', + '"\\j"', + '"\\ua"', + '"\\u1*"', + '"\\u12*"', + "\\u123*", + '"\0"', + '"\\uG"', + '"\\u000G"', + ]; invalidValues.forEach((value) => { - test("fail on invalid values", async () => { + test(`fail on invalid values ${value}`, async () => { try { - await runJSONParserTest(new JSONParser(), [value]); + await runJSONParserTest(new JSONParser(), value); fail(`Expected to fail on value "${value}"`); } catch (e) { // Expected error diff --git a/packages/node/test/utils/testRunner.ts b/packages/node/test/utils/testRunner.ts index 66f1ba2..54593d5 100644 --- a/packages/node/test/utils/testRunner.ts +++ b/packages/node/test/utils/testRunner.ts @@ -6,7 +6,7 @@ import type { ParsedTokenInfo } from "@streamparser/json/utils/types/parsedToken import type { ParsedElementInfo } from "@streamparser/json/utils/types/parsedElementInfo.js"; export type TestData = { - value: string | Iterable; + value: string | string[] | Iterable; paths?: string[]; expected: any[]; }; diff --git a/packages/plainjs/README.md b/packages/plainjs/README.md index ac55a04..7c29e58 100644 --- a/packages/plainjs/README.md +++ b/packages/plainjs/README.md @@ -64,6 +64,7 @@ The available options are: stringBufferSize: , // set to 0 to don't buffer. Min valid value is 4. numberBufferSize: , // set to 0 to don't buffer. separator: , // separator between object. For example `\n` for nd-js. + emitPartialTokens: // whether to emit tokens mid-parsing. } ``` @@ -127,6 +128,7 @@ The available options are: paths: , keepStack: , // whether to keep all the properties in the stack separator: , // separator between object. For example `\n` for nd-js. If left empty or set to undefined, the token parser will end after parsing the first object. To parse multiple object without any delimiter just set it to the empty string `''`. + emitPartialValues: , // whether to emit values mid-parsing. } ``` @@ -265,6 +267,59 @@ parser.onError = console.error; parser.write('"""'); ``` +## Optimistic parsing + +Optimistic parsing can be useful when incrementally building a JSON value that +you expect to be "eventually valid". When parsing optimistically, the parser +will always make accessible its "best guess" at what the eventually-correct +parsed value will look like. For example: + +``` +import { OptimisticJSONParser } from "@streamparser/json" + +const parser = new OptimisticJSONParser() + +parser.write('{') +console.log(parser.value) // {} + +parser.write('"') +console.log(parser.value) // {} + +parser.write('a"') +console.log(parser.value) // { a: undefined } + +parser.write(': "b') +console.log(parser.value) // { a: "b" } + +parser.write('ar", ') +console.log(parser.value) // { a: "bar" } + +parser.write('"c":') +console.log(parser.value) // { a: "bar", c: undefined } + +parser.write('[{') +console.log(parser.value) // { a: "bar", c: [ {} ] } + +parser.write('"d') +console.log(parser.value) // { a: "bar", c: [ { d: undefined } ] } + +parser.write('": 1') +console.log(parser.value) // { a: "bar", c: [ { d: 1 } ] } + +parser.write('23') +console.log(parser.value) // { a: "bar", c: [ { d: 123 } ] } + +parser.write('}') +console.log(parser.value) // { a: "bar", c: [ { d: 123 } ] } +``` + +and so on. An optimistic parser will attempt to present incomplete null, +boolean, string and number literals. It will also optimistically insert keys +with as-yet-undefied values, and close opened objects and arrays. Under +the hood, an optimistic tokenizer will emit "incomplete" tokens when it +thinks that it will eventually reach a state where a token can be definitively +produced. + ## Examples ### Stream-parsing a fetch request returning a JSONstream @@ -310,6 +365,31 @@ Imagine an endpoint that send a large amount of JSON objects one after the other } ``` +### Stream-parsing a fetch request returning a very long string getting previews of the string + +Imagine an endpoint that send a large amount of JSON objects one after the other (`"Once upon a midnight <...>"`). + +```js + import { JSONParser } from '@streamparser/json'; + + const jsonparser = new JSONParser({ emitPartialTokens: true, emitPartialValues: true }); + jsonparser.onValue = ({ value, key, parent, stack, partial }) => { + if (partial) { + console.log(`Parsing value: ${value}... (still parsing)`); + } else { + console.log(`Value parsed: ${value}`); + } + }; + + const response = await fetch('http://example.com/'); + const reader = response.body.getReader(); + while(true) { + const { done, value } = await reader.read(); + if (done) break; + jsonparser.write(value); + } +``` + ## Migration guide ### Upgrading from 0.10 to 0.11 diff --git a/packages/plainjs/dist/deno/README.md b/packages/plainjs/dist/deno/README.md index edbbc6b..e9ee012 100644 --- a/packages/plainjs/dist/deno/README.md +++ b/packages/plainjs/dist/deno/README.md @@ -64,6 +64,7 @@ The available options are: stringBufferSize: , // set to 0 to don't buffer. Min valid value is 4. numberBufferSize: , // set to 0 to don't buffer. separator: , // separator between object. For example `\n` for nd-js. + emitPartialTokens: // whether to emit tokens mid-parsing. } ``` @@ -127,6 +128,7 @@ The available options are: paths: , keepStack: , // whether to keep all the properties in the stack separator: , // separator between object. For example `\n` for nd-js. If left empty or set to undefined, the token parser will end after parsing the first object. To parse multiple object without any delimiter just set it to the empty string `''`. + emitPartialValues: , // whether to emit values mid-parsing. } ``` @@ -265,6 +267,59 @@ parser.onError = console.error; parser.write('"""'); ``` +## Optimistic parsing + +Optimistic parsing can be useful when incrementally building a JSON value that +you expect to be "eventually valid". When parsing optimistically, the parser +will always make accessible its "best guess" at what the eventually-correct +parsed value will look like. For example: + +``` +import { OptimisticJSONParser } from "@streamparser/json" + +const parser = new OptimisticJSONParser() + +parser.write('{') +console.log(parser.value) // {} + +parser.write('"') +console.log(parser.value) // {} + +parser.write('a"') +console.log(parser.value) // { a: undefined } + +parser.write(': "b') +console.log(parser.value) // { a: "b" } + +parser.write('ar", ') +console.log(parser.value) // { a: "bar" } + +parser.write('"c":') +console.log(parser.value) // { a: "bar", c: undefined } + +parser.write('[{') +console.log(parser.value) // { a: "bar", c: [ {} ] } + +parser.write('"d') +console.log(parser.value) // { a: "bar", c: [ { d: undefined } ] } + +parser.write('": 1') +console.log(parser.value) // { a: "bar", c: [ { d: 1 } ] } + +parser.write('23') +console.log(parser.value) // { a: "bar", c: [ { d: 123 } ] } + +parser.write('}') +console.log(parser.value) // { a: "bar", c: [ { d: 123 } ] } +``` + +and so on. An optimistic parser will attempt to present incomplete null, +boolean, string and number literals. It will also optimistically insert keys +with as-yet-undefied values, and close opened objects and arrays. Under +the hood, an optimistic tokenizer will emit "incomplete" tokens when it +thinks that it will eventually reach a state where a token can be definitively +produced. + ## Examples ### Stream-parsing a fetch request returning a JSONstream @@ -310,6 +365,31 @@ Imagine an endpoint that send a large amount of JSON objects one after the other } ``` +### Stream-parsing a fetch request returning a very long string getting previews of the string + +Imagine an endpoint that send a large amount of JSON objects one after the other (`"Once upon a midnight <...>"`). + +```js + import { JSONParser } from "https://deno.land/x/streamparser_json@v0.0.17/index.ts";/ + + const jsonparser = new JSONParser({ emitPartialTokens: true, emitPartialValues: true }); + jsonparser.onValue = ({ value, key, parent, stack, partial }) => { + if (partial) { + console.log(`Parsing value: ${value}... (still parsing)`); + } else { + console.log(`Value parsed: ${value}`); + } + }; + + const response = await fetch('http://example.com/'); + const reader = response.body.getReader(); + while(true) { + const { done, value } = await reader.read(); + if (done) break; + jsonparser.write(value); + } +``` + ## Migration guide ### Upgrading from 0.10 to 0.11 diff --git a/packages/plainjs/dist/deno/tokenizer.ts b/packages/plainjs/dist/deno/tokenizer.ts index adea59b..6e8d2f8 100644 --- a/packages/plainjs/dist/deno/tokenizer.ts +++ b/packages/plainjs/dist/deno/tokenizer.ts @@ -78,12 +78,14 @@ export interface TokenizerOptions { stringBufferSize?: number; numberBufferSize?: number; separator?: string; + emitPartialTokens?: boolean; } const defaultOpts: TokenizerOptions = { stringBufferSize: 0, numberBufferSize: 0, separator: undefined, + emitPartialTokens: false, }; export class TokenizerError extends Error { @@ -97,6 +99,7 @@ export class TokenizerError extends Error { export default class Tokenizer { private state = TokenizerStates.START; + private emitPartialTokens: boolean; private separator?: string; private separatorBytes?: Uint8Array; private separatorIndex = 0; @@ -114,6 +117,7 @@ export default class Tokenizer { constructor(opts?: TokenizerOptions) { opts = { ...defaultOpts, ...opts }; + this.emitPartialTokens = opts.emitPartialTokens === true; this.bufferedString = opts.stringBufferSize && opts.stringBufferSize > 4 ? new BufferedString(opts.stringBufferSize) @@ -645,7 +649,66 @@ export default class Tokenizer { )}`, ); } - // eslint-disable-next-line @typescript-eslint/no-explicit-any + + if (this.emitPartialTokens) { + switch (this.state) { + case TokenizerStates.TRUE1: + case TokenizerStates.TRUE2: + case TokenizerStates.TRUE3: + this.onToken({ + token: TokenType.TRUE, + value: true, + offset: this.offset, + partial: true, + }); + break; + case TokenizerStates.FALSE1: + case TokenizerStates.FALSE2: + case TokenizerStates.FALSE3: + case TokenizerStates.FALSE4: + this.onToken({ + token: TokenType.FALSE, + value: false, + offset: this.offset, + partial: true, + }); + break; + case TokenizerStates.NULL1: + case TokenizerStates.NULL2: + case TokenizerStates.NULL3: + this.onToken({ + token: TokenType.NULL, + value: null, + offset: this.offset, + partial: true, + }); + break; + case TokenizerStates.STRING_DEFAULT: { + const string = this.bufferedString.toString(); + this.onToken({ + token: TokenType.STRING, + value: string, + offset: this.offset, + partial: true, + }); + break; + } + case TokenizerStates.NUMBER_AFTER_INITIAL_ZERO: + case TokenizerStates.NUMBER_AFTER_INITIAL_NON_ZERO: + case TokenizerStates.NUMBER_AFTER_DECIMAL: + case TokenizerStates.NUMBER_AFTER_E_AND_DIGIT: + try { + this.onToken({ + token: TokenType.NUMBER, + value: this.parseNumber(this.bufferedNumber.toString()), + offset: this.offset, + partial: true, + }); + } catch (err: unknown) { + // Number couldn't be parsed. Do nothing. + } + } + } } catch (err: any) { this.error(err); } diff --git a/packages/plainjs/dist/deno/tokenparser.ts b/packages/plainjs/dist/deno/tokenparser.ts index b4e8132..cf6dfbc 100644 --- a/packages/plainjs/dist/deno/tokenparser.ts +++ b/packages/plainjs/dist/deno/tokenparser.ts @@ -34,12 +34,14 @@ export interface TokenParserOptions { paths?: string[]; keepStack?: boolean; separator?: string; + emitPartialValues?: boolean; } const defaultOpts: TokenParserOptions = { paths: undefined, keepStack: true, separator: undefined, + emitPartialValues: false, }; export class TokenParserError extends Error { @@ -82,6 +84,9 @@ export default class TokenParser { this.keepStack = opts.keepStack || false; this.separator = opts.separator; + if (!opts.emitPartialValues) { + this.emitPartial = () => {}; + } } private shouldEmit(): boolean { @@ -159,12 +164,44 @@ export default class TokenParser { } } + private emitPartial(value?: JsonPrimitive): void { + if (!this.shouldEmit()) return; + + if (this.state === TokenParserState.KEY) { + this.onValue({ + value: undefined, + key: value as JsonKey, + parent: this.value, + stack: this.stack, + partial: true, + }); + return; + } + + this.onValue({ + value: value, + key: this.key, + parent: this.value, + stack: this.stack, + partial: true, + }); + } + public get isEnded(): boolean { return this.state === TokenParserState.ENDED; } - public write({ token, value }: Omit): void { + public write({ + token, + value, + partial, + }: Omit): void { try { + if (partial) { + this.emitPartial(value); + return; + } + if (this.state === TokenParserState.VALUE) { if ( token === TokenType.STRING || @@ -199,6 +236,7 @@ export default class TokenParser { this.mode = TokenParserMode.OBJECT; this.state = TokenParserState.KEY; this.key = undefined; + this.emitPartial(); return; } @@ -216,6 +254,7 @@ export default class TokenParser { this.mode = TokenParserMode.ARRAY; this.state = TokenParserState.VALUE; this.key = 0; + this.emitPartial(); return; } @@ -233,6 +272,7 @@ export default class TokenParser { if (token === TokenType.STRING) { this.key = value as string; this.state = TokenParserState.COLON; + this.emitPartial(); return; } diff --git a/packages/plainjs/dist/deno/utils/types/parsedElementInfo.ts b/packages/plainjs/dist/deno/utils/types/parsedElementInfo.ts index 74b34d9..5969259 100644 --- a/packages/plainjs/dist/deno/utils/types/parsedElementInfo.ts +++ b/packages/plainjs/dist/deno/utils/types/parsedElementInfo.ts @@ -8,10 +8,11 @@ import type { } from "./jsonTypes.ts"; export interface ParsedElementInfo { - value: JsonPrimitive | JsonStruct; + value?: JsonPrimitive | JsonStruct; parent?: JsonStruct; key?: JsonKey; stack: StackElement[]; + partial?: boolean; } export interface ParsedArrayElement extends ParsedElementInfo { diff --git a/packages/plainjs/dist/deno/utils/types/parsedTokenInfo.ts b/packages/plainjs/dist/deno/utils/types/parsedTokenInfo.ts index c8108e5..b7aef4c 100644 --- a/packages/plainjs/dist/deno/utils/types/parsedTokenInfo.ts +++ b/packages/plainjs/dist/deno/utils/types/parsedTokenInfo.ts @@ -5,6 +5,7 @@ export interface ParsedTokenInfo { token: TokenType; value: JsonPrimitive; offset: number; + partial?: boolean; } export interface ParsedLeftBraceTokenInfo extends ParsedTokenInfo { diff --git a/packages/plainjs/dist/deno/utils/types/tokenType.ts b/packages/plainjs/dist/deno/utils/types/tokenType.ts index 88e89d6..4b724de 100644 --- a/packages/plainjs/dist/deno/utils/types/tokenType.ts +++ b/packages/plainjs/dist/deno/utils/types/tokenType.ts @@ -13,21 +13,4 @@ enum TokenType { SEPARATOR, } -export function TokenTypeToString(tokenType: TokenType): string { - return [ - "LEFT_BRACE", - "RIGHT_BRACE", - "LEFT_BRACKET", - "RIGHT_BRACKET", - "COLON", - "COMMA", - "TRUE", - "FALSE", - "NULL", - "STRING", - "NUMBER", - "SEPARATOR", - ][tokenType]; -} - export default TokenType; diff --git a/packages/plainjs/src/tokenizer.ts b/packages/plainjs/src/tokenizer.ts index 708fdd1..0880240 100644 --- a/packages/plainjs/src/tokenizer.ts +++ b/packages/plainjs/src/tokenizer.ts @@ -78,12 +78,14 @@ export interface TokenizerOptions { stringBufferSize?: number; numberBufferSize?: number; separator?: string; + emitPartialTokens?: boolean; } const defaultOpts: TokenizerOptions = { stringBufferSize: 0, numberBufferSize: 0, separator: undefined, + emitPartialTokens: false, }; export class TokenizerError extends Error { @@ -97,6 +99,7 @@ export class TokenizerError extends Error { export default class Tokenizer { private state = TokenizerStates.START; + private emitPartialTokens: boolean; private separator?: string; private separatorBytes?: Uint8Array; private separatorIndex = 0; @@ -114,6 +117,7 @@ export default class Tokenizer { constructor(opts?: TokenizerOptions) { opts = { ...defaultOpts, ...opts }; + this.emitPartialTokens = opts.emitPartialTokens === true; this.bufferedString = opts.stringBufferSize && opts.stringBufferSize > 4 ? new BufferedString(opts.stringBufferSize) @@ -645,7 +649,66 @@ export default class Tokenizer { )}`, ); } - // eslint-disable-next-line @typescript-eslint/no-explicit-any + + if (this.emitPartialTokens) { + switch (this.state) { + case TokenizerStates.TRUE1: + case TokenizerStates.TRUE2: + case TokenizerStates.TRUE3: + this.onToken({ + token: TokenType.TRUE, + value: true, + offset: this.offset, + partial: true, + }); + break; + case TokenizerStates.FALSE1: + case TokenizerStates.FALSE2: + case TokenizerStates.FALSE3: + case TokenizerStates.FALSE4: + this.onToken({ + token: TokenType.FALSE, + value: false, + offset: this.offset, + partial: true, + }); + break; + case TokenizerStates.NULL1: + case TokenizerStates.NULL2: + case TokenizerStates.NULL3: + this.onToken({ + token: TokenType.NULL, + value: null, + offset: this.offset, + partial: true, + }); + break; + case TokenizerStates.STRING_DEFAULT: { + const string = this.bufferedString.toString(); + this.onToken({ + token: TokenType.STRING, + value: string, + offset: this.offset, + partial: true, + }); + break; + } + case TokenizerStates.NUMBER_AFTER_INITIAL_ZERO: + case TokenizerStates.NUMBER_AFTER_INITIAL_NON_ZERO: + case TokenizerStates.NUMBER_AFTER_DECIMAL: + case TokenizerStates.NUMBER_AFTER_E_AND_DIGIT: + try { + this.onToken({ + token: TokenType.NUMBER, + value: this.parseNumber(this.bufferedNumber.toString()), + offset: this.offset, + partial: true, + }); + } catch (err: unknown) { + // Number couldn't be parsed. Do nothing. + } + } + } } catch (err: any) { this.error(err); } diff --git a/packages/plainjs/src/tokenparser.ts b/packages/plainjs/src/tokenparser.ts index 77bef8e..04331e2 100644 --- a/packages/plainjs/src/tokenparser.ts +++ b/packages/plainjs/src/tokenparser.ts @@ -34,12 +34,14 @@ export interface TokenParserOptions { paths?: string[]; keepStack?: boolean; separator?: string; + emitPartialValues?: boolean; } const defaultOpts: TokenParserOptions = { paths: undefined, keepStack: true, separator: undefined, + emitPartialValues: false, }; export class TokenParserError extends Error { @@ -82,6 +84,9 @@ export default class TokenParser { this.keepStack = opts.keepStack || false; this.separator = opts.separator; + if (!opts.emitPartialValues) { + this.emitPartial = () => {}; + } } private shouldEmit(): boolean { @@ -159,12 +164,44 @@ export default class TokenParser { } } + private emitPartial(value?: JsonPrimitive): void { + if (!this.shouldEmit()) return; + + if (this.state === TokenParserState.KEY) { + this.onValue({ + value: undefined, + key: value as JsonKey, + parent: this.value, + stack: this.stack, + partial: true, + }); + return; + } + + this.onValue({ + value: value, + key: this.key, + parent: this.value, + stack: this.stack, + partial: true, + }); + } + public get isEnded(): boolean { return this.state === TokenParserState.ENDED; } - public write({ token, value }: Omit): void { + public write({ + token, + value, + partial, + }: Omit): void { try { + if (partial) { + this.emitPartial(value); + return; + } + if (this.state === TokenParserState.VALUE) { if ( token === TokenType.STRING || @@ -199,6 +236,7 @@ export default class TokenParser { this.mode = TokenParserMode.OBJECT; this.state = TokenParserState.KEY; this.key = undefined; + this.emitPartial(); return; } @@ -216,6 +254,7 @@ export default class TokenParser { this.mode = TokenParserMode.ARRAY; this.state = TokenParserState.VALUE; this.key = 0; + this.emitPartial(); return; } @@ -233,6 +272,7 @@ export default class TokenParser { if (token === TokenType.STRING) { this.key = value as string; this.state = TokenParserState.COLON; + this.emitPartial(); return; } diff --git a/packages/plainjs/src/utils/types/parsedElementInfo.ts b/packages/plainjs/src/utils/types/parsedElementInfo.ts index 359c62f..72d8203 100644 --- a/packages/plainjs/src/utils/types/parsedElementInfo.ts +++ b/packages/plainjs/src/utils/types/parsedElementInfo.ts @@ -8,10 +8,11 @@ import type { } from "./jsonTypes.js"; export interface ParsedElementInfo { - value: JsonPrimitive | JsonStruct; + value?: JsonPrimitive | JsonStruct; parent?: JsonStruct; key?: JsonKey; stack: StackElement[]; + partial?: boolean; } export interface ParsedArrayElement extends ParsedElementInfo { diff --git a/packages/plainjs/src/utils/types/parsedTokenInfo.ts b/packages/plainjs/src/utils/types/parsedTokenInfo.ts index f60f7b9..d20a21f 100644 --- a/packages/plainjs/src/utils/types/parsedTokenInfo.ts +++ b/packages/plainjs/src/utils/types/parsedTokenInfo.ts @@ -5,6 +5,7 @@ export interface ParsedTokenInfo { token: TokenType; value: JsonPrimitive; offset: number; + partial?: boolean; } export interface ParsedLeftBraceTokenInfo extends ParsedTokenInfo { diff --git a/packages/plainjs/src/utils/types/tokenType.ts b/packages/plainjs/src/utils/types/tokenType.ts index 88e89d6..4b724de 100644 --- a/packages/plainjs/src/utils/types/tokenType.ts +++ b/packages/plainjs/src/utils/types/tokenType.ts @@ -13,21 +13,4 @@ enum TokenType { SEPARATOR, } -export function TokenTypeToString(tokenType: TokenType): string { - return [ - "LEFT_BRACE", - "RIGHT_BRACE", - "LEFT_BRACKET", - "RIGHT_BRACKET", - "COLON", - "COMMA", - "TRUE", - "FALSE", - "NULL", - "STRING", - "NUMBER", - "SEPARATOR", - ][tokenType]; -} - export default TokenType; diff --git a/packages/plainjs/test/callbacks.ts b/packages/plainjs/test/callbacks.ts index 587695e..9a1c659 100644 --- a/packages/plainjs/test/callbacks.ts +++ b/packages/plainjs/test/callbacks.ts @@ -15,7 +15,7 @@ describe("callback", () => { } }); - test("should throw if missing onError callback", () => { + test("should error if missing onError callback", () => { const p = new TokenParser(); p.end(); @@ -45,6 +45,9 @@ describe("callback", () => { /* Do nothing */ }); p.onValue = onValueCb; + p.onToken = () => { + /* Do nothing */ + }; p.write('"test"'); expect(onValueCb.mock.calls).toHaveLength(1); diff --git a/packages/plainjs/test/emitPartial.ts b/packages/plainjs/test/emitPartial.ts new file mode 100644 index 0000000..50143fc --- /dev/null +++ b/packages/plainjs/test/emitPartial.ts @@ -0,0 +1,646 @@ +import TokenType from "../src/utils/types/tokenType.js"; +import JSONParser from "../src/jsonparser.js"; +import Tokenizer from "../src/tokenizer.js"; +import { + TestData, + runJSONParserTest, + runTokenizerTest, +} from "./utils/testRunner.js"; + +describe("Emit Partial", () => { + describe("Tokenizer emit partial tokens", () => { + const emitPartialTokenTestData: TestData[] = [ + { + value: ["tr", "ue"], + expected: [ + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: false }, + ], + }, + { + value: ["t", "ru", "e"], + expected: [ + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: false }, + ], + }, + { + value: ["f", "al", "se"], + expected: [ + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: false }, + ], + }, + { + value: ["fal", "se"], + expected: [ + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: false }, + ], + }, + { + value: ["0", ".", "123"], + expected: [ + { token: TokenType.NUMBER, value: 0, partial: true }, + { token: TokenType.NUMBER, value: 0.123, partial: true }, + { token: TokenType.NUMBER, value: 0.123, partial: false }, + ], + }, + { + value: ["n", "u", "l", "l"], + expected: [ + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: false }, + ], + }, + { + value: ["n", "u", "l", "l"], + expected: [ + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: false }, + ], + }, + { + value: "{", + expected: [{ token: TokenType.LEFT_BRACE, value: "{", partial: false }], + }, + { + value: ['{ "fo', "o", '"', ': "', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "fo", partial: true }, + { token: TokenType.STRING, value: "foo", partial: true }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "", partial: true }, + { token: TokenType.STRING, value: "", partial: false }, + ], + }, + { + value: ['{ "foo": "ba', "r", '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "ba", partial: true }, + { token: TokenType.STRING, value: "bar", partial: true }, + { token: TokenType.STRING, value: "bar", partial: false }, + ], + }, + { + value: ['{ "foo": "bar"', "}"], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.RIGHT_BRACE, value: "}", partial: false }, + ], + }, + { + value: '{ "foo": "bar" }', + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.RIGHT_BRACE, value: "}", partial: false }, + ], + }, + { + value: [ + '{ "foo": "bar", "ba', + "z", + '": [', + '{ "foo": "bar", "baz": [', + '{ "foo": "bar", "baz": [1', + "2", + "3, ", + ], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "ba", partial: true }, + { token: TokenType.STRING, value: "baz", partial: true }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: true }, + { token: TokenType.NUMBER, value: 12, partial: true }, + { token: TokenType.NUMBER, value: 123, partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + ], + }, + { + value: '{ "foo": "bar", "baz": [1]', + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: false }, + { token: TokenType.RIGHT_BRACKET, value: "]", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", ', ' "baz": [1,'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [1,2', "3, 4", "5", "6] }"], + expected: [ + { + type: "complete", + token: TokenType.LEFT_BRACE, + value: "{", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "foo", + partial: false, + }, + { + type: "complete", + token: TokenType.COLON, + value: ":", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "bar", + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "baz", + partial: false, + }, + { + type: "complete", + token: TokenType.COLON, + value: ":", + partial: false, + }, + { + type: "complete", + token: TokenType.LEFT_BRACKET, + value: "[", + partial: false, + }, + { + type: "complete", + token: TokenType.NUMBER, + value: 1, + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { token: TokenType.NUMBER, value: 2, partial: true }, + { + type: "complete", + token: TokenType.NUMBER, + value: 23, + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { token: TokenType.NUMBER, value: 4, partial: true }, + { token: TokenType.NUMBER, value: 45, partial: true }, + { + type: "complete", + token: TokenType.NUMBER, + value: 456, + partial: false, + }, + { + type: "complete", + token: TokenType.RIGHT_BRACKET, + value: "]", + partial: false, + }, + { + type: "complete", + token: TokenType.RIGHT_BRACE, + value: "}", + partial: false, + }, + ], + }, + { + value: ['{ "foo": "bar", "baz"', ": [{"], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [{ "a', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "a", partial: true }, + { token: TokenType.STRING, value: "a", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [{ "a": "b', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "a", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "b", partial: true }, + { token: TokenType.STRING, value: "b", partial: false }, + ], + }, + ]; + + emitPartialTokenTestData.forEach(({ value, expected }) => { + test(`Tokenizer emit partial tokens: ${value}`, () => { + let i = 0; + runTokenizerTest( + new Tokenizer({ emitPartialTokens: true }), + value, + ({ token, value, partial }) => { + const expectedData = expected[i]; + expect(token).toEqual(expectedData.token); + expect(value).toEqual(expectedData.value); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); + }); + }); + + describe("TokenParser emit partial values", () => { + const emitPartialValuesTestData: TestData[] = [ + { + value: ['"a', "bc", '"'], + expected: [ + { value: "a", key: undefined, parent: undefined, partial: true }, + { value: "abc", key: undefined, parent: undefined, partial: true }, + { value: "abc", key: undefined, parent: undefined, partial: false }, + ], + }, + { + value: ["12", ".34"], + expected: [ + { value: 12, key: undefined, parent: undefined, partial: true }, + { value: 12.34, key: undefined, parent: undefined, partial: true }, + ], + }, + { + value: ["[", "]"], + expected: [ + { value: undefined, key: 0, parent: [], partial: true }, + { value: [], key: undefined, parent: undefined, partial: false }, + ], + }, + { + value: ["[", '"a', "bc", '"', ",", '"def"', "]"], + expected: [ + { value: undefined, key: 0, parent: [], partial: true }, + { value: "a", key: 0, parent: [], partial: true }, + { value: "abc", key: 0, parent: [], partial: true }, + { value: "abc", key: 0, parent: ["abc"], partial: false }, + { value: "def", key: 1, parent: ["abc", "def"], partial: false }, + { + value: ["abc", "def"], + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + { + value: [ + "{", + '"a', + "bc", + '"', + ":", + '"def"', + ",", + '"ghi":', + '"jkl"', + "}", + ], + expected: [ + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "a", parent: {}, partial: true }, + { value: undefined, key: "abc", parent: {}, partial: true }, + { value: undefined, key: "abc", parent: {}, partial: true }, + { value: "def", key: "abc", parent: { abc: "def" }, partial: false }, + { + value: undefined, + key: "ghi", + parent: { abc: "def" }, + partial: true, + }, + { + value: "jkl", + key: "ghi", + parent: { abc: "def", ghi: "jkl" }, + partial: false, + }, + { + value: { abc: "def", ghi: "jkl" }, + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + { + value: [ + '{ "foo"', + ": ", + '{ "foo1": "ba', + "r", + '" , "baz', + '": [', + '{ "foo2": "bar2", "baz2": [', + '{ "foo3": "bar3", "baz3": [1', + "2", + "3, ", + "3, 4", + "5", + "6] }", + "] }] }}", + ], + expected: [ + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo", parent: {}, partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo1", parent: {}, partial: true }, + { value: "ba", key: "foo1", parent: {}, partial: true }, + { value: "bar", key: "foo1", parent: {}, partial: true }, + { + value: "bar", + key: "foo1", + parent: { foo1: "bar" }, + partial: false, + }, + { + value: undefined, + key: "baz", + parent: { foo1: "bar" }, + partial: true, + }, + { + value: undefined, + key: "baz", + parent: { foo1: "bar" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo2", parent: {}, partial: true }, + { + value: "bar2", + key: "foo2", + parent: { foo2: "bar2" }, + partial: false, + }, + { + value: undefined, + key: "baz2", + parent: { foo2: "bar2" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo3", parent: {}, partial: true }, + { + value: "bar3", + key: "foo3", + parent: { foo3: "bar3" }, + partial: false, + }, + { + value: undefined, + key: "baz3", + parent: { foo3: "bar3" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: 1, key: 0, parent: [], partial: true }, + { value: 12, key: 0, parent: [], partial: true }, + { value: 123, key: 0, parent: [123], partial: false }, + { value: 3, key: 1, parent: [123, 3], partial: false }, + { value: 4, key: 2, parent: [123, 3], partial: true }, + { value: 45, key: 2, parent: [123, 3], partial: true }, + { value: 456, key: 2, parent: [123, 3, 456], partial: false }, + { + value: [123, 3, 456], + key: "baz3", + parent: { foo3: "bar3", baz3: [123, 3, 456] }, + partial: false, + }, + { + value: { foo3: "bar3", baz3: [123, 3, 456] }, + key: 0, + parent: [{ foo3: "bar3", baz3: [123, 3, 456] }], + partial: false, + }, + { + value: [{ foo3: "bar3", baz3: [123, 3, 456] }], + key: "baz2", + parent: { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + partial: false, + }, + { + value: { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + key: 0, + parent: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + partial: false, + }, + { + value: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + key: "baz", + parent: { + foo1: "bar", + baz: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + }, + partial: false, + }, + { + value: { + foo1: "bar", + baz: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + }, + key: "foo", + parent: { + foo: { + foo1: "bar", + baz: [ + { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + ], + }, + }, + partial: false, + }, + { + value: { + foo: { + foo1: "bar", + baz: [ + { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + ], + }, + }, + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + ]; + + emitPartialValuesTestData.forEach(({ value, expected }) => { + test(`TokenParser emit partial values: ${value}`, () => { + let i = 0; + runJSONParserTest( + new JSONParser({ emitPartialTokens: true, emitPartialValues: true }), + value, + ({ value, key, parent, partial }) => { + const expectedData = expected[i]; + expect(value).toEqual(expectedData.value); + expect(key).toEqual(expectedData.key); + expect(parent).toEqual(expectedData.parent); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); + }); + }); + + test("TokenParser emit partial values only if matching paths when paths is present", () => { + const value = ['{ "a"', ": 1,", '"b":', '{ "c":', "1 } }"]; + const expected = [ + { value: undefined, key: "c", parent: {}, partial: true }, + { value: 1, key: "c", parent: { c: 1 }, partial: false }, + ]; + let i = 0; + runJSONParserTest( + new JSONParser({ + paths: ["$.b.c"], + emitPartialTokens: true, + emitPartialValues: true, + }), + value, + ({ value, key, parent, partial }) => { + const expectedData = expected[i]; + expect(value).toEqual(expectedData.value); + expect(key).toEqual(expectedData.key); + expect(parent).toEqual(expectedData.parent); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); +}); diff --git a/packages/plainjs/test/types/numbers.ts b/packages/plainjs/test/types/numbers.ts index 9156932..20f208e 100644 --- a/packages/plainjs/test/types/numbers.ts +++ b/packages/plainjs/test/types/numbers.ts @@ -51,7 +51,7 @@ describe("number", () => { "21e999", ]; - const bufferSizes = [0, 64 * 1024]; + const bufferSizes = [0, 1, 64 * 1024]; bufferSizes.forEach((numberBufferSize) => { values.forEach((stringValue) => { diff --git a/packages/plainjs/test/types/strings.ts b/packages/plainjs/test/types/strings.ts index f3ff0fb..7a40175 100644 --- a/packages/plainjs/test/types/strings.ts +++ b/packages/plainjs/test/types/strings.ts @@ -15,7 +15,7 @@ describe("string", () => { "õ", ]; - const bufferSizes = [0, 64 * 1024]; + const bufferSizes = [0, 1, 64 * 1024]; bufferSizes.forEach((stringBufferSize) => { values.forEach((stringValue) => { @@ -159,12 +159,22 @@ describe("string", () => { ); }); - const invalidValues = ["\n", "\\j", "\\ua", "\\u1*", "\\u12*", "\\u123*"]; + const invalidValues = [ + '"\n"', + '"\\j"', + '"\\ua"', + '"\\u1*"', + '"\\u12*"', + "\\u123*", + '"\0"', + '"\\uG"', + '"\\u000G"', + ]; invalidValues.forEach((value) => { - test("fail on invalid values", async () => { + test(`fail on invalid values ${value}`, async () => { try { - await runJSONParserTest(new JSONParser(), [value]); + await runJSONParserTest(new JSONParser(), value); fail(`Expected to fail on value "${value}"`); } catch (e) { // Expected error diff --git a/packages/plainjs/test/utils/testRunner.ts b/packages/plainjs/test/utils/testRunner.ts index 54c99ff..dee7c01 100644 --- a/packages/plainjs/test/utils/testRunner.ts +++ b/packages/plainjs/test/utils/testRunner.ts @@ -5,7 +5,7 @@ import { ParsedTokenInfo } from "../../src/utils/types/parsedTokenInfo.js"; import { ParsedElementInfo } from "../../src/utils/types/parsedElementInfo.js"; export type TestData = { - value: string | Iterable; + value: string | string[] | Iterable; paths?: string[]; expected: any[]; }; diff --git a/packages/whatwg/README.md b/packages/whatwg/README.md index a99759e..40f8628 100644 --- a/packages/whatwg/README.md +++ b/packages/whatwg/README.md @@ -76,6 +76,7 @@ The available options are: stringBufferSize: , // set to 0 to don't buffer. Min valid value is 4. numberBufferSize: , // set to 0 to don't buffer. separator: , // separator between object. For example `\n` for nd-js. + emitPartialTokens: // whether to emit tokens mid-parsing. } ``` @@ -106,6 +107,7 @@ The available options are: paths: , keepStack: , // whether to keep all the properties in the stack separator: , // separator between object. For example `\n` for nd-js. If left empty or set to undefined, the token parser will end after parsing the first object. To parse multiple object without any delimiter just set it to the empty string `''`. + emitPartialValues: , // whether to emit values mid-parsing. } ``` @@ -132,7 +134,6 @@ const tokenParser = new TokenParser(); const jsonParser = tokenizer.pipeTrough(tokenParser); ``` - You can subscribe to the resulting data using the ```javascript @@ -226,6 +227,31 @@ Imagine an endpoint that send a large amount of JSON objects one after the other } ``` +### Stream-parsing a fetch request returning a very long string getting previews of the string + +Imagine an endpoint that send a large amount of JSON objects one after the other (`"Once upon a midnight <...>"`). + +```js + import { JSONParser } from '@streamparser/json-whatwg'; + + const parser = new JSONParser({ stringBufferSize: undefined, paths: ['$.*'], keepStack: false }); + + const response = await fetch('http://example.com/'); + + const reader = response.body.pipeThrough(parser).getReader(); + while(true) { + const { done, value: parsedElementInfo } = await reader.read(); + if (done) break; + + const { value, key, parent, stack, partial } = parsedElementInfo; + if (partial) { + console.log(`Parsing value: ${value}... (still parsing)`); + } else { + console.log(`Value parsed: ${value}`); + } + } +``` + ## License See [LICENSE.md]. diff --git a/packages/whatwg/dist/deno/README.md b/packages/whatwg/dist/deno/README.md index a99759e..40f8628 100644 --- a/packages/whatwg/dist/deno/README.md +++ b/packages/whatwg/dist/deno/README.md @@ -76,6 +76,7 @@ The available options are: stringBufferSize: , // set to 0 to don't buffer. Min valid value is 4. numberBufferSize: , // set to 0 to don't buffer. separator: , // separator between object. For example `\n` for nd-js. + emitPartialTokens: // whether to emit tokens mid-parsing. } ``` @@ -106,6 +107,7 @@ The available options are: paths: , keepStack: , // whether to keep all the properties in the stack separator: , // separator between object. For example `\n` for nd-js. If left empty or set to undefined, the token parser will end after parsing the first object. To parse multiple object without any delimiter just set it to the empty string `''`. + emitPartialValues: , // whether to emit values mid-parsing. } ``` @@ -132,7 +134,6 @@ const tokenParser = new TokenParser(); const jsonParser = tokenizer.pipeTrough(tokenParser); ``` - You can subscribe to the resulting data using the ```javascript @@ -226,6 +227,31 @@ Imagine an endpoint that send a large amount of JSON objects one after the other } ``` +### Stream-parsing a fetch request returning a very long string getting previews of the string + +Imagine an endpoint that send a large amount of JSON objects one after the other (`"Once upon a midnight <...>"`). + +```js + import { JSONParser } from '@streamparser/json-whatwg'; + + const parser = new JSONParser({ stringBufferSize: undefined, paths: ['$.*'], keepStack: false }); + + const response = await fetch('http://example.com/'); + + const reader = response.body.pipeThrough(parser).getReader(); + while(true) { + const { done, value: parsedElementInfo } = await reader.read(); + if (done) break; + + const { value, key, parent, stack, partial } = parsedElementInfo; + if (partial) { + console.log(`Parsing value: ${value}... (still parsing)`); + } else { + console.log(`Value parsed: ${value}`); + } + } +``` + ## License See [LICENSE.md]. diff --git a/packages/whatwg/dist/deno/utils.ts b/packages/whatwg/dist/deno/utils.ts index c614650..14395c7 100644 --- a/packages/whatwg/dist/deno/utils.ts +++ b/packages/whatwg/dist/deno/utils.ts @@ -3,8 +3,8 @@ import type { ParsedElementInfo } from "https://deno.land/x/streamparser_json@v0 export function cloneParsedElementInfo( parsedElementInfo: ParsedElementInfo, ): ParsedElementInfo { - const { value, key, parent, stack } = parsedElementInfo; - return { value, key, parent: clone(parent), stack: clone(stack) }; + const { value, key, parent, stack, partial } = parsedElementInfo; + return { value, key, parent: clone(parent), stack: clone(stack), partial }; } function clone(obj: T): T { diff --git a/packages/whatwg/src/utils.ts b/packages/whatwg/src/utils.ts index cb797ea..4a21864 100644 --- a/packages/whatwg/src/utils.ts +++ b/packages/whatwg/src/utils.ts @@ -3,8 +3,8 @@ import type { ParsedElementInfo } from "@streamparser/json/utils/types/parsedEle export function cloneParsedElementInfo( parsedElementInfo: ParsedElementInfo, ): ParsedElementInfo { - const { value, key, parent, stack } = parsedElementInfo; - return { value, key, parent: clone(parent), stack: clone(stack) }; + const { value, key, parent, stack, partial } = parsedElementInfo; + return { value, key, parent: clone(parent), stack: clone(stack), partial }; } function clone(obj: T): T { diff --git a/packages/whatwg/test/emitPartial.ts b/packages/whatwg/test/emitPartial.ts new file mode 100644 index 0000000..49df7cd --- /dev/null +++ b/packages/whatwg/test/emitPartial.ts @@ -0,0 +1,647 @@ +import TokenType from "@streamparser/json/utils/types/tokenType.js"; +import JSONParser from "../src/jsonparser.js"; +import Tokenizer from "../src/tokenizer.js"; +import { + TestData, + runJSONParserTest, + runTokenizerTest, +} from "./utils/testRunner.js"; + +describe("Emit Partial", () => { + describe("Tokenizer emit partial tokens", () => { + const emitPartialTokenTestData: TestData[] = [ + { + value: ["tr", "ue"], + expected: [ + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: false }, + ], + }, + { + value: ["t", "ru", "e"], + expected: [ + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: true }, + { token: TokenType.TRUE, value: true, partial: false }, + ], + }, + { + value: ["f", "al", "se"], + expected: [ + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: false }, + ], + }, + { + value: ["fal", "se"], + expected: [ + { token: TokenType.FALSE, value: false, partial: true }, + { token: TokenType.FALSE, value: false, partial: false }, + ], + }, + { + value: ["0", ".", "123"], + expected: [ + { token: TokenType.NUMBER, value: 0, partial: true }, + { token: TokenType.NUMBER, value: 0.123, partial: true }, + { token: TokenType.NUMBER, value: 0.123, partial: false }, + ], + }, + { + value: ["n", "u", "l", "l"], + expected: [ + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: false }, + ], + }, + { + value: ["n", "u", "l", "l"], + expected: [ + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: true }, + { token: TokenType.NULL, value: null, partial: false }, + ], + }, + { + value: "{", + expected: [{ token: TokenType.LEFT_BRACE, value: "{", partial: false }], + }, + { + value: ['{ "fo', "o", '"', ': "', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "fo", partial: true }, + { token: TokenType.STRING, value: "foo", partial: true }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "", partial: true }, + { token: TokenType.STRING, value: "", partial: false }, + ], + }, + { + value: ['{ "foo": "ba', "r", '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "ba", partial: true }, + { token: TokenType.STRING, value: "bar", partial: true }, + { token: TokenType.STRING, value: "bar", partial: false }, + ], + }, + { + value: ['{ "foo": "bar"', "}"], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.RIGHT_BRACE, value: "}", partial: false }, + ], + }, + { + value: '{ "foo": "bar" }', + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.RIGHT_BRACE, value: "}", partial: false }, + ], + }, + { + value: [ + '{ "foo": "bar", "ba', + "z", + '": [', + '{ "foo": "bar", "baz": [', + '{ "foo": "bar", "baz": [1', + "2", + "3, ", + ], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "ba", partial: true }, + { token: TokenType.STRING, value: "baz", partial: true }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: true }, + { token: TokenType.NUMBER, value: 12, partial: true }, + { token: TokenType.NUMBER, value: 123, partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + ], + }, + { + value: '{ "foo": "bar", "baz": [1]', + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: false }, + { token: TokenType.RIGHT_BRACKET, value: "]", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", ', ' "baz": [1,'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.NUMBER, value: 1, partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [1,2', "3, 4", "5", "6] }"], + expected: [ + { + type: "complete", + token: TokenType.LEFT_BRACE, + value: "{", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "foo", + partial: false, + }, + { + type: "complete", + token: TokenType.COLON, + value: ":", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "bar", + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { + type: "complete", + token: TokenType.STRING, + value: "baz", + partial: false, + }, + { + type: "complete", + token: TokenType.COLON, + value: ":", + partial: false, + }, + { + type: "complete", + token: TokenType.LEFT_BRACKET, + value: "[", + partial: false, + }, + { + type: "complete", + token: TokenType.NUMBER, + value: 1, + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { token: TokenType.NUMBER, value: 2, partial: true }, + { + type: "complete", + token: TokenType.NUMBER, + value: 23, + partial: false, + }, + { + type: "complete", + token: TokenType.COMMA, + value: ",", + partial: false, + }, + { token: TokenType.NUMBER, value: 4, partial: true }, + { token: TokenType.NUMBER, value: 45, partial: true }, + { + type: "complete", + token: TokenType.NUMBER, + value: 456, + partial: false, + }, + { + type: "complete", + token: TokenType.RIGHT_BRACKET, + value: "]", + partial: false, + }, + { + type: "complete", + token: TokenType.RIGHT_BRACE, + value: "}", + partial: false, + }, + ], + }, + { + value: ['{ "foo": "bar", "baz"', ": [{"], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [{ "a', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "a", partial: true }, + { token: TokenType.STRING, value: "a", partial: false }, + ], + }, + { + value: ['{ "foo": "bar", "baz": [{ "a": "b', '"'], + expected: [ + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "foo", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "bar", partial: false }, + { token: TokenType.COMMA, value: ",", partial: false }, + { token: TokenType.STRING, value: "baz", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.LEFT_BRACKET, value: "[", partial: false }, + { token: TokenType.LEFT_BRACE, value: "{", partial: false }, + { token: TokenType.STRING, value: "a", partial: false }, + { token: TokenType.COLON, value: ":", partial: false }, + { token: TokenType.STRING, value: "b", partial: true }, + { token: TokenType.STRING, value: "b", partial: false }, + ], + }, + ]; + + emitPartialTokenTestData.forEach(({ value, expected }) => { + test(`Tokenizer emit partial tokens: ${value}`, async () => { + let i = 0; + await runTokenizerTest( + new Tokenizer({ emitPartialTokens: true }), + value, + ({ token, value, partial }) => { + const expectedData = expected[i]; + expect(token).toEqual(expectedData.token); + expect(value).toEqual(expectedData.value); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); + }); + }); + + describe("TokenParser emit partial values", () => { + const emitPartialValuesTestData: TestData[] = [ + { + value: ['"a', "bc", '"'], + expected: [ + { value: "a", key: undefined, parent: undefined, partial: true }, + { value: "abc", key: undefined, parent: undefined, partial: true }, + { value: "abc", key: undefined, parent: undefined, partial: false }, + ], + }, + { + value: ["12", ".34"], + expected: [ + { value: 12, key: undefined, parent: undefined, partial: true }, + { value: 12.34, key: undefined, parent: undefined, partial: true }, + { value: 12.34, key: undefined, parent: undefined, partial: false }, + ], + }, + { + value: ["[", "]"], + expected: [ + { value: undefined, key: 0, parent: [], partial: true }, + { value: [], key: undefined, parent: undefined, partial: false }, + ], + }, + { + value: ["[", '"a', "bc", '"', ",", '"def"', "]"], + expected: [ + { value: undefined, key: 0, parent: [], partial: true }, + { value: "a", key: 0, parent: [], partial: true }, + { value: "abc", key: 0, parent: [], partial: true }, + { value: "abc", key: 0, parent: ["abc"], partial: false }, + { value: "def", key: 1, parent: ["abc", "def"], partial: false }, + { + value: ["abc", "def"], + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + { + value: [ + "{", + '"a', + "bc", + '"', + ":", + '"def"', + ",", + '"ghi":', + '"jkl"', + "}", + ], + expected: [ + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "a", parent: {}, partial: true }, + { value: undefined, key: "abc", parent: {}, partial: true }, + { value: undefined, key: "abc", parent: {}, partial: true }, + { value: "def", key: "abc", parent: { abc: "def" }, partial: false }, + { + value: undefined, + key: "ghi", + parent: { abc: "def" }, + partial: true, + }, + { + value: "jkl", + key: "ghi", + parent: { abc: "def", ghi: "jkl" }, + partial: false, + }, + { + value: { abc: "def", ghi: "jkl" }, + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + { + value: [ + '{ "foo"', + ": ", + '{ "foo1": "ba', + "r", + '" , "baz', + '": [', + '{ "foo2": "bar2", "baz2": [', + '{ "foo3": "bar3", "baz3": [1', + "2", + "3, ", + "3, 4", + "5", + "6] }", + "] }] }}", + ], + expected: [ + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo", parent: {}, partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo1", parent: {}, partial: true }, + { value: "ba", key: "foo1", parent: {}, partial: true }, + { value: "bar", key: "foo1", parent: {}, partial: true }, + { + value: "bar", + key: "foo1", + parent: { foo1: "bar" }, + partial: false, + }, + { + value: undefined, + key: "baz", + parent: { foo1: "bar" }, + partial: true, + }, + { + value: undefined, + key: "baz", + parent: { foo1: "bar" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo2", parent: {}, partial: true }, + { + value: "bar2", + key: "foo2", + parent: { foo2: "bar2" }, + partial: false, + }, + { + value: undefined, + key: "baz2", + parent: { foo2: "bar2" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: undefined, key: undefined, parent: {}, partial: true }, + { value: undefined, key: "foo3", parent: {}, partial: true }, + { + value: "bar3", + key: "foo3", + parent: { foo3: "bar3" }, + partial: false, + }, + { + value: undefined, + key: "baz3", + parent: { foo3: "bar3" }, + partial: true, + }, + { value: undefined, key: 0, parent: [], partial: true }, + { value: 1, key: 0, parent: [], partial: true }, + { value: 12, key: 0, parent: [], partial: true }, + { value: 123, key: 0, parent: [123], partial: false }, + { value: 3, key: 1, parent: [123, 3], partial: false }, + { value: 4, key: 2, parent: [123, 3], partial: true }, + { value: 45, key: 2, parent: [123, 3], partial: true }, + { value: 456, key: 2, parent: [123, 3, 456], partial: false }, + { + value: [123, 3, 456], + key: "baz3", + parent: { foo3: "bar3", baz3: [123, 3, 456] }, + partial: false, + }, + { + value: { foo3: "bar3", baz3: [123, 3, 456] }, + key: 0, + parent: [{ foo3: "bar3", baz3: [123, 3, 456] }], + partial: false, + }, + { + value: [{ foo3: "bar3", baz3: [123, 3, 456] }], + key: "baz2", + parent: { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + partial: false, + }, + { + value: { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + key: 0, + parent: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + partial: false, + }, + { + value: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + key: "baz", + parent: { + foo1: "bar", + baz: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + }, + partial: false, + }, + { + value: { + foo1: "bar", + baz: [ + { foo2: "bar2", baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }] }, + ], + }, + key: "foo", + parent: { + foo: { + foo1: "bar", + baz: [ + { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + ], + }, + }, + partial: false, + }, + { + value: { + foo: { + foo1: "bar", + baz: [ + { + foo2: "bar2", + baz2: [{ foo3: "bar3", baz3: [123, 3, 456] }], + }, + ], + }, + }, + key: undefined, + parent: undefined, + partial: false, + }, + ], + }, + ]; + + emitPartialValuesTestData.forEach(({ value, expected }) => { + test(`TokenParser emit partial values: ${value}`, async () => { + let i = 0; + await runJSONParserTest( + new JSONParser({ emitPartialTokens: true, emitPartialValues: true }), + value, + ({ value, key, parent, partial }) => { + const expectedData = expected[i]; + expect(value).toEqual(expectedData.value); + expect(key).toEqual(expectedData.key); + expect(parent).toEqual(expectedData.parent); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); + }); + }); + + test("TokenParser emit partial values only if matching paths when paths is present", async () => { + const value = ['{ "a"', ": 1,", '"b":', '{ "c":', "1 } }"]; + const expected = [ + { value: undefined, key: "c", parent: {}, partial: true }, + { value: 1, key: "c", parent: { c: 1 }, partial: false }, + ]; + let i = 0; + await runJSONParserTest( + new JSONParser({ + paths: ["$.b.c"], + emitPartialTokens: true, + emitPartialValues: true, + }), + value, + ({ value, key, parent, partial }) => { + const expectedData = expected[i]; + expect(value).toEqual(expectedData.value); + expect(key).toEqual(expectedData.key); + expect(parent).toEqual(expectedData.parent); + expect(partial ?? false).toEqual(expectedData.partial); + i += 1; + }, + ); + expect(i).toEqual(expected.length); + }); +}); diff --git a/packages/whatwg/test/types/numbers.ts b/packages/whatwg/test/types/numbers.ts index 9156932..20f208e 100644 --- a/packages/whatwg/test/types/numbers.ts +++ b/packages/whatwg/test/types/numbers.ts @@ -51,7 +51,7 @@ describe("number", () => { "21e999", ]; - const bufferSizes = [0, 64 * 1024]; + const bufferSizes = [0, 1, 64 * 1024]; bufferSizes.forEach((numberBufferSize) => { values.forEach((stringValue) => { diff --git a/packages/whatwg/test/types/strings.ts b/packages/whatwg/test/types/strings.ts index 4430c1d..c349a60 100644 --- a/packages/whatwg/test/types/strings.ts +++ b/packages/whatwg/test/types/strings.ts @@ -15,7 +15,7 @@ describe("string", () => { "õ", ]; - const bufferSizes = [0, 64 * 1024]; + const bufferSizes = [0, 1, 64 * 1024]; bufferSizes.forEach((stringBufferSize) => { values.forEach((stringValue) => { diff --git a/packages/whatwg/test/utils/testRunner.ts b/packages/whatwg/test/utils/testRunner.ts index a1c552b..47d5802 100644 --- a/packages/whatwg/test/utils/testRunner.ts +++ b/packages/whatwg/test/utils/testRunner.ts @@ -5,7 +5,7 @@ import type { ParsedTokenInfo } from "@streamparser/json/utils/types/parsedToken import type { ParsedElementInfo } from "@streamparser/json/utils/types/parsedElementInfo.js"; export type TestData = { - value: string | Iterable; + value: string | string[] | Iterable; paths?: string[]; expected: any[]; };