From d95f86cb360e1b40d999cb1edbd73f00d0fb5846 Mon Sep 17 00:00:00 2001 From: Vincent LE GOFF Date: Mon, 27 May 2019 09:17:48 +0200 Subject: [PATCH 01/11] add csv parse --- encoding/csv.ts | 103 +++++++++++++++++++++++++++++++++++++++ encoding/csv_test.ts | 112 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 214 insertions(+), 1 deletion(-) diff --git a/encoding/csv.ts b/encoding/csv.ts index aa2ceb1cf053..9baa1b8492a1 100644 --- a/encoding/csv.ts +++ b/encoding/csv.ts @@ -4,6 +4,7 @@ import { BufReader, EOF } from "../io/bufio.ts"; import { TextProtoReader } from "../textproto/mod.ts"; +import { StringReader } from "../io/readers.ts"; const INVALID_RUNE = ["\r", "\n", '"']; @@ -138,3 +139,105 @@ export async function readAll( } return result; } + +/** + * HeaderOption provides the column definition + * and the parse function for each entry of the + * column. + */ +export interface HeaderOption { + name: string; + parse?: (input: string) => unknown; +} + +export interface ParseOption { + header: boolean | string[] | HeaderOption[]; + parse?: (input: unknown) => unknown; +} + +/** + * Csv parse helper to manipulate data. + * Provides an auto/custom mapper for columns and parse function + * for columns and rows. + * @param input Input to parse. Can be a string or BufReader. + * @param opt options of the parser. + * @param [opt.header=false] HeaderOptions + * @param [opt.parse=null] Parse function for rows. + * Example: + * const r = await parseFile('a,b,c\ne,f,g\n', { + * header: ["this", "is", "sparta"], + * parse: (e: Record) => { + * return { super: e.this, street: e.is, fighter: e.sparta }; + * } + * }); + * // output + * [ + * { super: "a", street: "b", fighter: "c" }, + * { super: "e", street: "f", fighter: "g" } + * ] + */ +export async function parse( + input: string | BufReader, + opt: ParseOption = { header: false } +): Promise { + let r: string[][]; + let err: BufState; + if (input instanceof BufReader) { + [r, err] = await readAll(input); + } else { + [r, err] = await readAll(new BufReader(new StringReader(input))); + } + if (err) throw err; + if (opt.header) { + let headers: HeaderOption[] = []; + let i = 0; + if (Array.isArray(opt.header)) { + if (typeof opt.header[0] !== "string") { + headers = opt.header as HeaderOption[]; + } else { + const h = opt.header as string[]; + headers = h.map( + (e): HeaderOption => { + return { + name: e + }; + } + ); + } + } else { + headers = r.shift()!.map( + (e): HeaderOption => { + return { + name: e + }; + } + ); + i++; + } + return r.map( + (e): unknown => { + if (e.length !== headers.length) { + throw `Error number of fields line:${i}`; + } + i++; + let out: Record = {}; + for (let j = 0; j < e.length; j++) { + const h = headers[j]; + if (h.parse) { + out[h.name] = h.parse(e[j]); + } else { + out[h.name] = e[j]; + } + } + if (opt.parse) { + return opt.parse(out); + } + return out; + } + ); + } + if (opt.parse) { + return r.map((e: string[]): unknown => opt.parse!(e)); + } + return r; +} diff --git a/encoding/csv_test.ts b/encoding/csv_test.ts index 0cf95b473a7b..9ad2e9477d8e 100644 --- a/encoding/csv_test.ts +++ b/encoding/csv_test.ts @@ -2,7 +2,7 @@ // https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go import { test, runIfMain } from "../testing/mod.ts"; import { assertEquals, assert } from "../testing/asserts.ts"; -import { readAll } from "./csv.ts"; +import { readAll, parse } from "./csv.ts"; import { StringReader } from "../io/readers.ts"; import { BufReader } from "../io/bufio.ts"; @@ -468,4 +468,114 @@ for (const t of testCases) { }); } +const parseTestCases = [ + { + name: "simple", + in: "a,b,c", + header: false, + result: [["a", "b", "c"]] + }, + { + name: "simple Bufreader", + in: new BufReader(new StringReader("a,b,c")), + header: false, + result: [["a", "b", "c"]] + }, + { + name: "multiline", + in: "a,b,c\ne,f,g\n", + header: false, + result: [["a", "b", "c"], ["e", "f", "g"]] + }, + { + name: "header mapping boolean", + in: "a,b,c\ne,f,g\n", + header: true, + result: [{ a: "e", b: "f", c: "g" }] + }, + { + name: "header mapping array", + in: "a,b,c\ne,f,g\n", + header: ["this", "is", "sparta"], + result: [ + { this: "a", is: "b", sparta: "c" }, + { this: "e", is: "f", sparta: "g" } + ] + }, + { + name: "header mapping object", + in: "a,b,c\ne,f,g\n", + header: [{ name: "this" }, { name: "is" }, { name: "sparta" }], + result: [ + { this: "a", is: "b", sparta: "c" }, + { this: "e", is: "f", sparta: "g" } + ] + }, + { + name: "header mapping parse entry", + in: "a,b,c\ne,f,g\n", + header: [ + { + name: "this", + parse: (e: string): string => { + return `b${e}$$`; + } + }, + { + name: "is", + parse: (e: string): number => { + return e.length; + } + }, + { + name: "sparta", + parse: (e: string): unknown => { + return { bim: `boom-${e}` }; + } + } + ], + result: [ + { this: "ba$$", is: 1, sparta: { bim: `boom-c` } }, + { this: "be$$", is: 1, sparta: { bim: `boom-g` } } + ] + }, + { + name: "multiline parse", + in: "a,b,c\ne,f,g\n", + parse: (e: string[]): unknown => { + return { super: e[0], street: e[1], fighter: e[2] }; + }, + header: false, + result: [ + { super: "a", street: "b", fighter: "c" }, + { super: "e", street: "f", fighter: "g" } + ] + }, + { + name: "header mapping object parseline", + in: "a,b,c\ne,f,g\n", + header: [{ name: "this" }, { name: "is" }, { name: "sparta" }], + parse: (e: Record): unknown => { + return { super: e.this, street: e.is, fighter: e.sparta }; + }, + result: [ + { super: "a", street: "b", fighter: "c" }, + { super: "e", street: "f", fighter: "g" } + ] + } +]; + +for (const testCase of parseTestCases) { + test({ + name: `[CSV] Parse ${testCase.name}`, + async fn(): Promise { + const r = await parse(testCase.in, { + header: testCase.header, + parse: testCase.parse + }); + assertEquals(r, testCase.result); + } + }); +} + runIfMain(import.meta); From bb29e4aa2815457e8321e27b0c760d296ea613b7 Mon Sep 17 00:00:00 2001 From: Vincent LE GOFF Date: Mon, 27 May 2019 14:10:55 +0200 Subject: [PATCH 02/11] fix parsing options --- encoding/csv.ts | 14 +++++++++----- encoding/csv_test.ts | 4 +++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/encoding/csv.ts b/encoding/csv.ts index 9baa1b8492a1..9718e24f307f 100644 --- a/encoding/csv.ts +++ b/encoding/csv.ts @@ -29,7 +29,7 @@ export interface ParseOptions { function chkOptions(opt: ParseOptions): void { if ( INVALID_RUNE.includes(opt.comma) || - (opt.comment && INVALID_RUNE.includes(opt.comment)) || + INVALID_RUNE.includes(opt.comment!) || opt.comma === opt.comment ) { throw new Error("Invalid Delimiter"); @@ -150,7 +150,7 @@ export interface HeaderOption { parse?: (input: string) => unknown; } -export interface ParseOption { +export interface ExtendedParseOptions extends ParseOptions { header: boolean | string[] | HeaderOption[]; parse?: (input: unknown) => unknown; } @@ -178,14 +178,18 @@ export interface ParseOption { */ export async function parse( input: string | BufReader, - opt: ParseOption = { header: false } + opt: ExtendedParseOptions = { + header: false, + comma: ",", + trimLeadingSpace: false + } ): Promise { let r: string[][]; let err: BufState; if (input instanceof BufReader) { - [r, err] = await readAll(input); + [r, err] = await readAll(input, opt); } else { - [r, err] = await readAll(new BufReader(new StringReader(input))); + [r, err] = await readAll(new BufReader(new StringReader(input)), opt); } if (err) throw err; if (opt.header) { diff --git a/encoding/csv_test.ts b/encoding/csv_test.ts index 9ad2e9477d8e..3a54011aabbd 100644 --- a/encoding/csv_test.ts +++ b/encoding/csv_test.ts @@ -571,7 +571,9 @@ for (const testCase of parseTestCases) { async fn(): Promise { const r = await parse(testCase.in, { header: testCase.header, - parse: testCase.parse + parse: testCase.parse, + comma: ",", + trimLeadingSpace: false }); assertEquals(r, testCase.result); } From e05684a9dec9297e50122ab725ff4babadeb4cca Mon Sep 17 00:00:00 2001 From: Vincent LE GOFF Date: Mon, 27 May 2019 14:54:39 +0200 Subject: [PATCH 03/11] fix parameters --- encoding/csv.ts | 12 +++++------- encoding/csv_test.ts | 4 +--- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/encoding/csv.ts b/encoding/csv.ts index 9718e24f307f..4d354c5ed3ef 100644 --- a/encoding/csv.ts +++ b/encoding/csv.ts @@ -19,16 +19,16 @@ export class ParseError extends Error { } export interface ParseOptions { - comma: string; + comma?: string; comment?: string; - trimLeadingSpace: boolean; + trimLeadingSpace?: boolean; lazyQuotes?: boolean; fieldsPerRecord?: number; } function chkOptions(opt: ParseOptions): void { if ( - INVALID_RUNE.includes(opt.comma) || + INVALID_RUNE.includes(opt.comma!) || INVALID_RUNE.includes(opt.comment!) || opt.comma === opt.comment ) { @@ -69,7 +69,7 @@ export async function read( return []; } - result = line.split(opt.comma); + result = line.split(opt.comma!); let quoteError = false; result = result.map( @@ -179,9 +179,7 @@ export interface ExtendedParseOptions extends ParseOptions { export async function parse( input: string | BufReader, opt: ExtendedParseOptions = { - header: false, - comma: ",", - trimLeadingSpace: false + header: false } ): Promise { let r: string[][]; diff --git a/encoding/csv_test.ts b/encoding/csv_test.ts index 3a54011aabbd..9ad2e9477d8e 100644 --- a/encoding/csv_test.ts +++ b/encoding/csv_test.ts @@ -571,9 +571,7 @@ for (const testCase of parseTestCases) { async fn(): Promise { const r = await parse(testCase.in, { header: testCase.header, - parse: testCase.parse, - comma: ",", - trimLeadingSpace: false + parse: testCase.parse }); assertEquals(r, testCase.result); } From d5dfafe9818ce2e4f91dda05731403adb6f1a33f Mon Sep 17 00:00:00 2001 From: Vincent LE GOFF Date: Mon, 27 May 2019 15:08:23 +0200 Subject: [PATCH 04/11] doc --- encoding/csv.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/encoding/csv.ts b/encoding/csv.ts index 4d354c5ed3ef..378a2687256f 100644 --- a/encoding/csv.ts +++ b/encoding/csv.ts @@ -18,6 +18,15 @@ export class ParseError extends Error { } } +/** + * @property comma - Character which separates values. Default: ',' + * @property comment - Character to start a comment. Default: '#' + * @property trimLeadingSpace - Flag to trim the leading space of the value. Default: 'false' + * @property lazyQuotes - Allow unquoted quote in a quoted field or non double + * quoted quotes in quoted field Default: 'false' + * @property fieldsPerRecord - Enabling the check of fields for each row. If == 0 + * first row is used as referal for the number of fields. + */ export interface ParseOptions { comma?: string; comment?: string; From cdffd2347a886e40b9234bf68c2ec7a3fd43c6af Mon Sep 17 00:00:00 2001 From: LE GOFF Vincent Date: Mon, 27 May 2019 20:11:10 +0200 Subject: [PATCH 05/11] no need to export read --- encoding/csv.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encoding/csv.ts b/encoding/csv.ts index 378a2687256f..462356af5137 100644 --- a/encoding/csv.ts +++ b/encoding/csv.ts @@ -45,7 +45,7 @@ function chkOptions(opt: ParseOptions): void { } } -export async function read( +async function read( Startline: number, reader: BufReader, opt: ParseOptions = { comma: ",", comment: "#", trimLeadingSpace: false } From 09435625f9ae70a0d4dcc16a9465f1ca8c7060be Mon Sep 17 00:00:00 2001 From: LE GOFF Vincent Date: Mon, 27 May 2019 20:11:21 +0200 Subject: [PATCH 06/11] readme update --- encoding/README.md | 131 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 116 insertions(+), 15 deletions(-) diff --git a/encoding/README.md b/encoding/README.md index e30d972f3f91..f03e80ba2515 100644 --- a/encoding/README.md +++ b/encoding/README.md @@ -1,11 +1,112 @@ -# TOML +# Encoding + +## CSV + +- **`readAll(reader: BufReader, opt: ParseOptions = { comma: ",", trimLeadingSpace: false, lazyQuotes: false } ): Promise<[string[][], BufState]>`**: + Read the whole buffer and output the structured CSV datas +- **`parse(csvString: string, opt: ParseOption): Promise`**: + See [parse](###Parse) + +### Parse + +Parse the CSV string with the options provided. + +#### Options + +##### ParseOption + +- **`header: boolean | string[] | HeaderOption[];`**: If a boolean is provided, + the first line will be used as Header definitions. If `string[]` or + `HeaderOption[]` + those names will be used for header definition. +- **`parse?: (input: unknown) => unknown;`**: Parse function for the row, which + will be executed after parsing of all columns. Therefore if you don't provide + header and parse function with headers, input will be `string[]`. + +##### HeaderOption + +- **`name: string;`**: Name of the header to be used as property. +- **`parse?: (input: string) => unknown;`**: Parse function for the column. + This is executed on each entry of the header. This can be combined with the + Parse function of the rows. + +#### Usage + +```ts +// input: +// a,b,c +// e,f,g + +const r = await parseFile(filepath, { + header: false +}); +// output: +// [["a", "b", "c"], ["e", "f", "g"]] + +const r = await parseFile(filepath, { + header: true +}); +// output: +// [{ a: "e", b: "f", c: "g" }] + +const r = await parseFile(filepath, { + header: ["this", "is", "sparta"] +}); +// output: +// [ +// { this: "a", is: "b", sparta: "c" }, +// { this: "e", is: "f", sparta: "g" } +// ] + +const r = await parseFile(filepath, { + header: [ + { + name: "this", + parse: (e: string): string => { + return `b${e}$$`; + } + }, + { + name: "is", + parse: (e: string): number => { + return e.length; + } + }, + { + name: "sparta", + parse: (e: string): unknown => { + return { bim: `boom-${e}` }; + } + } + ] +}); +// output: +// [ +// { this: "ba$$", is: 1, sparta: { bim: `boom-c` } }, +// { this: "be$$", is: 1, sparta: { bim: `boom-g` } } +// ] + +const r = await parseFile(filepath, { + header: ["this", "is", "sparta"], + parse: (e: Record) => { + return { super: e.this, street: e.is, fighter: e.sparta }; + } +}); +// output: +// [ +// { super: "a", street: "b", fighter: "c" }, +// { super: "e", street: "f", fighter: "g" } +// ] +``` + +## TOML This module parse TOML files. It follows as much as possible the [TOML specs](https://github.com/toml-lang/toml). Be sure to read the supported types as not every specs is supported at the moment and the handling in TypeScript side is a bit different. -## Supported types and handling +### Supported types and handling - :heavy_check_mark: [Keys](https://github.com/toml-lang/toml#string) - :exclamation: [String](https://github.com/toml-lang/toml#string) @@ -27,39 +128,39 @@ TypeScript side is a bit different. :exclamation: _Supported with warnings see [Warning](#Warning)._ -### :warning: Warning +#### :warning: Warning -#### String +##### String - Regex : Due to the spec, there is no flag to detect regex properly in a TOML declaration. So the regex is stored as string. -#### Integer +##### Integer For **Binary** / **Octal** / **Hexadecimal** numbers, they are stored as string to be not interpreted as Decimal. -#### Local Time +##### Local Time Because local time does not exist in JavaScript, the local time is stored as a string. -#### Inline Table +##### Inline Table Inline tables are supported. See below: ```toml animal = { type = { name = "pug" } } -# Output +## Output animal = { type.name = "pug" } -# Output { animal : { type : { name : "pug" } } +## Output { animal : { type : { name : "pug" } } animal.as.leaders = "tosin" -# Output { animal: { as: { leaders: "tosin" } } } +## Output { animal: { as: { leaders: "tosin" } } } "tosin.abasi" = "guitarist" -# Output +## Output "tosin.abasi" : "guitarist" ``` -#### Array of Tables +##### Array of Tables At the moment only simple declarations like below are supported: @@ -89,9 +190,9 @@ will output: } ``` -## Usage +### Usage -### Parse +#### Parse ```ts import { parse } from "./parser.ts"; @@ -103,7 +204,7 @@ const tomlString = 'foo.bar = "Deno"'; const tomlObject22 = parse(tomlString); ``` -### Stringify +#### Stringify ```ts import { stringify } from "./parser.ts"; From 766d481f1bd35f12beff8e43b2f93dda054a29e7 Mon Sep 17 00:00:00 2001 From: LE GOFF Vincent Date: Mon, 27 May 2019 20:11:37 +0200 Subject: [PATCH 07/11] update main readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a1e7bc3955f9..ac9969627a58 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Here are the dedicated documentations of modules: - [colors](colors/README.md) - [datetime](datetime/README.md) +- [encoding](encoding/README.md) - [examples](examples/README.md) - [flags](flags/README.md) - [fs](fs/README.md) @@ -33,7 +34,6 @@ Here are the dedicated documentations of modules: - [prettier](prettier/README.md) - [strings](strings/README.md) - [testing](testing/README.md) -- [toml](encoding/toml/README.md) - [ws](ws/README.md) ## Contributing From 42215c3a51c053fe726a83c12ed8b862472b82c9 Mon Sep 17 00:00:00 2001 From: LE GOFF Vincent Date: Thu, 30 May 2019 09:10:50 +0200 Subject: [PATCH 08/11] rebase --- encoding/csv.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/encoding/csv.ts b/encoding/csv.ts index 462356af5137..1c4ae546b012 100644 --- a/encoding/csv.ts +++ b/encoding/csv.ts @@ -36,6 +36,8 @@ export interface ParseOptions { } function chkOptions(opt: ParseOptions): void { + if (!opt.comma) opt.comma = ","; + if (!opt.trimLeadingSpace) opt.trimLeadingSpace = false; if ( INVALID_RUNE.includes(opt.comma!) || INVALID_RUNE.includes(opt.comment!) || @@ -48,7 +50,7 @@ function chkOptions(opt: ParseOptions): void { async function read( Startline: number, reader: BufReader, - opt: ParseOptions = { comma: ",", comment: "#", trimLeadingSpace: false } + opt: ParseOptions = { comma: ",", trimLeadingSpace: false } ): Promise { const tp = new TextProtoReader(reader); let line: string; @@ -192,13 +194,11 @@ export async function parse( } ): Promise { let r: string[][]; - let err: BufState; if (input instanceof BufReader) { - [r, err] = await readAll(input, opt); + r = await readAll(input, opt); } else { - [r, err] = await readAll(new BufReader(new StringReader(input)), opt); + r = await readAll(new BufReader(new StringReader(input)), opt); } - if (err) throw err; if (opt.header) { let headers: HeaderOption[] = []; let i = 0; From bee8b0630f08256a1e484b0bb78228c924572a6a Mon Sep 17 00:00:00 2001 From: LE GOFF Vincent Date: Thu, 30 May 2019 15:12:42 +0200 Subject: [PATCH 09/11] rebase and strict --- encoding/csv_test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encoding/csv_test.ts b/encoding/csv_test.ts index 9ad2e9477d8e..a68b81dc8a2e 100644 --- a/encoding/csv_test.ts +++ b/encoding/csv_test.ts @@ -571,7 +571,7 @@ for (const testCase of parseTestCases) { async fn(): Promise { const r = await parse(testCase.in, { header: testCase.header, - parse: testCase.parse + parse: testCase.parse as (input: unknown) => unknown }); assertEquals(r, testCase.result); } From 84517955f5d90a66471b34b6d897bdc9a85bce2b Mon Sep 17 00:00:00 2001 From: LE GOFF Vincent Date: Thu, 30 May 2019 15:20:26 +0200 Subject: [PATCH 10/11] deactivate assertion --- fs/copy_test.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/copy_test.ts b/fs/copy_test.ts index 03d439ec18f1..38c27e15b700 100644 --- a/fs/copy_test.ts +++ b/fs/copy_test.ts @@ -346,8 +346,10 @@ testCopySync( assert(typeof destStatInfo.accessed === "number"); assert(typeof destStatInfo.modified === "number"); - assertEquals(destStatInfo.accessed, srcStatInfo.accessed); - assertEquals(destStatInfo.modified, srcStatInfo.modified); + // TODO: Activate test when https://github.com/denoland/deno/issues/2411 + // is fixed + // assertEquals(destStatInfo.accessed, srcStatInfo.accessed); + // assertEquals(destStatInfo.modified, srcStatInfo.modified); } ); From 7f90ed8d605b7e8fe6732198161ae6250b9f8010 Mon Sep 17 00:00:00 2001 From: LE GOFF Vincent Date: Thu, 30 May 2019 15:31:15 +0200 Subject: [PATCH 11/11] review --- fs/copy_test.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/copy_test.ts b/fs/copy_test.ts index 38c27e15b700..03d439ec18f1 100644 --- a/fs/copy_test.ts +++ b/fs/copy_test.ts @@ -346,10 +346,8 @@ testCopySync( assert(typeof destStatInfo.accessed === "number"); assert(typeof destStatInfo.modified === "number"); - // TODO: Activate test when https://github.com/denoland/deno/issues/2411 - // is fixed - // assertEquals(destStatInfo.accessed, srcStatInfo.accessed); - // assertEquals(destStatInfo.modified, srcStatInfo.modified); + assertEquals(destStatInfo.accessed, srcStatInfo.accessed); + assertEquals(destStatInfo.modified, srcStatInfo.modified); } );