From 35944e130209ccffea038ca4d41bbb792df2a056 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Thu, 1 Aug 2024 13:28:39 +0900 Subject: [PATCH 01/12] wrap the result in a code block --- csv/mod.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csv/mod.ts b/csv/mod.ts index d24467dee631..5afc62b948fc 100644 --- a/csv/mod.ts +++ b/csv/mod.ts @@ -56,7 +56,9 @@ * * results in * + * ```ts no-assert * [`the "word" is true`, `a "quoted-field"`] + * ``` * * Newlines and commas may be included in a quoted-field * From f75240e756a8cecde9647fc481ca01350775a0ac Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Thu, 1 Aug 2024 16:35:07 +0900 Subject: [PATCH 02/12] fix the wrong default value --- csv/parse.ts | 2 +- csv/parse_stream.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csv/parse.ts b/csv/parse.ts index 857a0703ad39..881a56b37e7f 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -261,7 +261,7 @@ export interface ParseOptions { * are ignored. With leading whitespace the comment character becomes part of * the field, even you provide `trimLeadingSpace: true`. * - * @default {"#"} + * By default, no character is considered to be a start of a comment. */ comment?: string; /** Flag to trim the leading space of the value. diff --git a/csv/parse_stream.ts b/csv/parse_stream.ts index 40748b128222..618d1f0d4b86 100644 --- a/csv/parse_stream.ts +++ b/csv/parse_stream.ts @@ -23,7 +23,7 @@ export interface CsvParseStreamOptions { * are ignored. With leading whitespace the comment character becomes part of * the field, even you provide `trimLeadingSpace: true`. * - * @default {"#"} + * By default, no character is considered to be a start of a comment. */ comment?: string; /** Flag to trim the leading space of the value. From df4b7497ad4d45ea90b7d2b0c362d363ecda8451 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Thu, 1 Aug 2024 16:35:27 +0900 Subject: [PATCH 03/12] more examples for parse --- csv/parse.ts | 116 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 9 deletions(-) diff --git a/csv/parse.ts b/csv/parse.ts index 881a56b37e7f..571fb6a6266d 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -313,9 +313,23 @@ export interface ParseOptions { * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert"; * - * const string = "a,b,c\nd,e,f"; + * const string = "a,b,c\n#d,e,f"; + * + * assertEquals(parse(string), [["a", "b", "c"], ["#d", "e", "f"]]); + * ``` + * + * @example Quoted fields + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert"; + * + * const string = `"a ""word""","comma,","newline\n"\nfoo,bar,baz`; + * const result = parse(string); * - * assertEquals(parse(string), [["a", "b", "c"], ["d", "e", "f"]]); + * assertEquals(result, [ + * ['a "word"', "comma,", "newline\n"], + * ["foo", "bar", "baz"] + * ]); * ``` * * @param input The input to parse. @@ -325,26 +339,110 @@ export function parse(input: string): string[][]; /** * Parses CSV string into an array of objects or an array of arrays of strings. * - * If `column` or `skipFirstRow` option is provided, it returns an array of + * If `columns` or `skipFirstRow` option is provided, it returns an array of * objects, otherwise it returns an array of arrays of string. * - * @example Usage + * @example skipFirstRow: false * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert"; + * import { assertType, IsExact } from "@std/testing/types" * * const string = "a,b,c\nd,e,f"; + * const result = parse(string, { skipFirstRow: false }); + * + * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); + * assertType>(true); + * ``` + * + * @example skipFirstRow: true + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const string = "a,b,c\nd,e,f"; + * const result = parse(string, { skipFirstRow: true }); + * + * assertEquals(result, [{ a: "d", b: "e", c: "f" }]); + * assertType[]>>(true); + * ``` + * + * @example specify columns + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const string = "a,b,c\nd,e,f"; + * const result = parse(string, { columns: ["x", "y", "z"] }); + * + * assertEquals(result, [{ x: "a", y: "b", z: "c" }, { x: "d", y: "e", z: "f" }]); + * assertType[]>>(true); + * ``` + * + * @example specify columns with skipFirstRow + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const string = "a,b,c\nd,e,f"; + * const result = parse(string, { columns: ["x", "y", "z"], skipFirstRow: true }); + * + * assertEquals(result, [{ x: "d", y: "e", z: "f" }]); + * assertType[]>>(true); + * ``` + * + * @example TSV (tab-separated values) + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert"; + * + * const string = "a\tb\tc\nd\te\tf"; + * const result = parse(string, { separator: "\t" }); + * + * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); + * ``` + * + * @example trimLeadingSpace: true + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert"; + * + * const string = " a, b, c\n"; + * const result = parse(string, { trimLeadingSpace: true }); + * + * assertEquals(result, [["a", "b", "c"]]); + * ``` + * + * @example lazyQuotes: true + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert"; + * + * const string = `a "word","1"2",a","b`; + * const result = parse(string, { lazyQuotes: true }); + * + * assertEquals(result, [['a "word"', '1"2', 'a"', 'b']]); + * ``` + * + * @example comment + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert"; + * + * const string = "a,b,c\n# THIS IS A COMMENT LINE\nd,e,f"; + * const result = parse(string, { comment: "#" }); * - * assertEquals(parse(string, { skipFirstRow: false }), [["a", "b", "c"], ["d", "e", "f"]]); - * assertEquals(parse(string, { skipFirstRow: true }), [{ a: "d", b: "e", c: "f" }]); - * assertEquals(parse(string, { columns: ["x", "y", "z"] }), [{ x: "a", y: "b", z: "c" }, { x: "d", y: "e", z: "f" }]); + * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); * ``` * * @typeParam T The options' type for parsing. * @param input The input to parse. * @param options The options for parsing. - * @returns If you don't provide `options.skipFirstRow` and `options.columns`, it returns `string[][]`. - * If you provide `options.skipFirstRow` or `options.columns`, it returns `Record[]`. + * @returns If you don't provide `options.skipFirstRow` or `options.columns`, it returns `string[][]`. + * If you provide `options.skipFirstRow` or `options.columns`, it returns `Record[]`. */ export function parse( input: string, From bfe0837e91961856df54e22238eba1d16a1987a5 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Thu, 1 Aug 2024 17:50:53 +0900 Subject: [PATCH 04/12] link to MDN for SyntaxError --- csv/_io.ts | 3 ++- csv/parse.ts | 4 ++-- csv/parse_stream.ts | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/csv/_io.ts b/csv/_io.ts index 921e9c0d3066..6a7b4c578c56 100644 --- a/csv/_io.ts +++ b/csv/_io.ts @@ -45,7 +45,8 @@ export interface ReadOptions { * If negative, no check is made and records may have a variable number of * fields. * - * If the wrong number of fields is in a row, a `ParseError` is thrown. + * If the wrong number of fields is in a row, a {@linkcode https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SyntaxError | SyntaxError} + * is thrown. */ fieldsPerRecord?: number; } diff --git a/csv/parse.ts b/csv/parse.ts index 571fb6a6266d..b77ee4202334 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -287,8 +287,8 @@ export interface ParseOptions { * If negative, no check is made and records may have a variable number of * fields. * - * If the wrong number of fields is in a row, a {@linkcode SyntaxError} is - * thrown. + * If the wrong number of fields is in a row, a {@linkcode https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SyntaxError | SyntaxError} + * is thrown. */ fieldsPerRecord?: number; /** diff --git a/csv/parse_stream.ts b/csv/parse_stream.ts index 618d1f0d4b86..aa7ca6cb812f 100644 --- a/csv/parse_stream.ts +++ b/csv/parse_stream.ts @@ -49,8 +49,8 @@ export interface CsvParseStreamOptions { * If negative, no check is made and records may have a variable number of * fields. * - * If the wrong number of fields is in a row, a {@linkcode ParseError} is - * thrown. + * If the wrong number of fields is in a row, a {@linkcode https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SyntaxError | SyntaxError} + * is thrown. */ fieldsPerRecord?: number; /** From 39a3d1742db43828c61722e162897bdcc3e14c86 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Thu, 1 Aug 2024 17:52:01 +0900 Subject: [PATCH 05/12] more specific import --- csv/parse.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/csv/parse.ts b/csv/parse.ts index b77ee4202334..9a570efb0077 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -311,7 +311,7 @@ export interface ParseOptions { * @example Usage * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * * const string = "a,b,c\n#d,e,f"; * @@ -321,7 +321,7 @@ export interface ParseOptions { * @example Quoted fields * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * * const string = `"a ""word""","comma,","newline\n"\nfoo,bar,baz`; * const result = parse(string); @@ -345,7 +345,7 @@ export function parse(input: string): string[][]; * @example skipFirstRow: false * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * import { assertType, IsExact } from "@std/testing/types" * * const string = "a,b,c\nd,e,f"; @@ -358,7 +358,7 @@ export function parse(input: string): string[][]; * @example skipFirstRow: true * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * import { assertType, IsExact } from "@std/testing/types" * * const string = "a,b,c\nd,e,f"; @@ -371,7 +371,7 @@ export function parse(input: string): string[][]; * @example specify columns * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * import { assertType, IsExact } from "@std/testing/types" * * const string = "a,b,c\nd,e,f"; @@ -384,7 +384,7 @@ export function parse(input: string): string[][]; * @example specify columns with skipFirstRow * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * import { assertType, IsExact } from "@std/testing/types" * * const string = "a,b,c\nd,e,f"; @@ -397,7 +397,7 @@ export function parse(input: string): string[][]; * @example TSV (tab-separated values) * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * * const string = "a\tb\tc\nd\te\tf"; * const result = parse(string, { separator: "\t" }); @@ -408,7 +408,7 @@ export function parse(input: string): string[][]; * @example trimLeadingSpace: true * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * * const string = " a, b, c\n"; * const result = parse(string, { trimLeadingSpace: true }); @@ -419,7 +419,7 @@ export function parse(input: string): string[][]; * @example lazyQuotes: true * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * * const string = `a "word","1"2",a","b`; * const result = parse(string, { lazyQuotes: true }); @@ -430,7 +430,7 @@ export function parse(input: string): string[][]; * @example comment * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * * const string = "a,b,c\n# THIS IS A COMMENT LINE\nd,e,f"; * const result = parse(string, { comment: "#" }); From 9aa83cc3e10c7939bf4cef52f285d858e54cff77 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Thu, 1 Aug 2024 18:21:29 +0900 Subject: [PATCH 06/12] wip --- csv/parse_stream.ts | 206 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 199 insertions(+), 7 deletions(-) diff --git a/csv/parse_stream.ts b/csv/parse_stream.ts index aa7ca6cb812f..5c604f2ffa13 100644 --- a/csv/parse_stream.ts +++ b/csv/parse_stream.ts @@ -108,17 +108,209 @@ export type RowType = T extends undefined ? string[] * A `CsvParseStream` expects input conforming to * {@link https://www.rfc-editor.org/rfc/rfc4180.html | RFC 4180}. * - * @example Usage - * ```ts no-assert + * @example default options + * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" * * const source = ReadableStream.from([ - * "name,age", - * "Alice,34", - * "Bob,24", - * "Charlie,45", + * "name,age\n", + * "Alice,34\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream()); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["name", "age"], + * ["Alice", "34"], + * ["Bob", "24"], + * ]); + * assertType>(true); + * ``` + * + * @example skipFirstRow: true + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const source = ReadableStream.from([ + * "name,age\n", + * "Alice,34\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ skipFirstRow: true })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: "24" }, + * ]); + * assertType[]>>(true); + * ``` + * + * @example specify columns + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * columns: ["name", "age"] + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: "24" }, + * ]); + * assertType[]>>(true); + * ``` + * + * @example specify columns with skipFirstRow + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * columns: ["name", "age"], + * skipFirstRow: true, + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [{ name: "Bob", age: "24" }]); + * assertType[]>>(true); + * ``` + * + * @example TSV (tab-separated values) + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * "Alice\t34\n", + * "Bob\t24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * separator: "\t", + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["Alice", "34"], + * ["Bob", "24"], + * ]); + * ``` + * + * @example trimLeadingSpace: true + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * " Alice,34\n ", + * "Bob, 24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * trimLeadingSpace: true, + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["Alice", "34"], + * ["Bob", "24"], + * ]); + * ``` + * + * @example Quoted fields + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * `"a ""word""","com`, + * `ma,","newline`, + * `\n"\nfoo,bar,b`, + * `az\n`, + * ]); + * const stream = source.pipeThrough(new CsvParseStream()); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ['a "word"', "comma,", "newline\n"], + * ["foo", "bar", "baz"] + * ]); + * ``` + * + * @example lazyQuotes: true + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * `a "word","1"`, + * `2",a","b`, + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * lazyQuotes: true, + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [['a "word"', '1"2', 'a"', 'b']]); + * ``` + * + * @example comment + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "# THIS IS A COMMENT\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * comment: "#", + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["Alice", "34"], + * ["Bob", "24"], + * ]); + * ``` + * + * @example fieldsPerRecord: 0 (infer the number of fields from the first row) + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "Bob,24,CA\n", // Note that this row has more fields than the first row + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * fieldsPerRecord: 0, + * })); + * for await (const row of stream) { + * } + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["Alice", "34"], + * ["Bob", "24"], * ]); - * const parts = source.pipeThrough(new CsvParseStream()); * ``` * * @typeParam T The type of options for the stream. From 1257b55d4dec8a095ba76bab63ceeaac5ed1cb49 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Fri, 2 Aug 2024 00:23:58 +0900 Subject: [PATCH 07/12] fieldsPerRecord example --- csv/parse.ts | 29 ++++++++++++++++++++++++++++- csv/parse_stream.ts | 34 ++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/csv/parse.ts b/csv/parse.ts index 0468c66303db..53f48ab02666 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -246,7 +246,7 @@ class Parser { if (lineResult.length > 0) { if (_nbFields && _nbFields !== lineResult.length) { throw new SyntaxError( - `record on line ${lineIndex}: wrong number of fields`, + `record on line ${lineIndex}: expected ${_nbFields} fields but got ${lineResult.length}`, ); } result.push(lineResult); @@ -446,6 +446,33 @@ export function parse(input: string): string[][]; * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); * ``` * + * @example fieldsPerRecord: 0 (infer the number of fields from the first row) + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertThrows } from "@std/assert/throws"; + * + * // Note that the second row has more fields than the first row + * const string = "a,b\nc,d,e"; + * assertThrows( + * () => parse(string, { fieldsPerRecord: 0 }), + * SyntaxError, + * "record on line 2: expected 2 fields but got 3", + * ); + * ``` + * + * @example fieldsPerRecord: 2 (enforce the number of fields for each row) + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertThrows } from "@std/assert/throws"; + * + * const string = "a,b\nc,d,e"; + * assertThrows( + * () => parse(string, { fieldsPerRecord: 2 }), + * SyntaxError, + * "record on line 2: expected 2 fields but got 3", + * ); + * ``` + * * @typeParam T The options' type for parsing. * @param input The input to parse. * @param options The options for parsing. diff --git a/csv/parse_stream.ts b/csv/parse_stream.ts index b4bd91143a5b..bd7427bc9bbe 100644 --- a/csv/parse_stream.ts +++ b/csv/parse_stream.ts @@ -295,6 +295,7 @@ export type RowType = T extends undefined ? string[] * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; + * import { assertRejects } from "@std/assert/rejects"; * * const source = ReadableStream.from([ * "Alice,34\n", @@ -303,14 +304,35 @@ export type RowType = T extends undefined ? string[] * const stream = source.pipeThrough(new CsvParseStream({ * fieldsPerRecord: 0, * })); - * for await (const row of stream) { - * } - * const result = await Array.fromAsync(stream); + * const reader = stream.getReader(); + * assertEquals(await reader.read(), { done: false, value: ["Alice", "34"] }); + * await assertRejects( + * () => reader.read(), + * SyntaxError, + * "record on line 2: expected 2 fields but got 3", + * ); + * ``` * - * assertEquals(result, [ - * ["Alice", "34"], - * ["Bob", "24"], + * @example fieldsPerRecord: 2 (enforce the number of fields for each row) + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertRejects } from "@std/assert/rejects"; + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "Bob,24,CA\n", * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * fieldsPerRecord: 2, + * })); + * const reader = stream.getReader(); + * assertEquals(await reader.read(), { done: false, value: ["Alice", "34"] }); + * await assertRejects( + * () => reader.read(), + * SyntaxError, + * "record on line 2: expected 2 fields but got 3", + * ); * ``` * * @typeParam T The type of options for the stream. From 23309d742c558a715c672d3da298ed1f242e2bc5 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Fri, 2 Aug 2024 00:35:44 +0900 Subject: [PATCH 08/12] fix --- csv/parse_test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csv/parse_test.ts b/csv/parse_test.ts index e2b3c74edb13..7c8874e00291 100644 --- a/csv/parse_test.ts +++ b/csv/parse_test.ts @@ -257,7 +257,7 @@ Deno.test({ assertThrows( () => parse(input, { fieldsPerRecord: 0 }), SyntaxError, - "record on line 2: wrong number of fields", + "record on line 2: expected 3 fields but got 2", ); }, }); @@ -268,7 +268,7 @@ Deno.test({ assertThrows( () => parse(input, { fieldsPerRecord: 2 }), SyntaxError, - "record on line 1: wrong number of fields", + "record on line 1: expected 2 fields but got 3", ); }, }); From 02030a4e23f5ce5eb48898fa671e07a94e961b7a Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Fri, 2 Aug 2024 01:10:45 +0900 Subject: [PATCH 09/12] example --- csv/parse_stream.ts | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/csv/parse_stream.ts b/csv/parse_stream.ts index bd7427bc9bbe..b69a200bf44d 100644 --- a/csv/parse_stream.ts +++ b/csv/parse_stream.ts @@ -472,20 +472,21 @@ export class CsvParseStream< * The instance's {@linkcode ReadableStream}. * * @example Usage - * ```ts no-assert + * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; * * const source = ReadableStream.from([ - * "name,age", - * "Alice,34", - * "Bob,24", - * "Charlie,45", + * "name,age\n", + * "Alice,34\n", + * "Bob,24\n", * ]); - * const parseStream = new CsvParseStream(); + * const parseStream = new CsvParseStream({ skipFirstRow: true }); * const parts = source.pipeTo(parseStream.writable); - * for await (const part of parseStream.readable) { - * console.log(part); - * } + * assertEquals(await Array.fromAsync(parseStream.readable), [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: "24" }, + * ]); * ``` * * @returns The instance's {@linkcode ReadableStream}. @@ -498,20 +499,21 @@ export class CsvParseStream< * The instance's {@linkcode WritableStream}. * * @example Usage - * ```ts no-assert + * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; * * const source = ReadableStream.from([ - * "name,age", - * "Alice,34", - * "Bob,24", - * "Charlie,45", + * "name,age\n", + * "Alice,34\n", + * "Bob,24\n", * ]); - * const parseStream = new CsvParseStream(); + * const parseStream = new CsvParseStream({ skipFirstRow: true }); * const parts = source.pipeTo(parseStream.writable); - * for await (const part of parseStream.readable) { - * console.log(part); - * } + * assertEquals(await Array.fromAsync(parseStream.readable), [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: "24" }, + * ]); * ``` * * @returns The instance's {@linkcode WritableStream}. From dcdd83fe8f7e80b42cf7d6abc9e11543478e5ff9 Mon Sep 17 00:00:00 2001 From: Asher Gomez Date: Fri, 2 Aug 2024 09:48:45 +1000 Subject: [PATCH 10/12] tweaks --- csv/_io.ts | 4 ++-- csv/parse.ts | 30 ++++++++++++++++-------------- csv/parse_stream.ts | 22 ++++++++++++---------- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/csv/_io.ts b/csv/_io.ts index adf221d40490..bf3b0c407a82 100644 --- a/csv/_io.ts +++ b/csv/_io.ts @@ -45,8 +45,8 @@ export interface ReadOptions { * If negative, no check is made and records may have a variable number of * fields. * - * If the wrong number of fields is in a row, a {@linkcode https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SyntaxError | SyntaxError} - * is thrown. + * If the wrong number of fields is in a row, a {@linkcode SyntaxError} is + * thrown. */ fieldsPerRecord?: number; } diff --git a/csv/parse.ts b/csv/parse.ts index 53f48ab02666..489ecaff2b46 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -295,8 +295,8 @@ export interface ParseOptions { * If negative, no check is made and records may have a variable number of * fields. * - * If the wrong number of fields is in a row, a {@linkcode https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SyntaxError | SyntaxError} - * is thrown. + * If the wrong number of fields is in a row, a {@linkcode SyntaxError} is + * thrown. */ fieldsPerRecord?: number; /** @@ -350,7 +350,7 @@ export function parse(input: string): string[][]; * If `columns` or `skipFirstRow` option is provided, it returns an array of * objects, otherwise it returns an array of arrays of string. * - * @example skipFirstRow: false + * @example Don't skip first row with `skipFirstRow: false` * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert/equals"; @@ -363,7 +363,7 @@ export function parse(input: string): string[][]; * assertType>(true); * ``` * - * @example skipFirstRow: true + * @example Skip first row with `skipFirstRow: true` * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert/equals"; @@ -376,7 +376,7 @@ export function parse(input: string): string[][]; * assertType[]>>(true); * ``` * - * @example specify columns + * @example Specify columns with `columns` option * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert/equals"; @@ -389,7 +389,8 @@ export function parse(input: string): string[][]; * assertType[]>>(true); * ``` * - * @example specify columns with skipFirstRow + * @example Specify columns with `columns` option and skip first row with + * `skipFirstRow: true` * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert/equals"; @@ -402,7 +403,7 @@ export function parse(input: string): string[][]; * assertType[]>>(true); * ``` * - * @example TSV (tab-separated values) + * @example TSV (tab-separated values) with `separator: "\t"` * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert/equals"; @@ -413,7 +414,7 @@ export function parse(input: string): string[][]; * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); * ``` * - * @example trimLeadingSpace: true + * @example Trim leading space with `trimLeadingSpace: true` * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert/equals"; @@ -424,7 +425,7 @@ export function parse(input: string): string[][]; * assertEquals(result, [["a", "b", "c"]]); * ``` * - * @example lazyQuotes: true + * @example Lazy quotes with `lazyQuotes: true` * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert/equals"; @@ -435,7 +436,7 @@ export function parse(input: string): string[][]; * assertEquals(result, [['a "word"', '1"2', 'a"', 'b']]); * ``` * - * @example comment + * @example Set comment prefix with `comment` option * ```ts * import { parse } from "@std/csv/parse"; * import { assertEquals } from "@std/assert/equals"; @@ -446,7 +447,7 @@ export function parse(input: string): string[][]; * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); * ``` * - * @example fieldsPerRecord: 0 (infer the number of fields from the first row) + * @example Infer the number of fields from the first row with `fieldsPerRecord: 0` * ```ts * import { parse } from "@std/csv/parse"; * import { assertThrows } from "@std/assert/throws"; @@ -460,7 +461,7 @@ export function parse(input: string): string[][]; * ); * ``` * - * @example fieldsPerRecord: 2 (enforce the number of fields for each row) + * @example Enforce the number of fields for each row with `fieldsPerRecord: 2` * ```ts * import { parse } from "@std/csv/parse"; * import { assertThrows } from "@std/assert/throws"; @@ -476,8 +477,9 @@ export function parse(input: string): string[][]; * @typeParam T The options' type for parsing. * @param input The input to parse. * @param options The options for parsing. - * @returns If you don't provide `options.skipFirstRow` or `options.columns`, it returns `string[][]`. - * If you provide `options.skipFirstRow` or `options.columns`, it returns `Record[]`. + * @returns If you don't provide `options.skipFirstRow` or `options.columns`, it + * returns `string[][]`. If you provide `options.skipFirstRow` or + * `options.columns`, it returns `Record[]`. */ export function parse( input: string, diff --git a/csv/parse_stream.ts b/csv/parse_stream.ts index b69a200bf44d..5a9b9525081b 100644 --- a/csv/parse_stream.ts +++ b/csv/parse_stream.ts @@ -108,7 +108,7 @@ export type RowType = T extends undefined ? string[] * A `CsvParseStream` expects input conforming to * {@link https://www.rfc-editor.org/rfc/rfc4180.html | RFC 4180}. * - * @example default options + * @example Usage with default options * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -130,7 +130,7 @@ export type RowType = T extends undefined ? string[] * assertType>(true); * ``` * - * @example skipFirstRow: true + * @example Skip first row with `skipFirstRow: true` * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -151,7 +151,7 @@ export type RowType = T extends undefined ? string[] * assertType[]>>(true); * ``` * - * @example specify columns + * @example Specify columns with `columns` option * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -173,7 +173,8 @@ export type RowType = T extends undefined ? string[] * assertType[]>>(true); * ``` * - * @example specify columns with skipFirstRow + * @example Specify columns with `columns` option and skip first row with + * `skipFirstRow: true` * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -193,7 +194,7 @@ export type RowType = T extends undefined ? string[] * assertType[]>>(true); * ``` * - * @example TSV (tab-separated values) + * @example TSV (tab-separated values) with `separator: "\t"` * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -213,7 +214,7 @@ export type RowType = T extends undefined ? string[] * ]); * ``` * - * @example trimLeadingSpace: true + * @example Trim leading space with `trimLeadingSpace: true` * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -253,7 +254,7 @@ export type RowType = T extends undefined ? string[] * ]); * ``` * - * @example lazyQuotes: true + * @example Allow lazy quotes with `lazyQuotes: true` * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -270,7 +271,7 @@ export type RowType = T extends undefined ? string[] * assertEquals(result, [['a "word"', '1"2', 'a"', 'b']]); * ``` * - * @example comment + * @example Define comment prefix with `comment` option * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -291,7 +292,8 @@ export type RowType = T extends undefined ? string[] * ]); * ``` * - * @example fieldsPerRecord: 0 (infer the number of fields from the first row) + * @example Infer the number of fields from the first row with + * `fieldsPerRecord: 0` * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; @@ -313,7 +315,7 @@ export type RowType = T extends undefined ? string[] * ); * ``` * - * @example fieldsPerRecord: 2 (enforce the number of fields for each row) + * @example Enforce the number of field for each row with `fieldsPerRecord: 2` * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; * import { assertEquals } from "@std/assert/equals"; From a91806b17c0574589b4a7adccfaa8f1e340ead80 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Fri, 2 Aug 2024 12:35:50 +0900 Subject: [PATCH 11/12] show 1-based line number in header and record length mismatch --- csv/_io.ts | 6 +++-- csv/parse.ts | 4 +-- csv/parse_stream_test.ts | 15 ++++++++--- csv/parse_test.ts | 56 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 72 insertions(+), 9 deletions(-) diff --git a/csv/_io.ts b/csv/_io.ts index bf3b0c407a82..346818ab72a3 100644 --- a/csv/_io.ts +++ b/csv/_io.ts @@ -228,11 +228,13 @@ export function createQuoteErrorMessage( export function convertRowToObject( row: string[], headers: readonly string[], - index: number, + zeroBasedLine: number, ) { if (row.length !== headers.length) { throw new Error( - `Error number of fields line: ${index}\nNumber of fields found: ${headers.length}\nExpected number of fields: ${row.length}`, + `record on line ${ + zeroBasedLine + 1 + } has ${row.length} fields, but the header has ${headers.length} fields`, ); } const out: Record = {}; diff --git a/csv/parse.ts b/csv/parse.ts index 489ecaff2b46..b34912967ddc 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -505,9 +505,9 @@ export function parse( headers = options.columns; } - const firstLineIndex = options.skipFirstRow ? 1 : 0; + const zeroBasedFirstLineIndex = options.skipFirstRow ? 1 : 0; return r.map((row, i) => { - return convertRowToObject(row, headers, firstLineIndex + i); + return convertRowToObject(row, headers, zeroBasedFirstLineIndex + i); }) as ParseResult; } return r as ParseResult; diff --git a/csv/parse_stream_test.ts b/csv/parse_stream_test.ts index 62fa5f742f9e..6288ec4ff546 100644 --- a/csv/parse_stream_test.ts +++ b/csv/parse_stream_test.ts @@ -350,14 +350,23 @@ x,,, columns: ["foo", "bar", "baz"], }, { - name: "mismatching number of headers and fields", + name: "mismatching number of headers and fields 1", input: "a,b,c\nd,e", skipFirstRow: true, columns: ["foo", "bar", "baz"], error: { klass: Error, - msg: - "Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2", + msg: "record on line 2 has 2 fields, but the header has 3 fields", + }, + }, + { + name: "mismatching number of headers and fields 2", + input: "a,b,c\nd,e,,g", + skipFirstRow: true, + columns: ["foo", "bar", "baz"], + error: { + klass: Error, + msg: "record on line 2 has 4 fields, but the header has 3 fields", }, }, { diff --git a/csv/parse_test.ts b/csv/parse_test.ts index 7c8874e00291..ec32de4e1a0f 100644 --- a/csv/parse_test.ts +++ b/csv/parse_test.ts @@ -23,6 +23,7 @@ Deno.test({ ); }, }); + await t.step({ name: "CRLF", fn() { @@ -97,6 +98,42 @@ Deno.test({ }, }); + await t.step({ + name: "BlankField", + fn() { + const input = "a,b,c\nd,,f"; + assertEquals( + parse(input), + [["a", "b", "c"], ["d", "", "f"]], + ); + }, + }); + + await t.step({ + name: "BlankField2", + fn() { + const input = "a,b,c\nd,,f"; + assertEquals( + parse(input, { skipFirstRow: true }), + [{ a: "d", b: "", c: "f" }], + ); + }, + }); + + await t.step({ + name: "BlankField3", + fn() { + const input = "a,b,c\nd,,f"; + assertEquals( + parse(input, { columns: ["one", "two", "three"] }), + [ + { one: "a", two: "b", three: "c" }, + { one: "d", two: "", three: "f" }, + ], + ); + }, + }); + await t.step({ name: "BlankLine", fn() { @@ -783,7 +820,7 @@ c"d,e`; }, }); await t.step({ - name: "mismatching number of headers and fields", + name: "mismatching number of headers and fields 1", fn() { const input = "a,b,c\nd,e"; assertThrows( @@ -793,7 +830,22 @@ c"d,e`; columns: ["foo", "bar", "baz"], }), Error, - "Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2", + "record on line 2 has 2 fields, but the header has 3 fields", + ); + }, + }); + await t.step({ + name: "mismatching number of headers and fields 2", + fn() { + const input = "a,b,c\nd,e,,g"; + assertThrows( + () => + parse(input, { + skipFirstRow: true, + columns: ["foo", "bar", "baz"], + }), + Error, + "record on line 2 has 4 fields, but the header has 3 fields", ); }, }); From cd43e7836c365a4723ea50be0dd489e00e744d32 Mon Sep 17 00:00:00 2001 From: Yusuke Tanaka Date: Fri, 2 Aug 2024 13:19:51 +0900 Subject: [PATCH 12/12] fix negative fieldsPerRecord in parse --- csv/parse.ts | 31 +++++++++++++++++++++++-------- csv/parse_test.ts | 11 +++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/csv/parse.ts b/csv/parse.ts index b34912967ddc..bc2bed875261 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -208,7 +208,7 @@ class Parser { this.#input = input.startsWith(BYTE_ORDER_MARK) ? input.slice(1) : input; this.#cursor = 0; const result: string[][] = []; - let _nbFields: number | undefined; + let lineResult: string[]; let first = true; let lineIndex = 0; @@ -225,6 +225,25 @@ class Parser { throw new Error("Invalid Delimiter"); } + // The number of fields per record that is either inferred from the first + // row (when options.fieldsPerRecord = 0), or set by the caller (when + // options.fieldsPerRecord > 0). + // + // Each possible variant means the following: + // "ANY": Variable number of fields is allowed. + // "UNINITIALIZED": The first row has not been read yet. Once it's read, the + // number of fields will be set. + // : The number of fields per record that every record must follow. + let _nbFields: "ANY" | "UNINITIALIZED" | number; + if (options.fieldsPerRecord === undefined || options.fieldsPerRecord < 0) { + _nbFields = "ANY"; + } else if (options.fieldsPerRecord === 0) { + _nbFields = "UNINITIALIZED"; + } else { + // TODO: Should we check if it's a valid integer? + _nbFields = options.fieldsPerRecord; + } + while (true) { const r = this.#parseRecord(lineIndex); if (r === null) break; @@ -234,17 +253,13 @@ class Parser { // the number of fields in the first record if (first) { first = false; - if (options.fieldsPerRecord !== undefined) { - if (options.fieldsPerRecord === 0) { - _nbFields = lineResult.length; - } else { - _nbFields = options.fieldsPerRecord; - } + if (_nbFields === "UNINITIALIZED") { + _nbFields = lineResult.length; } } if (lineResult.length > 0) { - if (_nbFields && _nbFields !== lineResult.length) { + if (typeof _nbFields === "number" && _nbFields !== lineResult.length) { throw new SyntaxError( `record on line ${lineIndex}: expected ${_nbFields} fields but got ${lineResult.length}`, ); diff --git a/csv/parse_test.ts b/csv/parse_test.ts index ec32de4e1a0f..f5dae7abca5b 100644 --- a/csv/parse_test.ts +++ b/csv/parse_test.ts @@ -309,6 +309,17 @@ Deno.test({ ); }, }); + await t.step({ + name: "NegativeFieldsPerRecord", + fn() { + const input = `a,b,c\nd,e`; + const output = [ + ["a", "b", "c"], + ["d", "e"], + ]; + assertEquals(parse(input, { fieldsPerRecord: -1 }), output); + }, + }); await t.step({ name: "FieldCount", fn() {