This release is 32 versions behind 1.0.6 — the latest version of @std/csv. Jump to latest
@std/csv@0.206.0

denoland/std
Reading and writing of comma-separated values (CSV) files
JSR Score
100%
Published
2 years ago (0.206.0)
Package root>csv_parse_stream_test.ts
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import { CsvParseStream } from "./csv_parse_stream.ts";
import type { CsvParseStreamOptions } from "./csv_parse_stream.ts";
import { ERR_QUOTE, ParseError } from "./_io.ts";
import {
  assert,
  assertEquals,
  assertRejects,
  assertStringIncludes,
} from "jsr:@std/assert@^0.206.0";
import type { AssertTrue, IsExact } from "jsr:/@std/testing@^0.206.0/types";
import { fromFileUrl, join } from "jsr:@std/path@^0.206.0";
import { delay } from "jsr:/@std/async@^0.206.0/delay";

const testdataDir = join(fromFileUrl(import.meta.url), "../testdata");
const encoder = new TextEncoder();

Deno.test({
  name: "[csv/csv_parse_stream] CsvParseStream should work with Deno.File",
  permissions: {
    read: [testdataDir],
  },
  fn: async () => {
    const file = await Deno.open(join(testdataDir, "simple.csv"));
    const readable = file.readable
      .pipeThrough(new TextDecoderStream())
      .pipeThrough(new CsvParseStream());
    const records = [] as Array<Array<string>>;
    for await (const record of readable) {
      records.push(record);
    }
    assertEquals(records, [
      ["id", "name"],
      ["1", "foobar"],
      ["2", "barbaz"],
    ]);
  },
});

Deno.test({
  name: "[csv/csv_parse_stream] CsvParseStream with invalid csv",
  fn: async () => {
    const readable = ReadableStream.from([
      encoder.encode("id,name\n"),
      encoder.encode("\n"),
      encoder.encode("1,foo\n"),
      encoder.encode('2,"baz\n'),
    ]).pipeThrough(new TextDecoderStream()).pipeThrough(
      new CsvParseStream(),
    );
    const reader = readable.getReader();
    assertEquals(await reader.read(), { done: false, value: ["id", "name"] });
    assertEquals(await reader.read(), { done: false, value: ["1", "foo"] });
    const error = await assertRejects(() => reader.read());
    assert(error instanceof ParseError);
    assertEquals(error.startLine, 4);
    assertEquals(error.line, 5);
    assertEquals(error.column, 0);
    assertStringIncludes(error.message, ERR_QUOTE);
  },
});

Deno.test({
  name: "[csv/csv_parse_stream] CsvParseStream with various inputs",
  permissions: "none",
  fn: async (t) => {
    // These test cases were originally ported from Go:
    // https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
    // Copyright 2011 The Go Authors. All rights reserved. BSD license.
    // https://github.com/golang/go/blob/master/LICENSE
    const testCases = [
      {
        name: "CRLF",
        input: "a,b\r\nc,d\r\n",
        output: [["a", "b"], ["c", "d"]],
      },
      {
        name: "BareCR",
        input: "a,b\rc,d\r\n",
        output: [["a", "b\rc", "d"]],
      },
      {
        name: "NoEOLTest",
        input: "a,b,c",
        output: [["a", "b", "c"]],
      },
      {
        name: "Semicolon",
        input: "a;b;c\n",
        output: [["a", "b", "c"]],
        separator: ";",
      },
      {
        name: "MultiLine",
        input: `"two
line","one line","three
line
field"`,
        output: [["two\nline", "one line", "three\nline\nfield"]],
      },
      {
        name: "BlankLine",
        input: "a,b,c\n\nd,e,f\n\n",
        output: [
          ["a", "b", "c"],
          ["d", "e", "f"],
        ],
      },
      {
        name: "LeadingSpace",
        input: " a,  b,   c\n",
        output: [[" a", "  b", "   c"]],
      },
      {
        name: "Comment",
        input: "#1,2,3\na,b,c\n#comment",
        output: [["a", "b", "c"]],
        comment: "#",
      },
      {
        name: "NoComment",
        input: "#1,2,3\na,b,c",
        output: [
          ["#1", "2", "3"],
          ["a", "b", "c"],
        ],
      },
      {
        name: "FieldCount",
        input: "a,b,c\nd,e",
        output: [
          ["a", "b", "c"],
          ["d", "e"],
        ],
      },
      {
        name: "TrailingCommaEOF",
        input: "a,b,c,",
        output: [["a", "b", "c", ""]],
      },
      {
        name: "TrailingCommaEOL",
        input: "a,b,c,\n",
        output: [["a", "b", "c", ""]],
      },
      {
        name: "NotTrailingComma3",
        input: "a,b,c, \n",
        output: [["a", "b", "c", " "]],
      },
      {
        name: "CommaFieldTest",
        input: `x,y,z,w
x,y,z,
x,y,,
x,,,
,,,
"x","y","z","w"
"x","y","z",""
"x","y","",""
"x","","",""
"","","",""
`,
        output: [
          ["x", "y", "z", "w"],
          ["x", "y", "z", ""],
          ["x", "y", "", ""],
          ["x", "", "", ""],
          ["", "", "", ""],
          ["x", "y", "z", "w"],
          ["x", "y", "z", ""],
          ["x", "y", "", ""],
          ["x", "", "", ""],
          ["", "", "", ""],
        ],
      },
      {
        name: "CRLFInQuotedField", // Issue 21201
        input: 'A,"Hello\r\nHi",B\r\n',
        output: [["A", "Hello\nHi", "B"]],
      },
      {
        name: "BinaryBlobField", // Issue 19410
        input: "x09\x41\xb4\x1c,aktau",
        output: [["x09A\xb4\x1c", "aktau"]],
      },
      {
        name: "TrailingCR",
        input: "field1,field2\r",
        output: [["field1", "field2"]],
      },
      {
        name: "QuotedTrailingCR",
        input: '"field"\r',
        output: [["field"]],
      },
      {
        name: "FieldCR",
        input: "field\rfield\r",
        output: [["field\rfield"]],
      },
      {
        name: "FieldCRCR",
        input: "field\r\rfield\r\r",
        output: [["field\r\rfield\r"]],
      },
      {
        name: "FieldCRCRLF",
        input: "field\r\r\nfield\r\r\n",
        output: [["field\r"], ["field\r"]],
      },
      {
        name: "FieldCRCRLFCR",
        input: "field\r\r\n\rfield\r\r\n\r",
        output: [["field\r"], ["\rfield\r"]],
      },
      {
        name: "MultiFieldCRCRLFCRCR",
        input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
        output: [
          ["field1", "field2\r"],
          ["\r\rfield1", "field2\r"],
          ["\r\r", ""],
        ],
      },
      {
        name: "NonASCIICommaAndCommentWithQuotes",
        input: 'a€"  b,"€ c\nλ comment\n',
        output: [["a", "  b,", " c"]],
        separator: "€",
        comment: "λ",
      },
      {
        // λ and θ start with the same byte.
        // This tests that the parser doesn't confuse such characters.
        name: "NonASCIICommaConfusion",
        input: '"abθcd"λefθgh',
        output: [["abθcd", "efθgh"]],
        separator: "λ",
        comment: "€",
      },
      {
        name: "NonASCIICommentConfusion",
        input: "λ\nλ\nθ\nλ\n",
        output: [["λ"], ["λ"], ["λ"]],
        comment: "θ",
      },
      {
        name: "QuotedFieldMultipleLF",
        input: '"\n\n\n\n"',
        output: [["\n\n\n\n"]],
      },
      {
        name: "MultipleCRLF",
        input: "\r\n\r\n\r\n\r\n",
        output: [],
      },
      {
        name: "DoubleQuoteWithTrailingCRLF",
        input: '"foo""bar"\r\n',
        output: [[`foo"bar`]],
      },
      {
        name: "EvenQuotes",
        input: `""""""""`,
        output: [[`"""`]],
      },
      {
        name: "simple",
        input: "a,b,c",
        output: [["a", "b", "c"]],
        skipFirstRow: false,
      },
      {
        name: "multiline",
        input: "a,b,c\ne,f,g\n",
        output: [
          ["a", "b", "c"],
          ["e", "f", "g"],
        ],
        skipFirstRow: false,
      },
      {
        name: "header mapping boolean",
        input: "a,b,c\ne,f,g\n",
        output: [{ a: "e", b: "f", c: "g" }],
        skipFirstRow: true,
      },
      {
        name: "header mapping array",
        input: "a,b,c\ne,f,g\n",
        output: [
          { this: "a", is: "b", sparta: "c" },
          { this: "e", is: "f", sparta: "g" },
        ],
        columns: ["this", "is", "sparta"],
      },
      {
        name: "provides both opts.skipFirstRow and opts.columns",
        input: "a,b,1\nc,d,2\ne,f,3",
        output: [
          { foo: "c", bar: "d", baz: "2" },
          { foo: "e", bar: "f", baz: "3" },
        ],
        skipFirstRow: true,
        columns: ["foo", "bar", "baz"],
      },
      {
        name: "mismatching number of headers and fields",
        input: "a,b,c\nd,e",
        skipFirstRow: true,
        columns: ["foo", "bar", "baz"],
        errorMessage:
          "Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2",
      },
    ];
    for (const testCase of testCases) {
      await t.step(testCase.name, async () => {
        const options: CsvParseStreamOptions = {};
        if (testCase.separator) {
          options.separator = testCase.separator;
        }
        if (testCase.comment) {
          options.comment = testCase.comment;
        }
        if (testCase.skipFirstRow) {
          options.skipFirstRow = testCase.skipFirstRow;
        }
        if (testCase.columns) {
          options.columns = testCase.columns;
        }
        const readable = ReadableStream.from(testCase.input)
          .pipeThrough(new CsvParseStream(options));

        if (testCase.output) {
          const actual = [];
          for await (const record of readable) {
            actual.push(record);
          }
          assertEquals(actual, testCase.output);
        } else {
          await assertRejects(async () => {
            for await (const _ of readable);
          }, testCase.errorMessage);
        }
      });
    }
  },
});

// Work around resource leak error with TextDecoderStream:
//   https://github.com/denoland/deno/issues/13142
export const MyTextDecoderStream = () => {
  const textDecoder = new TextDecoder();
  return new TransformStream({
    transform(chunk: Uint8Array, controller: TransformStreamDefaultController) {
      controller.enqueue(textDecoder.decode(chunk));
    },
    flush(controller: TransformStreamDefaultController) {
      controller.enqueue(textDecoder.decode());
    },
  });
};

Deno.test({
  name:
    "[csv/csv_parse_stream] cancel CsvParseStream during iteration does not leak file",
  permissions: { read: [testdataDir] },
  fn: async () => {
    const file = await Deno.open(join(testdataDir, "large.csv"));
    const readable = file.readable.pipeThrough(MyTextDecoderStream())
      .pipeThrough(new CsvParseStream());
    for await (const _record of readable) {
      break;
    }
    // FIXME(kt3k): Remove this delay.
    await delay(100);
  },
});

Deno.test({
  name: "[csv/csv_parse_stream] correct typing",
  fn() {
    // If no option is passed, defaults to ReadableStream<string[]>.
    {
      const { readable } = new CsvParseStream();
      type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>;
    }
    {
      const { readable } = new CsvParseStream(undefined);
      type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>;
    }
    {
      // `skipFirstRow` may be `true` or `false`.
      // `columns` may be `undefined` or `string[]`.
      // If you don't know exactly what the value of the option is,
      // the return type is ReadableStream<string[] | Record<string, string | undefined>>
      const options: CsvParseStreamOptions = {};
      const { readable } = new CsvParseStream(options);
      type _ = AssertTrue<
        IsExact<
          typeof readable,
          ReadableStream<string[] | Record<string, string | undefined>>
        >
      >;
    }
    {
      const { readable } = new CsvParseStream({});
      type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>;
    }

    // skipFirstRow option
    {
      const { readable } = new CsvParseStream({ skipFirstRow: undefined });
      type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>;
    }
    {
      const { readable } = new CsvParseStream({ skipFirstRow: false });
      type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>;
    }
    {
      const { readable } = new CsvParseStream({ skipFirstRow: true });
      type _ = AssertTrue<
        IsExact<
          typeof readable,
          ReadableStream<Record<string, string | undefined>>
        >
      >;
    }

    // columns option
    {
      const { readable } = new CsvParseStream({ columns: undefined });
      type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>;
    }
    {
      const { readable } = new CsvParseStream({ columns: ["aaa", "bbb"] });
      type _ = AssertTrue<
        IsExact<typeof readable, ReadableStream<Record<"aaa" | "bbb", string>>>
      >;
    }
    {
      const { readable } = new CsvParseStream({ columns: ["aaa"] as string[] });
      type _ = AssertTrue<
        IsExact<
          typeof readable,
          ReadableStream<Record<string, string | undefined>>
        >
      >;
    }

    // skipFirstRow option + columns option
    {
      const { readable } = new CsvParseStream({
        skipFirstRow: false,
        columns: undefined,
      });
      type _ = AssertTrue<IsExact<typeof readable, ReadableStream<string[]>>>;
    }
    {
      const { readable } = new CsvParseStream({
        skipFirstRow: true,
        columns: undefined,
      });
      type _ = AssertTrue<
        IsExact<
          typeof readable,
          ReadableStream<Record<string, string | undefined>>
        >
      >;
    }
    {
      const { readable } = new CsvParseStream({
        skipFirstRow: false,
        columns: ["aaa"],
      });
      type _ = AssertTrue<
        IsExact<typeof readable, ReadableStream<Record<"aaa", string>>>
      >;
    }
    {
      const { readable } = new CsvParseStream({
        skipFirstRow: true,
        columns: ["aaa"],
      });
      type _ = AssertTrue<
        IsExact<typeof readable, ReadableStream<Record<"aaa", string>>>
      >;
    }
  },
});