This release is 37 versions behind 1.0.6 — the latest version of @std/csv. Jump to latest
@std/csv@0.201.0

denoland/std
Works with
•JSR Score100%•
Downloads19,446/wk
•
Published2 years ago (0.201.0)
Reading and writing of comma-separated values (CSV) files
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.

import {
  convertRowToObject,
  ERR_BARE_QUOTE,
  ERR_FIELD_COUNT,
  ERR_INVALID_DELIM,
  ERR_QUOTE,
  ParseError,
  type ParseResult,
  type ReadOptions,
} from "./_io.ts";
import { assert } from "jsr:/@std/assert@^0.201.0/assert";

export {
  ERR_BARE_QUOTE,
  ERR_FIELD_COUNT,
  ERR_INVALID_DELIM,
  ERR_QUOTE,
  ParseError,
  ReadOptions,
};

const BYTE_ORDER_MARK = "\ufeff";

class Parser {
  #input = "";
  #cursor = 0;
  #options: {
    separator: string;
    trimLeadingSpace: boolean;
    comment?: string;
    lazyQuotes?: boolean;
    fieldsPerRecord?: number;
  };
  constructor({
    separator = ",",
    trimLeadingSpace = false,
    comment,
    lazyQuotes,
    fieldsPerRecord,
  }: ReadOptions = {}) {
    this.#options = {
      separator,
      trimLeadingSpace,
      comment,
      lazyQuotes,
      fieldsPerRecord,
    };
  }
  #readLine(): string | null {
    if (this.#isEOF()) return null;

    if (
      !this.#input.startsWith("\r\n", this.#cursor) ||
      !this.#input.startsWith("\n", this.#cursor)
    ) {
      let buffer = "";
      let hadNewline = false;
      while (this.#cursor < this.#input.length) {
        if (this.#input.startsWith("\r\n", this.#cursor)) {
          hadNewline = true;
          this.#cursor += 2;
          break;
        }
        if (
          this.#input.startsWith("\n", this.#cursor)
        ) {
          hadNewline = true;
          this.#cursor += 1;
          break;
        }
        buffer += this.#input[this.#cursor];
        this.#cursor += 1;
      }
      if (!hadNewline && buffer.endsWith("\r")) {
        buffer = buffer.slice(0, -1);
      }

      return buffer;
    }
    return null;
  }
  #isEOF(): boolean {
    return this.#cursor >= this.#input.length;
  }
  #parseRecord(startLine: number): string[] | null {
    let line = this.#readLine();
    if (line === null) return null;
    if (line.length === 0) {
      return [];
    }

    function runeCount(s: string): number {
      // Array.from considers the surrogate pair.
      return Array.from(s).length;
    }

    let lineIndex = startLine + 1;

    // line starting with comment character is ignored
    if (this.#options.comment && line[0] === this.#options.comment) {
      return [];
    }

    let fullLine = line;
    let quoteError: ParseError | null = null;
    const quote = '"';
    const quoteLen = quote.length;
    const separatorLen = this.#options.separator.length;
    let recordBuffer = "";
    const fieldIndexes = [] as number[];
    parseField:
    for (;;) {
      if (this.#options.trimLeadingSpace) {
        line = line.trimStart();
      }

      if (line.length === 0 || !line.startsWith(quote)) {
        // Non-quoted string field
        const i = line.indexOf(this.#options.separator);
        let field = line;
        if (i >= 0) {
          field = field.substring(0, i);
        }
        // Check to make sure a quote does not appear in field.
        if (!this.#options.lazyQuotes) {
          const j = field.indexOf(quote);
          if (j >= 0) {
            const col = runeCount(
              fullLine.slice(0, fullLine.length - line.slice(j).length),
            );
            quoteError = new ParseError(
              startLine + 1,
              lineIndex,
              col,
              ERR_BARE_QUOTE,
            );
            break parseField;
          }
        }
        recordBuffer += field;
        fieldIndexes.push(recordBuffer.length);
        if (i >= 0) {
          line = line.substring(i + separatorLen);
          continue parseField;
        }
        break parseField;
      } else {
        // Quoted string field
        line = line.substring(quoteLen);
        for (;;) {
          const i = line.indexOf(quote);
          if (i >= 0) {
            // Hit next quote.
            recordBuffer += line.substring(0, i);
            line = line.substring(i + quoteLen);
            if (line.startsWith(quote)) {
              // `""` sequence (append quote).
              recordBuffer += quote;
              line = line.substring(quoteLen);
            } else if (line.startsWith(this.#options.separator)) {
              // `","` sequence (end of field).
              line = line.substring(separatorLen);
              fieldIndexes.push(recordBuffer.length);
              continue parseField;
            } else if (0 === line.length) {
              // `"\n` sequence (end of line).
              fieldIndexes.push(recordBuffer.length);
              break parseField;
            } else if (this.#options.lazyQuotes) {
              // `"` sequence (bare quote).
              recordBuffer += quote;
            } else {
              // `"*` sequence (invalid non-escaped quote).
              const col = runeCount(
                fullLine.slice(0, fullLine.length - line.length - quoteLen),
              );
              quoteError = new ParseError(
                startLine + 1,
                lineIndex,
                col,
                ERR_QUOTE,
              );
              break parseField;
            }
          } else if (line.length > 0 || !(this.#isEOF())) {
            // Hit end of line (copy all data so far).
            recordBuffer += line;
            const r = this.#readLine();
            lineIndex++;
            line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go.
            fullLine = line;
            if (r === null) {
              // Abrupt end of file (EOF or error).
              if (!this.#options.lazyQuotes) {
                const col = runeCount(fullLine);
                quoteError = new ParseError(
                  startLine + 1,
                  lineIndex,
                  col,
                  ERR_QUOTE,
                );
                break parseField;
              }
              fieldIndexes.push(recordBuffer.length);
              break parseField;
            }
            recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
          } else {
            // Abrupt end of file (EOF on error).
            if (!this.#options.lazyQuotes) {
              const col = runeCount(fullLine);
              quoteError = new ParseError(
                startLine + 1,
                lineIndex,
                col,
                ERR_QUOTE,
              );
              break parseField;
            }
            fieldIndexes.push(recordBuffer.length);
            break parseField;
          }
        }
      }
    }
    if (quoteError) {
      throw quoteError;
    }
    const result = [] as string[];
    let preIdx = 0;
    for (const i of fieldIndexes) {
      result.push(recordBuffer.slice(preIdx, i));
      preIdx = i;
    }
    return result;
  }
  parse(input: string): string[][] {
    this.#input = input.startsWith(BYTE_ORDER_MARK) ? input.slice(1) : input;
    this.#cursor = 0;
    const result: string[][] = [];
    let _nbFields: number | undefined;
    let lineResult: string[];
    let first = true;
    let lineIndex = 0;

    const INVALID_RUNE = ["\r", "\n", '"'];

    const options = this.#options;
    if (
      INVALID_RUNE.includes(options.separator) ||
      (typeof options.comment === "string" &&
        INVALID_RUNE.includes(options.comment)) ||
      options.separator === options.comment
    ) {
      throw new Error(ERR_INVALID_DELIM);
    }

    for (;;) {
      const r = this.#parseRecord(lineIndex);
      if (r === null) break;
      lineResult = r;
      lineIndex++;
      // If fieldsPerRecord is 0, Read sets it to
      // the number of fields in the first record
      if (first) {
        first = false;
        if (options.fieldsPerRecord !== undefined) {
          if (options.fieldsPerRecord === 0) {
            _nbFields = lineResult.length;
          } else {
            _nbFields = options.fieldsPerRecord;
          }
        }
      }

      if (lineResult.length > 0) {
        if (_nbFields && _nbFields !== lineResult.length) {
          throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT);
        }
        result.push(lineResult);
      }
    }
    return result;
  }
}

export interface ParseOptions extends ReadOptions {
  /**
   * If you provide `skipFirstRow: true` and `columns`, the first line will be
   * skipped.
   * If you provide `skipFirstRow: true` but not `columns`, the first line will
   * be skipped and used as header definitions.
   */
  skipFirstRow?: boolean;

  /** List of names used for header definition. */
  columns?: readonly string[];
}

/**
 * Csv parse helper to manipulate data.
 * Provides an auto/custom mapper for columns.
 *
 * @example
 * ```ts
 * import { parse } from "@std/csv/parse";
 * const string = "a,b,c\nd,e,f";
 *
 * console.log(
 *   await parse(string, {
 *     skipFirstRow: false,
 *   }),
 * );
 * // output:
 * // [["a", "b", "c"], ["d", "e", "f"]]
 * ```
 *
 * @param input Input to parse.
 * @param opt options of the parser.
 * @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`.
 *   If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record<string, unknown>[]`.
 */
export function parse(input: string, opt?: undefined): string[][];
export function parse<const T extends ParseOptions>(
  input: string,
  opt: T,
): ParseResult<ParseOptions, T>;
export function parse<const T extends ParseOptions>(
  input: string,
  opt: T = { skipFirstRow: false } as T,
): ParseResult<ParseOptions, T> {
  const parser = new Parser(opt);
  const r = parser.parse(input);

  if (opt.skipFirstRow || opt.columns) {
    let headers: readonly string[] = [];

    if (opt.skipFirstRow) {
      const head = r.shift();
      assert(head !== undefined);
      headers = head;
    }

    if (opt.columns) {
      headers = opt.columns;
    }

    const firstLineIndex = opt.skipFirstRow ? 1 : 0;
    return r.map((row, i) => {
      return convertRowToObject(row, headers, firstLineIndex + i);
    }) as ParseResult<ParseOptions, T>;
  }
  return r as ParseResult<ParseOptions, T>;
}