├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── feature_request.md
    │   └── pull-request.md
    └── workflows
    │   ├── lint.yml
    │   ├── sast.yml
    │   └── tests.yml
├── .gitignore
├── .husky
    ├── commit-msg
    └── pre-commit
├── .prettierrc.json
├── LICENSE
├── README.md
├── benchmark.js
├── bin
    └── esbuild
├── commitlint.config.cjs
├── docs
    ├── .nojekyll
    ├── CNAME
    ├── README.md
    ├── docs
    │   ├── format.md
    │   ├── migrate
    │   │   ├── csv.md
    │   │   └── papaparse.md
    │   └── parse.md
    ├── index.html
    ├── sidebar.md
    ├── sidebar.png
    ├── sidebar.sketch
    └── t-rex.png
├── format.js
├── format.test.js
├── index.js
├── lint-staged.config.js
├── package-lock.json
├── package.json
├── parse-mini.js
├── parse.bench.js
├── parse.js
└── parse.test.js


/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Contributor Covenant Code of Conduct
  3 | 
  4 | ## Our Pledge
  5 | 
  6 | We as members, contributors, and leaders pledge to make participation in our
  7 | community a harassment-free experience for everyone, regardless of age, body
  8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  9 | identity and expression, level of experience, education, socio-economic status,
 10 | nationality, personal appearance, race, religion, or sexual identity
 11 | and orientation.
 12 | 
 13 | We pledge to act and interact in ways that contribute to an open, welcoming,
 14 | diverse, inclusive, and healthy community.
 15 | 
 16 | ## Our Standards
 17 | 
 18 | Examples of behavior that contributes to a positive environment for our
 19 | community include:
 20 | 
 21 | * Demonstrating empathy and kindness toward other people
 22 | * Being respectful of differing opinions, viewpoints, and experiences
 23 | * Giving and gracefully accepting constructive feedback
 24 | * Accepting responsibility and apologizing to those affected by our mistakes,
 25 |   and learning from the experience
 26 | * Focusing on what is best not just for us as individuals, but for the
 27 |   overall community
 28 | 
 29 | Examples of unacceptable behavior include:
 30 | 
 31 | * The use of sexualized language or imagery, and sexual attention or
 32 |   advances of any kind
 33 | * Trolling, insulting or derogatory comments, and personal or political attacks
 34 | * Public or private harassment
 35 | * Publishing others' private information, such as a physical or email
 36 |   address, without their explicit permission
 37 | * Other conduct which could reasonably be considered inappropriate in a
 38 |   professional setting
 39 | 
 40 | ## Enforcement Responsibilities
 41 | 
 42 | Community leaders are responsible for clarifying and enforcing our standards of
 43 | acceptable behavior and will take appropriate and fair corrective action in
 44 | response to any behavior that they deem inappropriate, threatening, offensive,
 45 | or harmful.
 46 | 
 47 | Community leaders have the right and responsibility to remove, edit, or reject
 48 | comments, commits, code, wiki edits, issues, and other contributions that are
 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 50 | decisions when appropriate.
 51 | 
 52 | ## Scope
 53 | 
 54 | This Code of Conduct applies within all community spaces, and also applies when
 55 | an individual is officially representing the community in public spaces.
 56 | Examples of representing our community include using an official e-mail address,
 57 | posting via an official social media account, or acting as an appointed
 58 | representative at an online or offline event.
 59 | 
 60 | ## Enforcement
 61 | 
 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 63 | reported to the community leaders responsible for enforcement at
 64 | [INSERT CONTACT METHOD].
 65 | All complaints will be reviewed and investigated promptly and fairly.
 66 | 
 67 | All community leaders are obligated to respect the privacy and security of the
 68 | reporter of any incident.
 69 | 
 70 | ## Enforcement Guidelines
 71 | 
 72 | Community leaders will follow these Community Impact Guidelines in determining
 73 | the consequences for any action they deem in violation of this Code of Conduct:
 74 | 
 75 | ### 1. Correction
 76 | 
 77 | **Community Impact**: Use of inappropriate language or other behavior deemed
 78 | unprofessional or unwelcome in the community.
 79 | 
 80 | **Consequence**: A private, written warning from community leaders, providing
 81 | clarity around the nature of the violation and an explanation of why the
 82 | behavior was inappropriate. A public apology may be requested.
 83 | 
 84 | ### 2. Warning
 85 | 
 86 | **Community Impact**: A violation through a single incident or series
 87 | of actions.
 88 | 
 89 | **Consequence**: A warning with consequences for continued behavior. No
 90 | interaction with the people involved, including unsolicited interaction with
 91 | those enforcing the Code of Conduct, for a specified period of time. This
 92 | includes avoiding interactions in community spaces as well as external channels
 93 | like social media. Violating these terms may lead to a temporary or
 94 | permanent ban.
 95 | 
 96 | ### 3. Temporary Ban
 97 | 
 98 | **Community Impact**: A serious violation of community standards, including
 99 | sustained inappropriate behavior.
100 | 
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 | 
107 | ### 4. Permanent Ban
108 | 
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior,  harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 | 
113 | **Consequence**: A permanent ban from any sort of public interaction within
114 | the community.
115 | 
116 | ## Attribution
117 | 
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.0, available at
120 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
121 | 
122 | Community Impact Guidelines were inspired by
123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124 | 
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available
127 | at [https://www.contributor-covenant.org/translations][translations].
128 | 
129 | [homepage]: https://www.contributor-covenant.org
130 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
131 | [Mozilla CoC]: https://github.com/mozilla/diversity
132 | [FAQ]: https://www.contributor-covenant.org/faq
133 | [translations]: https://www.contributor-covenant.org/translations
134 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | In the spirit of Open Source Software, everyone is very welcome to contribute to this repository. Feel free to [raise issues](https://github.com/willfarrell/csv-rex/issues) or to [submit Pull Requests](https://github.com/willfarrell/csv-rex/pulls).
 4 | 
 5 | Before contributing to the project, make sure to have a look at our [Code of Conduct](/.github/CODE_OF_CONDUCT.md).
 6 | 
 7 | 
 8 | ## Licence
 9 | 
10 | Licensed under [MIT Licence](LICENSE). Copyright (c) 2022 [will Farrell](https://github.com/willfarrell), and the [csv-rex team](https://github.com/willfarrell/csv-rex/graphs/contributors).
11 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [willfarrell]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | How to reproduce the behaviour:
15 | 1. Sample code '...'
16 | 2. Input '....'
17 | 3. Unit test '....'
18 | 4. Thrown error
19 | 
20 | **Expected behaviour**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Environment (please complete the following information):**
24 |  - Node.js: [e.g. 18]
25 |  - csv-rex: [e.g. 0.0.0]
26 |  - Browser: [e.g. Firefox 100]
27 | 
28 | **Additional context**
29 | Add any other context about the problem here.
30 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: feature request
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/pull-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Pull request
 3 | about: Pull request
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!-- First and foremost, thank you for taking the time to make middy better. You contribution helps everyone. -->
11 | 
12 | **What does this implement/fix? Explain your changes.**
13 | 
14 | **Does this close any currently open issues?**
15 | 
16 | **Any relevant logs, error output, etc?**
17 | 
18 | **Environment:**
19 |  - Node.js: [e.g. 18]
20 |  - csv-rex: [e.g. 0.0.0]
21 |  - Browser: [e.g. Firefox 100]
22 | 
23 | **Any other comments?**
24 | 
25 | **Todo List:**
26 | - [ ] Feature/Fix fully implemented
27 | - [ ] Added tests
28 |   - [ ] Unit tests
29 |   - [ ] Benchmark tests (if applicable)
30 | - [ ] Updated relevant documentation
31 | - [ ] Updated relevant examples
32 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - '**'
 7 |     tags-ignore:
 8 |       - '*.*.*'
 9 |   pull_request:
10 | 
11 | jobs:
12 |   lint:
13 |     runs-on: ubuntu-latest
14 | 
15 |     strategy:
16 |       matrix:
17 |         node-version: [18.x]
18 | 
19 |     steps:
20 |       - name: Checkout repository
21 |         uses: actions/checkout@v2
22 |       - name: Use Node.js ${{ matrix.node-version }}
23 |         uses: actions/setup-node@v2
24 |         with:
25 |           node-version: ${{ matrix.node-version }}
26 |       - name: Use npm 8
27 |         run: |
28 |           npm install -g npm@8
29 |       - name: Install dependencies
30 |         run: |
31 |           npm install
32 |         env:
33 |           CI: true
34 |       - name: Linting
35 |         run: npm run lint
36 |         env:
37 |           CI: true
38 | 


--------------------------------------------------------------------------------
/.github/workflows/sast.yml:
--------------------------------------------------------------------------------
 1 | name: SAST
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - '**'
 7 |     tags-ignore:
 8 |       - '*.*.*'
 9 |   pull_request:
10 | 
11 | jobs:
12 |   lint:
13 |     runs-on: ubuntu-latest
14 | 
15 |     permissions:
16 |       # required for all workflows
17 |       security-events: write
18 | 
19 |       # only required for workflows in private repositories
20 |       actions: read
21 |       contents: read
22 | 
23 |     steps:
24 |       - name: Checkout repository
25 |         uses: actions/checkout@v3
26 |       - name: Initialize CodeQL
27 |         uses: github/codeql-action/init@v2
28 |       - name: Perform CodeQL Analysis
29 |         uses: github/codeql-action/analyze@v2
30 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Unit Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'main'
 7 |     tags-ignore:
 8 |       - '*.*.*'
 9 |   pull_request:
10 | 
11 | jobs:
12 |   tests:
13 |     name: Tests
14 |     runs-on: ubuntu-latest
15 | 
16 |     strategy:
17 |       matrix:
18 |         node-version: [18.x]
19 | 
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v2
23 |       - name: Use Node.js ${{ matrix.node-version }}
24 |         uses: actions/setup-node@v2
25 |         with:
26 |           node-version: ${{ matrix.node-version }}
27 |       - name: Use npm 8
28 |         run: |
29 |           npm install --location=global npm@8
30 |       - name: Install dependencies
31 |         run: |
32 |           npm install
33 |         env:
34 |           CI: true
35 | 
36 |       - name: Build for Node.js ${{ matrix.node-version }}
37 |         run: |
38 |           npm run build
39 | 
40 |       - name: Unit tests
41 |         run: |
42 |           npm run test
43 |         env:
44 |           CI: true
45 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.cjs
  2 | *.mjs
  3 | *.mjs.map
  4 | coverage
  5 | *.0x
  6 | *.br
  7 | 
  8 | !commitlint.config.cjs
  9 | 
 10 | # Logs
 11 | logs
 12 | *.log
 13 | npm-debug.log*
 14 | yarn-debug.log*
 15 | yarn-error.log*
 16 | lerna-debug.log*
 17 | 
 18 | # Diagnostic reports (https://nodejs.org/api/report.html)
 19 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 20 | 
 21 | # Runtime data
 22 | pids
 23 | *.pid
 24 | *.seed
 25 | *.pid.lock
 26 | 
 27 | # Directory for instrumented libs generated by jscoverage/JSCover
 28 | lib-cov
 29 | 
 30 | # Coverage directory used by tools like istanbul
 31 | coverage
 32 | *.lcov
 33 | 
 34 | # nyc test coverage
 35 | .nyc_output
 36 | 
 37 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 38 | .grunt
 39 | 
 40 | # Bower dependency directory (https://bower.io/)
 41 | bower_components
 42 | 
 43 | # node-waf configuration
 44 | .lock-wscript
 45 | 
 46 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 47 | build/Release
 48 | 
 49 | # Dependency directories
 50 | node_modules/
 51 | jspm_packages/
 52 | 
 53 | # TypeScript v1 declaration files
 54 | typings/
 55 | 
 56 | # TypeScript cache
 57 | *.tsbuildinfo
 58 | 
 59 | # Optional npm cache directory
 60 | .npm
 61 | 
 62 | # Optional eslint cache
 63 | .eslintcache
 64 | 
 65 | # Microbundle cache
 66 | .rpt2_cache/
 67 | .rts2_cache_cjs/
 68 | .rts2_cache_es/
 69 | .rts2_cache_umd/
 70 | 
 71 | # Optional REPL history
 72 | .node_repl_history
 73 | 
 74 | # Output of 'npm pack'
 75 | *.tgz
 76 | 
 77 | # Yarn Integrity file
 78 | .yarn-integrity
 79 | 
 80 | # dotenv environment variables file
 81 | .env
 82 | .env.test
 83 | 
 84 | # parcel-bundler cache (https://parceljs.org/)
 85 | .cache
 86 | 
 87 | # Next.js build output
 88 | .next
 89 | 
 90 | # Nuxt.js build / generate output
 91 | .nuxt
 92 | dist
 93 | 
 94 | # Gatsby files
 95 | .cache/
 96 | # Comment in the public line in if your project uses Gatsby and *not* Next.js
 97 | # https://nextjs.org/blog/next-9-1#public-directory-support
 98 | # public
 99 | 
100 | # vuepress build output
101 | .vuepress/dist
102 | 
103 | # Serverless directories
104 | .serverless/
105 | 
106 | # FuseBox cache
107 | .fusebox/
108 | 
109 | # DynamoDB Local files
110 | .dynamodb/
111 | 
112 | # TernJS port file
113 | .tern-port
114 | 
115 | # IDE
116 | .idea
117 | *.iml
118 | .nova
119 | 
120 | # OS
121 | .DS_Store


--------------------------------------------------------------------------------
/.husky/commit-msg:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 | 
4 | npm run commit-msg
5 | 


--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 | 
4 | npm run pre-commit
5 | 


--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "singleQuote": true,
3 |   "semi": false,
4 |   "trailingComma": "none"
5 | }
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 will Farrell
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 | <br/><br/><br/><br/><br/><br/><br/>
 3 | 🦖
 4 | <br/><br/><br/><br/><br/><br/><br/>
 5 | <h1>CSV-Rex</h1>
 6 | <p>A tiny and fast CSV parser & formatter for JavaScript.</p>
 7 | <br />
 8 | <p>
 9 |   <a href="https://www.npmjs.com/package/csv-rex?activeTab=versions">
10 |     <img src="https://badge.fury.io/js/csv-rex.svg" alt="npm version" style="max-width:100%;">
11 |   </a>
12 |   <a href="https://packagephobia.com/result?p=csv-rex">
13 |     <img src="https://packagephobia.com/badge?p=csv-rex" alt="npm install size" style="max-width:100%;">
14 |   </a>
15 |   <a href="https://github.com/willfarrell/csv-rex/actions/workflows/tests.yml">
16 |     <img src="https://github.com/willfarrell/csv-rex/actions/workflows/tests.yml/badge.svg?branch=main&event=push" alt="GitHub Actions CI status badge" style="max-width:100%;">
17 |   </a>
18 |   <br/>
19 |    <a href="https://standardjs.com/">
20 |     <img src="https://img.shields.io/badge/code_style-standard-brightgreen.svg" alt="Standard Code Style"  style="max-width:100%;">
21 |   </a>
22 |   <a href="https://snyk.io/test/github/willfarrell/csv-rex">
23 |     <img src="https://snyk.io/test/github/willfarrell/csv-rex/badge.svg" alt="Known Vulnerabilities" data-canonical-src="https://snyk.io/test/github/willfarrell/csv-rex" style="max-width:100%;">
24 |   </a>
25 |   <a href="https://github.com/willfarrell/csv-rex/actions/workflows/sast.yml">
26 |     <img src="https://github.com/willfarrell/csv-rex/actions/workflows/sast.yml/badge.svg?branch=main&event=push" alt="SAST" style="max-width:100%;">
27 |   </a>
28 |   <a href="https://bestpractices.coreinfrastructure.org/projects/6208">
29 |     <img src="https://bestpractices.coreinfrastructure.org/projects/6208/badge" alt="Core Infrastructure Initiative (CII) Best Practices"  style="max-width:100%;">
30 |   </a>
31 | </p>
32 | <p>
33 | See full documentation at <a href="https://csv-rex.js.org">https://csv-rex.js.org</a>
34 | </p>
35 | </div>
36 | 


--------------------------------------------------------------------------------
/benchmark.js:
--------------------------------------------------------------------------------
1 | import parseBenchmarks from './parse.bench.js'
2 | // import formatBenchmarks from './parse.bench.js'
3 | 
4 | await parseBenchmarks()
5 | // await formatBenchmarks()
6 | 


--------------------------------------------------------------------------------
/bin/esbuild:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs index.js                --minify                      --allow-overwrite --outfile=index.cjs
 4 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse.js       --bundle --minify                      --allow-overwrite --outfile=parse.cjs
 5 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse-mini.js  --bundle --minify                      --allow-overwrite --outfile=parse-mini.cjs
 6 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs format.js      --bundle --minify                      --allow-overwrite --outfile=format.cjs
 7 | 
 8 | node_modules/.bin/esbuild --platform=node                 --format=esm index.js                --minify --sourcemap=external --allow-overwrite --outfile=index.mjs
 9 | node_modules/.bin/esbuild --platform=node                 --format=esm parse.js       --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse.mjs
10 | node_modules/.bin/esbuild --platform=node                 --format=esm parse-mini.js  --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse-mini.mjs
11 | node_modules/.bin/esbuild --platform=node                 --format=esm format.js      --bundle --minify --sourcemap=external --allow-overwrite --outfile=format.mjs
12 | 
13 | 


--------------------------------------------------------------------------------
/commitlint.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   extends: ['@commitlint/config-conventional']
3 | }
4 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | csv-rex.js.org


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 | <br/><br/><br/><br/><br/><br/><br/>
 3 | 🦖
 4 | <br/><br/><br/><br/><br/><br/><br/>
 5 | <h1>CSV-Rex</h1>
 6 | <p>A tiny and fast CSV parser & formatter for JavaScript.</p>
 7 | <br />
 8 | <p>
 9 |   <a href="https://www.npmjs.com/package/csv-rex?activeTab=versions">
10 |     <img src="https://badge.fury.io/js/csv-rex.svg" alt="npm version" style="max-width:100%;">
11 |   </a>
12 |   <a href="https://packagephobia.com/result?p=csv-rex">
13 |     <img src="https://packagephobia.com/badge?p=csv-rex" alt="npm install size" style="max-width:100%;">
14 |   </a>
15 |   <a href="https://github.com/willfarrell/csv-rex/actions/workflows/tests.yml">
16 |     <img src="https://github.com/willfarrell/csv-rex/actions/workflows/tests.yml/badge.svg?branch=main&event=push" alt="GitHub Actions CI status badge" style="max-width:100%;">
17 |   </a>
18 |   <br/>
19 |    <a href="https://standardjs.com/">
20 |     <img src="https://img.shields.io/badge/code_style-standard-brightgreen.svg" alt="Standard Code Style"  style="max-width:100%;">
21 |   </a>
22 |   <a href="https://snyk.io/test/github/willfarrell/csv-rex">
23 |     <img src="https://snyk.io/test/github/willfarrell/csv-rex/badge.svg" alt="Known Vulnerabilities" data-canonical-src="https://snyk.io/test/github/willfarrell/csv-rex" style="max-width:100%;">
24 |   </a>
25 |   <a href="https://github.com/willfarrell/csv-rex/actions/workflows/sast.yml">
26 |     <img src="https://github.com/willfarrell/csv-rex/actions/workflows/sast.yml/badge.svg?branch=main&event=push" alt="SAST" style="max-width:100%;">
27 |   </a>
28 |   <a href="https://bestpractices.coreinfrastructure.org/projects/6208">
29 |     <img src="https://bestpractices.coreinfrastructure.org/projects/6208/badge" alt="Core Infrastructure Initiative (CII) Best Practices"  style="max-width:100%;">
30 |   </a>
31 | </p>
32 | </div>
33 | 
34 | ## Features
35 | 
36 | - Free to use under MIT licence
37 | - Comma-Separated Values (CSV) Files specification compliant ([RFC-4180](https://tools.ietf.org/html/rfc4180))
38 | - Small bundle size (~1KB compressed = esbuild + minify + br)
39 | - Zero dependencies
40 | - ESM & CJS modules with `.map` files
41 | - NodeJS and WebStream API support via [@datastream/csv](https://github.com/willfarrell/datastream)
42 | - It's just fast. See the [benchmarks](https://github.com/willfarrell/csv-benchmarks).
43 | 
44 | ## Why not use `papaparse` or `csv-parse`?
45 | 
46 | Both are great libraries, we've used them both in many projects over the years.
47 | 
48 | - [`csv-parse`](https://csv.js.org/parse/): Built on top of NodeJS native APIs giving it great stream support. If you want to run it in the browser however, you've going to have to ship a very large polyfill.
49 | - [`papaparse`](https://www.papaparse.com/): Built to be more friendly for browser with an option to run in node as well. Faster than `csv-parse`, but, it's dadbod and lack of native stream support leaves room for improvement.
50 | 
51 | The goal with `csv-rex` is to have a CSV parser and formatter that is as fast as others, reduced bundle size, and have cross-environment stream support. We think we've achieved our goal and hope you enjoy.
52 | 
53 | ## Setup
54 | 
55 | ```bash
56 | npm install csv-rex
57 | ```
58 | 
59 | ```javascript
60 | import { parse, format } from 'csv-rex'
61 | 
62 | // parse
63 | const linesArray = parse(inputString, {})
64 | 
65 | // format
66 | const csv = format(linesArray, {})
67 | ```
68 | 


--------------------------------------------------------------------------------
/docs/docs/format.md:
--------------------------------------------------------------------------------
  1 | # format
  2 | 
  3 | ## Options
  4 | 
  5 | - `header` (`true`): Keys to be used in JSON object for the parsed row
  6 |   - `true`: Will include header, will use `Object.keys()` for columns
  7 |   - `[...]`: What columns to included and in what order
  8 |   - `false`: Will exclude a header line.
  9 | - `newlineChar` (`\r\n`): What `newline` character(s) to be used.
 10 | - `delimiterChar` (`,`): Characters used to separate fields.
 11 | - `quoteChar` (`"`): Character used to wrap fields that need to have special characters within them.
 12 | - `escapeChar` (`${quoteChar}`): Character used to escape the `quoteChar`.
 13 | - `quoteColumn`: (`undefined`): Array that maps to the headers to indicate what columns need to have quotes. Used to improve performance.
 14 |   - `true`: Always quote column
 15 |   - `false`: Never quote column
 16 |   - `undefined`/`null`/``: Detect if quotes are needed based on contents
 17 | - `enqueue` (`(string) => {}`): Function to run on formatted row data.
 18 | - `enableReturn` (`true`): Will concat rows into a single string. Set to `false` if handing data within enqueue for performance improvements.
 19 | 
 20 | ### Array chunk
 21 | - `header:[...]` required
 22 | 
 23 | ### Object chunk
 24 | 
 25 | 
 26 | 
 27 | ## Examples
 28 | 
 29 | ### Formatting an array of objects to CSV string
 30 | 
 31 | ```javascript
 32 | import { format } from 'csv-rex'
 33 | 
 34 | export default (arrayOfObjects) => parse(arrayOfObjects, { newlineChar: '\n' })
 35 | ```
 36 | 
 37 | ### NodeJS Stream
 38 | 
 39 | ```javascript
 40 | import { createReadStream } from 'node:fs'
 41 | import { pipeline, createReadableStream } from '@datastream/core'
 42 | import { csvFormatStream } from '@datastream/csv'
 43 | 
 44 | export default async (filePath, opts = {}) => {
 45 |   const streams = [
 46 |     createReadableStream([
 47 |       /*...*/
 48 |     ]),
 49 |     csvFormatStream(opts)
 50 |     // ...
 51 |   ]
 52 | 
 53 |   const result = await pipeline(streams)
 54 |   console.log(result.csvErrors)
 55 | }
 56 | ```
 57 | 
 58 | ### Web Stream API
 59 | 
 60 | Requires: Chrome v71 , Edge v79, Firefox v102, Safari v14.5, NodeJS v18 (v16 with import). If you want to use WebStreams with node you need to pass `--conditions=webstream` in the cli to force its use.
 61 | 
 62 | ```javascript
 63 | import { pipeline } from '@datastream/core'
 64 | import { stringReadableStream } from '@datastream/string'
 65 | import { csvParseStream } from '@datastream/csv'
 66 | 
 67 | export default async (blob, opts = {}) => {
 68 |   const streams = [
 69 |     stringReadableStream(blob),
 70 |     csvParseStream()
 71 |     // ...
 72 |   ]
 73 | 
 74 |   const result = await pipeline(streams)
 75 |   console.log(result.csvErrors)
 76 | }
 77 | ```
 78 | 
 79 | ### WebWorker using a file
 80 | 
 81 | To prevent blocking the main thread it is recommended that CSV parsing is done in a WebWorker, SharedWebWorker, or ServiceWorker instead of the main thread. This example doesn't use streams due to the lack of Firefox stream support mentioned above.
 82 | 
 83 | ```javascript
 84 | /* eslint-env worker */
 85 | import format from 'csv-rex/format'
 86 | 
 87 | const enqueue = ({ data, idx, err }) => {
 88 |   if (err) {
 89 |     // handler err
 90 |     return
 91 |   }
 92 |   // handle data
 93 | }
 94 | 
 95 | onmessage = async (event) => {
 96 |   const { file } = event.data
 97 |   const options = { enqueue }
 98 |   file.length = file.size // polyfill length
 99 |   await parse(file, options)
100 |   // ...
101 |   postMessageEncode()
102 | }
103 | 
104 | const postMessageEncode = (str) => {
105 |   if (typeof str !== 'string') str = JSON.stringify(str)
106 |   const buffer = new TextEncoder().encode(str).buffer
107 |   postMessage(buffer, [buffer])
108 | }
109 | ```
110 | 


--------------------------------------------------------------------------------
/docs/docs/migrate/csv.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/docs/migrate/csv.md


--------------------------------------------------------------------------------
/docs/docs/migrate/papaparse.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/docs/migrate/papaparse.md


--------------------------------------------------------------------------------
/docs/docs/parse.md:
--------------------------------------------------------------------------------
  1 | # parse
  2 | 
  3 | ## Options
  4 | 
  5 | - `header` (`true`): Keys to be used in JSON object for the parsed row
  6 |   - `true`: First row of the `input` contains columns and will need to be parsed out. output object of column value pairs.
  7 |   - `[...]`: Input doesn't contain columns, pre-assign columns and output object of column value pairs.
  8 |   - `false`: Input doesn't contain columns, output array of values
  9 | - `newlineChar` (`''`): What `newline` character(s) to be used. By default will guess from `\r\n`, `\n`, `\r`
 10 | - `delimiterChar` (`''`): Characters used to separate fields. Must be length of 1. By default will guess from `,`, `\t`, `|`, `;`, `\x1E`, `\x1F`
 11 | - `quoteChar` (`"`): Character used to wrap fields that need to have special characters within them. Must be length of 1
 12 | - `escapeChar` (`${quoteChar}`): Character used to escape the `quoteChar`. Must be length of 1
 13 | - `enqueue` (`({data, idx, err}) => {}`): Function to run on parsed row data.
 14 | - `emptyFieldValue` (`''`): Value to be used instead of an empty string. Can be set to `undefined` to have empty fields not be included.
 15 | - `coerceField` (`(field, idx) => field`): Function to apply type/value coercion.
 16 | - `commentPrefixValue` (`false`): Lines starting with this value will be ignored (i.e. `#`, `//`). Can be set to `false` if files will never have comments.
 17 | - `errorOnEmptyLine` (`true`): When an empty line is encountered. Push row with error when occurs, row ignored otherwise.
 18 | - `errorOnComment` (`true`): When a comment is encountered. Push row with error when occurs, row ignored otherwise.
 19 | - `errorOnExtraFields` (`true`): When number of headers is less than the number of fields in a row. Push row with error when occurs, row ignored. Set to `false` to have what headers exist be mapped, and passed on.
 20 | - `errorOnMissingFields` (`true`): When number of headers is more than the number of fields in a row. Push row with error when occurs, row ignored. Set to `false` to have what fields exist be mapped, and passed on.
 21 | - `errorOnFieldMalformed` (`true`): When no closing `quoteChar` is found. Throws parsing error.
 22 | - `chunkSize` (`64MB`): Size of chunks to process at once.
 23 | - `enableReturn` (`true`): Will concat rows into a single array. Set to `false` if handing data within enqueue for performance improvements.
 24 | 
 25 | ## Examples
 26 | 
 27 | ### Parsing a CSV formatted string to JSON (`[{...},{...},...]`)
 28 | 
 29 | ```javascript
 30 | import { parse } from 'csv-rex'
 31 | 
 32 | const enqueue = ({ idx, data, err }) => {
 33 |   if (err) {
 34 |     // handler err
 35 |     return
 36 |   }
 37 |   // modify and/or handle data
 38 | }
 39 | 
 40 | export default (csvString) => parse(csvString, { enqueue })
 41 | ```
 42 | 
 43 | ### NodeJS Stream
 44 | 
 45 | ```javascript
 46 | import { createReadStream } from 'node:fs'
 47 | import { pipeline } from '@datastream/core'
 48 | import { csvParseStream } from '@datastream/csv'
 49 | 
 50 | export default async (filePath, opts = {}) => {
 51 |   const streams = [
 52 |     createReadStream(filePath),
 53 |     csvParseStream()
 54 |     // ...
 55 |   ]
 56 | 
 57 |   const result = await pipeline(streams)
 58 |   console.log(result.csvErrors)
 59 | }
 60 | ```
 61 | 
 62 | ### Web Stream API
 63 | 
 64 | Requires: Chrome v71 , Edge v79, Firefox v102, Safari v14.5, NodeJS v18 (v16 with import). If you want to use WebStreams with node you need to pass `--conditions=webstream` in the cli to force its use.
 65 | 
 66 | ```javascript
 67 | import { pipeline } from '@datastream/core'
 68 | import { stringReadableStream } from '@datastream/string'
 69 | import { csvParseStream } from '@datastream/csv'
 70 | 
 71 | export default async (blob, opts = {}) => {
 72 |   const streams = [
 73 |     stringReadableStream(blob),
 74 |     csvParseStream()
 75 |     // ...
 76 |   ]
 77 | 
 78 |   const result = await pipeline(streams)
 79 |   console.log(result.csvErrors)
 80 | }
 81 | ```
 82 | 
 83 | ### File from input form in a Browser
 84 | 
 85 | To prevent blocking the main thread it is recommended that CSV parsing is done in a WebWorker, SharedWorker, or ServiceWorker instead of the main thread.
 86 | 
 87 | ```javascript
 88 | /* eslint-env worker */
 89 | import parse from 'csv-rex/parse'
 90 | 
 91 | const enqueue = ({ data, idx, err }) => {
 92 |   if (err) {
 93 |     // handler err
 94 |     return
 95 |   }
 96 |   // handle data
 97 | }
 98 | 
 99 | onmessage = async (event) => {
100 |   const { file } = event.data
101 |   const options = { enqueue }
102 |   file.length = file.size // polyfill length
103 |   await parse(file, options)
104 |   // ...
105 |   postMessageEncode()
106 | }
107 | 
108 | const postMessageEncode = (str) => {
109 |   if (typeof str !== 'string') str = JSON.stringify(str)
110 |   const buffer = new TextEncoder().encode(str).buffer
111 |   postMessage(buffer, [buffer])
112 | }
113 | ```
114 | 
115 | ```html
116 | <html>
117 |   <body>
118 |     <form
119 |       id="csv-demo"
120 |       onsubmit="start('csv-demo');return false"
121 |       enctype="multipart/form-data"
122 |     >
123 |       <input id="file" type="file" accept=".csv" onchange="start('csv-demo')" />
124 |     </form>
125 |     <script>
126 |       const start = (id) => {
127 |         const formData = document.getElementById(id)
128 |         const data = {}
129 |         for (let i = formData.length; i--; ) {
130 |           if (formData[i].type === 'file') {
131 |             if (!formData[i].files.length) {
132 |               continue
133 |             }
134 |             data['file'] = formData[i].files[0]
135 |           }
136 |         }
137 | 
138 |         const worker = new Worker(`/js/worker/csv.js`)
139 |         worker.onmessage = function (oEvent) {
140 |           console.log(postMessageDecode(oEvent.data))
141 |         }
142 |         worker.onerror = function (e) {
143 |           console.error('Worker error', e)
144 |           throw e
145 |         }
146 |         worker.postMessage(data)
147 |       }
148 |     </script>
149 |   </body>
150 | </html>
151 | ```
152 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <title>
 6 |       csv-rex - A tiny and fast CSV parser & formatter for JavaScript.
 7 |     </title>
 8 |     <meta
 9 |       name="description"
10 |       content="A tiny and fast CSV parser & formatter for JavaScript."
11 |     />
12 |     <meta
13 |       name="viewport"
14 |       content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0"
15 |     />
16 |     <link
17 |       href="//unpkg.com/docsify-themeable/dist/css/theme-simple.css"
18 |       rel="stylesheet"
19 |     />
20 |     <!-- <link href="//unpkg.com/prismjs@v1.x/themes/prism.css" rel="stylesheet" /> -->
21 |     <style>
22 |       :root {
23 |         --base-font-size: 16px;
24 |         --theme-color: rgb(104, 118, 52);
25 |         --link-color: rgb(104, 118, 52);
26 |         --link-color--hover: rgb(137, 152, 100);
27 |         --sidebar-name-margin: 0;
28 |         --sidebar-name-padding: 0;
29 |         --code-font-size: 0.9em;
30 |       }
31 |       .sidebar > h1 {
32 |         margin-bottom: -0.75em;
33 |         margin-top: 0.75em;
34 |       }
35 |       .markdown-section a code {
36 |         color: var(--link-color) !important;
37 |       }
38 |       .markdown-section code:not([class*='lang-']):not([class*='language-']) {
39 |         white-space: unset;
40 |       }
41 |     </style>
42 |   </head>
43 |   <body>
44 |     <div id="app"></div>
45 |   </body>
46 |   <script>
47 |     window.$docsify = {
48 |       name: 'csv-rex',
49 |       logo: 'sidebar.png',
50 |       loadSidebar: 'sidebar.md',
51 |       repo: 'https://github.com/willfarrell/csv-rex',
52 |       auto2top: true
53 |     }
54 |   </script>
55 |   <script src="//unpkg.com/docsify/lib/docsify.min.js"></script>
56 |   <script src="//unpkg.com/docsify/lib/plugins/search.min.js"></script>
57 |   <!-- <script src="//unpkg.com/prismjs@v1.x/components/prism-core.min.js"></script>
58 |   <script src="//unpkg.com/prismjs@v1.x/plugins/autoloader/prism-autoloader.min.js"></script> -->
59 | </html>
60 | 


--------------------------------------------------------------------------------
/docs/sidebar.md:
--------------------------------------------------------------------------------
 1 | - Getting started
 2 |   - [Setup](/)
 3 | - [Parse](/docs/parse.md)
 4 |   - [Options](/docs/parse.md)
 5 |   - [Basic Example](/docs/parse.md#start)
 6 |   - [Fast Mode Example](/docs/parse.md#start)
 7 |   - [Stream Example](/docs/parse.md)
 8 |   - [WebWorker Example](/docs/parse.md)
 9 | - [Format](/docs/format.md)
10 |   - [Options](/docs/format.md)
11 |   - [Basic Example](/docs/format.md)
12 |   - [NodeJS Stream Example](/docs/parse.md#nodestream)
13 |   - [Web Stream API Example](/docs/parse.md#webstream)
14 |   - [WebWorker Example](/docs/parse.md#browser)
15 | - Migrate
16 |   - [papaparse](/docs/migrate/papaparse.md)
17 |   - [csv](/docs/migrate/csv.md)
18 | 


--------------------------------------------------------------------------------
/docs/sidebar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/sidebar.png


--------------------------------------------------------------------------------
/docs/sidebar.sketch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/sidebar.sketch


--------------------------------------------------------------------------------
/docs/t-rex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/t-rex.png


--------------------------------------------------------------------------------
/format.js:
--------------------------------------------------------------------------------
  1 | export const defaultOptions = {
  2 |   header: true, // false: don't log out header; true: log out header
  3 |   newlineChar: '\r\n', // undefined: detect newline from file; '\r\n': Windows; '\n': Linux/Mac
  4 |   delimiterChar: ',', // TODO add in auto detect or function
  5 |   quoteChar: '"'
  6 |   // escapeChar: '"'
  7 | 
  8 |   // quoteColumn: undefined
  9 | }
 10 | 
 11 | export const format = (input, opts = {}) => {
 12 |   const options = { ...defaultOptions, enqueue: () => {}, ...opts }
 13 |   options.escapeChar ??= options.quoteChar
 14 |   const { enableReturn, enqueue } = options
 15 | 
 16 |   const isArrayData = Array.isArray(input[0])
 17 |   const format = isArrayData ? formatArray : formatObject
 18 |   if (!isArrayData && options.header === true) {
 19 |     options.header = Object.keys(input[0])
 20 |   }
 21 | 
 22 |   let res = options.header !== false ? formatArray(options.header, options) : ''
 23 | 
 24 |   for (let i = 0, l = input.length; i < l; i++) {
 25 |     const data = format(input[i], options)
 26 |     enqueue(data)
 27 |     if (enableReturn) {
 28 |       res += data
 29 |     }
 30 |   }
 31 | 
 32 |   return enableReturn && res
 33 | }
 34 | 
 35 | export const formatArray = (arr, options) => {
 36 |   let csv = ''
 37 |   for (let i = 0, l = arr.length; i < l; i++) {
 38 |     csv += (i ? options.delimiterChar : '') + formatField(arr[i], null, options)
 39 |   }
 40 |   return csv + options.newlineChar
 41 | }
 42 | 
 43 | export const formatObject = (data, options) => {
 44 |   let csv = ''
 45 |   const columns = options.header || Object.keys(data)
 46 |   for (let i = 0, l = columns.length; i < l; i++) {
 47 |     csv +=
 48 |       (i ? options.delimiterChar : '') +
 49 |       formatField(data[columns[i]], options.quoteColumn?.[i], options)
 50 |   }
 51 |   return csv + options.newlineChar
 52 | }
 53 | 
 54 | export const formatField = (
 55 |   field,
 56 |   needsQuotes,
 57 |   { quoteChar, escapeChar, delimiterChar, newlineChar }
 58 | ) => {
 59 |   if (field === undefined || field === null || field === '') {
 60 |     return ''
 61 |   }
 62 | 
 63 |   if (field.constructor === Date) {
 64 |     return field.toISOString() // JSON.stringify(str).slice(1, 25) faster??
 65 |   }
 66 | 
 67 |   field = field.toString()
 68 | 
 69 |   // Developer override using options.quotes
 70 |   if (needsQuotes === false) {
 71 |     return field
 72 |   }
 73 | 
 74 |   // Test if needs quote
 75 |   needsQuotes =
 76 |     needsQuotes ||
 77 |     hasAnyDelimiters(field, [
 78 |       delimiterChar,
 79 |       newlineChar,
 80 |       quoteChar,
 81 |       '\ufeff'
 82 |     ]) ||
 83 |     field[0] === ' ' ||
 84 |     field[field.length - 1] === ' '
 85 | 
 86 |   return needsQuotes
 87 |     ? quoteChar +
 88 |         field.replaceAll(quoteChar, escapeChar + quoteChar) +
 89 |         quoteChar
 90 |     : field
 91 | }
 92 | 
 93 | const hasAnyDelimiters = (field, delimiters) => {
 94 |   for (const delimiter of delimiters) {
 95 |     if (field.indexOf(delimiter) > -1) {
 96 |       return true
 97 |     }
 98 |   }
 99 | }
100 | 
101 | export default format
102 | 


--------------------------------------------------------------------------------
/format.test.js:
--------------------------------------------------------------------------------
  1 | import test from 'node:test'
  2 | import { equal } from 'node:assert'
  3 | import { format, formatArray, formatObject, formatField } from './format.js'
  4 | 
  5 | const defaultOptions = {
  6 |   header: true,
  7 |   escapeChar: '"',
  8 |   quoteChar: '"',
  9 |   delimiterChar: ',',
 10 |   newlineChar: '/n',
 11 | 
 12 |   enableReturn: true,
 13 |   enqueue: () => {}
 14 | }
 15 | 
 16 | // *** format() *** //
 17 | 
 18 | test('Should format array of objects w/ header == true', async (t) => {
 19 |   const field = format([{ a: '1', b: '2' }], {
 20 |     ...defaultOptions,
 21 |     header: true
 22 |   })
 23 |   equal(field, 'a,b/n1,2/n')
 24 | })
 25 | 
 26 | test('Should format array of objects w/ header == [...]', async (t) => {
 27 |   const field = format([{ a: '1', b: '2', c: '3' }], {
 28 |     ...defaultOptions,
 29 |     header: ['b', 'a']
 30 |   })
 31 |   equal(field, 'b,a/n2,1/n')
 32 | })
 33 | 
 34 | test('Should format array of objects w/ header === false', async (t) => {
 35 |   const field = format([{ a: '1', b: '2' }], {
 36 |     ...defaultOptions,
 37 |     header: false
 38 |   })
 39 |   equal(field, '1,2/n')
 40 | })
 41 | 
 42 | test('Should format array of arrays w/ header == [...]', async (t) => {
 43 |   const field = format([['1', '2']], {
 44 |     ...defaultOptions,
 45 |     header: ['a', 'b']
 46 |   })
 47 |   equal(field, 'a,b/n1,2/n')
 48 | })
 49 | 
 50 | test('Should format array of arrays w/ header === false', async (t) => {
 51 |   const field = format([['1', '2']], {
 52 |     ...defaultOptions,
 53 |     header: false
 54 |   })
 55 |   equal(field, '1,2/n')
 56 | })
 57 | 
 58 | // *** formatHeader() *** //
 59 | test('Should format header', async (t) => {
 60 |   const field = formatArray(['b', 'a'], {
 61 |     ...defaultOptions,
 62 |     header: ['b', 'a']
 63 |   })
 64 |   equal(field, 'b,a/n')
 65 | })
 66 | 
 67 | // *** formatArray() *** //
 68 | test('Should format row array', async (t) => {
 69 |   const field = formatArray(['1', '2'], { ...defaultOptions, header: false })
 70 |   equal(field, '1,2/n')
 71 | })
 72 | 
 73 | // *** formatObject() *** //
 74 | test('Should format row object', async (t) => {
 75 |   const field = formatObject(
 76 |     { a: '1', b: '2' },
 77 |     { ...defaultOptions, header: ['b', 'a'] }
 78 |   )
 79 |   equal(field, '2,1/n')
 80 | })
 81 | test('Should format row object w/ quotes', async (t) => {
 82 |   const field = formatObject(
 83 |     { a: '1', b: '2' },
 84 |     { ...defaultOptions, header: ['b', 'a'], quoteColumn: [true, true] }
 85 |   )
 86 |   equal(field, '"2","1"/n')
 87 | })
 88 | test('Should format row object w/o quotes', async (t) => {
 89 |   const field = formatObject(
 90 |     { a: '1', b: '2' },
 91 |     { ...defaultOptions, header: ['b', 'a'], quoteColumn: [false, false] }
 92 |   )
 93 |   equal(field, '2,1/n')
 94 | })
 95 | 
 96 | // *** formatField() *** //
 97 | test('Should format undefined', async (t) => {
 98 |   const field = formatField(undefined, undefined, defaultOptions)
 99 |   equal(field, '')
100 | })
101 | test('Should format null', async (t) => {
102 |   const field = formatField(null, undefined, defaultOptions)
103 |   equal(field, '')
104 | })
105 | test('Should format empty string', async (t) => {
106 |   const field = formatField('', undefined, defaultOptions)
107 |   equal(field, '')
108 | })
109 | test('Should format date', async (t) => {
110 |   const field = formatField(
111 |     new Date('2000-01-01T00:00:00.000Z'),
112 |     undefined,
113 |     defaultOptions
114 |   )
115 |   equal(field, '2000-01-01T00:00:00.000Z')
116 | })
117 | test('Should format number', async (t) => {
118 |   const field = formatField(0, undefined, defaultOptions)
119 |   equal(field, '0')
120 | })
121 | test('Should format string', async (t) => {
122 |   const field = formatField('column', undefined, defaultOptions)
123 |   equal(field, 'column')
124 | })
125 | test('Should format string with delimiter', async (t) => {
126 |   const field = formatField('_"_', undefined, defaultOptions)
127 |   equal(field, '"_""_"')
128 | })
129 | test('Should format string with leading space', async (t) => {
130 |   const field = formatField(' space', undefined, defaultOptions)
131 |   equal(field, '" space"')
132 | })
133 | test('Should format string with trailing space', async (t) => {
134 |   const field = formatField('space ', undefined, defaultOptions)
135 |   equal(field, '"space "')
136 | })
137 | test('Should format w/ quotes', async (t) => {
138 |   const field = formatField('column', true, defaultOptions)
139 |   equal(field, '"column"')
140 | })
141 | test('Should format w/o quotes', async (t) => {
142 |   const field = formatField('column', false, defaultOptions)
143 |   equal(field, 'column')
144 | })
145 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | // import {TextDecoder} from 'node:util'
 2 | // import {defaultOptions, optionDetectNewlineValue} from './options.js'
 3 | import csvParse from 'csv-rex/parse'
 4 | import csvParseMini from 'csv-rex/parse-mini'
 5 | import csvFormat from 'csv-rex/format'
 6 | 
 7 | export const parse = csvParse
 8 | export const parseMini = csvParseMini
 9 | export const format = csvFormat
10 | 
11 | export default {
12 |   parse: csvParse,
13 |   parseMini: csvParseMini,
14 |   format: csvFormat
15 | }
16 | 


--------------------------------------------------------------------------------
/lint-staged.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   '**/*.{json,yml}': ['prettier --write'],
3 |   '**/*.js': ['prettier --write', 'standard --fix']
4 | }
5 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "csv-rex",
 3 |   "version": "0.7.0",
 4 |   "description": "A tiny and fast CSV parser for JavaScript.",
 5 |   "type": "module",
 6 |   "files": [
 7 |     "*.mjs",
 8 |     "*.cjs",
 9 |     "*.map"
10 |   ],
11 |   "main": "index.mjs",
12 |   "exports": {
13 |     ".": {
14 |       "import": {
15 |         "default": "./index.mjs"
16 |       },
17 |       "require": {
18 |         "default": "./index.cjs"
19 |       }
20 |     },
21 |     "./parse": {
22 |       "import": {
23 |         "default": "./parse.mjs"
24 |       },
25 |       "require": {
26 |         "default": "./parse.cjs"
27 |       }
28 |     },
29 |     "./parse-mini": {
30 |       "import": {
31 |         "default": "./parse-mini.mjs"
32 |       },
33 |       "require": {
34 |         "default": "./parse-mini.cjs"
35 |       }
36 |     },
37 |     "./format": {
38 |       "import": {
39 |         "default": "./format.mjs"
40 |       },
41 |       "require": {
42 |         "default": "./format.cjs"
43 |       }
44 |     }
45 |   },
46 |   "scripts": {
47 |     "prepare": "husky install",
48 |     "commit-msg": "commitlint --config commitlint.config.cjs --edit",
49 |     "pre-commit": "lint-staged",
50 |     "start": "docsify serve docs",
51 |     "lint": "prettier --write *.{js,json} && standard --fix *.js",
52 |     "test": "npm run build && c8 node --test",
53 |     "prepublishOnly": "npm test",
54 |     "build": "./bin/esbuild",
55 |     "bench": "npm run build && node parse.bench.js"
56 |   },
57 |   "repository": {
58 |     "type": "git",
59 |     "url": "git+https://willfarrell@github.com/willfarrell/csv-rex.git"
60 |   },
61 |   "keywords": [
62 |     "csv",
63 |     "parse",
64 |     "format",
65 |     "json"
66 |   ],
67 |   "author": {
68 |     "name": "willfarrell",
69 |     "url": "https://github.com/willfarrell"
70 |   },
71 |   "license": "MIT",
72 |   "bugs": {
73 |     "url": "https://github.com/willfarrell/csv-rex/issues"
74 |   },
75 |   "homepage": "https://github.com/willfarrell/csv-rex#readme",
76 |   "dependencies": {
77 |     "csv-rex": "0.7.0"
78 |   },
79 |   "devDependencies": {
80 |     "@commitlint/cli": "^17.0.0",
81 |     "@commitlint/config-conventional": "^17.0.0",
82 |     "benny": "^3.7.1",
83 |     "c8": "^7.11.0",
84 |     "docsify-cli": "^4.4.4",
85 |     "esbuild": "^0.17.0",
86 |     "husky": "^8.0.0",
87 |     "lint-staged": "^13.0.0",
88 |     "prettier": "^2.0.0",
89 |     "sinon": "^15.0.0",
90 |     "standard": "^17.0.0"
91 |   },
92 |   "funding": {
93 |     "type": "github",
94 |     "url": "https://github.com/sponsors/willfarrell"
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/parse-mini.js:
--------------------------------------------------------------------------------
  1 | // chunkSize >> largest expected row
  2 | const defaultOptions = {
  3 |   header: true, // false: return array; true: detect headers and return json; [...]: use defined headers and return json
  4 |   newlineChar: '\r\n', // '': detect newline from chunk; '\r\n': Windows; '\n': Linux/Mac
  5 |   delimiterChar: ',', // '': detect delimiter from chunk
  6 |   // quoteChar: '"',
  7 |   // escapeChar: '"', // default: `quoteChar`
  8 | 
  9 |   // Parse
 10 |   emptyFieldValue: '',
 11 |   coerceField: (field) => field, // TODO tests
 12 |   // commentPrefixValue: false, // falsy: disable, '//': enabled
 13 |   // errorOnComment: true,
 14 |   // errorOnEmptyLine: true,
 15 |   errorOnFieldsMismatch: true
 16 |   // errorOnFieldMalformed: true
 17 | }
 18 | 
 19 | const length = (value) => value.length
 20 | 
 21 | export const parse = (opts = {}) => {
 22 |   const options = { ...defaultOptions, ...opts }
 23 |   options.escapeChar ??= options.quoteChar
 24 | 
 25 |   let { header, newlineChar, delimiterChar } = options
 26 |   let headerLength = length(header)
 27 |   const {
 28 |     // quoteChar,
 29 |     // escapeChar,
 30 |     // commentPrefixValue,
 31 |     emptyFieldValue,
 32 |     coerceField,
 33 |     // errorOnEmptyLine,
 34 |     // errorOnComment,
 35 |     errorOnFieldsMismatch
 36 |     // errorOnFieldMalformed
 37 |   } = options
 38 | 
 39 |   let chunk, enqueue
 40 |   let partialLine = ''
 41 |   let idx = 0
 42 |   const enqueueRow = (row) => {
 43 |     let data = row
 44 |     idx += 1
 45 |     if (headerLength) {
 46 |       const rowLength = length(row)
 47 | 
 48 |       if (headerLength !== rowLength) {
 49 |         if (errorOnFieldsMismatch) {
 50 |           enqueueError(
 51 |             'FieldsMismatch',
 52 |             `Incorrect number of fields parsed, expected ${headerLength}.`
 53 |           )
 54 |         }
 55 |         return
 56 |       } else {
 57 |         data = {}
 58 |         for (let i = 0; i < rowLength; i++) {
 59 |           data[header[i]] = row[i]
 60 |         }
 61 |       }
 62 |     }
 63 |     enqueue({ idx, data })
 64 |   }
 65 | 
 66 |   const enqueueError = (code, message) => {
 67 |     enqueue({ idx, err: { code, message } })
 68 |   }
 69 | 
 70 |   const transformField = (field, idx) => {
 71 |     return coerceField(field || emptyFieldValue, idx)
 72 |   }
 73 | 
 74 |   const chunkParse = (string, controller) => {
 75 |     chunk = string
 76 |     enqueue = controller.enqueue
 77 |     const lines = chunk.split(newlineChar) // TODO use cursor pattern
 78 |     let linesLength = length(lines)
 79 |     if (linesLength > 1) {
 80 |       partialLine = lines.pop()
 81 |       linesLength -= 1
 82 |     }
 83 | 
 84 |     let i = 0
 85 |     if (header === true) {
 86 |       header = lines[i].split(delimiterChar)
 87 |       headerLength = length(header)
 88 |       i += 1
 89 |     }
 90 | 
 91 |     for (; i < linesLength; i++) {
 92 |       const line = lines[i]
 93 |       const row = []
 94 |       let cursor = 0
 95 |       while (cursor < line.length) {
 96 |         const delimiterIndex = line.indexOf(delimiterChar, cursor)
 97 |         if (delimiterIndex === -1) {
 98 |           row.push(transformField(line.substring(cursor), row.length))
 99 |           break
100 |         }
101 |         row.push(
102 |           transformField(line.substring(cursor, delimiterIndex), row.length)
103 |         )
104 |         cursor = delimiterIndex + 1
105 |       }
106 |       enqueueRow(row)
107 |     }
108 |   }
109 | 
110 |   return {
111 |     chunkParse,
112 |     header: () => header,
113 |     previousChunk: () => partialLine
114 |   }
115 | }
116 | 
117 | export default (input, opts) => {
118 |   const options = {
119 |     ...defaultOptions,
120 |     ...{
121 |       enableReturn: true,
122 |       chunkSize: 64 * 1024 * 1024,
123 |       enqueue: () => {}
124 |     },
125 |     ...opts
126 |   }
127 |   const { chunkSize, enableReturn, enqueue } = options
128 |   const { chunkParse, previousChunk } = parse(options)
129 | 
130 |   const res = []
131 |   const controller = { enqueue }
132 | 
133 |   if (enableReturn) {
134 |     controller.enqueue = (row) => {
135 |       enqueue(row)
136 |       res.push(row.data)
137 |     }
138 |   }
139 | 
140 |   let position = 0
141 |   while (position < input.length) {
142 |     const chunk =
143 |       previousChunk() + input.substring(position, position + chunkSize)
144 | 
145 |     // Checking if you can use fastParse slows it down more than checking for quoteChar on ever field.
146 |     chunkParse(chunk, controller)
147 |     position += chunkSize
148 |   }
149 |   // flush
150 |   const chunk = previousChunk()
151 |   chunkParse(chunk, controller, true)
152 | 
153 |   return enableReturn && res
154 | }
155 | 


--------------------------------------------------------------------------------
/parse.bench.js:
--------------------------------------------------------------------------------
 1 | import { add, cycle, /* save, */ suite } from 'benny'
 2 | import parse from './parse.js' // 'csv-rex/parse'
 3 | 
 4 | const inputs = {}
 5 | const configs = []
 6 | const baseline = {
 7 |   columns: 10,
 8 |   rows: 1_000,
 9 |   quotes: false,
10 |   newlineChar: '\r\n',
11 |   delimiterChar: ',',
12 |   header: false,
13 |   commentPrefixValue: false
14 | }
15 | configs.push({ ...baseline })
16 | // expected to be slower, compare against each other
17 | configs.push({ ...baseline, columns: 100 }) // input has move columns
18 | configs.push({ ...baseline, rows: 10_000 }) // input has more rows
19 | // Options
20 | configs.push({
21 |   ...baseline,
22 |   header: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
23 | }) // pre-defined headers to make object
24 | configs.push({ ...baseline, header: true }) // use header to make object
25 | configs.push({ ...baseline, newlineChar: '\n' }) // shorter newline ** should be fastest
26 | configs.push({ ...baseline, newlineChar: '' }) // detect newline
27 | configs.push({ ...baseline, delimiterChar: '\t' }) // detect delimiter
28 | configs.push({ ...baseline, delimiterChar: '' }) // detect delimiter
29 | configs.push({ ...baseline, commentPrefixValue: '//' }) // detect comments
30 | configs.push({ ...baseline, quotes: true }) // input has quoted fields
31 | 
32 | configs.push({ ...baseline, newlineChar: '\n', delimiterChar: '\t' }) // TSV
33 | configs.push({ ...baseline })
34 | 
35 | const baselineDiff = (config) => {
36 |   const diff = {}
37 |   for (const key in config) {
38 |     if (config[key] !== baseline[key]) {
39 |       diff[key] = config[key]
40 |     }
41 |   }
42 |   return diff
43 | }
44 | 
45 | const testBatch = (configs) => {
46 |   return configs.map((config) => {
47 |     const { columns, rows, quotes, ...options } = config
48 |     const delimiterChar = options.delimiterChar || baseline.delimiterChar
49 |     const newlineChar = options.newlineChar || baseline.newlineChar
50 |     const input = `${columns}x${rows} w/${
51 |       quotes ? '' : 'o'
52 |     } quotes and {newlineChar:${newlineChar},delimiterChar:${delimiterChar}}`
53 |     if (!inputs[input]) {
54 |       const wrapper = quotes ? '"' : ''
55 |       const delimiter = quotes ? `"${delimiterChar}"` : `${delimiterChar}`
56 |       let csv =
57 |         wrapper +
58 |         Array.from({ length: columns + 1 }, (_, x) => `__${x}__`).join(
59 |           delimiter
60 |         ) +
61 |         wrapper +
62 |         newlineChar
63 |       for (let y = 0; y < rows; y++) {
64 |         csv +=
65 |           wrapper +
66 |           Array.from({ length: columns + 1 }, (_, x) => `${x}x${y}`).join(
67 |             delimiter
68 |           ) +
69 |           wrapper +
70 |           newlineChar
71 |       }
72 |       inputs[input] = csv
73 |     }
74 |     return add(
75 |       `parse(${JSON.stringify({ columns, rows, quotes })}, ${JSON.stringify(
76 |         options
77 |       )}) :: ${JSON.stringify(baselineDiff(config))}`,
78 |       () => {
79 |         parse(inputs[input], options)
80 |       }
81 |     )
82 |   })
83 | }
84 | 
85 | const parseSuite = suite(
86 |   'parse',
87 |   ...testBatch(configs),
88 |   cycle()
89 |   // save({file: 'parse.bench.csv', format: 'csv'})
90 | )
91 | 
92 | export default () => parseSuite
93 | 


--------------------------------------------------------------------------------
/parse.js:
--------------------------------------------------------------------------------
  1 | // chunkSize >> largest expected row
  2 | const defaultOptions = {
  3 |   header: true, // false: return array; true: detect headers and return json; [...]: use defined headers and return json
  4 |   newlineChar: '', // '': detect newline from chunk; '\r\n': Windows; '\n': Linux/Mac
  5 |   delimiterChar: '', // '': detect delimiter from chunk
  6 |   quoteChar: '"',
  7 |   // escapeChar: '"', // default: `quoteChar`
  8 |   detectCharLength: 1024,
  9 | 
 10 |   // Parse
 11 |   emptyFieldValue: '',
 12 |   // TODO option to remove empty fields from object
 13 |   coerceField: (field) => field, // TODO tests
 14 |   commentPrefixValue: false, // falsy: disable, '//': enabled
 15 |   errorOnComment: true,
 16 |   errorOnEmptyLine: true,
 17 |   errorOnExtraFields: true,
 18 |   errorOnMissingFields: true
 19 |   // errorOnFieldMalformed: true
 20 | }
 21 | 
 22 | const length = (value) => value.length
 23 | const escapeRegExp = (string) => string.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&') // https://github.com/tc39/proposal-regex-escaping
 24 | 
 25 | export const parse = (opts = {}) => {
 26 |   const options = { ...defaultOptions, ...opts }
 27 |   options.escapeChar ??= options.quoteChar
 28 | 
 29 |   let { header, newlineChar, delimiterChar } = options
 30 |   const {
 31 |     detectCharLength,
 32 |     quoteChar,
 33 |     escapeChar,
 34 |     commentPrefixValue,
 35 |     emptyFieldValue,
 36 |     coerceField,
 37 |     errorOnEmptyLine,
 38 |     errorOnComment,
 39 |     errorOnExtraFields,
 40 |     errorOnMissingFields
 41 |     // errorOnFieldMalformed
 42 |   } = options
 43 |   let headerLength = length(header)
 44 |   const detectDelimiterCharRegExp = /,|\t|\||;|\x1E|\x1F/g // eslint-disable-line no-control-regex
 45 |   const detectNewlineCharRegExp = /\r\n|\n|\r/g
 46 | 
 47 |   const escapedQuoteChar = escapeChar + quoteChar
 48 |   const escapedQuoteCharRegExp = new RegExp(
 49 |     `${escapeRegExp(escapedQuoteChar)}`,
 50 |     'g'
 51 |   )
 52 | 
 53 |   const escapedQuoteEqual = escapeChar === quoteChar
 54 |   const escapedQuoteNotEqual = escapeChar !== quoteChar
 55 | 
 56 |   let newlineCharLength = length(newlineChar)
 57 |   const delimiterCharLength = 1 // length(delimiterChar)
 58 |   const quoteCharLength = 1 // length(quoteChar)
 59 |   const escapeCharLength = 1 // length(escapeChar)
 60 |   const escapedQuoteCharLength = 2 // length(escapedQuoteChar)
 61 |   // const commentPrefixValueLength = length(commentPrefixValue)
 62 | 
 63 |   let chunk, chunkLength, cursor, row, enqueue
 64 |   let partialLine = ''
 65 |   let idx = 0
 66 |   const enqueueRow = (row) => {
 67 |     idx += 1
 68 |     if (header === true) {
 69 |       header = row
 70 |       headerLength = length(header)
 71 |       return
 72 |     }
 73 |     let data = row
 74 |     if (headerLength) {
 75 |       let rowLength = length(row)
 76 | 
 77 |       // enqueueError('FieldsMismatch', `Parsed ${rowLength} fields, expected ${headerLength}.`)
 78 |       if (errorOnMissingFields && rowLength < headerLength) {
 79 |         enqueueError(
 80 |           'MissingFields',
 81 |           `Too few fields were parsed, expected ${headerLength}.`
 82 |         )
 83 |         return
 84 |       } else if (headerLength < rowLength) {
 85 |         if (errorOnExtraFields) {
 86 |           enqueueError(
 87 |             'ExtraFields',
 88 |             `Too many fields were parsed, expected ${headerLength}.`
 89 |           )
 90 |           return
 91 |         }
 92 |         // only map fields that have headers
 93 |         rowLength = headerLength
 94 |       }
 95 | 
 96 |       data = {}
 97 |       for (let i = 0; i < rowLength; i++) {
 98 |         data[header[i]] = row[i]
 99 |       }
100 |     }
101 |     enqueue({ idx, data })
102 |   }
103 | 
104 |   const enqueueError = (code, message) => {
105 |     enqueue({ idx, err: { code, message } })
106 |   }
107 | 
108 |   const findNext = (searchValue, start = cursor) => {
109 |     return chunk.indexOf(searchValue, start)
110 |   }
111 | 
112 |   const parseField = (end) => {
113 |     return chunk.substring(cursor, end)
114 |   }
115 |   const transformField = (field, idx) => {
116 |     return coerceField(field || emptyFieldValue, idx)
117 |   }
118 | 
119 |   // TODO idea: when header == true/array using a different addFieldToRow function to allow faster key:value mapping
120 |   // const resetRow = () => {
121 |   //   row = []
122 |   // }
123 |   const addFieldToRow = (field, idx) => {
124 |     row.push(transformField(field, idx))
125 |   }
126 | 
127 |   const checkForEmptyLine = () => {
128 |     if (findNext(newlineChar) === cursor) {
129 |       idx += 1
130 |       cursor += newlineCharLength
131 |       if (errorOnEmptyLine) {
132 |         enqueueError('EmptyLineExists', 'Empty line detected.')
133 |       }
134 |       return checkForEmptyLine()
135 |     } else if (commentPrefixValue && findNext(commentPrefixValue) === cursor) {
136 |       idx += 1
137 |       cursor = findNext(newlineChar) + newlineCharLength
138 |       if (errorOnComment) {
139 |         enqueueError('CommentExists', 'Comment detected.')
140 |       }
141 |       return checkForEmptyLine()
142 |     }
143 |   }
144 | 
145 |   const chunkParse = (string, controller, flush = false) => {
146 |     chunk = string
147 |     chunkLength = length(chunk)
148 |     enqueue = controller.enqueue
149 |     partialLine = ''
150 |     cursor = 0
151 |     row = [] // resetRow()
152 | 
153 |     // auto-detect
154 |     if (!newlineChar) {
155 |       newlineChar = detectChar(
156 |         chunk.substring(0, detectCharLength),
157 |         detectNewlineCharRegExp
158 |       )
159 | 
160 |       if (!newlineChar) {
161 |         if (chunk.length < detectCharLength && !flush) {
162 |           // First chunk is too short
163 |           partialLine = chunk
164 |           return
165 |         } else {
166 |           throw new Error('newlineCharUnknown')
167 |         }
168 |       }
169 |       newlineCharLength = length(newlineChar)
170 |     }
171 |     delimiterChar ||= detectChar(
172 |       chunk.substring(0, detectCharLength),
173 |       detectDelimiterCharRegExp
174 |     )
175 | 
176 |     checkForEmptyLine()
177 |     let lineStart = 0
178 |     for (;;) {
179 |       let quoted
180 |       let nextCursor = cursor
181 |       let nextCursorLength
182 |       let atNewline
183 |       if (chunk[cursor] === quoteChar) {
184 |         cursor += quoteCharLength
185 |         quoted = true
186 |         nextCursor = cursor
187 |         for (;;) {
188 |           nextCursor = findNext(quoteChar, nextCursor)
189 |           if (nextCursor < 0) {
190 |             partialLine = chunk.substring(lineStart, chunkLength) + partialLine
191 |             if (flush) {
192 |               throw new Error('QuotedFieldMalformed', { cause: idx })
193 |             }
194 |             return
195 |           }
196 |           if (
197 |             escapedQuoteEqual &&
198 |             chunk[nextCursor + quoteCharLength] === quoteChar
199 |           ) {
200 |             nextCursor += escapedQuoteCharLength
201 |             continue
202 |           }
203 |           if (
204 |             escapedQuoteNotEqual &&
205 |             chunk[nextCursor - escapeCharLength] === escapeChar
206 |           ) {
207 |             nextCursor += quoteCharLength
208 |             continue
209 |           }
210 |           break
211 |         }
212 |       }
213 | 
214 |       // fallback
215 |       const nextDelimiterChar = findNext(delimiterChar, nextCursor)
216 |       let nextNewlineChar = findNext(newlineChar, nextCursor)
217 |       if (nextNewlineChar < 0) {
218 |         if (!flush) {
219 |           partialLine = chunk.substring(lineStart, chunkLength) + partialLine
220 |           return
221 |         }
222 |         nextNewlineChar = chunkLength
223 |       }
224 |       if (nextDelimiterChar > -1 && nextDelimiterChar < nextNewlineChar) {
225 |         nextCursor = nextDelimiterChar
226 |         nextCursorLength = delimiterCharLength
227 |       } else {
228 |         nextCursor = nextNewlineChar
229 |         nextCursorLength = newlineCharLength
230 |         atNewline = true
231 |       }
232 | 
233 |       if (nextCursor < 0 || !nextCursor) {
234 |         break
235 |       }
236 | 
237 |       let field
238 |       if (quoted) {
239 |         field = parseField(nextCursor - 1).replace(
240 |           escapedQuoteCharRegExp,
241 |           quoteChar
242 |         )
243 |       } else {
244 |         field = parseField(nextCursor)
245 |       }
246 |       addFieldToRow(field, row.length)
247 | 
248 |       cursor = nextCursor + nextCursorLength
249 | 
250 |       if (atNewline) {
251 |         enqueueRow(row)
252 |         row = [] // resetRow()
253 |         checkForEmptyLine()
254 |         lineStart = cursor
255 |       }
256 |       // `row.length === 0` required for when a csv ends with just `,` and no newline
257 |       if (chunkLength <= cursor && row.length === 0) {
258 |         break
259 |       }
260 |     }
261 |   }
262 | 
263 |   return {
264 |     chunkParse,
265 |     header: () => header,
266 |     previousChunk: () => partialLine
267 |   }
268 | }
269 | 
270 | export const detectChar = (chunk, pattern) => {
271 |   let match
272 |   const chars = {}
273 |   while ((match = pattern.exec(chunk))) {
274 |     const char = match[0]
275 |     chars[char] ??= 0
276 |     chars[char] += 1
277 |     if (chars[char] > 5) return char
278 |   }
279 |   // pattern.lastIndex = 0 // not reused again
280 |   const { key } =
281 |     Object.keys(chars)
282 |       .map((key) => ({ key, value: chars[key] }))
283 |       .sort((a, b) => a.value - b.value)?.[0] ?? {}
284 |   if (!key) {
285 |     return
286 |   }
287 |   return key
288 | }
289 | 
290 | export const coerceTo = {
291 |   string: (field) => field,
292 |   boolean: (field) => {
293 |     const boolean = coerceTo.true(field)
294 |     return typeof boolean === 'boolean' ? boolean : coerceTo.false(field)
295 |   },
296 |   true: (field) => (field.toLowerCase() === 'true' ? true : field),
297 |   false: (field) => (field.toLowerCase() === 'false' ? false : field),
298 |   number: (field) => {
299 |     const decimal = coerceTo.decimal(field)
300 |     return Number.isInteger(decimal) ? coerceTo.integer(field) : decimal
301 |   },
302 |   integer: (field) => Number.parseInt(field, 10) || field,
303 |   decimal: (field) => Number.parseFloat(field) || field,
304 |   json: (field) => {
305 |     try {
306 |       return JSON.parse(field)
307 |     } catch (e) {
308 |       return field
309 |     }
310 |   },
311 |   timestamp: (field) => {
312 |     const date = new Date(field)
313 |     return date.toString() !== 'Invalid Date' ? date : field
314 |   },
315 |   null: (field) => (field.toLowerCase() === 'null' ? null : field),
316 |   any: (field) => {
317 |     const types = ['boolean', 'number', 'null', 'json']
318 |     for (let i = 0, l = types.length; i < l; i++) {
319 |       field = coerceTo[types[i]](field)
320 | 
321 |       if (typeof field !== 'string') {
322 |         break
323 |       }
324 |     }
325 | 
326 |     return field
327 |   }
328 | }
329 | 
330 | export default (input, opts) => {
331 |   const options = {
332 |     ...defaultOptions,
333 |     ...{
334 |       enableReturn: true,
335 |       chunkSize: 64 * 1024 * 1024,
336 |       enqueue: () => {}
337 |     },
338 |     ...opts
339 |   }
340 |   const { chunkSize, enableReturn, enqueue } = options
341 |   const { chunkParse, previousChunk } = parse(options)
342 | 
343 |   const res = []
344 |   const controller = { enqueue }
345 | 
346 |   if (enableReturn) {
347 |     controller.enqueue = (row) => {
348 |       enqueue(row)
349 |       res.push(row.data)
350 |     }
351 |   }
352 | 
353 |   let position = 0
354 |   while (position < input.length) {
355 |     const chunk =
356 |       previousChunk() + input.substring(position, position + chunkSize)
357 | 
358 |     // Checking if you can use fastParse slows it down more than checking for quoteChar on ever field.
359 |     chunkParse(chunk, controller)
360 |     position += chunkSize
361 |   }
362 |   // flush
363 |   const chunk = previousChunk()
364 |   chunkParse(chunk, controller, true)
365 | 
366 |   return enableReturn && res
367 | }
368 | 


--------------------------------------------------------------------------------
/parse.test.js:
--------------------------------------------------------------------------------
  1 | import test from 'node:test'
  2 | import { equal, deepEqual } from 'node:assert'
  3 | import sinon from 'sinon'
  4 | import csvParse, { parse, coerceTo } from './parse.js'
  5 | 
  6 | const allMethods = ['chunkParse'] // , 'testParse']
  7 | const quoteMethods = ['chunkParse'] // , 'testParse']
  8 | 
  9 | test('Should parse csv string using mjs', async (t) => {
 10 |   const options = {
 11 |     enqueue: sinon.spy(),
 12 |     header: false
 13 |   }
 14 |   const input = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n'
 15 |   const res = csvParse(input, options)
 16 |   deepEqual(res, [
 17 |     ['a', 'b', 'c'],
 18 |     ['1', '2', '3'],
 19 |     ['4', '5', '6']
 20 |   ])
 21 |   equal(options.enqueue.callCount, 3)
 22 | })
 23 | 
 24 | // *** Default Export *** //
 25 | test('Should parse csv string', async (t) => {
 26 |   const options = {
 27 |     enqueue: sinon.spy(),
 28 |     chunkSize: 12
 29 |   }
 30 |   const input = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n'
 31 |   const res = csvParse(input, options)
 32 |   deepEqual(res, [
 33 |     { a: '1', b: '2', c: '3' },
 34 |     { a: '4', b: '5', c: '6' }
 35 |   ])
 36 |   equal(options.enqueue.callCount, 2)
 37 | })
 38 | 
 39 | test('Should parse csv string with empty first column', async (t) => {
 40 |   const options = {
 41 |     enqueue: sinon.spy(),
 42 |     delimiterChar: ','
 43 |   }
 44 |   const input = 'a,b,c,d\r\n1,2,3,4\r\n,5,6,7\r\n,10,11,12\r\n'
 45 |   const res = csvParse(input, options)
 46 |   deepEqual(res, [
 47 |     { a: '1', b: '2', c: '3', d: '4' },
 48 |     { a: '', b: '5', c: '6', d: '7' },
 49 |     { a: '', b: '10', c: '11', d: '12' }
 50 |   ])
 51 |   equal(options.enqueue.callCount, 3)
 52 | })
 53 | 
 54 | test('Should parse csv string with empty last column', async (t) => {
 55 |   const options = {
 56 |     enqueue: sinon.spy(),
 57 |     delimiterChar: ','
 58 |   }
 59 |   const input = 'a,b,c,d\r\n1,2,3,4\r\n4,5,6,\r\n9,10,11,\r\n'
 60 |   const res = csvParse(input, options)
 61 |   deepEqual(res, [
 62 |     { a: '1', b: '2', c: '3', d: '4' },
 63 |     { a: '4', b: '5', c: '6', d: '' },
 64 |     { a: '9', b: '10', c: '11', d: '' }
 65 |   ])
 66 |   equal(options.enqueue.callCount, 3)
 67 | })
 68 | 
 69 | test('Should parse csv string with empty first and last columns', async (t) => {
 70 |   const options = {
 71 |     enqueue: sinon.spy(),
 72 |     delimiterChar: ','
 73 |   }
 74 |   const input = 'a,b,c,d\r\n1,2,3,4\r\n,5,6,\r\n,10,11,\r\n'
 75 |   const res = csvParse(input, options)
 76 |   deepEqual(res, [
 77 |     { a: '1', b: '2', c: '3', d: '4' },
 78 |     { a: '', b: '5', c: '6', d: '' },
 79 |     { a: '', b: '10', c: '11', d: '' }
 80 |   ])
 81 |   equal(options.enqueue.callCount, 3)
 82 | })
 83 | 
 84 | test('Should parse csv string with empty last followed by empty first column', async (t) => {
 85 |   const options = {
 86 |     enqueue: sinon.spy(),
 87 |     delimiterChar: ','
 88 |   }
 89 |   const input = 'a,b,c,d\r\n1,2,3,4\r\n4,5,6,\r\n,10,11,12\r\n'
 90 |   const res = csvParse(input, options)
 91 |   deepEqual(res, [
 92 |     { a: '1', b: '2', c: '3', d: '4' },
 93 |     { a: '4', b: '5', c: '6', d: '' },
 94 |     { a: '', b: '10', c: '11', d: '12' }
 95 |   ])
 96 |   equal(options.enqueue.callCount, 3)
 97 | })
 98 | 
 99 | test('Should parse csv string w/ quotes', async (t) => {
100 |   const options = {
101 |     enqueue: sinon.spy(),
102 |     chunkSize: 14
103 |   }
104 |   const input = 'a,b,c\r\n1,"2",3\r\n4,"5",6\r\n'
105 |   const res = csvParse(input, options)
106 |   deepEqual(res, [
107 |     { a: '1', b: '2', c: '3' },
108 |     { a: '4', b: '5', c: '6' }
109 |   ])
110 |   equal(options.enqueue.callCount, 2)
111 | })
112 | 
113 | // *** General *** //
114 | for (const method of allMethods) {
115 |   test(`${method}: Should parse single row with { }`, async (t) => {
116 |     const options = {}
117 |     const enqueue = sinon.spy()
118 |     const chunk = 'a,b,c\r\n1,2,3\r\n'
119 |     const parser = parse(options)
120 |     parser[method](chunk, { enqueue })
121 |     equal(enqueue.callCount, 1)
122 |     deepEqual(enqueue.firstCall.args, [
123 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
124 |     ])
125 |   })
126 | 
127 |   test(`${method}: Should parse multiple rows with { }`, async (t) => {
128 |     const options = {}
129 |     const enqueue = sinon.spy()
130 | 
131 |     const chunk = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n7,8,9'
132 |     const parser = parse(options)
133 |     parser[method](chunk, { enqueue })
134 |     parser[method](parser.previousChunk(), { enqueue }, true)
135 |     equal(enqueue.callCount, 3)
136 |     deepEqual(enqueue.firstCall.args, [
137 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
138 |     ])
139 |     deepEqual(enqueue.secondCall.args, [
140 |       { data: { a: '4', b: '5', c: '6' }, idx: 3 }
141 |     ])
142 |     deepEqual(enqueue.thirdCall.args, [
143 |       { data: { a: '7', b: '8', c: '9' }, idx: 4 }
144 |     ])
145 |   })
146 | }
147 | 
148 | // *** Chunking *** //
149 | for (const method of allMethods) {
150 |   test(`${method}: Should parse with chunking`, async (t) => {
151 |     const options = {}
152 |     const enqueue = sinon.spy()
153 |     let chunk = 'a,b,c\r\n1,2,'
154 |     const parser = parse(options)
155 |     parser[method](chunk, { enqueue })
156 |     equal(parser.previousChunk(), '1,2,')
157 |     chunk = parser.previousChunk() + '3\r\n4'
158 |     parser[method](chunk, { enqueue })
159 |     equal(parser.previousChunk(), '4')
160 |     chunk = parser.previousChunk() + ',5,6'
161 |     parser[method](chunk, { enqueue }, true)
162 |     equal(enqueue.callCount, 2)
163 |     deepEqual(enqueue.firstCall.args, [
164 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
165 |     ])
166 |     deepEqual(enqueue.secondCall.args, [
167 |       { data: { a: '4', b: '5', c: '6' }, idx: 3 }
168 |     ])
169 |   })
170 | }
171 | 
172 | for (const method of quoteMethods) {
173 |   test(`${method}: Should parse with chunking and quotes`, async (t) => {
174 |     const options = {}
175 |     const enqueue = sinon.spy()
176 |     let chunk = 'a,b,c\r\n"1","2","'
177 |     const parser = parse(options)
178 |     parser[method](chunk, { enqueue })
179 |     equal(parser.previousChunk(), '"1","2","')
180 |     chunk = parser.previousChunk() + '3"\r\n'
181 |     parser[method](chunk, { enqueue })
182 |     equal(parser.previousChunk(), '')
183 |     chunk = parser.previousChunk() + '"4","5","6"'
184 |     parser[method](chunk, { enqueue }, true)
185 |     equal(enqueue.callCount, 2)
186 |     deepEqual(enqueue.firstCall.args, [
187 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
188 |     ])
189 |     deepEqual(enqueue.secondCall.args, [
190 |       { data: { a: '4', b: '5', c: '6' }, idx: 3 }
191 |     ])
192 |   })
193 | }
194 | 
195 | // *** Option: header *** //
196 | for (const method of allMethods) {
197 |   test(`${method}: Should parse with { header: [...] }`, async (t) => {
198 |     const options = { header: ['a', 'b', 'c'] }
199 |     const enqueue = sinon.spy()
200 |     const chunk = '1,2,3\r\n'
201 |     const parser = parse(options)
202 |     parser[method](chunk, { enqueue })
203 |     equal(enqueue.callCount, 1)
204 |     deepEqual(enqueue.firstCall.args, [
205 |       { data: { a: '1', b: '2', c: '3' }, idx: 1 }
206 |     ])
207 |   })
208 | 
209 |   test(`${method}: Should parse with { header: true }`, async (t) => {
210 |     const options = { header: true }
211 |     const enqueue = sinon.spy()
212 |     const chunk = 'a,b,c\r\n1,2,3\r\n'
213 |     const parser = parse(options)
214 |     parser[method](chunk, { enqueue })
215 |     equal(enqueue.callCount, 1)
216 |     deepEqual(enqueue.firstCall.args, [
217 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
218 |     ])
219 |   })
220 | 
221 |   test(`${method}: Should parse with { header: false }`, async (t) => {
222 |     const options = { header: false }
223 |     const enqueue = sinon.spy()
224 |     const chunk = '1,2,3\r\n'
225 |     const parser = parse(options)
226 |     parser[method](chunk, { enqueue })
227 |     equal(enqueue.callCount, 1)
228 |     deepEqual(enqueue.firstCall.args, [{ data: ['1', '2', '3'], idx: 1 }])
229 |   })
230 | }
231 | 
232 | // *** Option: newline *** //
233 | for (const method of allMethods) {
234 |   test(`${method}: Should parse with { newlineChar: "" } (auto detect)`, async (t) => {
235 |     const options = { newlineChar: '' }
236 |     const enqueue = sinon.spy()
237 |     const chunk = 'a,b,c\r1,2,3\r'
238 |     const parser = parse(options)
239 |     parser[method](chunk, { enqueue })
240 |     deepEqual(enqueue.firstCall.args, [
241 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
242 |     ])
243 |   })
244 | 
245 |   test(`${method}: Should parse with { newlineChar: "\\r\\n" }`, async (t) => {
246 |     const options = { newlineChar: '\r\n' }
247 |     const enqueue = sinon.spy()
248 |     const chunk = 'a,b,c\r\n1,2,3\r\n'
249 |     const parser = parse(options)
250 |     parser[method](chunk, { enqueue })
251 |     deepEqual(enqueue.firstCall.args, [
252 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
253 |     ])
254 |   })
255 |   test(`${method}: Should parse with { newlineChar: "\\n" }`, async (t) => {
256 |     const options = { newlineChar: '\n' }
257 |     const enqueue = sinon.spy()
258 |     const chunk = 'a,b,c\n1,2,3\n'
259 |     const parser = parse(options)
260 |     parser[method](chunk, { enqueue })
261 |     deepEqual(enqueue.firstCall.args, [
262 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
263 |     ])
264 |   })
265 |   test(`${method}: Should parse first chunk is shorter than the headers with { newlineChar: "" }`, async (t) => {
266 |     const options = { newlineChar: '' }
267 |     const enqueue = sinon.spy()
268 |     const chunk0 = 'a,b,'
269 |     const chunk1 = 'c\n1,2,3\n1,2,3'
270 |     const parser = parse(options)
271 |     console.log('parser', parser.previousChunk() + chunk0)
272 |     parser[method](chunk0, { enqueue })
273 |     console.log('parser', parser.previousChunk() + chunk1)
274 |     parser[method](parser.previousChunk() + chunk1, { enqueue }, true)
275 |     console.log(enqueue.firstCall)
276 |     deepEqual(enqueue.firstCall.args, [
277 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
278 |     ])
279 |     deepEqual(enqueue.secondCall.args, [
280 |       { data: { a: '1', b: '2', c: '3' }, idx: 3 }
281 |     ])
282 |   })
283 |   test(`${method}: Should parse when no newline at end of file`, async (t) => {
284 |     const options = { newlineChar: '' }
285 |     const enqueue = sinon.spy()
286 |     const chunk = 'a,b,c\n1,2,3\n1,2,3'
287 |     const parser = parse(options)
288 |     parser[method](chunk, { enqueue })
289 |     parser[method](parser.previousChunk(), { enqueue }, true)
290 |     deepEqual(enqueue.firstCall.args, [
291 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
292 |     ])
293 |     deepEqual(enqueue.secondCall.args, [
294 |       { data: { a: '1', b: '2', c: '3' }, idx: 3 }
295 |     ])
296 |   })
297 |   test(`${method}: Should parse when no field and newline at end of file`, async (t) => {
298 |     const options = { newlineChar: '' }
299 |     const enqueue = sinon.spy()
300 |     const chunk = 'a,b,c\n1,2,3\n1,2,'
301 |     const parser = parse(options)
302 |     parser[method](chunk, { enqueue })
303 |     parser[method](parser.previousChunk(), { enqueue }, true)
304 |     deepEqual(enqueue.firstCall.args, [
305 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
306 |     ])
307 |     deepEqual(enqueue.secondCall.args, [
308 |       { data: { a: '1', b: '2', c: '' }, idx: 3 }
309 |     ])
310 |   })
311 | }
312 | 
313 | // *** Option: delimiter *** //
314 | for (const method of allMethods) {
315 |   test(`${method}: Should parse with { delimiterValue: "" }`, async (t) => {
316 |     const options = { delimiterChar: '' }
317 |     const enqueue = sinon.spy()
318 |     const chunk = 'a\x1Fb\x1Fc\r\n1\x1F2\x1F3\r\n'
319 |     const parser = parse(options)
320 |     parser[method](chunk, { enqueue })
321 |     equal(enqueue.callCount, 1)
322 |     deepEqual(enqueue.firstCall.args, [
323 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
324 |     ])
325 |   })
326 |   test(`${method}: Should parse with { delimiterValue: "," }`, async (t) => {
327 |     const options = { delimiterChar: ',' }
328 |     const enqueue = sinon.spy()
329 |     const chunk = 'a,b,c\r\n1,2,3\r\n'
330 |     const parser = parse(options)
331 |     parser[method](chunk, { enqueue })
332 |     equal(enqueue.callCount, 1)
333 |     deepEqual(enqueue.firstCall.args, [
334 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
335 |     ])
336 |   })
337 |   test(`${method}: Should parse with { delimiterValue: "|" }`, async (t) => {
338 |     const options = { delimiterChar: '|' }
339 |     const enqueue = sinon.spy()
340 |     const chunk = 'a|b|c\r\n1|2|3\r\n'
341 |     const parser = parse(options)
342 |     parser[method](chunk, { enqueue })
343 |     equal(enqueue.callCount, 1)
344 |     deepEqual(enqueue.firstCall.args, [
345 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
346 |     ])
347 |   })
348 | 
349 |   test(`${method}: Should parse with { delimiterValue: "\\t" }`, async (t) => {
350 |     const options = { delimiterChar: '\t' }
351 |     const enqueue = sinon.spy()
352 |     const chunk = 'a\tb\tc\r\n1\t2\t3\r\n'
353 |     const parser = parse(options)
354 |     parser[method](chunk, { enqueue })
355 |     equal(enqueue.callCount, 1)
356 |     deepEqual(enqueue.firstCall.args, [
357 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
358 |     ])
359 |   })
360 | }
361 | 
362 | // *** Option: quoteChar *** //
363 | for (const method of quoteMethods) {
364 |   test(`${method}: Should parse with { quoteChar: '"' }`, async (t) => {
365 |     const options = { quoteChar: '"' }
366 |     const enqueue = sinon.spy()
367 |     const chunk = '"a","b","c"\r\n"1","2","3"\r\n'
368 |     const parser = parse(options)
369 |     parser[method](chunk, { enqueue })
370 |     deepEqual(enqueue.firstCall.args, [
371 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
372 |     ])
373 |   })
374 | 
375 |   test(`${method}: Should parse with { quoteChar: '\`' }`, async (t) => {
376 |     const options = { quoteChar: '`' }
377 |     const enqueue = sinon.spy()
378 |     const chunk = '`a`,`b`,`c`\r\n`1`,`2`,`3`\r\n'
379 |     const parser = parse(options)
380 |     parser[method](chunk, { enqueue })
381 |     deepEqual(enqueue.firstCall.args, [
382 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
383 |     ])
384 |   })
385 | }
386 | 
387 | // *** Option: escapeChar *** //
388 | for (const method of quoteMethods) {
389 |   test(`${method}: Should parse with { quoteChar: '"', escapeChar: '"' }`, async (t) => {
390 |     const options = { quoteChar: '"', escapeChar: '"' }
391 |     const enqueue = sinon.spy()
392 |     const chunk = '"a","b""","c"\r\n"1","2""","3"\r\n'
393 |     const parser = parse(options)
394 |     parser[method](chunk, { enqueue })
395 |     deepEqual(enqueue.firstCall.args, [
396 |       { data: { a: '1', 'b"': '2"', c: '3' }, idx: 2 }
397 |     ])
398 |   })
399 | 
400 |   test(`${method}: Should parse with { quoteChar: '"', escapeChar: '\\' }`, async (t) => {
401 |     const options = { quoteChar: '"', escapeChar: '\\' }
402 |     const enqueue = sinon.spy()
403 |     const chunk = '"a","b\\"","c"\r\n"1","2\\"","3"\r\n'
404 |     const parser = parse(options)
405 |     parser[method](chunk, { enqueue })
406 |     deepEqual(enqueue.firstCall.args, [
407 |       { data: { a: '1', 'b"': '2"', c: '3' }, idx: 2 }
408 |     ])
409 |   })
410 | 
411 |   test(`${method}: Should parse with { quoteChar: '"' } and field containing newline`, async (t) => {
412 |     const options = { quoteChar: '"' }
413 |     const enqueue = sinon.spy()
414 |     const chunk = '"a","b\r\nb","c"\r\n"1","2\r\n2","3"'
415 |     const parser = parse(options)
416 |     parser[method](chunk, { enqueue })
417 |     parser[method](parser.previousChunk(), { enqueue }, true)
418 |     deepEqual(enqueue.firstCall.args, [
419 |       { data: { a: '1', 'b\r\nb': '2\r\n2', c: '3' }, idx: 2 }
420 |     ])
421 |   })
422 | 
423 |   test(`${method}: Should parse with { quoteChar: '"' } and field containing delimiter`, async (t) => {
424 |     const options = { delimiterValue: ',', quoteChar: '"' }
425 |     const enqueue = sinon.spy()
426 |     const chunk = '"a","b,b","c"\r\n"1","2,2","3"\r\n'
427 |     const parser = parse(options)
428 |     parser[method](chunk, { enqueue })
429 |     deepEqual(enqueue.firstCall.args, [
430 |       { data: { a: '1', 'b,b': '2,2', c: '3' }, idx: 2 }
431 |     ])
432 |   })
433 | }
434 | 
435 | // *** coerceFields *** //
436 | for (const method of quoteMethods) {
437 |   test(`${method}: Should parse with { coerceField: (field) => ... }`, async (t) => {
438 |     const coerceField = (field, idx) => {
439 |       return Object.values(coerceTo)[idx](field)
440 |     }
441 |     const options = { header: true, quoteChar: "'", coerceField }
442 |     const enqueue = sinon.spy()
443 |     const chunk =
444 |       'string,boolean,true,false,number,integer,decimal,json,timestamp,null\r\nstring,true,true,false,0,-1,-1.1,\'{"a":"b"}\',2022-07-30T04:46:24.466Z,null\r\n'
445 |     const parser = parse(options)
446 |     parser[method](chunk, { enqueue })
447 |     deepEqual(enqueue.firstCall.args, [
448 |       {
449 |         data: {
450 |           boolean: true,
451 |           decimal: -1.1,
452 |           false: false,
453 |           number: 0,
454 |           integer: -1,
455 |           json: {
456 |             a: 'b'
457 |           },
458 |           null: null,
459 |           string: 'string',
460 |           timestamp: new Date('2022-07-30T04:46:24.466Z'),
461 |           true: true
462 |         },
463 |         idx: 2
464 |       }
465 |     ])
466 |   })
467 | }
468 | 
469 | test('Should coerceTo boolean', async (t) => {
470 |   equal(coerceTo.true('true'), true)
471 |   equal(coerceTo.true('TRUE'), true)
472 | 
473 |   equal(coerceTo.false('false'), false)
474 |   equal(coerceTo.false('FALSE'), false)
475 | 
476 |   equal(coerceTo.boolean('true'), true)
477 |   equal(coerceTo.boolean('TRUE'), true)
478 |   equal(coerceTo.boolean('false'), false)
479 |   equal(coerceTo.boolean('FALSE'), false)
480 | 
481 |   equal(coerceTo.any('true'), true)
482 |   equal(coerceTo.any('TRUE'), true)
483 |   equal(coerceTo.any('false'), false)
484 |   equal(coerceTo.any('FALSE'), false)
485 | })
486 | 
487 | test('Should not coerceTo boolean', async (t) => {
488 |   equal(coerceTo.null('1'), '1')
489 |   equal(coerceTo.null('0'), '0')
490 | })
491 | 
492 | test('Should coerceTo number', async (t) => {
493 |   equal(coerceTo.integer('1.1'), 1)
494 |   equal(coerceTo.integer('1'), 1)
495 |   equal(coerceTo.integer('0'), 0)
496 |   equal(coerceTo.integer('-1'), -1)
497 |   equal(coerceTo.integer('-1'), -1)
498 | 
499 |   equal(coerceTo.decimal('1.1'), 1.1)
500 |   equal(coerceTo.decimal('1'), 1)
501 |   equal(coerceTo.decimal('0'), 0)
502 |   equal(coerceTo.decimal('-1'), -1)
503 |   equal(coerceTo.decimal('-1.1'), -1.1)
504 | 
505 |   equal(coerceTo.number('1.1'), 1.1)
506 |   equal(coerceTo.number('1'), 1)
507 |   equal(coerceTo.number('0'), 0)
508 |   equal(coerceTo.number('-1'), -1)
509 |   equal(coerceTo.number('-1.1'), -1.1)
510 | 
511 |   equal(coerceTo.any('1.1'), 1.1)
512 |   equal(coerceTo.any('1'), 1)
513 |   equal(coerceTo.any('0'), 0)
514 |   equal(coerceTo.any('-1'), -1)
515 |   equal(coerceTo.any('-1.1'), -1.1)
516 | })
517 | 
518 | test('Should not coerceTo number', async (t) => {
519 |   equal(coerceTo.null('a'), 'a')
520 | })
521 | 
522 | test('Should coerceTo null', async (t) => {
523 |   equal(coerceTo.null('null'), null)
524 |   equal(coerceTo.null('NULL'), null)
525 | 
526 |   equal(coerceTo.any('null'), null)
527 |   equal(coerceTo.any('NULL'), null)
528 | })
529 | 
530 | test('Should not coerceTo null', async (t) => {
531 |   equal(coerceTo.null('Nil'), 'Nil')
532 | })
533 | 
534 | test('Should coerceTo timestamp', async (t) => {
535 |   deepEqual(coerceTo.timestamp('2000-01-01'), new Date('2000-01-01'))
536 |   deepEqual(
537 |     coerceTo.timestamp('2000-01-01T00:00:00Z'),
538 |     new Date('2000-01-01T00:00:00Z')
539 |   )
540 | 
541 |   // `any` doesn't support `date` due conflict with `number`
542 |   // deepEqual(coerceTo.any('2000-01-01'), new Date('2000-01-01'))
543 |   // deepEqual(coerceTo.any('2000-01-01T00:00:00Z'), new Date('2000-01-01T00:00:00Z'))
544 | })
545 | 
546 | test('Should not coerceTo timestamp', async (t) => {
547 |   equal(coerceTo.timestamp('not a timestamp'), 'not a timestamp')
548 | })
549 | 
550 | test('Should coerceTo json', async (t) => {
551 |   deepEqual(coerceTo.json('["a"]'), ['a'])
552 |   deepEqual(coerceTo.json('{"a":1}'), { a: 1 })
553 | 
554 |   deepEqual(coerceTo.any('["a"]'), ['a'])
555 |   deepEqual(coerceTo.any('{"a":1}'), { a: 1 })
556 | })
557 | 
558 | test('Should not coerceTo json', async (t) => {
559 |   equal(coerceTo.json('not json'), 'not json')
560 | })
561 | 
562 | // *** empty fields *** //
563 | for (const method of allMethods) {
564 |   test(`${method}: Should parse with { emptyFieldValue: "" }`, async (t) => {
565 |     const options = { emptyFieldValue: '' }
566 |     const enqueue = sinon.spy()
567 |     const chunk = 'a,b,c\r\n,,\r\n'
568 |     const parser = parse(options)
569 |     parser[method](chunk, { enqueue })
570 |     equal(enqueue.callCount, 1)
571 |     deepEqual(enqueue.firstCall.args, [
572 |       { data: { a: '', b: '', c: '' }, idx: 2 }
573 |     ])
574 |   })
575 |   test(`${method}: Should parse with { emptyFieldValue: null }`, async (t) => {
576 |     const options = { emptyFieldValue: null }
577 |     const enqueue = sinon.spy()
578 |     const chunk = 'a,b,c\r\n,,\r\n'
579 |     const parser = parse(options)
580 |     parser[method](chunk, { enqueue })
581 |     equal(enqueue.callCount, 1)
582 |     deepEqual(enqueue.firstCall.args, [
583 |       { data: { a: null, b: null, c: null }, idx: 2 }
584 |     ])
585 |   })
586 |   test(`${method}: Should parse with { emptyFieldValue: undefined }`, async (t) => {
587 |     const options = { emptyFieldValue: undefined }
588 |     const enqueue = sinon.spy()
589 |     const chunk = 'a,b,c\r\n,,\r\n'
590 |     const parser = parse(options)
591 |     parser[method](chunk, { enqueue })
592 |     equal(enqueue.callCount, 1)
593 |     deepEqual(enqueue.firstCall.args, [
594 |       { data: { a: undefined, b: undefined, c: undefined }, idx: 2 }
595 |     ])
596 |   })
597 |   test(`${method}: Should parse with { emptyFieldValue: "" } and first field`, async (t) => {
598 |     const options = { emptyFieldValue: '' }
599 |     const enqueue = sinon.spy()
600 |     const chunk = 'a,b,c\r\n1,,\r\n'
601 |     const parser = parse(options)
602 |     parser[method](chunk, { enqueue })
603 |     equal(enqueue.callCount, 1)
604 |     deepEqual(enqueue.firstCall.args, [
605 |       { data: { a: '1', b: '', c: '' }, idx: 2 }
606 |     ])
607 |   })
608 |   test(`${method}: Should parse with { emptyFieldValue: "" } and middle field`, async (t) => {
609 |     const options = { emptyFieldValue: '' }
610 |     const enqueue = sinon.spy()
611 |     const chunk = 'a,b,c\r\n,2,\r\n'
612 |     const parser = parse(options)
613 |     parser[method](chunk, { enqueue })
614 |     equal(enqueue.callCount, 1)
615 |     deepEqual(enqueue.firstCall.args, [
616 |       { data: { a: '', b: '2', c: '' }, idx: 2 }
617 |     ])
618 |   })
619 |   test(`${method}: Should parse with { emptyFieldValue: "" } and last field`, async (t) => {
620 |     const options = { emptyFieldValue: '' }
621 |     const enqueue = sinon.spy()
622 |     const chunk = 'a,b,c\r\n,,3\r\n'
623 |     const parser = parse(options)
624 |     parser[method](chunk, { enqueue })
625 |     equal(enqueue.callCount, 1)
626 |     deepEqual(enqueue.firstCall.args, [
627 |       { data: { a: '', b: '', c: '3' }, idx: 2 }
628 |     ])
629 |   })
630 | }
631 | 
632 | // *** Option: errorOnEmptyLines *** //
633 | for (const method of allMethods) {
634 |   test(`${method}: Should parse with { errorOnEmptyLine: false }`, async (t) => {
635 |     const options = { errorOnEmptyLine: false }
636 |     const enqueue = sinon.spy()
637 |     const chunk = '\r\na,b,c\r\n\r\n1,2,3\r\n'
638 |     const parser = parse(options)
639 |     parser[method](chunk, { enqueue })
640 |     equal(enqueue.callCount, 1)
641 |     deepEqual(enqueue.firstCall.args, [
642 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
643 |     ])
644 |   })
645 | 
646 |   test(`${method}: Should parse with { errorOnEmptyLine: true }`, async (t) => {
647 |     const options = { errorOnEmptyLine: true }
648 |     const enqueue = sinon.spy()
649 |     const chunk = '\r\na,b,c\r\n\r\n1,2,3\r\n'
650 |     const parser = parse(options)
651 |     parser[method](chunk, { enqueue })
652 |     equal(enqueue.callCount, 3)
653 |     deepEqual(enqueue.firstCall.args, [
654 |       {
655 |         err: { code: 'EmptyLineExists', message: 'Empty line detected.' },
656 |         idx: 1
657 |       }
658 |     ])
659 |     deepEqual(enqueue.secondCall.args, [
660 |       {
661 |         err: { code: 'EmptyLineExists', message: 'Empty line detected.' },
662 |         idx: 3
663 |       }
664 |     ])
665 |     deepEqual(enqueue.thirdCall.args, [
666 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
667 |     ])
668 |   })
669 | }
670 | 
671 | // *** Option: commentPrefixValue && errorOnComment *** //
672 | for (const method of allMethods) {
673 |   test(`${method}: Should parse with { commentPrefixValue: "//", errorOnComment: false }`, async (t) => {
674 |     const options = { commentPrefixValue: '//', errorOnComment: false }
675 |     const enqueue = sinon.spy()
676 |     const chunk = '// header\r\na,b,c\r\n// data\r\n1,2,3\r\n'
677 |     const parser = parse(options)
678 |     parser[method](chunk, { enqueue })
679 |     equal(enqueue.callCount, 1)
680 |     deepEqual(enqueue.firstCall.args, [
681 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
682 |     ])
683 |   })
684 | 
685 |   test(`${method}: Should parse with { commentPrefixValue: "//", errorOnComment: true }`, async (t) => {
686 |     const options = { commentPrefixValue: '//', errorOnComment: true }
687 |     const enqueue = sinon.spy()
688 |     const chunk = '// header\r\na,b,c\r\n// data\r\n1,2,3\r\n'
689 |     const parser = parse(options)
690 |     parser[method](chunk, { enqueue })
691 |     equal(enqueue.callCount, 3)
692 |     deepEqual(enqueue.firstCall.args, [
693 |       { err: { code: 'CommentExists', message: 'Comment detected.' }, idx: 1 }
694 |     ])
695 |     deepEqual(enqueue.secondCall.args, [
696 |       { err: { code: 'CommentExists', message: 'Comment detected.' }, idx: 3 }
697 |     ])
698 |     deepEqual(enqueue.thirdCall.args, [
699 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
700 |     ])
701 |   })
702 | }
703 | 
704 | // *** Option: errorOnMissingFields *** //
705 | for (const method of allMethods) {
706 |   test(`${method}: Should parse with { errorOnMissingFields: false }`, async (t) => {
707 |     const options = { errorOnMissingFields: false }
708 |     const enqueue = sinon.spy()
709 |     const chunk = 'a,b,c\r\n1,2\r\n1,2,3\r\n1,2,3\r\n'
710 |     const parser = parse(options)
711 |     parser[method](chunk, { enqueue })
712 |     deepEqual(enqueue.firstCall.args, [{ data: { a: '1', b: '2' }, idx: 2 }])
713 |     deepEqual(enqueue.secondCall.args, [
714 |       { data: { a: '1', b: '2', c: '3' }, idx: 3 }
715 |     ])
716 |     deepEqual(enqueue.thirdCall.args, [
717 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
718 |     ])
719 |     equal(enqueue.callCount, 3)
720 |   })
721 | 
722 |   test(`${method}: Should parse with { errorOnMissingFields: true }`, async (t) => {
723 |     const options = { errorOnMissingFields: true }
724 |     const enqueue = sinon.spy()
725 |     const chunk = 'a,b,c\r\n1,2\r\n1,2,3\r\n1,2,3\r\n'
726 |     const parser = parse(options)
727 |     parser[method](chunk, { enqueue })
728 |     deepEqual(enqueue.firstCall.args, [
729 |       {
730 |         err: {
731 |           code: 'MissingFields',
732 |           message: 'Too few fields were parsed, expected 3.'
733 |         },
734 |         idx: 2
735 |       }
736 |     ])
737 |     deepEqual(enqueue.secondCall.args, [
738 |       { data: { a: '1', b: '2', c: '3' }, idx: 3 }
739 |     ])
740 |     deepEqual(enqueue.thirdCall.args, [
741 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
742 |     ])
743 |     equal(enqueue.callCount, 3)
744 |   })
745 | }
746 | 
747 | // *** Option: errorOnExtraFields *** //
748 | for (const method of allMethods) {
749 |   test(`${method}: Should parse with { errorOnExtraFields: false }`, async (t) => {
750 |     const options = { errorOnExtraFields: false }
751 |     const enqueue = sinon.spy()
752 |     const chunk = 'a,b,c\r\n1,2,3,4\r\n1,2,3\r\n1,2,3\r\n'
753 |     const parser = parse(options)
754 |     parser[method](chunk, { enqueue })
755 |     deepEqual(enqueue.firstCall.args, [
756 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
757 |     ])
758 |     deepEqual(enqueue.secondCall.args, [
759 |       { data: { a: '1', b: '2', c: '3' }, idx: 3 }
760 |     ])
761 |     deepEqual(enqueue.thirdCall.args, [
762 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
763 |     ])
764 |     equal(enqueue.callCount, 3)
765 |   })
766 | 
767 |   test(`${method}: Should parse with { errorOnExtraFields: true }`, async (t) => {
768 |     const options = { errorOnExtraFields: true }
769 |     const enqueue = sinon.spy()
770 |     const chunk = 'a,b,c\r\n1,2,3,4\r\n1,2,3\r\n1,2,3\r\n'
771 |     const parser = parse(options)
772 |     parser[method](chunk, { enqueue })
773 |     deepEqual(enqueue.firstCall.args, [
774 |       {
775 |         err: {
776 |           code: 'ExtraFields',
777 |           message: 'Too many fields were parsed, expected 3.'
778 |         },
779 |         idx: 2
780 |       }
781 |     ])
782 |     deepEqual(enqueue.secondCall.args, [
783 |       { data: { a: '1', b: '2', c: '3' }, idx: 3 }
784 |     ])
785 |     deepEqual(enqueue.thirdCall.args, [
786 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
787 |     ])
788 |     equal(enqueue.callCount, 3)
789 |   })
790 | }
791 | 
792 | // *** Option: errorOnFieldsMismatch *** //
793 | /* for (const method of allMethods) {
794 |   test(`${method}: Should parse with { errorOnFieldsMismatch: false }`, async (t) => {
795 |     const options = { errorOnFieldsMismatch: false }
796 |     const enqueue = sinon.spy()
797 |     const chunk = 'a,b,c\r\n1,2\r\n1,2,3,4\r\n1,2,3\r\n'
798 |     const parser = parse(options)
799 |     parser[method](chunk, { enqueue })
800 |     deepEqual(enqueue.firstCall.args, [
801 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
802 |     ])
803 |     equal(enqueue.callCount, 1)
804 |   })
805 | 
806 |   test(`${method}: Should parse with { errorOnFieldsMismatch: true }`, async (t) => {
807 |     const options = { errorOnFieldsMismatch: true }
808 |     const enqueue = sinon.spy()
809 |     const chunk = 'a,b,c\r\n1,2\r\n1,2,3,4\r\n1,2,3\r\n'
810 |     const parser = parse(options)
811 |     parser[method](chunk, { enqueue })
812 |     deepEqual(enqueue.firstCall.args, [
813 |       {
814 |         err: {
815 |           code: 'FieldsMismatchTooFew',
816 |           message: 'Too few fields were parsed, expected 3.'
817 |         },
818 |         idx: 2
819 |       }
820 |     ])
821 |     deepEqual(enqueue.secondCall.args, [
822 |       {
823 |         err: {
824 |           code: 'FieldsMismatchTooMany',
825 |           message: 'Too many fields were parsed, expected 3.'
826 |         },
827 |         idx: 3
828 |       }
829 |     ])
830 |     deepEqual(enqueue.thirdCall.args, [
831 |       { data: { a: '1', b: '2', c: '3' }, idx: 4 }
832 |     ])
833 |     equal(enqueue.callCount, 3)
834 |   })
835 | } */
836 | 
837 | // *** Option: errorOnFieldMalformed *** //
838 | for (const method of quoteMethods) {
839 |   test(`${method}: Should parse with { errorOnFieldMalformed }`, async (t) => {
840 |     const options = { errorOnFieldMalformed: true }
841 |     const enqueue = sinon.spy()
842 |     const chunk = 'a,b,c\r\n"1","2","3"\r\n"4'
843 |     const parser = parse(options)
844 |     try {
845 |       parser[method](chunk, { enqueue })
846 |       parser[method](parser.previousChunk(), { enqueue }, true)
847 |     } catch (e) {
848 |       console.log('catch')
849 |       equal(e.message, 'QuotedFieldMalformed')
850 |     }
851 |     deepEqual(enqueue.firstCall.args, [
852 |       { data: { a: '1', b: '2', c: '3' }, idx: 2 }
853 |     ])
854 |     equal(enqueue.callCount, 1)
855 |   })
856 | }
857 | 
858 | // *** extra spaces *** //
859 | /* test(`${method}: Should parse with space padding`, async (t) => {
860 | const options = { }
861 | const enqueue = sinon.spy()
862 | let chunk = 'a,b,c\r\n"1" ,"2" ,"3" \r\n'
863 | const parser = parse(options)
864 | parser[method](chunk, { enqueue })
865 | equal(enqueue.callCount, 1)
866 | deepEqual(enqueue.firstCall.args, [{data:{ a: '1', b: '2', c: '3' },idx:2}])
867 | }) */
868 | 


--------------------------------------------------------------------------------