├── .github
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── feature_request.md
│ └── pull-request.md
└── workflows
│ ├── lint.yml
│ ├── sast.yml
│ └── tests.yml
├── .gitignore
├── .husky
├── commit-msg
└── pre-commit
├── .prettierrc.json
├── LICENSE
├── README.md
├── benchmark.js
├── bin
└── esbuild
├── commitlint.config.cjs
├── docs
├── .nojekyll
├── CNAME
├── README.md
├── docs
│ ├── format.md
│ ├── migrate
│ │ ├── csv.md
│ │ └── papaparse.md
│ └── parse.md
├── index.html
├── sidebar.md
├── sidebar.png
├── sidebar.sketch
└── t-rex.png
├── format.js
├── format.test.js
├── index.js
├── lint-staged.config.js
├── package-lock.json
├── package.json
├── parse-mini.js
├── parse.bench.js
├── parse.js
└── parse.test.js
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 |
2 | # Contributor Covenant Code of Conduct
3 |
4 | ## Our Pledge
5 |
6 | We as members, contributors, and leaders pledge to make participation in our
7 | community a harassment-free experience for everyone, regardless of age, body
8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
9 | identity and expression, level of experience, education, socio-economic status,
10 | nationality, personal appearance, race, religion, or sexual identity
11 | and orientation.
12 |
13 | We pledge to act and interact in ways that contribute to an open, welcoming,
14 | diverse, inclusive, and healthy community.
15 |
16 | ## Our Standards
17 |
18 | Examples of behavior that contributes to a positive environment for our
19 | community include:
20 |
21 | * Demonstrating empathy and kindness toward other people
22 | * Being respectful of differing opinions, viewpoints, and experiences
23 | * Giving and gracefully accepting constructive feedback
24 | * Accepting responsibility and apologizing to those affected by our mistakes,
25 | and learning from the experience
26 | * Focusing on what is best not just for us as individuals, but for the
27 | overall community
28 |
29 | Examples of unacceptable behavior include:
30 |
31 | * The use of sexualized language or imagery, and sexual attention or
32 | advances of any kind
33 | * Trolling, insulting or derogatory comments, and personal or political attacks
34 | * Public or private harassment
35 | * Publishing others' private information, such as a physical or email
36 | address, without their explicit permission
37 | * Other conduct which could reasonably be considered inappropriate in a
38 | professional setting
39 |
40 | ## Enforcement Responsibilities
41 |
42 | Community leaders are responsible for clarifying and enforcing our standards of
43 | acceptable behavior and will take appropriate and fair corrective action in
44 | response to any behavior that they deem inappropriate, threatening, offensive,
45 | or harmful.
46 |
47 | Community leaders have the right and responsibility to remove, edit, or reject
48 | comments, commits, code, wiki edits, issues, and other contributions that are
49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
50 | decisions when appropriate.
51 |
52 | ## Scope
53 |
54 | This Code of Conduct applies within all community spaces, and also applies when
55 | an individual is officially representing the community in public spaces.
56 | Examples of representing our community include using an official e-mail address,
57 | posting via an official social media account, or acting as an appointed
58 | representative at an online or offline event.
59 |
60 | ## Enforcement
61 |
62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
63 | reported to the community leaders responsible for enforcement at
64 | [INSERT CONTACT METHOD].
65 | All complaints will be reviewed and investigated promptly and fairly.
66 |
67 | All community leaders are obligated to respect the privacy and security of the
68 | reporter of any incident.
69 |
70 | ## Enforcement Guidelines
71 |
72 | Community leaders will follow these Community Impact Guidelines in determining
73 | the consequences for any action they deem in violation of this Code of Conduct:
74 |
75 | ### 1. Correction
76 |
77 | **Community Impact**: Use of inappropriate language or other behavior deemed
78 | unprofessional or unwelcome in the community.
79 |
80 | **Consequence**: A private, written warning from community leaders, providing
81 | clarity around the nature of the violation and an explanation of why the
82 | behavior was inappropriate. A public apology may be requested.
83 |
84 | ### 2. Warning
85 |
86 | **Community Impact**: A violation through a single incident or series
87 | of actions.
88 |
89 | **Consequence**: A warning with consequences for continued behavior. No
90 | interaction with the people involved, including unsolicited interaction with
91 | those enforcing the Code of Conduct, for a specified period of time. This
92 | includes avoiding interactions in community spaces as well as external channels
93 | like social media. Violating these terms may lead to a temporary or
94 | permanent ban.
95 |
96 | ### 3. Temporary Ban
97 |
98 | **Community Impact**: A serious violation of community standards, including
99 | sustained inappropriate behavior.
100 |
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 |
107 | ### 4. Permanent Ban
108 |
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior, harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 |
113 | **Consequence**: A permanent ban from any sort of public interaction within
114 | the community.
115 |
116 | ## Attribution
117 |
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.0, available at
120 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
121 |
122 | Community Impact Guidelines were inspired by
123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124 |
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available
127 | at [https://www.contributor-covenant.org/translations][translations].
128 |
129 | [homepage]: https://www.contributor-covenant.org
130 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
131 | [Mozilla CoC]: https://github.com/mozilla/diversity
132 | [FAQ]: https://www.contributor-covenant.org/faq
133 | [translations]: https://www.contributor-covenant.org/translations
134 |
--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | In the spirit of Open Source Software, everyone is very welcome to contribute to this repository. Feel free to [raise issues](https://github.com/willfarrell/csv-rex/issues) or to [submit Pull Requests](https://github.com/willfarrell/csv-rex/pulls).
4 |
5 | Before contributing to the project, make sure to have a look at our [Code of Conduct](/.github/CODE_OF_CONDUCT.md).
6 |
7 |
8 | ## Licence
9 |
10 | Licensed under [MIT Licence](LICENSE). Copyright (c) 2022 [will Farrell](https://github.com/willfarrell), and the [csv-rex team](https://github.com/willfarrell/csv-rex/graphs/contributors).
11 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [willfarrell]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | How to reproduce the behaviour:
15 | 1. Sample code '...'
16 | 2. Input '....'
17 | 3. Unit test '....'
18 | 4. Thrown error
19 |
20 | **Expected behaviour**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Environment (please complete the following information):**
24 | - Node.js: [e.g. 18]
25 | - csv-rex: [e.g. 0.0.0]
26 | - Browser: [e.g. Firefox 100]
27 |
28 | **Additional context**
29 | Add any other context about the problem here.
30 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: feature request
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/pull-request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Pull request
3 | about: Pull request
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 |
11 |
12 | **What does this implement/fix? Explain your changes.**
13 |
14 | **Does this close any currently open issues?**
15 |
16 | **Any relevant logs, error output, etc?**
17 |
18 | **Environment:**
19 | - Node.js: [e.g. 18]
20 | - csv-rex: [e.g. 0.0.0]
21 | - Browser: [e.g. Firefox 100]
22 |
23 | **Any other comments?**
24 |
25 | **Todo List:**
26 | - [ ] Feature/Fix fully implemented
27 | - [ ] Added tests
28 | - [ ] Unit tests
29 | - [ ] Benchmark tests (if applicable)
30 | - [ ] Updated relevant documentation
31 | - [ ] Updated relevant examples
32 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: Lint
2 |
3 | on:
4 | push:
5 | branches:
6 | - '**'
7 | tags-ignore:
8 | - '*.*.*'
9 | pull_request:
10 |
11 | jobs:
12 | lint:
13 | runs-on: ubuntu-latest
14 |
15 | strategy:
16 | matrix:
17 | node-version: [18.x]
18 |
19 | steps:
20 | - name: Checkout repository
21 | uses: actions/checkout@v2
22 | - name: Use Node.js ${{ matrix.node-version }}
23 | uses: actions/setup-node@v2
24 | with:
25 | node-version: ${{ matrix.node-version }}
26 | - name: Use npm 8
27 | run: |
28 | npm install -g npm@8
29 | - name: Install dependencies
30 | run: |
31 | npm install
32 | env:
33 | CI: true
34 | - name: Linting
35 | run: npm run lint
36 | env:
37 | CI: true
38 |
--------------------------------------------------------------------------------
/.github/workflows/sast.yml:
--------------------------------------------------------------------------------
1 | name: SAST
2 |
3 | on:
4 | push:
5 | branches:
6 | - '**'
7 | tags-ignore:
8 | - '*.*.*'
9 | pull_request:
10 |
11 | jobs:
12 | lint:
13 | runs-on: ubuntu-latest
14 |
15 | permissions:
16 | # required for all workflows
17 | security-events: write
18 |
19 | # only required for workflows in private repositories
20 | actions: read
21 | contents: read
22 |
23 | steps:
24 | - name: Checkout repository
25 | uses: actions/checkout@v3
26 | - name: Initialize CodeQL
27 | uses: github/codeql-action/init@v2
28 | - name: Perform CodeQL Analysis
29 | uses: github/codeql-action/analyze@v2
30 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Unit Tests
2 |
3 | on:
4 | push:
5 | branches:
6 | - 'main'
7 | tags-ignore:
8 | - '*.*.*'
9 | pull_request:
10 |
11 | jobs:
12 | tests:
13 | name: Tests
14 | runs-on: ubuntu-latest
15 |
16 | strategy:
17 | matrix:
18 | node-version: [18.x]
19 |
20 | steps:
21 | - name: Checkout repository
22 | uses: actions/checkout@v2
23 | - name: Use Node.js ${{ matrix.node-version }}
24 | uses: actions/setup-node@v2
25 | with:
26 | node-version: ${{ matrix.node-version }}
27 | - name: Use npm 8
28 | run: |
29 | npm install --location=global npm@8
30 | - name: Install dependencies
31 | run: |
32 | npm install
33 | env:
34 | CI: true
35 |
36 | - name: Build for Node.js ${{ matrix.node-version }}
37 | run: |
38 | npm run build
39 |
40 | - name: Unit tests
41 | run: |
42 | npm run test
43 | env:
44 | CI: true
45 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.cjs
2 | *.mjs
3 | *.mjs.map
4 | coverage
5 | *.0x
6 | *.br
7 |
8 | !commitlint.config.cjs
9 |
10 | # Logs
11 | logs
12 | *.log
13 | npm-debug.log*
14 | yarn-debug.log*
15 | yarn-error.log*
16 | lerna-debug.log*
17 |
18 | # Diagnostic reports (https://nodejs.org/api/report.html)
19 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
20 |
21 | # Runtime data
22 | pids
23 | *.pid
24 | *.seed
25 | *.pid.lock
26 |
27 | # Directory for instrumented libs generated by jscoverage/JSCover
28 | lib-cov
29 |
30 | # Coverage directory used by tools like istanbul
31 | coverage
32 | *.lcov
33 |
34 | # nyc test coverage
35 | .nyc_output
36 |
37 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
38 | .grunt
39 |
40 | # Bower dependency directory (https://bower.io/)
41 | bower_components
42 |
43 | # node-waf configuration
44 | .lock-wscript
45 |
46 | # Compiled binary addons (https://nodejs.org/api/addons.html)
47 | build/Release
48 |
49 | # Dependency directories
50 | node_modules/
51 | jspm_packages/
52 |
53 | # TypeScript v1 declaration files
54 | typings/
55 |
56 | # TypeScript cache
57 | *.tsbuildinfo
58 |
59 | # Optional npm cache directory
60 | .npm
61 |
62 | # Optional eslint cache
63 | .eslintcache
64 |
65 | # Microbundle cache
66 | .rpt2_cache/
67 | .rts2_cache_cjs/
68 | .rts2_cache_es/
69 | .rts2_cache_umd/
70 |
71 | # Optional REPL history
72 | .node_repl_history
73 |
74 | # Output of 'npm pack'
75 | *.tgz
76 |
77 | # Yarn Integrity file
78 | .yarn-integrity
79 |
80 | # dotenv environment variables file
81 | .env
82 | .env.test
83 |
84 | # parcel-bundler cache (https://parceljs.org/)
85 | .cache
86 |
87 | # Next.js build output
88 | .next
89 |
90 | # Nuxt.js build / generate output
91 | .nuxt
92 | dist
93 |
94 | # Gatsby files
95 | .cache/
96 | # Comment in the public line in if your project uses Gatsby and *not* Next.js
97 | # https://nextjs.org/blog/next-9-1#public-directory-support
98 | # public
99 |
100 | # vuepress build output
101 | .vuepress/dist
102 |
103 | # Serverless directories
104 | .serverless/
105 |
106 | # FuseBox cache
107 | .fusebox/
108 |
109 | # DynamoDB Local files
110 | .dynamodb/
111 |
112 | # TernJS port file
113 | .tern-port
114 |
115 | # IDE
116 | .idea
117 | *.iml
118 | .nova
119 |
120 | # OS
121 | .DS_Store
--------------------------------------------------------------------------------
/.husky/commit-msg:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 |
4 | npm run commit-msg
5 |
--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 |
4 | npm run pre-commit
5 |
--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "singleQuote": true,
3 | "semi": false,
4 | "trailingComma": "none"
5 | }
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 will Farrell
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
36 |
--------------------------------------------------------------------------------
/benchmark.js:
--------------------------------------------------------------------------------
1 | import parseBenchmarks from './parse.bench.js'
2 | // import formatBenchmarks from './parse.bench.js'
3 |
4 | await parseBenchmarks()
5 | // await formatBenchmarks()
6 |
--------------------------------------------------------------------------------
/bin/esbuild:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 |
3 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs index.js --minify --allow-overwrite --outfile=index.cjs
4 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse.js --bundle --minify --allow-overwrite --outfile=parse.cjs
5 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse-mini.js --bundle --minify --allow-overwrite --outfile=parse-mini.cjs
6 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs format.js --bundle --minify --allow-overwrite --outfile=format.cjs
7 |
8 | node_modules/.bin/esbuild --platform=node --format=esm index.js --minify --sourcemap=external --allow-overwrite --outfile=index.mjs
9 | node_modules/.bin/esbuild --platform=node --format=esm parse.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse.mjs
10 | node_modules/.bin/esbuild --platform=node --format=esm parse-mini.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse-mini.mjs
11 | node_modules/.bin/esbuild --platform=node --format=esm format.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=format.mjs
12 |
13 |
--------------------------------------------------------------------------------
/commitlint.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | extends: ['@commitlint/config-conventional']
3 | }
4 |
--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/.nojekyll
--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | csv-rex.js.org
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 |
33 |
34 | ## Features
35 |
36 | - Free to use under MIT licence
37 | - Comma-Separated Values (CSV) Files specification compliant ([RFC-4180](https://tools.ietf.org/html/rfc4180))
38 | - Small bundle size (~1KB compressed = esbuild + minify + br)
39 | - Zero dependencies
40 | - ESM & CJS modules with `.map` files
41 | - NodeJS and WebStream API support via [@datastream/csv](https://github.com/willfarrell/datastream)
42 | - It's just fast. See the [benchmarks](https://github.com/willfarrell/csv-benchmarks).
43 |
44 | ## Why not use `papaparse` or `csv-parse`?
45 |
46 | Both are great libraries, we've used them both in many projects over the years.
47 |
48 | - [`csv-parse`](https://csv.js.org/parse/): Built on top of NodeJS native APIs giving it great stream support. If you want to run it in the browser however, you've going to have to ship a very large polyfill.
49 | - [`papaparse`](https://www.papaparse.com/): Built to be more friendly for browser with an option to run in node as well. Faster than `csv-parse`, but, it's dadbod and lack of native stream support leaves room for improvement.
50 |
51 | The goal with `csv-rex` is to have a CSV parser and formatter that is as fast as others, reduced bundle size, and have cross-environment stream support. We think we've achieved our goal and hope you enjoy.
52 |
53 | ## Setup
54 |
55 | ```bash
56 | npm install csv-rex
57 | ```
58 |
59 | ```javascript
60 | import { parse, format } from 'csv-rex'
61 |
62 | // parse
63 | const linesArray = parse(inputString, {})
64 |
65 | // format
66 | const csv = format(linesArray, {})
67 | ```
68 |
--------------------------------------------------------------------------------
/docs/docs/format.md:
--------------------------------------------------------------------------------
1 | # format
2 |
3 | ## Options
4 |
5 | - `header` (`true`): Keys to be used in JSON object for the parsed row
6 | - `true`: Will include header, will use `Object.keys()` for columns
7 | - `[...]`: What columns to included and in what order
8 | - `false`: Will exclude a header line.
9 | - `newlineChar` (`\r\n`): What `newline` character(s) to be used.
10 | - `delimiterChar` (`,`): Characters used to separate fields.
11 | - `quoteChar` (`"`): Character used to wrap fields that need to have special characters within them.
12 | - `escapeChar` (`${quoteChar}`): Character used to escape the `quoteChar`.
13 | - `quoteColumn`: (`undefined`): Array that maps to the headers to indicate what columns need to have quotes. Used to improve performance.
14 | - `true`: Always quote column
15 | - `false`: Never quote column
16 | - `undefined`/`null`/``: Detect if quotes are needed based on contents
17 | - `enqueue` (`(string) => {}`): Function to run on formatted row data.
18 | - `enableReturn` (`true`): Will concat rows into a single string. Set to `false` if handing data within enqueue for performance improvements.
19 |
20 | ### Array chunk
21 | - `header:[...]` required
22 |
23 | ### Object chunk
24 |
25 |
26 |
27 | ## Examples
28 |
29 | ### Formatting an array of objects to CSV string
30 |
31 | ```javascript
32 | import { format } from 'csv-rex'
33 |
34 | export default (arrayOfObjects) => parse(arrayOfObjects, { newlineChar: '\n' })
35 | ```
36 |
37 | ### NodeJS Stream
38 |
39 | ```javascript
40 | import { createReadStream } from 'node:fs'
41 | import { pipeline, createReadableStream } from '@datastream/core'
42 | import { csvFormatStream } from '@datastream/csv'
43 |
44 | export default async (filePath, opts = {}) => {
45 | const streams = [
46 | createReadableStream([
47 | /*...*/
48 | ]),
49 | csvFormatStream(opts)
50 | // ...
51 | ]
52 |
53 | const result = await pipeline(streams)
54 | console.log(result.csvErrors)
55 | }
56 | ```
57 |
58 | ### Web Stream API
59 |
60 | Requires: Chrome v71 , Edge v79, Firefox v102, Safari v14.5, NodeJS v18 (v16 with import). If you want to use WebStreams with node you need to pass `--conditions=webstream` in the cli to force its use.
61 |
62 | ```javascript
63 | import { pipeline } from '@datastream/core'
64 | import { stringReadableStream } from '@datastream/string'
65 | import { csvParseStream } from '@datastream/csv'
66 |
67 | export default async (blob, opts = {}) => {
68 | const streams = [
69 | stringReadableStream(blob),
70 | csvParseStream()
71 | // ...
72 | ]
73 |
74 | const result = await pipeline(streams)
75 | console.log(result.csvErrors)
76 | }
77 | ```
78 |
79 | ### WebWorker using a file
80 |
81 | To prevent blocking the main thread it is recommended that CSV parsing is done in a WebWorker, SharedWebWorker, or ServiceWorker instead of the main thread. This example doesn't use streams due to the lack of Firefox stream support mentioned above.
82 |
83 | ```javascript
84 | /* eslint-env worker */
85 | import format from 'csv-rex/format'
86 |
87 | const enqueue = ({ data, idx, err }) => {
88 | if (err) {
89 | // handler err
90 | return
91 | }
92 | // handle data
93 | }
94 |
95 | onmessage = async (event) => {
96 | const { file } = event.data
97 | const options = { enqueue }
98 | file.length = file.size // polyfill length
99 | await parse(file, options)
100 | // ...
101 | postMessageEncode()
102 | }
103 |
104 | const postMessageEncode = (str) => {
105 | if (typeof str !== 'string') str = JSON.stringify(str)
106 | const buffer = new TextEncoder().encode(str).buffer
107 | postMessage(buffer, [buffer])
108 | }
109 | ```
110 |
--------------------------------------------------------------------------------
/docs/docs/migrate/csv.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/docs/migrate/csv.md
--------------------------------------------------------------------------------
/docs/docs/migrate/papaparse.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/docs/migrate/papaparse.md
--------------------------------------------------------------------------------
/docs/docs/parse.md:
--------------------------------------------------------------------------------
1 | # parse
2 |
3 | ## Options
4 |
5 | - `header` (`true`): Keys to be used in JSON object for the parsed row
6 | - `true`: First row of the `input` contains columns and will need to be parsed out. output object of column value pairs.
7 | - `[...]`: Input doesn't contain columns, pre-assign columns and output object of column value pairs.
8 | - `false`: Input doesn't contain columns, output array of values
9 | - `newlineChar` (`''`): What `newline` character(s) to be used. By default will guess from `\r\n`, `\n`, `\r`
10 | - `delimiterChar` (`''`): Characters used to separate fields. Must be length of 1. By default will guess from `,`, `\t`, `|`, `;`, `\x1E`, `\x1F`
11 | - `quoteChar` (`"`): Character used to wrap fields that need to have special characters within them. Must be length of 1
12 | - `escapeChar` (`${quoteChar}`): Character used to escape the `quoteChar`. Must be length of 1
13 | - `enqueue` (`({data, idx, err}) => {}`): Function to run on parsed row data.
14 | - `emptyFieldValue` (`''`): Value to be used instead of an empty string. Can be set to `undefined` to have empty fields not be included.
15 | - `coerceField` (`(field, idx) => field`): Function to apply type/value coercion.
16 | - `commentPrefixValue` (`false`): Lines starting with this value will be ignored (i.e. `#`, `//`). Can be set to `false` if files will never have comments.
17 | - `errorOnEmptyLine` (`true`): When an empty line is encountered. Push row with error when occurs, row ignored otherwise.
18 | - `errorOnComment` (`true`): When a comment is encountered. Push row with error when occurs, row ignored otherwise.
19 | - `errorOnExtraFields` (`true`): When number of headers is less than the number of fields in a row. Push row with error when occurs, row ignored. Set to `false` to have what headers exist be mapped, and passed on.
20 | - `errorOnMissingFields` (`true`): When number of headers is more than the number of fields in a row. Push row with error when occurs, row ignored. Set to `false` to have what fields exist be mapped, and passed on.
21 | - `errorOnFieldMalformed` (`true`): When no closing `quoteChar` is found. Throws parsing error.
22 | - `chunkSize` (`64MB`): Size of chunks to process at once.
23 | - `enableReturn` (`true`): Will concat rows into a single array. Set to `false` if handing data within enqueue for performance improvements.
24 |
25 | ## Examples
26 |
27 | ### Parsing a CSV formatted string to JSON (`[{...},{...},...]`)
28 |
29 | ```javascript
30 | import { parse } from 'csv-rex'
31 |
32 | const enqueue = ({ idx, data, err }) => {
33 | if (err) {
34 | // handler err
35 | return
36 | }
37 | // modify and/or handle data
38 | }
39 |
40 | export default (csvString) => parse(csvString, { enqueue })
41 | ```
42 |
43 | ### NodeJS Stream
44 |
45 | ```javascript
46 | import { createReadStream } from 'node:fs'
47 | import { pipeline } from '@datastream/core'
48 | import { csvParseStream } from '@datastream/csv'
49 |
50 | export default async (filePath, opts = {}) => {
51 | const streams = [
52 | createReadStream(filePath),
53 | csvParseStream()
54 | // ...
55 | ]
56 |
57 | const result = await pipeline(streams)
58 | console.log(result.csvErrors)
59 | }
60 | ```
61 |
62 | ### Web Stream API
63 |
64 | Requires: Chrome v71 , Edge v79, Firefox v102, Safari v14.5, NodeJS v18 (v16 with import). If you want to use WebStreams with node you need to pass `--conditions=webstream` in the cli to force its use.
65 |
66 | ```javascript
67 | import { pipeline } from '@datastream/core'
68 | import { stringReadableStream } from '@datastream/string'
69 | import { csvParseStream } from '@datastream/csv'
70 |
71 | export default async (blob, opts = {}) => {
72 | const streams = [
73 | stringReadableStream(blob),
74 | csvParseStream()
75 | // ...
76 | ]
77 |
78 | const result = await pipeline(streams)
79 | console.log(result.csvErrors)
80 | }
81 | ```
82 |
83 | ### File from input form in a Browser
84 |
85 | To prevent blocking the main thread it is recommended that CSV parsing is done in a WebWorker, SharedWorker, or ServiceWorker instead of the main thread.
86 |
87 | ```javascript
88 | /* eslint-env worker */
89 | import parse from 'csv-rex/parse'
90 |
91 | const enqueue = ({ data, idx, err }) => {
92 | if (err) {
93 | // handler err
94 | return
95 | }
96 | // handle data
97 | }
98 |
99 | onmessage = async (event) => {
100 | const { file } = event.data
101 | const options = { enqueue }
102 | file.length = file.size // polyfill length
103 | await parse(file, options)
104 | // ...
105 | postMessageEncode()
106 | }
107 |
108 | const postMessageEncode = (str) => {
109 | if (typeof str !== 'string') str = JSON.stringify(str)
110 | const buffer = new TextEncoder().encode(str).buffer
111 | postMessage(buffer, [buffer])
112 | }
113 | ```
114 |
115 | ```html
116 |
117 |
118 |
125 |
149 |
150 |
151 | ```
152 |
--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | csv-rex - A tiny and fast CSV parser & formatter for JavaScript.
7 |
8 |
12 |
16 |
20 |
21 |
42 |
43 |
44 |
45 |
46 |
55 |
56 |
57 |
59 |
60 |
--------------------------------------------------------------------------------
/docs/sidebar.md:
--------------------------------------------------------------------------------
1 | - Getting started
2 | - [Setup](/)
3 | - [Parse](/docs/parse.md)
4 | - [Options](/docs/parse.md)
5 | - [Basic Example](/docs/parse.md#start)
6 | - [Fast Mode Example](/docs/parse.md#start)
7 | - [Stream Example](/docs/parse.md)
8 | - [WebWorker Example](/docs/parse.md)
9 | - [Format](/docs/format.md)
10 | - [Options](/docs/format.md)
11 | - [Basic Example](/docs/format.md)
12 | - [NodeJS Stream Example](/docs/parse.md#nodestream)
13 | - [Web Stream API Example](/docs/parse.md#webstream)
14 | - [WebWorker Example](/docs/parse.md#browser)
15 | - Migrate
16 | - [papaparse](/docs/migrate/papaparse.md)
17 | - [csv](/docs/migrate/csv.md)
18 |
--------------------------------------------------------------------------------
/docs/sidebar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/sidebar.png
--------------------------------------------------------------------------------
/docs/sidebar.sketch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/sidebar.sketch
--------------------------------------------------------------------------------
/docs/t-rex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/t-rex.png
--------------------------------------------------------------------------------
/format.js:
--------------------------------------------------------------------------------
1 | export const defaultOptions = {
2 | header: true, // false: don't log out header; true: log out header
3 | newlineChar: '\r\n', // undefined: detect newline from file; '\r\n': Windows; '\n': Linux/Mac
4 | delimiterChar: ',', // TODO add in auto detect or function
5 | quoteChar: '"'
6 | // escapeChar: '"'
7 |
8 | // quoteColumn: undefined
9 | }
10 |
11 | export const format = (input, opts = {}) => {
12 | const options = { ...defaultOptions, enqueue: () => {}, ...opts }
13 | options.escapeChar ??= options.quoteChar
14 | const { enableReturn, enqueue } = options
15 |
16 | const isArrayData = Array.isArray(input[0])
17 | const format = isArrayData ? formatArray : formatObject
18 | if (!isArrayData && options.header === true) {
19 | options.header = Object.keys(input[0])
20 | }
21 |
22 | let res = options.header !== false ? formatArray(options.header, options) : ''
23 |
24 | for (let i = 0, l = input.length; i < l; i++) {
25 | const data = format(input[i], options)
26 | enqueue(data)
27 | if (enableReturn) {
28 | res += data
29 | }
30 | }
31 |
32 | return enableReturn && res
33 | }
34 |
35 | export const formatArray = (arr, options) => {
36 | let csv = ''
37 | for (let i = 0, l = arr.length; i < l; i++) {
38 | csv += (i ? options.delimiterChar : '') + formatField(arr[i], null, options)
39 | }
40 | return csv + options.newlineChar
41 | }
42 |
43 | export const formatObject = (data, options) => {
44 | let csv = ''
45 | const columns = options.header || Object.keys(data)
46 | for (let i = 0, l = columns.length; i < l; i++) {
47 | csv +=
48 | (i ? options.delimiterChar : '') +
49 | formatField(data[columns[i]], options.quoteColumn?.[i], options)
50 | }
51 | return csv + options.newlineChar
52 | }
53 |
54 | export const formatField = (
55 | field,
56 | needsQuotes,
57 | { quoteChar, escapeChar, delimiterChar, newlineChar }
58 | ) => {
59 | if (field === undefined || field === null || field === '') {
60 | return ''
61 | }
62 |
63 | if (field.constructor === Date) {
64 | return field.toISOString() // JSON.stringify(str).slice(1, 25) faster??
65 | }
66 |
67 | field = field.toString()
68 |
69 | // Developer override using options.quotes
70 | if (needsQuotes === false) {
71 | return field
72 | }
73 |
74 | // Test if needs quote
75 | needsQuotes =
76 | needsQuotes ||
77 | hasAnyDelimiters(field, [
78 | delimiterChar,
79 | newlineChar,
80 | quoteChar,
81 | '\ufeff'
82 | ]) ||
83 | field[0] === ' ' ||
84 | field[field.length - 1] === ' '
85 |
86 | return needsQuotes
87 | ? quoteChar +
88 | field.replaceAll(quoteChar, escapeChar + quoteChar) +
89 | quoteChar
90 | : field
91 | }
92 |
93 | const hasAnyDelimiters = (field, delimiters) => {
94 | for (const delimiter of delimiters) {
95 | if (field.indexOf(delimiter) > -1) {
96 | return true
97 | }
98 | }
99 | }
100 |
101 | export default format
102 |
--------------------------------------------------------------------------------
/format.test.js:
--------------------------------------------------------------------------------
1 | import test from 'node:test'
2 | import { equal } from 'node:assert'
3 | import { format, formatArray, formatObject, formatField } from './format.js'
4 |
5 | const defaultOptions = {
6 | header: true,
7 | escapeChar: '"',
8 | quoteChar: '"',
9 | delimiterChar: ',',
10 | newlineChar: '/n',
11 |
12 | enableReturn: true,
13 | enqueue: () => {}
14 | }
15 |
16 | // *** format() *** //
17 |
18 | test('Should format array of objects w/ header == true', async (t) => {
19 | const field = format([{ a: '1', b: '2' }], {
20 | ...defaultOptions,
21 | header: true
22 | })
23 | equal(field, 'a,b/n1,2/n')
24 | })
25 |
26 | test('Should format array of objects w/ header == [...]', async (t) => {
27 | const field = format([{ a: '1', b: '2', c: '3' }], {
28 | ...defaultOptions,
29 | header: ['b', 'a']
30 | })
31 | equal(field, 'b,a/n2,1/n')
32 | })
33 |
34 | test('Should format array of objects w/ header === false', async (t) => {
35 | const field = format([{ a: '1', b: '2' }], {
36 | ...defaultOptions,
37 | header: false
38 | })
39 | equal(field, '1,2/n')
40 | })
41 |
42 | test('Should format array of arrays w/ header == [...]', async (t) => {
43 | const field = format([['1', '2']], {
44 | ...defaultOptions,
45 | header: ['a', 'b']
46 | })
47 | equal(field, 'a,b/n1,2/n')
48 | })
49 |
50 | test('Should format array of arrays w/ header === false', async (t) => {
51 | const field = format([['1', '2']], {
52 | ...defaultOptions,
53 | header: false
54 | })
55 | equal(field, '1,2/n')
56 | })
57 |
58 | // *** formatHeader() *** //
59 | test('Should format header', async (t) => {
60 | const field = formatArray(['b', 'a'], {
61 | ...defaultOptions,
62 | header: ['b', 'a']
63 | })
64 | equal(field, 'b,a/n')
65 | })
66 |
67 | // *** formatArray() *** //
68 | test('Should format row array', async (t) => {
69 | const field = formatArray(['1', '2'], { ...defaultOptions, header: false })
70 | equal(field, '1,2/n')
71 | })
72 |
73 | // *** formatObject() *** //
74 | test('Should format row object', async (t) => {
75 | const field = formatObject(
76 | { a: '1', b: '2' },
77 | { ...defaultOptions, header: ['b', 'a'] }
78 | )
79 | equal(field, '2,1/n')
80 | })
81 | test('Should format row object w/ quotes', async (t) => {
82 | const field = formatObject(
83 | { a: '1', b: '2' },
84 | { ...defaultOptions, header: ['b', 'a'], quoteColumn: [true, true] }
85 | )
86 | equal(field, '"2","1"/n')
87 | })
88 | test('Should format row object w/o quotes', async (t) => {
89 | const field = formatObject(
90 | { a: '1', b: '2' },
91 | { ...defaultOptions, header: ['b', 'a'], quoteColumn: [false, false] }
92 | )
93 | equal(field, '2,1/n')
94 | })
95 |
96 | // *** formatField() *** //
97 | test('Should format undefined', async (t) => {
98 | const field = formatField(undefined, undefined, defaultOptions)
99 | equal(field, '')
100 | })
101 | test('Should format null', async (t) => {
102 | const field = formatField(null, undefined, defaultOptions)
103 | equal(field, '')
104 | })
105 | test('Should format empty string', async (t) => {
106 | const field = formatField('', undefined, defaultOptions)
107 | equal(field, '')
108 | })
109 | test('Should format date', async (t) => {
110 | const field = formatField(
111 | new Date('2000-01-01T00:00:00.000Z'),
112 | undefined,
113 | defaultOptions
114 | )
115 | equal(field, '2000-01-01T00:00:00.000Z')
116 | })
117 | test('Should format number', async (t) => {
118 | const field = formatField(0, undefined, defaultOptions)
119 | equal(field, '0')
120 | })
121 | test('Should format string', async (t) => {
122 | const field = formatField('column', undefined, defaultOptions)
123 | equal(field, 'column')
124 | })
125 | test('Should format string with delimiter', async (t) => {
126 | const field = formatField('_"_', undefined, defaultOptions)
127 | equal(field, '"_""_"')
128 | })
129 | test('Should format string with leading space', async (t) => {
130 | const field = formatField(' space', undefined, defaultOptions)
131 | equal(field, '" space"')
132 | })
133 | test('Should format string with trailing space', async (t) => {
134 | const field = formatField('space ', undefined, defaultOptions)
135 | equal(field, '"space "')
136 | })
137 | test('Should format w/ quotes', async (t) => {
138 | const field = formatField('column', true, defaultOptions)
139 | equal(field, '"column"')
140 | })
141 | test('Should format w/o quotes', async (t) => {
142 | const field = formatField('column', false, defaultOptions)
143 | equal(field, 'column')
144 | })
145 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | // import {TextDecoder} from 'node:util'
2 | // import {defaultOptions, optionDetectNewlineValue} from './options.js'
3 | import csvParse from 'csv-rex/parse'
4 | import csvParseMini from 'csv-rex/parse-mini'
5 | import csvFormat from 'csv-rex/format'
6 |
7 | export const parse = csvParse
8 | export const parseMini = csvParseMini
9 | export const format = csvFormat
10 |
11 | export default {
12 | parse: csvParse,
13 | parseMini: csvParseMini,
14 | format: csvFormat
15 | }
16 |
--------------------------------------------------------------------------------
/lint-staged.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 | '**/*.{json,yml}': ['prettier --write'],
3 | '**/*.js': ['prettier --write', 'standard --fix']
4 | }
5 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "csv-rex",
3 | "version": "0.7.0",
4 | "description": "A tiny and fast CSV parser for JavaScript.",
5 | "type": "module",
6 | "files": [
7 | "*.mjs",
8 | "*.cjs",
9 | "*.map"
10 | ],
11 | "main": "index.mjs",
12 | "exports": {
13 | ".": {
14 | "import": {
15 | "default": "./index.mjs"
16 | },
17 | "require": {
18 | "default": "./index.cjs"
19 | }
20 | },
21 | "./parse": {
22 | "import": {
23 | "default": "./parse.mjs"
24 | },
25 | "require": {
26 | "default": "./parse.cjs"
27 | }
28 | },
29 | "./parse-mini": {
30 | "import": {
31 | "default": "./parse-mini.mjs"
32 | },
33 | "require": {
34 | "default": "./parse-mini.cjs"
35 | }
36 | },
37 | "./format": {
38 | "import": {
39 | "default": "./format.mjs"
40 | },
41 | "require": {
42 | "default": "./format.cjs"
43 | }
44 | }
45 | },
46 | "scripts": {
47 | "prepare": "husky install",
48 | "commit-msg": "commitlint --config commitlint.config.cjs --edit",
49 | "pre-commit": "lint-staged",
50 | "start": "docsify serve docs",
51 | "lint": "prettier --write *.{js,json} && standard --fix *.js",
52 | "test": "npm run build && c8 node --test",
53 | "prepublishOnly": "npm test",
54 | "build": "./bin/esbuild",
55 | "bench": "npm run build && node parse.bench.js"
56 | },
57 | "repository": {
58 | "type": "git",
59 | "url": "git+https://willfarrell@github.com/willfarrell/csv-rex.git"
60 | },
61 | "keywords": [
62 | "csv",
63 | "parse",
64 | "format",
65 | "json"
66 | ],
67 | "author": {
68 | "name": "willfarrell",
69 | "url": "https://github.com/willfarrell"
70 | },
71 | "license": "MIT",
72 | "bugs": {
73 | "url": "https://github.com/willfarrell/csv-rex/issues"
74 | },
75 | "homepage": "https://github.com/willfarrell/csv-rex#readme",
76 | "dependencies": {
77 | "csv-rex": "0.7.0"
78 | },
79 | "devDependencies": {
80 | "@commitlint/cli": "^17.0.0",
81 | "@commitlint/config-conventional": "^17.0.0",
82 | "benny": "^3.7.1",
83 | "c8": "^7.11.0",
84 | "docsify-cli": "^4.4.4",
85 | "esbuild": "^0.17.0",
86 | "husky": "^8.0.0",
87 | "lint-staged": "^13.0.0",
88 | "prettier": "^2.0.0",
89 | "sinon": "^15.0.0",
90 | "standard": "^17.0.0"
91 | },
92 | "funding": {
93 | "type": "github",
94 | "url": "https://github.com/sponsors/willfarrell"
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/parse-mini.js:
--------------------------------------------------------------------------------
1 | // chunkSize >> largest expected row
2 | const defaultOptions = {
3 | header: true, // false: return array; true: detect headers and return json; [...]: use defined headers and return json
4 | newlineChar: '\r\n', // '': detect newline from chunk; '\r\n': Windows; '\n': Linux/Mac
5 | delimiterChar: ',', // '': detect delimiter from chunk
6 | // quoteChar: '"',
7 | // escapeChar: '"', // default: `quoteChar`
8 |
9 | // Parse
10 | emptyFieldValue: '',
11 | coerceField: (field) => field, // TODO tests
12 | // commentPrefixValue: false, // falsy: disable, '//': enabled
13 | // errorOnComment: true,
14 | // errorOnEmptyLine: true,
15 | errorOnFieldsMismatch: true
16 | // errorOnFieldMalformed: true
17 | }
18 |
19 | const length = (value) => value.length
20 |
21 | export const parse = (opts = {}) => {
22 | const options = { ...defaultOptions, ...opts }
23 | options.escapeChar ??= options.quoteChar
24 |
25 | let { header, newlineChar, delimiterChar } = options
26 | let headerLength = length(header)
27 | const {
28 | // quoteChar,
29 | // escapeChar,
30 | // commentPrefixValue,
31 | emptyFieldValue,
32 | coerceField,
33 | // errorOnEmptyLine,
34 | // errorOnComment,
35 | errorOnFieldsMismatch
36 | // errorOnFieldMalformed
37 | } = options
38 |
39 | let chunk, enqueue
40 | let partialLine = ''
41 | let idx = 0
42 | const enqueueRow = (row) => {
43 | let data = row
44 | idx += 1
45 | if (headerLength) {
46 | const rowLength = length(row)
47 |
48 | if (headerLength !== rowLength) {
49 | if (errorOnFieldsMismatch) {
50 | enqueueError(
51 | 'FieldsMismatch',
52 | `Incorrect number of fields parsed, expected ${headerLength}.`
53 | )
54 | }
55 | return
56 | } else {
57 | data = {}
58 | for (let i = 0; i < rowLength; i++) {
59 | data[header[i]] = row[i]
60 | }
61 | }
62 | }
63 | enqueue({ idx, data })
64 | }
65 |
66 | const enqueueError = (code, message) => {
67 | enqueue({ idx, err: { code, message } })
68 | }
69 |
70 | const transformField = (field, idx) => {
71 | return coerceField(field || emptyFieldValue, idx)
72 | }
73 |
74 | const chunkParse = (string, controller) => {
75 | chunk = string
76 | enqueue = controller.enqueue
77 | const lines = chunk.split(newlineChar) // TODO use cursor pattern
78 | let linesLength = length(lines)
79 | if (linesLength > 1) {
80 | partialLine = lines.pop()
81 | linesLength -= 1
82 | }
83 |
84 | let i = 0
85 | if (header === true) {
86 | header = lines[i].split(delimiterChar)
87 | headerLength = length(header)
88 | i += 1
89 | }
90 |
91 | for (; i < linesLength; i++) {
92 | const line = lines[i]
93 | const row = []
94 | let cursor = 0
95 | while (cursor < line.length) {
96 | const delimiterIndex = line.indexOf(delimiterChar, cursor)
97 | if (delimiterIndex === -1) {
98 | row.push(transformField(line.substring(cursor), row.length))
99 | break
100 | }
101 | row.push(
102 | transformField(line.substring(cursor, delimiterIndex), row.length)
103 | )
104 | cursor = delimiterIndex + 1
105 | }
106 | enqueueRow(row)
107 | }
108 | }
109 |
110 | return {
111 | chunkParse,
112 | header: () => header,
113 | previousChunk: () => partialLine
114 | }
115 | }
116 |
117 | export default (input, opts) => {
118 | const options = {
119 | ...defaultOptions,
120 | ...{
121 | enableReturn: true,
122 | chunkSize: 64 * 1024 * 1024,
123 | enqueue: () => {}
124 | },
125 | ...opts
126 | }
127 | const { chunkSize, enableReturn, enqueue } = options
128 | const { chunkParse, previousChunk } = parse(options)
129 |
130 | const res = []
131 | const controller = { enqueue }
132 |
133 | if (enableReturn) {
134 | controller.enqueue = (row) => {
135 | enqueue(row)
136 | res.push(row.data)
137 | }
138 | }
139 |
140 | let position = 0
141 | while (position < input.length) {
142 | const chunk =
143 | previousChunk() + input.substring(position, position + chunkSize)
144 |
145 | // Checking if you can use fastParse slows it down more than checking for quoteChar on ever field.
146 | chunkParse(chunk, controller)
147 | position += chunkSize
148 | }
149 | // flush
150 | const chunk = previousChunk()
151 | chunkParse(chunk, controller, true)
152 |
153 | return enableReturn && res
154 | }
155 |
--------------------------------------------------------------------------------
/parse.bench.js:
--------------------------------------------------------------------------------
1 | import { add, cycle, /* save, */ suite } from 'benny'
2 | import parse from './parse.js' // 'csv-rex/parse'
3 |
4 | const inputs = {}
5 | const configs = []
6 | const baseline = {
7 | columns: 10,
8 | rows: 1_000,
9 | quotes: false,
10 | newlineChar: '\r\n',
11 | delimiterChar: ',',
12 | header: false,
13 | commentPrefixValue: false
14 | }
15 | configs.push({ ...baseline })
16 | // expected to be slower, compare against each other
17 | configs.push({ ...baseline, columns: 100 }) // input has move columns
18 | configs.push({ ...baseline, rows: 10_000 }) // input has more rows
19 | // Options
20 | configs.push({
21 | ...baseline,
22 | header: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
23 | }) // pre-defined headers to make object
24 | configs.push({ ...baseline, header: true }) // use header to make object
25 | configs.push({ ...baseline, newlineChar: '\n' }) // shorter newline ** should be fastest
26 | configs.push({ ...baseline, newlineChar: '' }) // detect newline
27 | configs.push({ ...baseline, delimiterChar: '\t' }) // detect delimiter
28 | configs.push({ ...baseline, delimiterChar: '' }) // detect delimiter
29 | configs.push({ ...baseline, commentPrefixValue: '//' }) // detect comments
30 | configs.push({ ...baseline, quotes: true }) // input has quoted fields
31 |
32 | configs.push({ ...baseline, newlineChar: '\n', delimiterChar: '\t' }) // TSV
33 | configs.push({ ...baseline })
34 |
35 | const baselineDiff = (config) => {
36 | const diff = {}
37 | for (const key in config) {
38 | if (config[key] !== baseline[key]) {
39 | diff[key] = config[key]
40 | }
41 | }
42 | return diff
43 | }
44 |
45 | const testBatch = (configs) => {
46 | return configs.map((config) => {
47 | const { columns, rows, quotes, ...options } = config
48 | const delimiterChar = options.delimiterChar || baseline.delimiterChar
49 | const newlineChar = options.newlineChar || baseline.newlineChar
50 | const input = `${columns}x${rows} w/${
51 | quotes ? '' : 'o'
52 | } quotes and {newlineChar:${newlineChar},delimiterChar:${delimiterChar}}`
53 | if (!inputs[input]) {
54 | const wrapper = quotes ? '"' : ''
55 | const delimiter = quotes ? `"${delimiterChar}"` : `${delimiterChar}`
56 | let csv =
57 | wrapper +
58 | Array.from({ length: columns + 1 }, (_, x) => `__${x}__`).join(
59 | delimiter
60 | ) +
61 | wrapper +
62 | newlineChar
63 | for (let y = 0; y < rows; y++) {
64 | csv +=
65 | wrapper +
66 | Array.from({ length: columns + 1 }, (_, x) => `${x}x${y}`).join(
67 | delimiter
68 | ) +
69 | wrapper +
70 | newlineChar
71 | }
72 | inputs[input] = csv
73 | }
74 | return add(
75 | `parse(${JSON.stringify({ columns, rows, quotes })}, ${JSON.stringify(
76 | options
77 | )}) :: ${JSON.stringify(baselineDiff(config))}`,
78 | () => {
79 | parse(inputs[input], options)
80 | }
81 | )
82 | })
83 | }
84 |
85 | const parseSuite = suite(
86 | 'parse',
87 | ...testBatch(configs),
88 | cycle()
89 | // save({file: 'parse.bench.csv', format: 'csv'})
90 | )
91 |
92 | export default () => parseSuite
93 |
--------------------------------------------------------------------------------
/parse.js:
--------------------------------------------------------------------------------
1 | // chunkSize >> largest expected row
2 | const defaultOptions = {
3 | header: true, // false: return array; true: detect headers and return json; [...]: use defined headers and return json
4 | newlineChar: '', // '': detect newline from chunk; '\r\n': Windows; '\n': Linux/Mac
5 | delimiterChar: '', // '': detect delimiter from chunk
6 | quoteChar: '"',
7 | // escapeChar: '"', // default: `quoteChar`
8 | detectCharLength: 1024,
9 |
10 | // Parse
11 | emptyFieldValue: '',
12 | // TODO option to remove empty fields from object
13 | coerceField: (field) => field, // TODO tests
14 | commentPrefixValue: false, // falsy: disable, '//': enabled
15 | errorOnComment: true,
16 | errorOnEmptyLine: true,
17 | errorOnExtraFields: true,
18 | errorOnMissingFields: true
19 | // errorOnFieldMalformed: true
20 | }
21 |
22 | const length = (value) => value.length
23 | const escapeRegExp = (string) => string.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&') // https://github.com/tc39/proposal-regex-escaping
24 |
25 | export const parse = (opts = {}) => {
26 | const options = { ...defaultOptions, ...opts }
27 | options.escapeChar ??= options.quoteChar
28 |
29 | let { header, newlineChar, delimiterChar } = options
30 | const {
31 | detectCharLength,
32 | quoteChar,
33 | escapeChar,
34 | commentPrefixValue,
35 | emptyFieldValue,
36 | coerceField,
37 | errorOnEmptyLine,
38 | errorOnComment,
39 | errorOnExtraFields,
40 | errorOnMissingFields
41 | // errorOnFieldMalformed
42 | } = options
43 | let headerLength = length(header)
44 | const detectDelimiterCharRegExp = /,|\t|\||;|\x1E|\x1F/g // eslint-disable-line no-control-regex
45 | const detectNewlineCharRegExp = /\r\n|\n|\r/g
46 |
47 | const escapedQuoteChar = escapeChar + quoteChar
48 | const escapedQuoteCharRegExp = new RegExp(
49 | `${escapeRegExp(escapedQuoteChar)}`,
50 | 'g'
51 | )
52 |
53 | const escapedQuoteEqual = escapeChar === quoteChar
54 | const escapedQuoteNotEqual = escapeChar !== quoteChar
55 |
56 | let newlineCharLength = length(newlineChar)
57 | const delimiterCharLength = 1 // length(delimiterChar)
58 | const quoteCharLength = 1 // length(quoteChar)
59 | const escapeCharLength = 1 // length(escapeChar)
60 | const escapedQuoteCharLength = 2 // length(escapedQuoteChar)
61 | // const commentPrefixValueLength = length(commentPrefixValue)
62 |
63 | let chunk, chunkLength, cursor, row, enqueue
64 | let partialLine = ''
65 | let idx = 0
66 | const enqueueRow = (row) => {
67 | idx += 1
68 | if (header === true) {
69 | header = row
70 | headerLength = length(header)
71 | return
72 | }
73 | let data = row
74 | if (headerLength) {
75 | let rowLength = length(row)
76 |
77 | // enqueueError('FieldsMismatch', `Parsed ${rowLength} fields, expected ${headerLength}.`)
78 | if (errorOnMissingFields && rowLength < headerLength) {
79 | enqueueError(
80 | 'MissingFields',
81 | `Too few fields were parsed, expected ${headerLength}.`
82 | )
83 | return
84 | } else if (headerLength < rowLength) {
85 | if (errorOnExtraFields) {
86 | enqueueError(
87 | 'ExtraFields',
88 | `Too many fields were parsed, expected ${headerLength}.`
89 | )
90 | return
91 | }
92 | // only map fields that have headers
93 | rowLength = headerLength
94 | }
95 |
96 | data = {}
97 | for (let i = 0; i < rowLength; i++) {
98 | data[header[i]] = row[i]
99 | }
100 | }
101 | enqueue({ idx, data })
102 | }
103 |
104 | const enqueueError = (code, message) => {
105 | enqueue({ idx, err: { code, message } })
106 | }
107 |
108 | const findNext = (searchValue, start = cursor) => {
109 | return chunk.indexOf(searchValue, start)
110 | }
111 |
112 | const parseField = (end) => {
113 | return chunk.substring(cursor, end)
114 | }
115 | const transformField = (field, idx) => {
116 | return coerceField(field || emptyFieldValue, idx)
117 | }
118 |
119 | // TODO idea: when header == true/array using a different addFieldToRow function to allow faster key:value mapping
120 | // const resetRow = () => {
121 | // row = []
122 | // }
123 | const addFieldToRow = (field, idx) => {
124 | row.push(transformField(field, idx))
125 | }
126 |
127 | const checkForEmptyLine = () => {
128 | if (findNext(newlineChar) === cursor) {
129 | idx += 1
130 | cursor += newlineCharLength
131 | if (errorOnEmptyLine) {
132 | enqueueError('EmptyLineExists', 'Empty line detected.')
133 | }
134 | return checkForEmptyLine()
135 | } else if (commentPrefixValue && findNext(commentPrefixValue) === cursor) {
136 | idx += 1
137 | cursor = findNext(newlineChar) + newlineCharLength
138 | if (errorOnComment) {
139 | enqueueError('CommentExists', 'Comment detected.')
140 | }
141 | return checkForEmptyLine()
142 | }
143 | }
144 |
145 | const chunkParse = (string, controller, flush = false) => {
146 | chunk = string
147 | chunkLength = length(chunk)
148 | enqueue = controller.enqueue
149 | partialLine = ''
150 | cursor = 0
151 | row = [] // resetRow()
152 |
153 | // auto-detect
154 | if (!newlineChar) {
155 | newlineChar = detectChar(
156 | chunk.substring(0, detectCharLength),
157 | detectNewlineCharRegExp
158 | )
159 |
160 | if (!newlineChar) {
161 | if (chunk.length < detectCharLength && !flush) {
162 | // First chunk is too short
163 | partialLine = chunk
164 | return
165 | } else {
166 | throw new Error('newlineCharUnknown')
167 | }
168 | }
169 | newlineCharLength = length(newlineChar)
170 | }
171 | delimiterChar ||= detectChar(
172 | chunk.substring(0, detectCharLength),
173 | detectDelimiterCharRegExp
174 | )
175 |
176 | checkForEmptyLine()
177 | let lineStart = 0
178 | for (;;) {
179 | let quoted
180 | let nextCursor = cursor
181 | let nextCursorLength
182 | let atNewline
183 | if (chunk[cursor] === quoteChar) {
184 | cursor += quoteCharLength
185 | quoted = true
186 | nextCursor = cursor
187 | for (;;) {
188 | nextCursor = findNext(quoteChar, nextCursor)
189 | if (nextCursor < 0) {
190 | partialLine = chunk.substring(lineStart, chunkLength) + partialLine
191 | if (flush) {
192 | throw new Error('QuotedFieldMalformed', { cause: idx })
193 | }
194 | return
195 | }
196 | if (
197 | escapedQuoteEqual &&
198 | chunk[nextCursor + quoteCharLength] === quoteChar
199 | ) {
200 | nextCursor += escapedQuoteCharLength
201 | continue
202 | }
203 | if (
204 | escapedQuoteNotEqual &&
205 | chunk[nextCursor - escapeCharLength] === escapeChar
206 | ) {
207 | nextCursor += quoteCharLength
208 | continue
209 | }
210 | break
211 | }
212 | }
213 |
214 | // fallback
215 | const nextDelimiterChar = findNext(delimiterChar, nextCursor)
216 | let nextNewlineChar = findNext(newlineChar, nextCursor)
217 | if (nextNewlineChar < 0) {
218 | if (!flush) {
219 | partialLine = chunk.substring(lineStart, chunkLength) + partialLine
220 | return
221 | }
222 | nextNewlineChar = chunkLength
223 | }
224 | if (nextDelimiterChar > -1 && nextDelimiterChar < nextNewlineChar) {
225 | nextCursor = nextDelimiterChar
226 | nextCursorLength = delimiterCharLength
227 | } else {
228 | nextCursor = nextNewlineChar
229 | nextCursorLength = newlineCharLength
230 | atNewline = true
231 | }
232 |
233 | if (nextCursor < 0 || !nextCursor) {
234 | break
235 | }
236 |
237 | let field
238 | if (quoted) {
239 | field = parseField(nextCursor - 1).replace(
240 | escapedQuoteCharRegExp,
241 | quoteChar
242 | )
243 | } else {
244 | field = parseField(nextCursor)
245 | }
246 | addFieldToRow(field, row.length)
247 |
248 | cursor = nextCursor + nextCursorLength
249 |
250 | if (atNewline) {
251 | enqueueRow(row)
252 | row = [] // resetRow()
253 | checkForEmptyLine()
254 | lineStart = cursor
255 | }
256 | // `row.length === 0` required for when a csv ends with just `,` and no newline
257 | if (chunkLength <= cursor && row.length === 0) {
258 | break
259 | }
260 | }
261 | }
262 |
263 | return {
264 | chunkParse,
265 | header: () => header,
266 | previousChunk: () => partialLine
267 | }
268 | }
269 |
270 | export const detectChar = (chunk, pattern) => {
271 | let match
272 | const chars = {}
273 | while ((match = pattern.exec(chunk))) {
274 | const char = match[0]
275 | chars[char] ??= 0
276 | chars[char] += 1
277 | if (chars[char] > 5) return char
278 | }
279 | // pattern.lastIndex = 0 // not reused again
280 | const { key } =
281 | Object.keys(chars)
282 | .map((key) => ({ key, value: chars[key] }))
283 | .sort((a, b) => a.value - b.value)?.[0] ?? {}
284 | if (!key) {
285 | return
286 | }
287 | return key
288 | }
289 |
290 | export const coerceTo = {
291 | string: (field) => field,
292 | boolean: (field) => {
293 | const boolean = coerceTo.true(field)
294 | return typeof boolean === 'boolean' ? boolean : coerceTo.false(field)
295 | },
296 | true: (field) => (field.toLowerCase() === 'true' ? true : field),
297 | false: (field) => (field.toLowerCase() === 'false' ? false : field),
298 | number: (field) => {
299 | const decimal = coerceTo.decimal(field)
300 | return Number.isInteger(decimal) ? coerceTo.integer(field) : decimal
301 | },
302 | integer: (field) => Number.parseInt(field, 10) || field,
303 | decimal: (field) => Number.parseFloat(field) || field,
304 | json: (field) => {
305 | try {
306 | return JSON.parse(field)
307 | } catch (e) {
308 | return field
309 | }
310 | },
311 | timestamp: (field) => {
312 | const date = new Date(field)
313 | return date.toString() !== 'Invalid Date' ? date : field
314 | },
315 | null: (field) => (field.toLowerCase() === 'null' ? null : field),
316 | any: (field) => {
317 | const types = ['boolean', 'number', 'null', 'json']
318 | for (let i = 0, l = types.length; i < l; i++) {
319 | field = coerceTo[types[i]](field)
320 |
321 | if (typeof field !== 'string') {
322 | break
323 | }
324 | }
325 |
326 | return field
327 | }
328 | }
329 |
330 | export default (input, opts) => {
331 | const options = {
332 | ...defaultOptions,
333 | ...{
334 | enableReturn: true,
335 | chunkSize: 64 * 1024 * 1024,
336 | enqueue: () => {}
337 | },
338 | ...opts
339 | }
340 | const { chunkSize, enableReturn, enqueue } = options
341 | const { chunkParse, previousChunk } = parse(options)
342 |
343 | const res = []
344 | const controller = { enqueue }
345 |
346 | if (enableReturn) {
347 | controller.enqueue = (row) => {
348 | enqueue(row)
349 | res.push(row.data)
350 | }
351 | }
352 |
353 | let position = 0
354 | while (position < input.length) {
355 | const chunk =
356 | previousChunk() + input.substring(position, position + chunkSize)
357 |
358 | // Checking if you can use fastParse slows it down more than checking for quoteChar on ever field.
359 | chunkParse(chunk, controller)
360 | position += chunkSize
361 | }
362 | // flush
363 | const chunk = previousChunk()
364 | chunkParse(chunk, controller, true)
365 |
366 | return enableReturn && res
367 | }
368 |
--------------------------------------------------------------------------------
/parse.test.js:
--------------------------------------------------------------------------------
1 | import test from 'node:test'
2 | import { equal, deepEqual } from 'node:assert'
3 | import sinon from 'sinon'
4 | import csvParse, { parse, coerceTo } from './parse.js'
5 |
6 | const allMethods = ['chunkParse'] // , 'testParse']
7 | const quoteMethods = ['chunkParse'] // , 'testParse']
8 |
9 | test('Should parse csv string using mjs', async (t) => {
10 | const options = {
11 | enqueue: sinon.spy(),
12 | header: false
13 | }
14 | const input = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n'
15 | const res = csvParse(input, options)
16 | deepEqual(res, [
17 | ['a', 'b', 'c'],
18 | ['1', '2', '3'],
19 | ['4', '5', '6']
20 | ])
21 | equal(options.enqueue.callCount, 3)
22 | })
23 |
24 | // *** Default Export *** //
25 | test('Should parse csv string', async (t) => {
26 | const options = {
27 | enqueue: sinon.spy(),
28 | chunkSize: 12
29 | }
30 | const input = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n'
31 | const res = csvParse(input, options)
32 | deepEqual(res, [
33 | { a: '1', b: '2', c: '3' },
34 | { a: '4', b: '5', c: '6' }
35 | ])
36 | equal(options.enqueue.callCount, 2)
37 | })
38 |
39 | test('Should parse csv string with empty first column', async (t) => {
40 | const options = {
41 | enqueue: sinon.spy(),
42 | delimiterChar: ','
43 | }
44 | const input = 'a,b,c,d\r\n1,2,3,4\r\n,5,6,7\r\n,10,11,12\r\n'
45 | const res = csvParse(input, options)
46 | deepEqual(res, [
47 | { a: '1', b: '2', c: '3', d: '4' },
48 | { a: '', b: '5', c: '6', d: '7' },
49 | { a: '', b: '10', c: '11', d: '12' }
50 | ])
51 | equal(options.enqueue.callCount, 3)
52 | })
53 |
54 | test('Should parse csv string with empty last column', async (t) => {
55 | const options = {
56 | enqueue: sinon.spy(),
57 | delimiterChar: ','
58 | }
59 | const input = 'a,b,c,d\r\n1,2,3,4\r\n4,5,6,\r\n9,10,11,\r\n'
60 | const res = csvParse(input, options)
61 | deepEqual(res, [
62 | { a: '1', b: '2', c: '3', d: '4' },
63 | { a: '4', b: '5', c: '6', d: '' },
64 | { a: '9', b: '10', c: '11', d: '' }
65 | ])
66 | equal(options.enqueue.callCount, 3)
67 | })
68 |
69 | test('Should parse csv string with empty first and last columns', async (t) => {
70 | const options = {
71 | enqueue: sinon.spy(),
72 | delimiterChar: ','
73 | }
74 | const input = 'a,b,c,d\r\n1,2,3,4\r\n,5,6,\r\n,10,11,\r\n'
75 | const res = csvParse(input, options)
76 | deepEqual(res, [
77 | { a: '1', b: '2', c: '3', d: '4' },
78 | { a: '', b: '5', c: '6', d: '' },
79 | { a: '', b: '10', c: '11', d: '' }
80 | ])
81 | equal(options.enqueue.callCount, 3)
82 | })
83 |
84 | test('Should parse csv string with empty last followed by empty first column', async (t) => {
85 | const options = {
86 | enqueue: sinon.spy(),
87 | delimiterChar: ','
88 | }
89 | const input = 'a,b,c,d\r\n1,2,3,4\r\n4,5,6,\r\n,10,11,12\r\n'
90 | const res = csvParse(input, options)
91 | deepEqual(res, [
92 | { a: '1', b: '2', c: '3', d: '4' },
93 | { a: '4', b: '5', c: '6', d: '' },
94 | { a: '', b: '10', c: '11', d: '12' }
95 | ])
96 | equal(options.enqueue.callCount, 3)
97 | })
98 |
99 | test('Should parse csv string w/ quotes', async (t) => {
100 | const options = {
101 | enqueue: sinon.spy(),
102 | chunkSize: 14
103 | }
104 | const input = 'a,b,c\r\n1,"2",3\r\n4,"5",6\r\n'
105 | const res = csvParse(input, options)
106 | deepEqual(res, [
107 | { a: '1', b: '2', c: '3' },
108 | { a: '4', b: '5', c: '6' }
109 | ])
110 | equal(options.enqueue.callCount, 2)
111 | })
112 |
113 | // *** General *** //
114 | for (const method of allMethods) {
115 | test(`${method}: Should parse single row with { }`, async (t) => {
116 | const options = {}
117 | const enqueue = sinon.spy()
118 | const chunk = 'a,b,c\r\n1,2,3\r\n'
119 | const parser = parse(options)
120 | parser[method](chunk, { enqueue })
121 | equal(enqueue.callCount, 1)
122 | deepEqual(enqueue.firstCall.args, [
123 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
124 | ])
125 | })
126 |
127 | test(`${method}: Should parse multiple rows with { }`, async (t) => {
128 | const options = {}
129 | const enqueue = sinon.spy()
130 |
131 | const chunk = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n7,8,9'
132 | const parser = parse(options)
133 | parser[method](chunk, { enqueue })
134 | parser[method](parser.previousChunk(), { enqueue }, true)
135 | equal(enqueue.callCount, 3)
136 | deepEqual(enqueue.firstCall.args, [
137 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
138 | ])
139 | deepEqual(enqueue.secondCall.args, [
140 | { data: { a: '4', b: '5', c: '6' }, idx: 3 }
141 | ])
142 | deepEqual(enqueue.thirdCall.args, [
143 | { data: { a: '7', b: '8', c: '9' }, idx: 4 }
144 | ])
145 | })
146 | }
147 |
148 | // *** Chunking *** //
149 | for (const method of allMethods) {
150 | test(`${method}: Should parse with chunking`, async (t) => {
151 | const options = {}
152 | const enqueue = sinon.spy()
153 | let chunk = 'a,b,c\r\n1,2,'
154 | const parser = parse(options)
155 | parser[method](chunk, { enqueue })
156 | equal(parser.previousChunk(), '1,2,')
157 | chunk = parser.previousChunk() + '3\r\n4'
158 | parser[method](chunk, { enqueue })
159 | equal(parser.previousChunk(), '4')
160 | chunk = parser.previousChunk() + ',5,6'
161 | parser[method](chunk, { enqueue }, true)
162 | equal(enqueue.callCount, 2)
163 | deepEqual(enqueue.firstCall.args, [
164 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
165 | ])
166 | deepEqual(enqueue.secondCall.args, [
167 | { data: { a: '4', b: '5', c: '6' }, idx: 3 }
168 | ])
169 | })
170 | }
171 |
172 | for (const method of quoteMethods) {
173 | test(`${method}: Should parse with chunking and quotes`, async (t) => {
174 | const options = {}
175 | const enqueue = sinon.spy()
176 | let chunk = 'a,b,c\r\n"1","2","'
177 | const parser = parse(options)
178 | parser[method](chunk, { enqueue })
179 | equal(parser.previousChunk(), '"1","2","')
180 | chunk = parser.previousChunk() + '3"\r\n'
181 | parser[method](chunk, { enqueue })
182 | equal(parser.previousChunk(), '')
183 | chunk = parser.previousChunk() + '"4","5","6"'
184 | parser[method](chunk, { enqueue }, true)
185 | equal(enqueue.callCount, 2)
186 | deepEqual(enqueue.firstCall.args, [
187 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
188 | ])
189 | deepEqual(enqueue.secondCall.args, [
190 | { data: { a: '4', b: '5', c: '6' }, idx: 3 }
191 | ])
192 | })
193 | }
194 |
195 | // *** Option: header *** //
196 | for (const method of allMethods) {
197 | test(`${method}: Should parse with { header: [...] }`, async (t) => {
198 | const options = { header: ['a', 'b', 'c'] }
199 | const enqueue = sinon.spy()
200 | const chunk = '1,2,3\r\n'
201 | const parser = parse(options)
202 | parser[method](chunk, { enqueue })
203 | equal(enqueue.callCount, 1)
204 | deepEqual(enqueue.firstCall.args, [
205 | { data: { a: '1', b: '2', c: '3' }, idx: 1 }
206 | ])
207 | })
208 |
209 | test(`${method}: Should parse with { header: true }`, async (t) => {
210 | const options = { header: true }
211 | const enqueue = sinon.spy()
212 | const chunk = 'a,b,c\r\n1,2,3\r\n'
213 | const parser = parse(options)
214 | parser[method](chunk, { enqueue })
215 | equal(enqueue.callCount, 1)
216 | deepEqual(enqueue.firstCall.args, [
217 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
218 | ])
219 | })
220 |
221 | test(`${method}: Should parse with { header: false }`, async (t) => {
222 | const options = { header: false }
223 | const enqueue = sinon.spy()
224 | const chunk = '1,2,3\r\n'
225 | const parser = parse(options)
226 | parser[method](chunk, { enqueue })
227 | equal(enqueue.callCount, 1)
228 | deepEqual(enqueue.firstCall.args, [{ data: ['1', '2', '3'], idx: 1 }])
229 | })
230 | }
231 |
232 | // *** Option: newline *** //
233 | for (const method of allMethods) {
234 | test(`${method}: Should parse with { newlineChar: "" } (auto detect)`, async (t) => {
235 | const options = { newlineChar: '' }
236 | const enqueue = sinon.spy()
237 | const chunk = 'a,b,c\r1,2,3\r'
238 | const parser = parse(options)
239 | parser[method](chunk, { enqueue })
240 | deepEqual(enqueue.firstCall.args, [
241 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
242 | ])
243 | })
244 |
245 | test(`${method}: Should parse with { newlineChar: "\\r\\n" }`, async (t) => {
246 | const options = { newlineChar: '\r\n' }
247 | const enqueue = sinon.spy()
248 | const chunk = 'a,b,c\r\n1,2,3\r\n'
249 | const parser = parse(options)
250 | parser[method](chunk, { enqueue })
251 | deepEqual(enqueue.firstCall.args, [
252 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
253 | ])
254 | })
255 | test(`${method}: Should parse with { newlineChar: "\\n" }`, async (t) => {
256 | const options = { newlineChar: '\n' }
257 | const enqueue = sinon.spy()
258 | const chunk = 'a,b,c\n1,2,3\n'
259 | const parser = parse(options)
260 | parser[method](chunk, { enqueue })
261 | deepEqual(enqueue.firstCall.args, [
262 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
263 | ])
264 | })
265 | test(`${method}: Should parse first chunk is shorter than the headers with { newlineChar: "" }`, async (t) => {
266 | const options = { newlineChar: '' }
267 | const enqueue = sinon.spy()
268 | const chunk0 = 'a,b,'
269 | const chunk1 = 'c\n1,2,3\n1,2,3'
270 | const parser = parse(options)
271 | console.log('parser', parser.previousChunk() + chunk0)
272 | parser[method](chunk0, { enqueue })
273 | console.log('parser', parser.previousChunk() + chunk1)
274 | parser[method](parser.previousChunk() + chunk1, { enqueue }, true)
275 | console.log(enqueue.firstCall)
276 | deepEqual(enqueue.firstCall.args, [
277 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
278 | ])
279 | deepEqual(enqueue.secondCall.args, [
280 | { data: { a: '1', b: '2', c: '3' }, idx: 3 }
281 | ])
282 | })
283 | test(`${method}: Should parse when no newline at end of file`, async (t) => {
284 | const options = { newlineChar: '' }
285 | const enqueue = sinon.spy()
286 | const chunk = 'a,b,c\n1,2,3\n1,2,3'
287 | const parser = parse(options)
288 | parser[method](chunk, { enqueue })
289 | parser[method](parser.previousChunk(), { enqueue }, true)
290 | deepEqual(enqueue.firstCall.args, [
291 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
292 | ])
293 | deepEqual(enqueue.secondCall.args, [
294 | { data: { a: '1', b: '2', c: '3' }, idx: 3 }
295 | ])
296 | })
297 | test(`${method}: Should parse when no field and newline at end of file`, async (t) => {
298 | const options = { newlineChar: '' }
299 | const enqueue = sinon.spy()
300 | const chunk = 'a,b,c\n1,2,3\n1,2,'
301 | const parser = parse(options)
302 | parser[method](chunk, { enqueue })
303 | parser[method](parser.previousChunk(), { enqueue }, true)
304 | deepEqual(enqueue.firstCall.args, [
305 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
306 | ])
307 | deepEqual(enqueue.secondCall.args, [
308 | { data: { a: '1', b: '2', c: '' }, idx: 3 }
309 | ])
310 | })
311 | }
312 |
313 | // *** Option: delimiter *** //
314 | for (const method of allMethods) {
315 | test(`${method}: Should parse with { delimiterValue: "" }`, async (t) => {
316 | const options = { delimiterChar: '' }
317 | const enqueue = sinon.spy()
318 | const chunk = 'a\x1Fb\x1Fc\r\n1\x1F2\x1F3\r\n'
319 | const parser = parse(options)
320 | parser[method](chunk, { enqueue })
321 | equal(enqueue.callCount, 1)
322 | deepEqual(enqueue.firstCall.args, [
323 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
324 | ])
325 | })
326 | test(`${method}: Should parse with { delimiterValue: "," }`, async (t) => {
327 | const options = { delimiterChar: ',' }
328 | const enqueue = sinon.spy()
329 | const chunk = 'a,b,c\r\n1,2,3\r\n'
330 | const parser = parse(options)
331 | parser[method](chunk, { enqueue })
332 | equal(enqueue.callCount, 1)
333 | deepEqual(enqueue.firstCall.args, [
334 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
335 | ])
336 | })
337 | test(`${method}: Should parse with { delimiterValue: "|" }`, async (t) => {
338 | const options = { delimiterChar: '|' }
339 | const enqueue = sinon.spy()
340 | const chunk = 'a|b|c\r\n1|2|3\r\n'
341 | const parser = parse(options)
342 | parser[method](chunk, { enqueue })
343 | equal(enqueue.callCount, 1)
344 | deepEqual(enqueue.firstCall.args, [
345 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
346 | ])
347 | })
348 |
349 | test(`${method}: Should parse with { delimiterValue: "\\t" }`, async (t) => {
350 | const options = { delimiterChar: '\t' }
351 | const enqueue = sinon.spy()
352 | const chunk = 'a\tb\tc\r\n1\t2\t3\r\n'
353 | const parser = parse(options)
354 | parser[method](chunk, { enqueue })
355 | equal(enqueue.callCount, 1)
356 | deepEqual(enqueue.firstCall.args, [
357 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
358 | ])
359 | })
360 | }
361 |
362 | // *** Option: quoteChar *** //
363 | for (const method of quoteMethods) {
364 | test(`${method}: Should parse with { quoteChar: '"' }`, async (t) => {
365 | const options = { quoteChar: '"' }
366 | const enqueue = sinon.spy()
367 | const chunk = '"a","b","c"\r\n"1","2","3"\r\n'
368 | const parser = parse(options)
369 | parser[method](chunk, { enqueue })
370 | deepEqual(enqueue.firstCall.args, [
371 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
372 | ])
373 | })
374 |
375 | test(`${method}: Should parse with { quoteChar: '\`' }`, async (t) => {
376 | const options = { quoteChar: '`' }
377 | const enqueue = sinon.spy()
378 | const chunk = '`a`,`b`,`c`\r\n`1`,`2`,`3`\r\n'
379 | const parser = parse(options)
380 | parser[method](chunk, { enqueue })
381 | deepEqual(enqueue.firstCall.args, [
382 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
383 | ])
384 | })
385 | }
386 |
387 | // *** Option: escapeChar *** //
388 | for (const method of quoteMethods) {
389 | test(`${method}: Should parse with { quoteChar: '"', escapeChar: '"' }`, async (t) => {
390 | const options = { quoteChar: '"', escapeChar: '"' }
391 | const enqueue = sinon.spy()
392 | const chunk = '"a","b""","c"\r\n"1","2""","3"\r\n'
393 | const parser = parse(options)
394 | parser[method](chunk, { enqueue })
395 | deepEqual(enqueue.firstCall.args, [
396 | { data: { a: '1', 'b"': '2"', c: '3' }, idx: 2 }
397 | ])
398 | })
399 |
400 | test(`${method}: Should parse with { quoteChar: '"', escapeChar: '\\' }`, async (t) => {
401 | const options = { quoteChar: '"', escapeChar: '\\' }
402 | const enqueue = sinon.spy()
403 | const chunk = '"a","b\\"","c"\r\n"1","2\\"","3"\r\n'
404 | const parser = parse(options)
405 | parser[method](chunk, { enqueue })
406 | deepEqual(enqueue.firstCall.args, [
407 | { data: { a: '1', 'b"': '2"', c: '3' }, idx: 2 }
408 | ])
409 | })
410 |
411 | test(`${method}: Should parse with { quoteChar: '"' } and field containing newline`, async (t) => {
412 | const options = { quoteChar: '"' }
413 | const enqueue = sinon.spy()
414 | const chunk = '"a","b\r\nb","c"\r\n"1","2\r\n2","3"'
415 | const parser = parse(options)
416 | parser[method](chunk, { enqueue })
417 | parser[method](parser.previousChunk(), { enqueue }, true)
418 | deepEqual(enqueue.firstCall.args, [
419 | { data: { a: '1', 'b\r\nb': '2\r\n2', c: '3' }, idx: 2 }
420 | ])
421 | })
422 |
423 | test(`${method}: Should parse with { quoteChar: '"' } and field containing delimiter`, async (t) => {
424 | const options = { delimiterValue: ',', quoteChar: '"' }
425 | const enqueue = sinon.spy()
426 | const chunk = '"a","b,b","c"\r\n"1","2,2","3"\r\n'
427 | const parser = parse(options)
428 | parser[method](chunk, { enqueue })
429 | deepEqual(enqueue.firstCall.args, [
430 | { data: { a: '1', 'b,b': '2,2', c: '3' }, idx: 2 }
431 | ])
432 | })
433 | }
434 |
435 | // *** coerceFields *** //
436 | for (const method of quoteMethods) {
437 | test(`${method}: Should parse with { coerceField: (field) => ... }`, async (t) => {
438 | const coerceField = (field, idx) => {
439 | return Object.values(coerceTo)[idx](field)
440 | }
441 | const options = { header: true, quoteChar: "'", coerceField }
442 | const enqueue = sinon.spy()
443 | const chunk =
444 | 'string,boolean,true,false,number,integer,decimal,json,timestamp,null\r\nstring,true,true,false,0,-1,-1.1,\'{"a":"b"}\',2022-07-30T04:46:24.466Z,null\r\n'
445 | const parser = parse(options)
446 | parser[method](chunk, { enqueue })
447 | deepEqual(enqueue.firstCall.args, [
448 | {
449 | data: {
450 | boolean: true,
451 | decimal: -1.1,
452 | false: false,
453 | number: 0,
454 | integer: -1,
455 | json: {
456 | a: 'b'
457 | },
458 | null: null,
459 | string: 'string',
460 | timestamp: new Date('2022-07-30T04:46:24.466Z'),
461 | true: true
462 | },
463 | idx: 2
464 | }
465 | ])
466 | })
467 | }
468 |
469 | test('Should coerceTo boolean', async (t) => {
470 | equal(coerceTo.true('true'), true)
471 | equal(coerceTo.true('TRUE'), true)
472 |
473 | equal(coerceTo.false('false'), false)
474 | equal(coerceTo.false('FALSE'), false)
475 |
476 | equal(coerceTo.boolean('true'), true)
477 | equal(coerceTo.boolean('TRUE'), true)
478 | equal(coerceTo.boolean('false'), false)
479 | equal(coerceTo.boolean('FALSE'), false)
480 |
481 | equal(coerceTo.any('true'), true)
482 | equal(coerceTo.any('TRUE'), true)
483 | equal(coerceTo.any('false'), false)
484 | equal(coerceTo.any('FALSE'), false)
485 | })
486 |
487 | test('Should not coerceTo boolean', async (t) => {
488 | equal(coerceTo.null('1'), '1')
489 | equal(coerceTo.null('0'), '0')
490 | })
491 |
492 | test('Should coerceTo number', async (t) => {
493 | equal(coerceTo.integer('1.1'), 1)
494 | equal(coerceTo.integer('1'), 1)
495 | equal(coerceTo.integer('0'), 0)
496 | equal(coerceTo.integer('-1'), -1)
497 | equal(coerceTo.integer('-1'), -1)
498 |
499 | equal(coerceTo.decimal('1.1'), 1.1)
500 | equal(coerceTo.decimal('1'), 1)
501 | equal(coerceTo.decimal('0'), 0)
502 | equal(coerceTo.decimal('-1'), -1)
503 | equal(coerceTo.decimal('-1.1'), -1.1)
504 |
505 | equal(coerceTo.number('1.1'), 1.1)
506 | equal(coerceTo.number('1'), 1)
507 | equal(coerceTo.number('0'), 0)
508 | equal(coerceTo.number('-1'), -1)
509 | equal(coerceTo.number('-1.1'), -1.1)
510 |
511 | equal(coerceTo.any('1.1'), 1.1)
512 | equal(coerceTo.any('1'), 1)
513 | equal(coerceTo.any('0'), 0)
514 | equal(coerceTo.any('-1'), -1)
515 | equal(coerceTo.any('-1.1'), -1.1)
516 | })
517 |
518 | test('Should not coerceTo number', async (t) => {
519 | equal(coerceTo.null('a'), 'a')
520 | })
521 |
522 | test('Should coerceTo null', async (t) => {
523 | equal(coerceTo.null('null'), null)
524 | equal(coerceTo.null('NULL'), null)
525 |
526 | equal(coerceTo.any('null'), null)
527 | equal(coerceTo.any('NULL'), null)
528 | })
529 |
530 | test('Should not coerceTo null', async (t) => {
531 | equal(coerceTo.null('Nil'), 'Nil')
532 | })
533 |
534 | test('Should coerceTo timestamp', async (t) => {
535 | deepEqual(coerceTo.timestamp('2000-01-01'), new Date('2000-01-01'))
536 | deepEqual(
537 | coerceTo.timestamp('2000-01-01T00:00:00Z'),
538 | new Date('2000-01-01T00:00:00Z')
539 | )
540 |
541 | // `any` doesn't support `date` due conflict with `number`
542 | // deepEqual(coerceTo.any('2000-01-01'), new Date('2000-01-01'))
543 | // deepEqual(coerceTo.any('2000-01-01T00:00:00Z'), new Date('2000-01-01T00:00:00Z'))
544 | })
545 |
546 | test('Should not coerceTo timestamp', async (t) => {
547 | equal(coerceTo.timestamp('not a timestamp'), 'not a timestamp')
548 | })
549 |
550 | test('Should coerceTo json', async (t) => {
551 | deepEqual(coerceTo.json('["a"]'), ['a'])
552 | deepEqual(coerceTo.json('{"a":1}'), { a: 1 })
553 |
554 | deepEqual(coerceTo.any('["a"]'), ['a'])
555 | deepEqual(coerceTo.any('{"a":1}'), { a: 1 })
556 | })
557 |
558 | test('Should not coerceTo json', async (t) => {
559 | equal(coerceTo.json('not json'), 'not json')
560 | })
561 |
562 | // *** empty fields *** //
563 | for (const method of allMethods) {
564 | test(`${method}: Should parse with { emptyFieldValue: "" }`, async (t) => {
565 | const options = { emptyFieldValue: '' }
566 | const enqueue = sinon.spy()
567 | const chunk = 'a,b,c\r\n,,\r\n'
568 | const parser = parse(options)
569 | parser[method](chunk, { enqueue })
570 | equal(enqueue.callCount, 1)
571 | deepEqual(enqueue.firstCall.args, [
572 | { data: { a: '', b: '', c: '' }, idx: 2 }
573 | ])
574 | })
575 | test(`${method}: Should parse with { emptyFieldValue: null }`, async (t) => {
576 | const options = { emptyFieldValue: null }
577 | const enqueue = sinon.spy()
578 | const chunk = 'a,b,c\r\n,,\r\n'
579 | const parser = parse(options)
580 | parser[method](chunk, { enqueue })
581 | equal(enqueue.callCount, 1)
582 | deepEqual(enqueue.firstCall.args, [
583 | { data: { a: null, b: null, c: null }, idx: 2 }
584 | ])
585 | })
586 | test(`${method}: Should parse with { emptyFieldValue: undefined }`, async (t) => {
587 | const options = { emptyFieldValue: undefined }
588 | const enqueue = sinon.spy()
589 | const chunk = 'a,b,c\r\n,,\r\n'
590 | const parser = parse(options)
591 | parser[method](chunk, { enqueue })
592 | equal(enqueue.callCount, 1)
593 | deepEqual(enqueue.firstCall.args, [
594 | { data: { a: undefined, b: undefined, c: undefined }, idx: 2 }
595 | ])
596 | })
597 | test(`${method}: Should parse with { emptyFieldValue: "" } and first field`, async (t) => {
598 | const options = { emptyFieldValue: '' }
599 | const enqueue = sinon.spy()
600 | const chunk = 'a,b,c\r\n1,,\r\n'
601 | const parser = parse(options)
602 | parser[method](chunk, { enqueue })
603 | equal(enqueue.callCount, 1)
604 | deepEqual(enqueue.firstCall.args, [
605 | { data: { a: '1', b: '', c: '' }, idx: 2 }
606 | ])
607 | })
608 | test(`${method}: Should parse with { emptyFieldValue: "" } and middle field`, async (t) => {
609 | const options = { emptyFieldValue: '' }
610 | const enqueue = sinon.spy()
611 | const chunk = 'a,b,c\r\n,2,\r\n'
612 | const parser = parse(options)
613 | parser[method](chunk, { enqueue })
614 | equal(enqueue.callCount, 1)
615 | deepEqual(enqueue.firstCall.args, [
616 | { data: { a: '', b: '2', c: '' }, idx: 2 }
617 | ])
618 | })
619 | test(`${method}: Should parse with { emptyFieldValue: "" } and last field`, async (t) => {
620 | const options = { emptyFieldValue: '' }
621 | const enqueue = sinon.spy()
622 | const chunk = 'a,b,c\r\n,,3\r\n'
623 | const parser = parse(options)
624 | parser[method](chunk, { enqueue })
625 | equal(enqueue.callCount, 1)
626 | deepEqual(enqueue.firstCall.args, [
627 | { data: { a: '', b: '', c: '3' }, idx: 2 }
628 | ])
629 | })
630 | }
631 |
632 | // *** Option: errorOnEmptyLines *** //
633 | for (const method of allMethods) {
634 | test(`${method}: Should parse with { errorOnEmptyLine: false }`, async (t) => {
635 | const options = { errorOnEmptyLine: false }
636 | const enqueue = sinon.spy()
637 | const chunk = '\r\na,b,c\r\n\r\n1,2,3\r\n'
638 | const parser = parse(options)
639 | parser[method](chunk, { enqueue })
640 | equal(enqueue.callCount, 1)
641 | deepEqual(enqueue.firstCall.args, [
642 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
643 | ])
644 | })
645 |
646 | test(`${method}: Should parse with { errorOnEmptyLine: true }`, async (t) => {
647 | const options = { errorOnEmptyLine: true }
648 | const enqueue = sinon.spy()
649 | const chunk = '\r\na,b,c\r\n\r\n1,2,3\r\n'
650 | const parser = parse(options)
651 | parser[method](chunk, { enqueue })
652 | equal(enqueue.callCount, 3)
653 | deepEqual(enqueue.firstCall.args, [
654 | {
655 | err: { code: 'EmptyLineExists', message: 'Empty line detected.' },
656 | idx: 1
657 | }
658 | ])
659 | deepEqual(enqueue.secondCall.args, [
660 | {
661 | err: { code: 'EmptyLineExists', message: 'Empty line detected.' },
662 | idx: 3
663 | }
664 | ])
665 | deepEqual(enqueue.thirdCall.args, [
666 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
667 | ])
668 | })
669 | }
670 |
671 | // *** Option: commentPrefixValue && errorOnComment *** //
672 | for (const method of allMethods) {
673 | test(`${method}: Should parse with { commentPrefixValue: "//", errorOnComment: false }`, async (t) => {
674 | const options = { commentPrefixValue: '//', errorOnComment: false }
675 | const enqueue = sinon.spy()
676 | const chunk = '// header\r\na,b,c\r\n// data\r\n1,2,3\r\n'
677 | const parser = parse(options)
678 | parser[method](chunk, { enqueue })
679 | equal(enqueue.callCount, 1)
680 | deepEqual(enqueue.firstCall.args, [
681 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
682 | ])
683 | })
684 |
685 | test(`${method}: Should parse with { commentPrefixValue: "//", errorOnComment: true }`, async (t) => {
686 | const options = { commentPrefixValue: '//', errorOnComment: true }
687 | const enqueue = sinon.spy()
688 | const chunk = '// header\r\na,b,c\r\n// data\r\n1,2,3\r\n'
689 | const parser = parse(options)
690 | parser[method](chunk, { enqueue })
691 | equal(enqueue.callCount, 3)
692 | deepEqual(enqueue.firstCall.args, [
693 | { err: { code: 'CommentExists', message: 'Comment detected.' }, idx: 1 }
694 | ])
695 | deepEqual(enqueue.secondCall.args, [
696 | { err: { code: 'CommentExists', message: 'Comment detected.' }, idx: 3 }
697 | ])
698 | deepEqual(enqueue.thirdCall.args, [
699 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
700 | ])
701 | })
702 | }
703 |
704 | // *** Option: errorOnMissingFields *** //
705 | for (const method of allMethods) {
706 | test(`${method}: Should parse with { errorOnMissingFields: false }`, async (t) => {
707 | const options = { errorOnMissingFields: false }
708 | const enqueue = sinon.spy()
709 | const chunk = 'a,b,c\r\n1,2\r\n1,2,3\r\n1,2,3\r\n'
710 | const parser = parse(options)
711 | parser[method](chunk, { enqueue })
712 | deepEqual(enqueue.firstCall.args, [{ data: { a: '1', b: '2' }, idx: 2 }])
713 | deepEqual(enqueue.secondCall.args, [
714 | { data: { a: '1', b: '2', c: '3' }, idx: 3 }
715 | ])
716 | deepEqual(enqueue.thirdCall.args, [
717 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
718 | ])
719 | equal(enqueue.callCount, 3)
720 | })
721 |
722 | test(`${method}: Should parse with { errorOnMissingFields: true }`, async (t) => {
723 | const options = { errorOnMissingFields: true }
724 | const enqueue = sinon.spy()
725 | const chunk = 'a,b,c\r\n1,2\r\n1,2,3\r\n1,2,3\r\n'
726 | const parser = parse(options)
727 | parser[method](chunk, { enqueue })
728 | deepEqual(enqueue.firstCall.args, [
729 | {
730 | err: {
731 | code: 'MissingFields',
732 | message: 'Too few fields were parsed, expected 3.'
733 | },
734 | idx: 2
735 | }
736 | ])
737 | deepEqual(enqueue.secondCall.args, [
738 | { data: { a: '1', b: '2', c: '3' }, idx: 3 }
739 | ])
740 | deepEqual(enqueue.thirdCall.args, [
741 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
742 | ])
743 | equal(enqueue.callCount, 3)
744 | })
745 | }
746 |
747 | // *** Option: errorOnExtraFields *** //
748 | for (const method of allMethods) {
749 | test(`${method}: Should parse with { errorOnExtraFields: false }`, async (t) => {
750 | const options = { errorOnExtraFields: false }
751 | const enqueue = sinon.spy()
752 | const chunk = 'a,b,c\r\n1,2,3,4\r\n1,2,3\r\n1,2,3\r\n'
753 | const parser = parse(options)
754 | parser[method](chunk, { enqueue })
755 | deepEqual(enqueue.firstCall.args, [
756 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
757 | ])
758 | deepEqual(enqueue.secondCall.args, [
759 | { data: { a: '1', b: '2', c: '3' }, idx: 3 }
760 | ])
761 | deepEqual(enqueue.thirdCall.args, [
762 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
763 | ])
764 | equal(enqueue.callCount, 3)
765 | })
766 |
767 | test(`${method}: Should parse with { errorOnExtraFields: true }`, async (t) => {
768 | const options = { errorOnExtraFields: true }
769 | const enqueue = sinon.spy()
770 | const chunk = 'a,b,c\r\n1,2,3,4\r\n1,2,3\r\n1,2,3\r\n'
771 | const parser = parse(options)
772 | parser[method](chunk, { enqueue })
773 | deepEqual(enqueue.firstCall.args, [
774 | {
775 | err: {
776 | code: 'ExtraFields',
777 | message: 'Too many fields were parsed, expected 3.'
778 | },
779 | idx: 2
780 | }
781 | ])
782 | deepEqual(enqueue.secondCall.args, [
783 | { data: { a: '1', b: '2', c: '3' }, idx: 3 }
784 | ])
785 | deepEqual(enqueue.thirdCall.args, [
786 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
787 | ])
788 | equal(enqueue.callCount, 3)
789 | })
790 | }
791 |
792 | // *** Option: errorOnFieldsMismatch *** //
793 | /* for (const method of allMethods) {
794 | test(`${method}: Should parse with { errorOnFieldsMismatch: false }`, async (t) => {
795 | const options = { errorOnFieldsMismatch: false }
796 | const enqueue = sinon.spy()
797 | const chunk = 'a,b,c\r\n1,2\r\n1,2,3,4\r\n1,2,3\r\n'
798 | const parser = parse(options)
799 | parser[method](chunk, { enqueue })
800 | deepEqual(enqueue.firstCall.args, [
801 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
802 | ])
803 | equal(enqueue.callCount, 1)
804 | })
805 |
806 | test(`${method}: Should parse with { errorOnFieldsMismatch: true }`, async (t) => {
807 | const options = { errorOnFieldsMismatch: true }
808 | const enqueue = sinon.spy()
809 | const chunk = 'a,b,c\r\n1,2\r\n1,2,3,4\r\n1,2,3\r\n'
810 | const parser = parse(options)
811 | parser[method](chunk, { enqueue })
812 | deepEqual(enqueue.firstCall.args, [
813 | {
814 | err: {
815 | code: 'FieldsMismatchTooFew',
816 | message: 'Too few fields were parsed, expected 3.'
817 | },
818 | idx: 2
819 | }
820 | ])
821 | deepEqual(enqueue.secondCall.args, [
822 | {
823 | err: {
824 | code: 'FieldsMismatchTooMany',
825 | message: 'Too many fields were parsed, expected 3.'
826 | },
827 | idx: 3
828 | }
829 | ])
830 | deepEqual(enqueue.thirdCall.args, [
831 | { data: { a: '1', b: '2', c: '3' }, idx: 4 }
832 | ])
833 | equal(enqueue.callCount, 3)
834 | })
835 | } */
836 |
837 | // *** Option: errorOnFieldMalformed *** //
838 | for (const method of quoteMethods) {
839 | test(`${method}: Should parse with { errorOnFieldMalformed }`, async (t) => {
840 | const options = { errorOnFieldMalformed: true }
841 | const enqueue = sinon.spy()
842 | const chunk = 'a,b,c\r\n"1","2","3"\r\n"4'
843 | const parser = parse(options)
844 | try {
845 | parser[method](chunk, { enqueue })
846 | parser[method](parser.previousChunk(), { enqueue }, true)
847 | } catch (e) {
848 | console.log('catch')
849 | equal(e.message, 'QuotedFieldMalformed')
850 | }
851 | deepEqual(enqueue.firstCall.args, [
852 | { data: { a: '1', b: '2', c: '3' }, idx: 2 }
853 | ])
854 | equal(enqueue.callCount, 1)
855 | })
856 | }
857 |
858 | // *** extra spaces *** //
859 | /* test(`${method}: Should parse with space padding`, async (t) => {
860 | const options = { }
861 | const enqueue = sinon.spy()
862 | let chunk = 'a,b,c\r\n"1" ,"2" ,"3" \r\n'
863 | const parser = parse(options)
864 | parser[method](chunk, { enqueue })
865 | equal(enqueue.callCount, 1)
866 | deepEqual(enqueue.firstCall.args, [{data:{ a: '1', b: '2', c: '3' },idx:2}])
867 | }) */
868 |
--------------------------------------------------------------------------------