├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── pull-request.md └── workflows │ ├── lint.yml │ ├── sast.yml │ └── tests.yml ├── .gitignore ├── .husky ├── commit-msg └── pre-commit ├── .prettierrc.json ├── LICENSE ├── README.md ├── benchmark.js ├── bin └── esbuild ├── commitlint.config.cjs ├── docs ├── .nojekyll ├── CNAME ├── README.md ├── docs │ ├── format.md │ ├── migrate │ │ ├── csv.md │ │ └── papaparse.md │ └── parse.md ├── index.html ├── sidebar.md ├── sidebar.png ├── sidebar.sketch └── t-rex.png ├── format.js ├── format.test.js ├── index.js ├── lint-staged.config.js ├── package-lock.json ├── package.json ├── parse-mini.js ├── parse.bench.js ├── parse.js └── parse.test.js /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, religion, or sexual identity 11 | and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the 27 | overall community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or 32 | advances of any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email 36 | address, without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official e-mail address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement at 64 | [INSERT CONTACT METHOD]. 65 | All complaints will be reviewed and investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security of the 68 | reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in determining 73 | the consequences for any action they deem in violation of this Code of Conduct: 74 | 75 | ### 1. Correction 76 | 77 | **Community Impact**: Use of inappropriate language or other behavior deemed 78 | unprofessional or unwelcome in the community. 79 | 80 | **Consequence**: A private, written warning from community leaders, providing 81 | clarity around the nature of the violation and an explanation of why the 82 | behavior was inappropriate. A public apology may be requested. 83 | 84 | ### 2. Warning 85 | 86 | **Community Impact**: A violation through a single incident or series 87 | of actions. 88 | 89 | **Consequence**: A warning with consequences for continued behavior. No 90 | interaction with the people involved, including unsolicited interaction with 91 | those enforcing the Code of Conduct, for a specified period of time. This 92 | includes avoiding interactions in community spaces as well as external channels 93 | like social media. Violating these terms may lead to a temporary or 94 | permanent ban. 95 | 96 | ### 3. Temporary Ban 97 | 98 | **Community Impact**: A serious violation of community standards, including 99 | sustained inappropriate behavior. 100 | 101 | **Consequence**: A temporary ban from any sort of interaction or public 102 | communication with the community for a specified period of time. No public or 103 | private interaction with the people involved, including unsolicited interaction 104 | with those enforcing the Code of Conduct, is allowed during this period. 105 | Violating these terms may lead to a permanent ban. 106 | 107 | ### 4. Permanent Ban 108 | 109 | **Community Impact**: Demonstrating a pattern of violation of community 110 | standards, including sustained inappropriate behavior, harassment of an 111 | individual, or aggression toward or disparagement of classes of individuals. 112 | 113 | **Consequence**: A permanent ban from any sort of public interaction within 114 | the community. 115 | 116 | ## Attribution 117 | 118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 119 | version 2.0, available at 120 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. 121 | 122 | Community Impact Guidelines were inspired by 123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available 127 | at [https://www.contributor-covenant.org/translations][translations]. 128 | 129 | [homepage]: https://www.contributor-covenant.org 130 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html 131 | [Mozilla CoC]: https://github.com/mozilla/diversity 132 | [FAQ]: https://www.contributor-covenant.org/faq 133 | [translations]: https://www.contributor-covenant.org/translations 134 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | In the spirit of Open Source Software, everyone is very welcome to contribute to this repository. Feel free to [raise issues](https://github.com/willfarrell/csv-rex/issues) or to [submit Pull Requests](https://github.com/willfarrell/csv-rex/pulls). 4 | 5 | Before contributing to the project, make sure to have a look at our [Code of Conduct](/.github/CODE_OF_CONDUCT.md). 6 | 7 | 8 | ## Licence 9 | 10 | Licensed under [MIT Licence](LICENSE). Copyright (c) 2022 [will Farrell](https://github.com/willfarrell), and the [csv-rex team](https://github.com/willfarrell/csv-rex/graphs/contributors). 11 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [willfarrell] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | How to reproduce the behaviour: 15 | 1. Sample code '...' 16 | 2. Input '....' 17 | 3. Unit test '....' 18 | 4. Thrown error 19 | 20 | **Expected behaviour** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Environment (please complete the following information):** 24 | - Node.js: [e.g. 18] 25 | - csv-rex: [e.g. 0.0.0] 26 | - Browser: [e.g. Firefox 100] 27 | 28 | **Additional context** 29 | Add any other context about the problem here. 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: feature request 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/pull-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Pull request 3 | about: Pull request 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | 12 | **What does this implement/fix? Explain your changes.** 13 | 14 | **Does this close any currently open issues?** 15 | 16 | **Any relevant logs, error output, etc?** 17 | 18 | **Environment:** 19 | - Node.js: [e.g. 18] 20 | - csv-rex: [e.g. 0.0.0] 21 | - Browser: [e.g. Firefox 100] 22 | 23 | **Any other comments?** 24 | 25 | **Todo List:** 26 | - [ ] Feature/Fix fully implemented 27 | - [ ] Added tests 28 | - [ ] Unit tests 29 | - [ ] Benchmark tests (if applicable) 30 | - [ ] Updated relevant documentation 31 | - [ ] Updated relevant examples 32 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - '**' 7 | tags-ignore: 8 | - '*.*.*' 9 | pull_request: 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | 15 | strategy: 16 | matrix: 17 | node-version: [18.x] 18 | 19 | steps: 20 | - name: Checkout repository 21 | uses: actions/checkout@v2 22 | - name: Use Node.js ${{ matrix.node-version }} 23 | uses: actions/setup-node@v2 24 | with: 25 | node-version: ${{ matrix.node-version }} 26 | - name: Use npm 8 27 | run: | 28 | npm install -g npm@8 29 | - name: Install dependencies 30 | run: | 31 | npm install 32 | env: 33 | CI: true 34 | - name: Linting 35 | run: npm run lint 36 | env: 37 | CI: true 38 | -------------------------------------------------------------------------------- /.github/workflows/sast.yml: -------------------------------------------------------------------------------- 1 | name: SAST 2 | 3 | on: 4 | push: 5 | branches: 6 | - '**' 7 | tags-ignore: 8 | - '*.*.*' 9 | pull_request: 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | 15 | permissions: 16 | # required for all workflows 17 | security-events: write 18 | 19 | # only required for workflows in private repositories 20 | actions: read 21 | contents: read 22 | 23 | steps: 24 | - name: Checkout repository 25 | uses: actions/checkout@v3 26 | - name: Initialize CodeQL 27 | uses: github/codeql-action/init@v2 28 | - name: Perform CodeQL Analysis 29 | uses: github/codeql-action/analyze@v2 30 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | tags-ignore: 8 | - '*.*.*' 9 | pull_request: 10 | 11 | jobs: 12 | tests: 13 | name: Tests 14 | runs-on: ubuntu-latest 15 | 16 | strategy: 17 | matrix: 18 | node-version: [18.x] 19 | 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v2 23 | - name: Use Node.js ${{ matrix.node-version }} 24 | uses: actions/setup-node@v2 25 | with: 26 | node-version: ${{ matrix.node-version }} 27 | - name: Use npm 8 28 | run: | 29 | npm install --location=global npm@8 30 | - name: Install dependencies 31 | run: | 32 | npm install 33 | env: 34 | CI: true 35 | 36 | - name: Build for Node.js ${{ matrix.node-version }} 37 | run: | 38 | npm run build 39 | 40 | - name: Unit tests 41 | run: | 42 | npm run test 43 | env: 44 | CI: true 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.cjs 2 | *.mjs 3 | *.mjs.map 4 | coverage 5 | *.0x 6 | *.br 7 | 8 | !commitlint.config.cjs 9 | 10 | # Logs 11 | logs 12 | *.log 13 | npm-debug.log* 14 | yarn-debug.log* 15 | yarn-error.log* 16 | lerna-debug.log* 17 | 18 | # Diagnostic reports (https://nodejs.org/api/report.html) 19 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 20 | 21 | # Runtime data 22 | pids 23 | *.pid 24 | *.seed 25 | *.pid.lock 26 | 27 | # Directory for instrumented libs generated by jscoverage/JSCover 28 | lib-cov 29 | 30 | # Coverage directory used by tools like istanbul 31 | coverage 32 | *.lcov 33 | 34 | # nyc test coverage 35 | .nyc_output 36 | 37 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 38 | .grunt 39 | 40 | # Bower dependency directory (https://bower.io/) 41 | bower_components 42 | 43 | # node-waf configuration 44 | .lock-wscript 45 | 46 | # Compiled binary addons (https://nodejs.org/api/addons.html) 47 | build/Release 48 | 49 | # Dependency directories 50 | node_modules/ 51 | jspm_packages/ 52 | 53 | # TypeScript v1 declaration files 54 | typings/ 55 | 56 | # TypeScript cache 57 | *.tsbuildinfo 58 | 59 | # Optional npm cache directory 60 | .npm 61 | 62 | # Optional eslint cache 63 | .eslintcache 64 | 65 | # Microbundle cache 66 | .rpt2_cache/ 67 | .rts2_cache_cjs/ 68 | .rts2_cache_es/ 69 | .rts2_cache_umd/ 70 | 71 | # Optional REPL history 72 | .node_repl_history 73 | 74 | # Output of 'npm pack' 75 | *.tgz 76 | 77 | # Yarn Integrity file 78 | .yarn-integrity 79 | 80 | # dotenv environment variables file 81 | .env 82 | .env.test 83 | 84 | # parcel-bundler cache (https://parceljs.org/) 85 | .cache 86 | 87 | # Next.js build output 88 | .next 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and *not* Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # Serverless directories 104 | .serverless/ 105 | 106 | # FuseBox cache 107 | .fusebox/ 108 | 109 | # DynamoDB Local files 110 | .dynamodb/ 111 | 112 | # TernJS port file 113 | .tern-port 114 | 115 | # IDE 116 | .idea 117 | *.iml 118 | .nova 119 | 120 | # OS 121 | .DS_Store -------------------------------------------------------------------------------- /.husky/commit-msg: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | 4 | npm run commit-msg 5 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | 4 | npm run pre-commit 5 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "semi": false, 4 | "trailingComma": "none" 5 | } 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 will Farrell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |






3 | 🦖 4 |






5 |

CSV-Rex

6 |

A tiny and fast CSV parser & formatter for JavaScript.

7 |
8 |

9 | 10 | npm version 11 | 12 | 13 | npm install size 14 | 15 | 16 | GitHub Actions CI status badge 17 | 18 |
19 | 20 | Standard Code Style 21 | 22 | 23 | Known Vulnerabilities 24 | 25 | 26 | SAST 27 | 28 | 29 | Core Infrastructure Initiative (CII) Best Practices 30 | 31 |

32 |

33 | See full documentation at https://csv-rex.js.org 34 |

35 |
36 | -------------------------------------------------------------------------------- /benchmark.js: -------------------------------------------------------------------------------- 1 | import parseBenchmarks from './parse.bench.js' 2 | // import formatBenchmarks from './parse.bench.js' 3 | 4 | await parseBenchmarks() 5 | // await formatBenchmarks() 6 | -------------------------------------------------------------------------------- /bin/esbuild: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs index.js --minify --allow-overwrite --outfile=index.cjs 4 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse.js --bundle --minify --allow-overwrite --outfile=parse.cjs 5 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs parse-mini.js --bundle --minify --allow-overwrite --outfile=parse-mini.cjs 6 | node_modules/.bin/esbuild --platform=node --target=node14 --format=cjs format.js --bundle --minify --allow-overwrite --outfile=format.cjs 7 | 8 | node_modules/.bin/esbuild --platform=node --format=esm index.js --minify --sourcemap=external --allow-overwrite --outfile=index.mjs 9 | node_modules/.bin/esbuild --platform=node --format=esm parse.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse.mjs 10 | node_modules/.bin/esbuild --platform=node --format=esm parse-mini.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=parse-mini.mjs 11 | node_modules/.bin/esbuild --platform=node --format=esm format.js --bundle --minify --sourcemap=external --allow-overwrite --outfile=format.mjs 12 | 13 | -------------------------------------------------------------------------------- /commitlint.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: ['@commitlint/config-conventional'] 3 | } 4 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/.nojekyll -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | csv-rex.js.org -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 |
2 |






3 | 🦖 4 |






5 |

CSV-Rex

6 |

A tiny and fast CSV parser & formatter for JavaScript.

7 |
8 |

9 | 10 | npm version 11 | 12 | 13 | npm install size 14 | 15 | 16 | GitHub Actions CI status badge 17 | 18 |
19 | 20 | Standard Code Style 21 | 22 | 23 | Known Vulnerabilities 24 | 25 | 26 | SAST 27 | 28 | 29 | Core Infrastructure Initiative (CII) Best Practices 30 | 31 |

32 |
33 | 34 | ## Features 35 | 36 | - Free to use under MIT licence 37 | - Comma-Separated Values (CSV) Files specification compliant ([RFC-4180](https://tools.ietf.org/html/rfc4180)) 38 | - Small bundle size (~1KB compressed = esbuild + minify + br) 39 | - Zero dependencies 40 | - ESM & CJS modules with `.map` files 41 | - NodeJS and WebStream API support via [@datastream/csv](https://github.com/willfarrell/datastream) 42 | - It's just fast. See the [benchmarks](https://github.com/willfarrell/csv-benchmarks). 43 | 44 | ## Why not use `papaparse` or `csv-parse`? 45 | 46 | Both are great libraries, we've used them both in many projects over the years. 47 | 48 | - [`csv-parse`](https://csv.js.org/parse/): Built on top of NodeJS native APIs giving it great stream support. If you want to run it in the browser however, you've going to have to ship a very large polyfill. 49 | - [`papaparse`](https://www.papaparse.com/): Built to be more friendly for browser with an option to run in node as well. Faster than `csv-parse`, but, it's dadbod and lack of native stream support leaves room for improvement. 50 | 51 | The goal with `csv-rex` is to have a CSV parser and formatter that is as fast as others, reduced bundle size, and have cross-environment stream support. We think we've achieved our goal and hope you enjoy. 52 | 53 | ## Setup 54 | 55 | ```bash 56 | npm install csv-rex 57 | ``` 58 | 59 | ```javascript 60 | import { parse, format } from 'csv-rex' 61 | 62 | // parse 63 | const linesArray = parse(inputString, {}) 64 | 65 | // format 66 | const csv = format(linesArray, {}) 67 | ``` 68 | -------------------------------------------------------------------------------- /docs/docs/format.md: -------------------------------------------------------------------------------- 1 | # format 2 | 3 | ## Options 4 | 5 | - `header` (`true`): Keys to be used in JSON object for the parsed row 6 | - `true`: Will include header, will use `Object.keys()` for columns 7 | - `[...]`: What columns to included and in what order 8 | - `false`: Will exclude a header line. 9 | - `newlineChar` (`\r\n`): What `newline` character(s) to be used. 10 | - `delimiterChar` (`,`): Characters used to separate fields. 11 | - `quoteChar` (`"`): Character used to wrap fields that need to have special characters within them. 12 | - `escapeChar` (`${quoteChar}`): Character used to escape the `quoteChar`. 13 | - `quoteColumn`: (`undefined`): Array that maps to the headers to indicate what columns need to have quotes. Used to improve performance. 14 | - `true`: Always quote column 15 | - `false`: Never quote column 16 | - `undefined`/`null`/``: Detect if quotes are needed based on contents 17 | - `enqueue` (`(string) => {}`): Function to run on formatted row data. 18 | - `enableReturn` (`true`): Will concat rows into a single string. Set to `false` if handing data within enqueue for performance improvements. 19 | 20 | ### Array chunk 21 | - `header:[...]` required 22 | 23 | ### Object chunk 24 | 25 | 26 | 27 | ## Examples 28 | 29 | ### Formatting an array of objects to CSV string 30 | 31 | ```javascript 32 | import { format } from 'csv-rex' 33 | 34 | export default (arrayOfObjects) => parse(arrayOfObjects, { newlineChar: '\n' }) 35 | ``` 36 | 37 | ### NodeJS Stream 38 | 39 | ```javascript 40 | import { createReadStream } from 'node:fs' 41 | import { pipeline, createReadableStream } from '@datastream/core' 42 | import { csvFormatStream } from '@datastream/csv' 43 | 44 | export default async (filePath, opts = {}) => { 45 | const streams = [ 46 | createReadableStream([ 47 | /*...*/ 48 | ]), 49 | csvFormatStream(opts) 50 | // ... 51 | ] 52 | 53 | const result = await pipeline(streams) 54 | console.log(result.csvErrors) 55 | } 56 | ``` 57 | 58 | ### Web Stream API 59 | 60 | Requires: Chrome v71 , Edge v79, Firefox v102, Safari v14.5, NodeJS v18 (v16 with import). If you want to use WebStreams with node you need to pass `--conditions=webstream` in the cli to force its use. 61 | 62 | ```javascript 63 | import { pipeline } from '@datastream/core' 64 | import { stringReadableStream } from '@datastream/string' 65 | import { csvParseStream } from '@datastream/csv' 66 | 67 | export default async (blob, opts = {}) => { 68 | const streams = [ 69 | stringReadableStream(blob), 70 | csvParseStream() 71 | // ... 72 | ] 73 | 74 | const result = await pipeline(streams) 75 | console.log(result.csvErrors) 76 | } 77 | ``` 78 | 79 | ### WebWorker using a file 80 | 81 | To prevent blocking the main thread it is recommended that CSV parsing is done in a WebWorker, SharedWebWorker, or ServiceWorker instead of the main thread. This example doesn't use streams due to the lack of Firefox stream support mentioned above. 82 | 83 | ```javascript 84 | /* eslint-env worker */ 85 | import format from 'csv-rex/format' 86 | 87 | const enqueue = ({ data, idx, err }) => { 88 | if (err) { 89 | // handler err 90 | return 91 | } 92 | // handle data 93 | } 94 | 95 | onmessage = async (event) => { 96 | const { file } = event.data 97 | const options = { enqueue } 98 | file.length = file.size // polyfill length 99 | await parse(file, options) 100 | // ... 101 | postMessageEncode() 102 | } 103 | 104 | const postMessageEncode = (str) => { 105 | if (typeof str !== 'string') str = JSON.stringify(str) 106 | const buffer = new TextEncoder().encode(str).buffer 107 | postMessage(buffer, [buffer]) 108 | } 109 | ``` 110 | -------------------------------------------------------------------------------- /docs/docs/migrate/csv.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/docs/migrate/csv.md -------------------------------------------------------------------------------- /docs/docs/migrate/papaparse.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/docs/migrate/papaparse.md -------------------------------------------------------------------------------- /docs/docs/parse.md: -------------------------------------------------------------------------------- 1 | # parse 2 | 3 | ## Options 4 | 5 | - `header` (`true`): Keys to be used in JSON object for the parsed row 6 | - `true`: First row of the `input` contains columns and will need to be parsed out. output object of column value pairs. 7 | - `[...]`: Input doesn't contain columns, pre-assign columns and output object of column value pairs. 8 | - `false`: Input doesn't contain columns, output array of values 9 | - `newlineChar` (`''`): What `newline` character(s) to be used. By default will guess from `\r\n`, `\n`, `\r` 10 | - `delimiterChar` (`''`): Characters used to separate fields. Must be length of 1. By default will guess from `,`, `\t`, `|`, `;`, `\x1E`, `\x1F` 11 | - `quoteChar` (`"`): Character used to wrap fields that need to have special characters within them. Must be length of 1 12 | - `escapeChar` (`${quoteChar}`): Character used to escape the `quoteChar`. Must be length of 1 13 | - `enqueue` (`({data, idx, err}) => {}`): Function to run on parsed row data. 14 | - `emptyFieldValue` (`''`): Value to be used instead of an empty string. Can be set to `undefined` to have empty fields not be included. 15 | - `coerceField` (`(field, idx) => field`): Function to apply type/value coercion. 16 | - `commentPrefixValue` (`false`): Lines starting with this value will be ignored (i.e. `#`, `//`). Can be set to `false` if files will never have comments. 17 | - `errorOnEmptyLine` (`true`): When an empty line is encountered. Push row with error when occurs, row ignored otherwise. 18 | - `errorOnComment` (`true`): When a comment is encountered. Push row with error when occurs, row ignored otherwise. 19 | - `errorOnExtraFields` (`true`): When number of headers is less than the number of fields in a row. Push row with error when occurs, row ignored. Set to `false` to have what headers exist be mapped, and passed on. 20 | - `errorOnMissingFields` (`true`): When number of headers is more than the number of fields in a row. Push row with error when occurs, row ignored. Set to `false` to have what fields exist be mapped, and passed on. 21 | - `errorOnFieldMalformed` (`true`): When no closing `quoteChar` is found. Throws parsing error. 22 | - `chunkSize` (`64MB`): Size of chunks to process at once. 23 | - `enableReturn` (`true`): Will concat rows into a single array. Set to `false` if handing data within enqueue for performance improvements. 24 | 25 | ## Examples 26 | 27 | ### Parsing a CSV formatted string to JSON (`[{...},{...},...]`) 28 | 29 | ```javascript 30 | import { parse } from 'csv-rex' 31 | 32 | const enqueue = ({ idx, data, err }) => { 33 | if (err) { 34 | // handler err 35 | return 36 | } 37 | // modify and/or handle data 38 | } 39 | 40 | export default (csvString) => parse(csvString, { enqueue }) 41 | ``` 42 | 43 | ### NodeJS Stream 44 | 45 | ```javascript 46 | import { createReadStream } from 'node:fs' 47 | import { pipeline } from '@datastream/core' 48 | import { csvParseStream } from '@datastream/csv' 49 | 50 | export default async (filePath, opts = {}) => { 51 | const streams = [ 52 | createReadStream(filePath), 53 | csvParseStream() 54 | // ... 55 | ] 56 | 57 | const result = await pipeline(streams) 58 | console.log(result.csvErrors) 59 | } 60 | ``` 61 | 62 | ### Web Stream API 63 | 64 | Requires: Chrome v71 , Edge v79, Firefox v102, Safari v14.5, NodeJS v18 (v16 with import). If you want to use WebStreams with node you need to pass `--conditions=webstream` in the cli to force its use. 65 | 66 | ```javascript 67 | import { pipeline } from '@datastream/core' 68 | import { stringReadableStream } from '@datastream/string' 69 | import { csvParseStream } from '@datastream/csv' 70 | 71 | export default async (blob, opts = {}) => { 72 | const streams = [ 73 | stringReadableStream(blob), 74 | csvParseStream() 75 | // ... 76 | ] 77 | 78 | const result = await pipeline(streams) 79 | console.log(result.csvErrors) 80 | } 81 | ``` 82 | 83 | ### File from input form in a Browser 84 | 85 | To prevent blocking the main thread it is recommended that CSV parsing is done in a WebWorker, SharedWorker, or ServiceWorker instead of the main thread. 86 | 87 | ```javascript 88 | /* eslint-env worker */ 89 | import parse from 'csv-rex/parse' 90 | 91 | const enqueue = ({ data, idx, err }) => { 92 | if (err) { 93 | // handler err 94 | return 95 | } 96 | // handle data 97 | } 98 | 99 | onmessage = async (event) => { 100 | const { file } = event.data 101 | const options = { enqueue } 102 | file.length = file.size // polyfill length 103 | await parse(file, options) 104 | // ... 105 | postMessageEncode() 106 | } 107 | 108 | const postMessageEncode = (str) => { 109 | if (typeof str !== 'string') str = JSON.stringify(str) 110 | const buffer = new TextEncoder().encode(str).buffer 111 | postMessage(buffer, [buffer]) 112 | } 113 | ``` 114 | 115 | ```html 116 | 117 | 118 |
123 | 124 |
125 | 149 | 150 | 151 | ``` 152 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | csv-rex - A tiny and fast CSV parser & formatter for JavaScript. 7 | 8 | 12 | 16 | 20 | 21 | 42 | 43 | 44 |
45 | 46 | 55 | 56 | 57 | 59 | 60 | -------------------------------------------------------------------------------- /docs/sidebar.md: -------------------------------------------------------------------------------- 1 | - Getting started 2 | - [Setup](/) 3 | - [Parse](/docs/parse.md) 4 | - [Options](/docs/parse.md) 5 | - [Basic Example](/docs/parse.md#start) 6 | - [Fast Mode Example](/docs/parse.md#start) 7 | - [Stream Example](/docs/parse.md) 8 | - [WebWorker Example](/docs/parse.md) 9 | - [Format](/docs/format.md) 10 | - [Options](/docs/format.md) 11 | - [Basic Example](/docs/format.md) 12 | - [NodeJS Stream Example](/docs/parse.md#nodestream) 13 | - [Web Stream API Example](/docs/parse.md#webstream) 14 | - [WebWorker Example](/docs/parse.md#browser) 15 | - Migrate 16 | - [papaparse](/docs/migrate/papaparse.md) 17 | - [csv](/docs/migrate/csv.md) 18 | -------------------------------------------------------------------------------- /docs/sidebar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/sidebar.png -------------------------------------------------------------------------------- /docs/sidebar.sketch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/sidebar.sketch -------------------------------------------------------------------------------- /docs/t-rex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/willfarrell/csv-rex/196e6072dedd34828944ebba90d7bc7b576de168/docs/t-rex.png -------------------------------------------------------------------------------- /format.js: -------------------------------------------------------------------------------- 1 | export const defaultOptions = { 2 | header: true, // false: don't log out header; true: log out header 3 | newlineChar: '\r\n', // undefined: detect newline from file; '\r\n': Windows; '\n': Linux/Mac 4 | delimiterChar: ',', // TODO add in auto detect or function 5 | quoteChar: '"' 6 | // escapeChar: '"' 7 | 8 | // quoteColumn: undefined 9 | } 10 | 11 | export const format = (input, opts = {}) => { 12 | const options = { ...defaultOptions, enqueue: () => {}, ...opts } 13 | options.escapeChar ??= options.quoteChar 14 | const { enableReturn, enqueue } = options 15 | 16 | const isArrayData = Array.isArray(input[0]) 17 | const format = isArrayData ? formatArray : formatObject 18 | if (!isArrayData && options.header === true) { 19 | options.header = Object.keys(input[0]) 20 | } 21 | 22 | let res = options.header !== false ? formatArray(options.header, options) : '' 23 | 24 | for (let i = 0, l = input.length; i < l; i++) { 25 | const data = format(input[i], options) 26 | enqueue(data) 27 | if (enableReturn) { 28 | res += data 29 | } 30 | } 31 | 32 | return enableReturn && res 33 | } 34 | 35 | export const formatArray = (arr, options) => { 36 | let csv = '' 37 | for (let i = 0, l = arr.length; i < l; i++) { 38 | csv += (i ? options.delimiterChar : '') + formatField(arr[i], null, options) 39 | } 40 | return csv + options.newlineChar 41 | } 42 | 43 | export const formatObject = (data, options) => { 44 | let csv = '' 45 | const columns = options.header || Object.keys(data) 46 | for (let i = 0, l = columns.length; i < l; i++) { 47 | csv += 48 | (i ? options.delimiterChar : '') + 49 | formatField(data[columns[i]], options.quoteColumn?.[i], options) 50 | } 51 | return csv + options.newlineChar 52 | } 53 | 54 | export const formatField = ( 55 | field, 56 | needsQuotes, 57 | { quoteChar, escapeChar, delimiterChar, newlineChar } 58 | ) => { 59 | if (field === undefined || field === null || field === '') { 60 | return '' 61 | } 62 | 63 | if (field.constructor === Date) { 64 | return field.toISOString() // JSON.stringify(str).slice(1, 25) faster?? 65 | } 66 | 67 | field = field.toString() 68 | 69 | // Developer override using options.quotes 70 | if (needsQuotes === false) { 71 | return field 72 | } 73 | 74 | // Test if needs quote 75 | needsQuotes = 76 | needsQuotes || 77 | hasAnyDelimiters(field, [ 78 | delimiterChar, 79 | newlineChar, 80 | quoteChar, 81 | '\ufeff' 82 | ]) || 83 | field[0] === ' ' || 84 | field[field.length - 1] === ' ' 85 | 86 | return needsQuotes 87 | ? quoteChar + 88 | field.replaceAll(quoteChar, escapeChar + quoteChar) + 89 | quoteChar 90 | : field 91 | } 92 | 93 | const hasAnyDelimiters = (field, delimiters) => { 94 | for (const delimiter of delimiters) { 95 | if (field.indexOf(delimiter) > -1) { 96 | return true 97 | } 98 | } 99 | } 100 | 101 | export default format 102 | -------------------------------------------------------------------------------- /format.test.js: -------------------------------------------------------------------------------- 1 | import test from 'node:test' 2 | import { equal } from 'node:assert' 3 | import { format, formatArray, formatObject, formatField } from './format.js' 4 | 5 | const defaultOptions = { 6 | header: true, 7 | escapeChar: '"', 8 | quoteChar: '"', 9 | delimiterChar: ',', 10 | newlineChar: '/n', 11 | 12 | enableReturn: true, 13 | enqueue: () => {} 14 | } 15 | 16 | // *** format() *** // 17 | 18 | test('Should format array of objects w/ header == true', async (t) => { 19 | const field = format([{ a: '1', b: '2' }], { 20 | ...defaultOptions, 21 | header: true 22 | }) 23 | equal(field, 'a,b/n1,2/n') 24 | }) 25 | 26 | test('Should format array of objects w/ header == [...]', async (t) => { 27 | const field = format([{ a: '1', b: '2', c: '3' }], { 28 | ...defaultOptions, 29 | header: ['b', 'a'] 30 | }) 31 | equal(field, 'b,a/n2,1/n') 32 | }) 33 | 34 | test('Should format array of objects w/ header === false', async (t) => { 35 | const field = format([{ a: '1', b: '2' }], { 36 | ...defaultOptions, 37 | header: false 38 | }) 39 | equal(field, '1,2/n') 40 | }) 41 | 42 | test('Should format array of arrays w/ header == [...]', async (t) => { 43 | const field = format([['1', '2']], { 44 | ...defaultOptions, 45 | header: ['a', 'b'] 46 | }) 47 | equal(field, 'a,b/n1,2/n') 48 | }) 49 | 50 | test('Should format array of arrays w/ header === false', async (t) => { 51 | const field = format([['1', '2']], { 52 | ...defaultOptions, 53 | header: false 54 | }) 55 | equal(field, '1,2/n') 56 | }) 57 | 58 | // *** formatHeader() *** // 59 | test('Should format header', async (t) => { 60 | const field = formatArray(['b', 'a'], { 61 | ...defaultOptions, 62 | header: ['b', 'a'] 63 | }) 64 | equal(field, 'b,a/n') 65 | }) 66 | 67 | // *** formatArray() *** // 68 | test('Should format row array', async (t) => { 69 | const field = formatArray(['1', '2'], { ...defaultOptions, header: false }) 70 | equal(field, '1,2/n') 71 | }) 72 | 73 | // *** formatObject() *** // 74 | test('Should format row object', async (t) => { 75 | const field = formatObject( 76 | { a: '1', b: '2' }, 77 | { ...defaultOptions, header: ['b', 'a'] } 78 | ) 79 | equal(field, '2,1/n') 80 | }) 81 | test('Should format row object w/ quotes', async (t) => { 82 | const field = formatObject( 83 | { a: '1', b: '2' }, 84 | { ...defaultOptions, header: ['b', 'a'], quoteColumn: [true, true] } 85 | ) 86 | equal(field, '"2","1"/n') 87 | }) 88 | test('Should format row object w/o quotes', async (t) => { 89 | const field = formatObject( 90 | { a: '1', b: '2' }, 91 | { ...defaultOptions, header: ['b', 'a'], quoteColumn: [false, false] } 92 | ) 93 | equal(field, '2,1/n') 94 | }) 95 | 96 | // *** formatField() *** // 97 | test('Should format undefined', async (t) => { 98 | const field = formatField(undefined, undefined, defaultOptions) 99 | equal(field, '') 100 | }) 101 | test('Should format null', async (t) => { 102 | const field = formatField(null, undefined, defaultOptions) 103 | equal(field, '') 104 | }) 105 | test('Should format empty string', async (t) => { 106 | const field = formatField('', undefined, defaultOptions) 107 | equal(field, '') 108 | }) 109 | test('Should format date', async (t) => { 110 | const field = formatField( 111 | new Date('2000-01-01T00:00:00.000Z'), 112 | undefined, 113 | defaultOptions 114 | ) 115 | equal(field, '2000-01-01T00:00:00.000Z') 116 | }) 117 | test('Should format number', async (t) => { 118 | const field = formatField(0, undefined, defaultOptions) 119 | equal(field, '0') 120 | }) 121 | test('Should format string', async (t) => { 122 | const field = formatField('column', undefined, defaultOptions) 123 | equal(field, 'column') 124 | }) 125 | test('Should format string with delimiter', async (t) => { 126 | const field = formatField('_"_', undefined, defaultOptions) 127 | equal(field, '"_""_"') 128 | }) 129 | test('Should format string with leading space', async (t) => { 130 | const field = formatField(' space', undefined, defaultOptions) 131 | equal(field, '" space"') 132 | }) 133 | test('Should format string with trailing space', async (t) => { 134 | const field = formatField('space ', undefined, defaultOptions) 135 | equal(field, '"space "') 136 | }) 137 | test('Should format w/ quotes', async (t) => { 138 | const field = formatField('column', true, defaultOptions) 139 | equal(field, '"column"') 140 | }) 141 | test('Should format w/o quotes', async (t) => { 142 | const field = formatField('column', false, defaultOptions) 143 | equal(field, 'column') 144 | }) 145 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | // import {TextDecoder} from 'node:util' 2 | // import {defaultOptions, optionDetectNewlineValue} from './options.js' 3 | import csvParse from 'csv-rex/parse' 4 | import csvParseMini from 'csv-rex/parse-mini' 5 | import csvFormat from 'csv-rex/format' 6 | 7 | export const parse = csvParse 8 | export const parseMini = csvParseMini 9 | export const format = csvFormat 10 | 11 | export default { 12 | parse: csvParse, 13 | parseMini: csvParseMini, 14 | format: csvFormat 15 | } 16 | -------------------------------------------------------------------------------- /lint-staged.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | '**/*.{json,yml}': ['prettier --write'], 3 | '**/*.js': ['prettier --write', 'standard --fix'] 4 | } 5 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "csv-rex", 3 | "version": "0.7.0", 4 | "description": "A tiny and fast CSV parser for JavaScript.", 5 | "type": "module", 6 | "files": [ 7 | "*.mjs", 8 | "*.cjs", 9 | "*.map" 10 | ], 11 | "main": "index.mjs", 12 | "exports": { 13 | ".": { 14 | "import": { 15 | "default": "./index.mjs" 16 | }, 17 | "require": { 18 | "default": "./index.cjs" 19 | } 20 | }, 21 | "./parse": { 22 | "import": { 23 | "default": "./parse.mjs" 24 | }, 25 | "require": { 26 | "default": "./parse.cjs" 27 | } 28 | }, 29 | "./parse-mini": { 30 | "import": { 31 | "default": "./parse-mini.mjs" 32 | }, 33 | "require": { 34 | "default": "./parse-mini.cjs" 35 | } 36 | }, 37 | "./format": { 38 | "import": { 39 | "default": "./format.mjs" 40 | }, 41 | "require": { 42 | "default": "./format.cjs" 43 | } 44 | } 45 | }, 46 | "scripts": { 47 | "prepare": "husky install", 48 | "commit-msg": "commitlint --config commitlint.config.cjs --edit", 49 | "pre-commit": "lint-staged", 50 | "start": "docsify serve docs", 51 | "lint": "prettier --write *.{js,json} && standard --fix *.js", 52 | "test": "npm run build && c8 node --test", 53 | "prepublishOnly": "npm test", 54 | "build": "./bin/esbuild", 55 | "bench": "npm run build && node parse.bench.js" 56 | }, 57 | "repository": { 58 | "type": "git", 59 | "url": "git+https://willfarrell@github.com/willfarrell/csv-rex.git" 60 | }, 61 | "keywords": [ 62 | "csv", 63 | "parse", 64 | "format", 65 | "json" 66 | ], 67 | "author": { 68 | "name": "willfarrell", 69 | "url": "https://github.com/willfarrell" 70 | }, 71 | "license": "MIT", 72 | "bugs": { 73 | "url": "https://github.com/willfarrell/csv-rex/issues" 74 | }, 75 | "homepage": "https://github.com/willfarrell/csv-rex#readme", 76 | "dependencies": { 77 | "csv-rex": "0.7.0" 78 | }, 79 | "devDependencies": { 80 | "@commitlint/cli": "^17.0.0", 81 | "@commitlint/config-conventional": "^17.0.0", 82 | "benny": "^3.7.1", 83 | "c8": "^7.11.0", 84 | "docsify-cli": "^4.4.4", 85 | "esbuild": "^0.17.0", 86 | "husky": "^8.0.0", 87 | "lint-staged": "^13.0.0", 88 | "prettier": "^2.0.0", 89 | "sinon": "^15.0.0", 90 | "standard": "^17.0.0" 91 | }, 92 | "funding": { 93 | "type": "github", 94 | "url": "https://github.com/sponsors/willfarrell" 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /parse-mini.js: -------------------------------------------------------------------------------- 1 | // chunkSize >> largest expected row 2 | const defaultOptions = { 3 | header: true, // false: return array; true: detect headers and return json; [...]: use defined headers and return json 4 | newlineChar: '\r\n', // '': detect newline from chunk; '\r\n': Windows; '\n': Linux/Mac 5 | delimiterChar: ',', // '': detect delimiter from chunk 6 | // quoteChar: '"', 7 | // escapeChar: '"', // default: `quoteChar` 8 | 9 | // Parse 10 | emptyFieldValue: '', 11 | coerceField: (field) => field, // TODO tests 12 | // commentPrefixValue: false, // falsy: disable, '//': enabled 13 | // errorOnComment: true, 14 | // errorOnEmptyLine: true, 15 | errorOnFieldsMismatch: true 16 | // errorOnFieldMalformed: true 17 | } 18 | 19 | const length = (value) => value.length 20 | 21 | export const parse = (opts = {}) => { 22 | const options = { ...defaultOptions, ...opts } 23 | options.escapeChar ??= options.quoteChar 24 | 25 | let { header, newlineChar, delimiterChar } = options 26 | let headerLength = length(header) 27 | const { 28 | // quoteChar, 29 | // escapeChar, 30 | // commentPrefixValue, 31 | emptyFieldValue, 32 | coerceField, 33 | // errorOnEmptyLine, 34 | // errorOnComment, 35 | errorOnFieldsMismatch 36 | // errorOnFieldMalformed 37 | } = options 38 | 39 | let chunk, enqueue 40 | let partialLine = '' 41 | let idx = 0 42 | const enqueueRow = (row) => { 43 | let data = row 44 | idx += 1 45 | if (headerLength) { 46 | const rowLength = length(row) 47 | 48 | if (headerLength !== rowLength) { 49 | if (errorOnFieldsMismatch) { 50 | enqueueError( 51 | 'FieldsMismatch', 52 | `Incorrect number of fields parsed, expected ${headerLength}.` 53 | ) 54 | } 55 | return 56 | } else { 57 | data = {} 58 | for (let i = 0; i < rowLength; i++) { 59 | data[header[i]] = row[i] 60 | } 61 | } 62 | } 63 | enqueue({ idx, data }) 64 | } 65 | 66 | const enqueueError = (code, message) => { 67 | enqueue({ idx, err: { code, message } }) 68 | } 69 | 70 | const transformField = (field, idx) => { 71 | return coerceField(field || emptyFieldValue, idx) 72 | } 73 | 74 | const chunkParse = (string, controller) => { 75 | chunk = string 76 | enqueue = controller.enqueue 77 | const lines = chunk.split(newlineChar) // TODO use cursor pattern 78 | let linesLength = length(lines) 79 | if (linesLength > 1) { 80 | partialLine = lines.pop() 81 | linesLength -= 1 82 | } 83 | 84 | let i = 0 85 | if (header === true) { 86 | header = lines[i].split(delimiterChar) 87 | headerLength = length(header) 88 | i += 1 89 | } 90 | 91 | for (; i < linesLength; i++) { 92 | const line = lines[i] 93 | const row = [] 94 | let cursor = 0 95 | while (cursor < line.length) { 96 | const delimiterIndex = line.indexOf(delimiterChar, cursor) 97 | if (delimiterIndex === -1) { 98 | row.push(transformField(line.substring(cursor), row.length)) 99 | break 100 | } 101 | row.push( 102 | transformField(line.substring(cursor, delimiterIndex), row.length) 103 | ) 104 | cursor = delimiterIndex + 1 105 | } 106 | enqueueRow(row) 107 | } 108 | } 109 | 110 | return { 111 | chunkParse, 112 | header: () => header, 113 | previousChunk: () => partialLine 114 | } 115 | } 116 | 117 | export default (input, opts) => { 118 | const options = { 119 | ...defaultOptions, 120 | ...{ 121 | enableReturn: true, 122 | chunkSize: 64 * 1024 * 1024, 123 | enqueue: () => {} 124 | }, 125 | ...opts 126 | } 127 | const { chunkSize, enableReturn, enqueue } = options 128 | const { chunkParse, previousChunk } = parse(options) 129 | 130 | const res = [] 131 | const controller = { enqueue } 132 | 133 | if (enableReturn) { 134 | controller.enqueue = (row) => { 135 | enqueue(row) 136 | res.push(row.data) 137 | } 138 | } 139 | 140 | let position = 0 141 | while (position < input.length) { 142 | const chunk = 143 | previousChunk() + input.substring(position, position + chunkSize) 144 | 145 | // Checking if you can use fastParse slows it down more than checking for quoteChar on ever field. 146 | chunkParse(chunk, controller) 147 | position += chunkSize 148 | } 149 | // flush 150 | const chunk = previousChunk() 151 | chunkParse(chunk, controller, true) 152 | 153 | return enableReturn && res 154 | } 155 | -------------------------------------------------------------------------------- /parse.bench.js: -------------------------------------------------------------------------------- 1 | import { add, cycle, /* save, */ suite } from 'benny' 2 | import parse from './parse.js' // 'csv-rex/parse' 3 | 4 | const inputs = {} 5 | const configs = [] 6 | const baseline = { 7 | columns: 10, 8 | rows: 1_000, 9 | quotes: false, 10 | newlineChar: '\r\n', 11 | delimiterChar: ',', 12 | header: false, 13 | commentPrefixValue: false 14 | } 15 | configs.push({ ...baseline }) 16 | // expected to be slower, compare against each other 17 | configs.push({ ...baseline, columns: 100 }) // input has move columns 18 | configs.push({ ...baseline, rows: 10_000 }) // input has more rows 19 | // Options 20 | configs.push({ 21 | ...baseline, 22 | header: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] 23 | }) // pre-defined headers to make object 24 | configs.push({ ...baseline, header: true }) // use header to make object 25 | configs.push({ ...baseline, newlineChar: '\n' }) // shorter newline ** should be fastest 26 | configs.push({ ...baseline, newlineChar: '' }) // detect newline 27 | configs.push({ ...baseline, delimiterChar: '\t' }) // detect delimiter 28 | configs.push({ ...baseline, delimiterChar: '' }) // detect delimiter 29 | configs.push({ ...baseline, commentPrefixValue: '//' }) // detect comments 30 | configs.push({ ...baseline, quotes: true }) // input has quoted fields 31 | 32 | configs.push({ ...baseline, newlineChar: '\n', delimiterChar: '\t' }) // TSV 33 | configs.push({ ...baseline }) 34 | 35 | const baselineDiff = (config) => { 36 | const diff = {} 37 | for (const key in config) { 38 | if (config[key] !== baseline[key]) { 39 | diff[key] = config[key] 40 | } 41 | } 42 | return diff 43 | } 44 | 45 | const testBatch = (configs) => { 46 | return configs.map((config) => { 47 | const { columns, rows, quotes, ...options } = config 48 | const delimiterChar = options.delimiterChar || baseline.delimiterChar 49 | const newlineChar = options.newlineChar || baseline.newlineChar 50 | const input = `${columns}x${rows} w/${ 51 | quotes ? '' : 'o' 52 | } quotes and {newlineChar:${newlineChar},delimiterChar:${delimiterChar}}` 53 | if (!inputs[input]) { 54 | const wrapper = quotes ? '"' : '' 55 | const delimiter = quotes ? `"${delimiterChar}"` : `${delimiterChar}` 56 | let csv = 57 | wrapper + 58 | Array.from({ length: columns + 1 }, (_, x) => `__${x}__`).join( 59 | delimiter 60 | ) + 61 | wrapper + 62 | newlineChar 63 | for (let y = 0; y < rows; y++) { 64 | csv += 65 | wrapper + 66 | Array.from({ length: columns + 1 }, (_, x) => `${x}x${y}`).join( 67 | delimiter 68 | ) + 69 | wrapper + 70 | newlineChar 71 | } 72 | inputs[input] = csv 73 | } 74 | return add( 75 | `parse(${JSON.stringify({ columns, rows, quotes })}, ${JSON.stringify( 76 | options 77 | )}) :: ${JSON.stringify(baselineDiff(config))}`, 78 | () => { 79 | parse(inputs[input], options) 80 | } 81 | ) 82 | }) 83 | } 84 | 85 | const parseSuite = suite( 86 | 'parse', 87 | ...testBatch(configs), 88 | cycle() 89 | // save({file: 'parse.bench.csv', format: 'csv'}) 90 | ) 91 | 92 | export default () => parseSuite 93 | -------------------------------------------------------------------------------- /parse.js: -------------------------------------------------------------------------------- 1 | // chunkSize >> largest expected row 2 | const defaultOptions = { 3 | header: true, // false: return array; true: detect headers and return json; [...]: use defined headers and return json 4 | newlineChar: '', // '': detect newline from chunk; '\r\n': Windows; '\n': Linux/Mac 5 | delimiterChar: '', // '': detect delimiter from chunk 6 | quoteChar: '"', 7 | // escapeChar: '"', // default: `quoteChar` 8 | detectCharLength: 1024, 9 | 10 | // Parse 11 | emptyFieldValue: '', 12 | // TODO option to remove empty fields from object 13 | coerceField: (field) => field, // TODO tests 14 | commentPrefixValue: false, // falsy: disable, '//': enabled 15 | errorOnComment: true, 16 | errorOnEmptyLine: true, 17 | errorOnExtraFields: true, 18 | errorOnMissingFields: true 19 | // errorOnFieldMalformed: true 20 | } 21 | 22 | const length = (value) => value.length 23 | const escapeRegExp = (string) => string.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&') // https://github.com/tc39/proposal-regex-escaping 24 | 25 | export const parse = (opts = {}) => { 26 | const options = { ...defaultOptions, ...opts } 27 | options.escapeChar ??= options.quoteChar 28 | 29 | let { header, newlineChar, delimiterChar } = options 30 | const { 31 | detectCharLength, 32 | quoteChar, 33 | escapeChar, 34 | commentPrefixValue, 35 | emptyFieldValue, 36 | coerceField, 37 | errorOnEmptyLine, 38 | errorOnComment, 39 | errorOnExtraFields, 40 | errorOnMissingFields 41 | // errorOnFieldMalformed 42 | } = options 43 | let headerLength = length(header) 44 | const detectDelimiterCharRegExp = /,|\t|\||;|\x1E|\x1F/g // eslint-disable-line no-control-regex 45 | const detectNewlineCharRegExp = /\r\n|\n|\r/g 46 | 47 | const escapedQuoteChar = escapeChar + quoteChar 48 | const escapedQuoteCharRegExp = new RegExp( 49 | `${escapeRegExp(escapedQuoteChar)}`, 50 | 'g' 51 | ) 52 | 53 | const escapedQuoteEqual = escapeChar === quoteChar 54 | const escapedQuoteNotEqual = escapeChar !== quoteChar 55 | 56 | let newlineCharLength = length(newlineChar) 57 | const delimiterCharLength = 1 // length(delimiterChar) 58 | const quoteCharLength = 1 // length(quoteChar) 59 | const escapeCharLength = 1 // length(escapeChar) 60 | const escapedQuoteCharLength = 2 // length(escapedQuoteChar) 61 | // const commentPrefixValueLength = length(commentPrefixValue) 62 | 63 | let chunk, chunkLength, cursor, row, enqueue 64 | let partialLine = '' 65 | let idx = 0 66 | const enqueueRow = (row) => { 67 | idx += 1 68 | if (header === true) { 69 | header = row 70 | headerLength = length(header) 71 | return 72 | } 73 | let data = row 74 | if (headerLength) { 75 | let rowLength = length(row) 76 | 77 | // enqueueError('FieldsMismatch', `Parsed ${rowLength} fields, expected ${headerLength}.`) 78 | if (errorOnMissingFields && rowLength < headerLength) { 79 | enqueueError( 80 | 'MissingFields', 81 | `Too few fields were parsed, expected ${headerLength}.` 82 | ) 83 | return 84 | } else if (headerLength < rowLength) { 85 | if (errorOnExtraFields) { 86 | enqueueError( 87 | 'ExtraFields', 88 | `Too many fields were parsed, expected ${headerLength}.` 89 | ) 90 | return 91 | } 92 | // only map fields that have headers 93 | rowLength = headerLength 94 | } 95 | 96 | data = {} 97 | for (let i = 0; i < rowLength; i++) { 98 | data[header[i]] = row[i] 99 | } 100 | } 101 | enqueue({ idx, data }) 102 | } 103 | 104 | const enqueueError = (code, message) => { 105 | enqueue({ idx, err: { code, message } }) 106 | } 107 | 108 | const findNext = (searchValue, start = cursor) => { 109 | return chunk.indexOf(searchValue, start) 110 | } 111 | 112 | const parseField = (end) => { 113 | return chunk.substring(cursor, end) 114 | } 115 | const transformField = (field, idx) => { 116 | return coerceField(field || emptyFieldValue, idx) 117 | } 118 | 119 | // TODO idea: when header == true/array using a different addFieldToRow function to allow faster key:value mapping 120 | // const resetRow = () => { 121 | // row = [] 122 | // } 123 | const addFieldToRow = (field, idx) => { 124 | row.push(transformField(field, idx)) 125 | } 126 | 127 | const checkForEmptyLine = () => { 128 | if (findNext(newlineChar) === cursor) { 129 | idx += 1 130 | cursor += newlineCharLength 131 | if (errorOnEmptyLine) { 132 | enqueueError('EmptyLineExists', 'Empty line detected.') 133 | } 134 | return checkForEmptyLine() 135 | } else if (commentPrefixValue && findNext(commentPrefixValue) === cursor) { 136 | idx += 1 137 | cursor = findNext(newlineChar) + newlineCharLength 138 | if (errorOnComment) { 139 | enqueueError('CommentExists', 'Comment detected.') 140 | } 141 | return checkForEmptyLine() 142 | } 143 | } 144 | 145 | const chunkParse = (string, controller, flush = false) => { 146 | chunk = string 147 | chunkLength = length(chunk) 148 | enqueue = controller.enqueue 149 | partialLine = '' 150 | cursor = 0 151 | row = [] // resetRow() 152 | 153 | // auto-detect 154 | if (!newlineChar) { 155 | newlineChar = detectChar( 156 | chunk.substring(0, detectCharLength), 157 | detectNewlineCharRegExp 158 | ) 159 | 160 | if (!newlineChar) { 161 | if (chunk.length < detectCharLength && !flush) { 162 | // First chunk is too short 163 | partialLine = chunk 164 | return 165 | } else { 166 | throw new Error('newlineCharUnknown') 167 | } 168 | } 169 | newlineCharLength = length(newlineChar) 170 | } 171 | delimiterChar ||= detectChar( 172 | chunk.substring(0, detectCharLength), 173 | detectDelimiterCharRegExp 174 | ) 175 | 176 | checkForEmptyLine() 177 | let lineStart = 0 178 | for (;;) { 179 | let quoted 180 | let nextCursor = cursor 181 | let nextCursorLength 182 | let atNewline 183 | if (chunk[cursor] === quoteChar) { 184 | cursor += quoteCharLength 185 | quoted = true 186 | nextCursor = cursor 187 | for (;;) { 188 | nextCursor = findNext(quoteChar, nextCursor) 189 | if (nextCursor < 0) { 190 | partialLine = chunk.substring(lineStart, chunkLength) + partialLine 191 | if (flush) { 192 | throw new Error('QuotedFieldMalformed', { cause: idx }) 193 | } 194 | return 195 | } 196 | if ( 197 | escapedQuoteEqual && 198 | chunk[nextCursor + quoteCharLength] === quoteChar 199 | ) { 200 | nextCursor += escapedQuoteCharLength 201 | continue 202 | } 203 | if ( 204 | escapedQuoteNotEqual && 205 | chunk[nextCursor - escapeCharLength] === escapeChar 206 | ) { 207 | nextCursor += quoteCharLength 208 | continue 209 | } 210 | break 211 | } 212 | } 213 | 214 | // fallback 215 | const nextDelimiterChar = findNext(delimiterChar, nextCursor) 216 | let nextNewlineChar = findNext(newlineChar, nextCursor) 217 | if (nextNewlineChar < 0) { 218 | if (!flush) { 219 | partialLine = chunk.substring(lineStart, chunkLength) + partialLine 220 | return 221 | } 222 | nextNewlineChar = chunkLength 223 | } 224 | if (nextDelimiterChar > -1 && nextDelimiterChar < nextNewlineChar) { 225 | nextCursor = nextDelimiterChar 226 | nextCursorLength = delimiterCharLength 227 | } else { 228 | nextCursor = nextNewlineChar 229 | nextCursorLength = newlineCharLength 230 | atNewline = true 231 | } 232 | 233 | if (nextCursor < 0 || !nextCursor) { 234 | break 235 | } 236 | 237 | let field 238 | if (quoted) { 239 | field = parseField(nextCursor - 1).replace( 240 | escapedQuoteCharRegExp, 241 | quoteChar 242 | ) 243 | } else { 244 | field = parseField(nextCursor) 245 | } 246 | addFieldToRow(field, row.length) 247 | 248 | cursor = nextCursor + nextCursorLength 249 | 250 | if (atNewline) { 251 | enqueueRow(row) 252 | row = [] // resetRow() 253 | checkForEmptyLine() 254 | lineStart = cursor 255 | } 256 | // `row.length === 0` required for when a csv ends with just `,` and no newline 257 | if (chunkLength <= cursor && row.length === 0) { 258 | break 259 | } 260 | } 261 | } 262 | 263 | return { 264 | chunkParse, 265 | header: () => header, 266 | previousChunk: () => partialLine 267 | } 268 | } 269 | 270 | export const detectChar = (chunk, pattern) => { 271 | let match 272 | const chars = {} 273 | while ((match = pattern.exec(chunk))) { 274 | const char = match[0] 275 | chars[char] ??= 0 276 | chars[char] += 1 277 | if (chars[char] > 5) return char 278 | } 279 | // pattern.lastIndex = 0 // not reused again 280 | const { key } = 281 | Object.keys(chars) 282 | .map((key) => ({ key, value: chars[key] })) 283 | .sort((a, b) => a.value - b.value)?.[0] ?? {} 284 | if (!key) { 285 | return 286 | } 287 | return key 288 | } 289 | 290 | export const coerceTo = { 291 | string: (field) => field, 292 | boolean: (field) => { 293 | const boolean = coerceTo.true(field) 294 | return typeof boolean === 'boolean' ? boolean : coerceTo.false(field) 295 | }, 296 | true: (field) => (field.toLowerCase() === 'true' ? true : field), 297 | false: (field) => (field.toLowerCase() === 'false' ? false : field), 298 | number: (field) => { 299 | const decimal = coerceTo.decimal(field) 300 | return Number.isInteger(decimal) ? coerceTo.integer(field) : decimal 301 | }, 302 | integer: (field) => Number.parseInt(field, 10) || field, 303 | decimal: (field) => Number.parseFloat(field) || field, 304 | json: (field) => { 305 | try { 306 | return JSON.parse(field) 307 | } catch (e) { 308 | return field 309 | } 310 | }, 311 | timestamp: (field) => { 312 | const date = new Date(field) 313 | return date.toString() !== 'Invalid Date' ? date : field 314 | }, 315 | null: (field) => (field.toLowerCase() === 'null' ? null : field), 316 | any: (field) => { 317 | const types = ['boolean', 'number', 'null', 'json'] 318 | for (let i = 0, l = types.length; i < l; i++) { 319 | field = coerceTo[types[i]](field) 320 | 321 | if (typeof field !== 'string') { 322 | break 323 | } 324 | } 325 | 326 | return field 327 | } 328 | } 329 | 330 | export default (input, opts) => { 331 | const options = { 332 | ...defaultOptions, 333 | ...{ 334 | enableReturn: true, 335 | chunkSize: 64 * 1024 * 1024, 336 | enqueue: () => {} 337 | }, 338 | ...opts 339 | } 340 | const { chunkSize, enableReturn, enqueue } = options 341 | const { chunkParse, previousChunk } = parse(options) 342 | 343 | const res = [] 344 | const controller = { enqueue } 345 | 346 | if (enableReturn) { 347 | controller.enqueue = (row) => { 348 | enqueue(row) 349 | res.push(row.data) 350 | } 351 | } 352 | 353 | let position = 0 354 | while (position < input.length) { 355 | const chunk = 356 | previousChunk() + input.substring(position, position + chunkSize) 357 | 358 | // Checking if you can use fastParse slows it down more than checking for quoteChar on ever field. 359 | chunkParse(chunk, controller) 360 | position += chunkSize 361 | } 362 | // flush 363 | const chunk = previousChunk() 364 | chunkParse(chunk, controller, true) 365 | 366 | return enableReturn && res 367 | } 368 | -------------------------------------------------------------------------------- /parse.test.js: -------------------------------------------------------------------------------- 1 | import test from 'node:test' 2 | import { equal, deepEqual } from 'node:assert' 3 | import sinon from 'sinon' 4 | import csvParse, { parse, coerceTo } from './parse.js' 5 | 6 | const allMethods = ['chunkParse'] // , 'testParse'] 7 | const quoteMethods = ['chunkParse'] // , 'testParse'] 8 | 9 | test('Should parse csv string using mjs', async (t) => { 10 | const options = { 11 | enqueue: sinon.spy(), 12 | header: false 13 | } 14 | const input = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n' 15 | const res = csvParse(input, options) 16 | deepEqual(res, [ 17 | ['a', 'b', 'c'], 18 | ['1', '2', '3'], 19 | ['4', '5', '6'] 20 | ]) 21 | equal(options.enqueue.callCount, 3) 22 | }) 23 | 24 | // *** Default Export *** // 25 | test('Should parse csv string', async (t) => { 26 | const options = { 27 | enqueue: sinon.spy(), 28 | chunkSize: 12 29 | } 30 | const input = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n' 31 | const res = csvParse(input, options) 32 | deepEqual(res, [ 33 | { a: '1', b: '2', c: '3' }, 34 | { a: '4', b: '5', c: '6' } 35 | ]) 36 | equal(options.enqueue.callCount, 2) 37 | }) 38 | 39 | test('Should parse csv string with empty first column', async (t) => { 40 | const options = { 41 | enqueue: sinon.spy(), 42 | delimiterChar: ',' 43 | } 44 | const input = 'a,b,c,d\r\n1,2,3,4\r\n,5,6,7\r\n,10,11,12\r\n' 45 | const res = csvParse(input, options) 46 | deepEqual(res, [ 47 | { a: '1', b: '2', c: '3', d: '4' }, 48 | { a: '', b: '5', c: '6', d: '7' }, 49 | { a: '', b: '10', c: '11', d: '12' } 50 | ]) 51 | equal(options.enqueue.callCount, 3) 52 | }) 53 | 54 | test('Should parse csv string with empty last column', async (t) => { 55 | const options = { 56 | enqueue: sinon.spy(), 57 | delimiterChar: ',' 58 | } 59 | const input = 'a,b,c,d\r\n1,2,3,4\r\n4,5,6,\r\n9,10,11,\r\n' 60 | const res = csvParse(input, options) 61 | deepEqual(res, [ 62 | { a: '1', b: '2', c: '3', d: '4' }, 63 | { a: '4', b: '5', c: '6', d: '' }, 64 | { a: '9', b: '10', c: '11', d: '' } 65 | ]) 66 | equal(options.enqueue.callCount, 3) 67 | }) 68 | 69 | test('Should parse csv string with empty first and last columns', async (t) => { 70 | const options = { 71 | enqueue: sinon.spy(), 72 | delimiterChar: ',' 73 | } 74 | const input = 'a,b,c,d\r\n1,2,3,4\r\n,5,6,\r\n,10,11,\r\n' 75 | const res = csvParse(input, options) 76 | deepEqual(res, [ 77 | { a: '1', b: '2', c: '3', d: '4' }, 78 | { a: '', b: '5', c: '6', d: '' }, 79 | { a: '', b: '10', c: '11', d: '' } 80 | ]) 81 | equal(options.enqueue.callCount, 3) 82 | }) 83 | 84 | test('Should parse csv string with empty last followed by empty first column', async (t) => { 85 | const options = { 86 | enqueue: sinon.spy(), 87 | delimiterChar: ',' 88 | } 89 | const input = 'a,b,c,d\r\n1,2,3,4\r\n4,5,6,\r\n,10,11,12\r\n' 90 | const res = csvParse(input, options) 91 | deepEqual(res, [ 92 | { a: '1', b: '2', c: '3', d: '4' }, 93 | { a: '4', b: '5', c: '6', d: '' }, 94 | { a: '', b: '10', c: '11', d: '12' } 95 | ]) 96 | equal(options.enqueue.callCount, 3) 97 | }) 98 | 99 | test('Should parse csv string w/ quotes', async (t) => { 100 | const options = { 101 | enqueue: sinon.spy(), 102 | chunkSize: 14 103 | } 104 | const input = 'a,b,c\r\n1,"2",3\r\n4,"5",6\r\n' 105 | const res = csvParse(input, options) 106 | deepEqual(res, [ 107 | { a: '1', b: '2', c: '3' }, 108 | { a: '4', b: '5', c: '6' } 109 | ]) 110 | equal(options.enqueue.callCount, 2) 111 | }) 112 | 113 | // *** General *** // 114 | for (const method of allMethods) { 115 | test(`${method}: Should parse single row with { }`, async (t) => { 116 | const options = {} 117 | const enqueue = sinon.spy() 118 | const chunk = 'a,b,c\r\n1,2,3\r\n' 119 | const parser = parse(options) 120 | parser[method](chunk, { enqueue }) 121 | equal(enqueue.callCount, 1) 122 | deepEqual(enqueue.firstCall.args, [ 123 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 124 | ]) 125 | }) 126 | 127 | test(`${method}: Should parse multiple rows with { }`, async (t) => { 128 | const options = {} 129 | const enqueue = sinon.spy() 130 | 131 | const chunk = 'a,b,c\r\n1,2,3\r\n4,5,6\r\n7,8,9' 132 | const parser = parse(options) 133 | parser[method](chunk, { enqueue }) 134 | parser[method](parser.previousChunk(), { enqueue }, true) 135 | equal(enqueue.callCount, 3) 136 | deepEqual(enqueue.firstCall.args, [ 137 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 138 | ]) 139 | deepEqual(enqueue.secondCall.args, [ 140 | { data: { a: '4', b: '5', c: '6' }, idx: 3 } 141 | ]) 142 | deepEqual(enqueue.thirdCall.args, [ 143 | { data: { a: '7', b: '8', c: '9' }, idx: 4 } 144 | ]) 145 | }) 146 | } 147 | 148 | // *** Chunking *** // 149 | for (const method of allMethods) { 150 | test(`${method}: Should parse with chunking`, async (t) => { 151 | const options = {} 152 | const enqueue = sinon.spy() 153 | let chunk = 'a,b,c\r\n1,2,' 154 | const parser = parse(options) 155 | parser[method](chunk, { enqueue }) 156 | equal(parser.previousChunk(), '1,2,') 157 | chunk = parser.previousChunk() + '3\r\n4' 158 | parser[method](chunk, { enqueue }) 159 | equal(parser.previousChunk(), '4') 160 | chunk = parser.previousChunk() + ',5,6' 161 | parser[method](chunk, { enqueue }, true) 162 | equal(enqueue.callCount, 2) 163 | deepEqual(enqueue.firstCall.args, [ 164 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 165 | ]) 166 | deepEqual(enqueue.secondCall.args, [ 167 | { data: { a: '4', b: '5', c: '6' }, idx: 3 } 168 | ]) 169 | }) 170 | } 171 | 172 | for (const method of quoteMethods) { 173 | test(`${method}: Should parse with chunking and quotes`, async (t) => { 174 | const options = {} 175 | const enqueue = sinon.spy() 176 | let chunk = 'a,b,c\r\n"1","2","' 177 | const parser = parse(options) 178 | parser[method](chunk, { enqueue }) 179 | equal(parser.previousChunk(), '"1","2","') 180 | chunk = parser.previousChunk() + '3"\r\n' 181 | parser[method](chunk, { enqueue }) 182 | equal(parser.previousChunk(), '') 183 | chunk = parser.previousChunk() + '"4","5","6"' 184 | parser[method](chunk, { enqueue }, true) 185 | equal(enqueue.callCount, 2) 186 | deepEqual(enqueue.firstCall.args, [ 187 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 188 | ]) 189 | deepEqual(enqueue.secondCall.args, [ 190 | { data: { a: '4', b: '5', c: '6' }, idx: 3 } 191 | ]) 192 | }) 193 | } 194 | 195 | // *** Option: header *** // 196 | for (const method of allMethods) { 197 | test(`${method}: Should parse with { header: [...] }`, async (t) => { 198 | const options = { header: ['a', 'b', 'c'] } 199 | const enqueue = sinon.spy() 200 | const chunk = '1,2,3\r\n' 201 | const parser = parse(options) 202 | parser[method](chunk, { enqueue }) 203 | equal(enqueue.callCount, 1) 204 | deepEqual(enqueue.firstCall.args, [ 205 | { data: { a: '1', b: '2', c: '3' }, idx: 1 } 206 | ]) 207 | }) 208 | 209 | test(`${method}: Should parse with { header: true }`, async (t) => { 210 | const options = { header: true } 211 | const enqueue = sinon.spy() 212 | const chunk = 'a,b,c\r\n1,2,3\r\n' 213 | const parser = parse(options) 214 | parser[method](chunk, { enqueue }) 215 | equal(enqueue.callCount, 1) 216 | deepEqual(enqueue.firstCall.args, [ 217 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 218 | ]) 219 | }) 220 | 221 | test(`${method}: Should parse with { header: false }`, async (t) => { 222 | const options = { header: false } 223 | const enqueue = sinon.spy() 224 | const chunk = '1,2,3\r\n' 225 | const parser = parse(options) 226 | parser[method](chunk, { enqueue }) 227 | equal(enqueue.callCount, 1) 228 | deepEqual(enqueue.firstCall.args, [{ data: ['1', '2', '3'], idx: 1 }]) 229 | }) 230 | } 231 | 232 | // *** Option: newline *** // 233 | for (const method of allMethods) { 234 | test(`${method}: Should parse with { newlineChar: "" } (auto detect)`, async (t) => { 235 | const options = { newlineChar: '' } 236 | const enqueue = sinon.spy() 237 | const chunk = 'a,b,c\r1,2,3\r' 238 | const parser = parse(options) 239 | parser[method](chunk, { enqueue }) 240 | deepEqual(enqueue.firstCall.args, [ 241 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 242 | ]) 243 | }) 244 | 245 | test(`${method}: Should parse with { newlineChar: "\\r\\n" }`, async (t) => { 246 | const options = { newlineChar: '\r\n' } 247 | const enqueue = sinon.spy() 248 | const chunk = 'a,b,c\r\n1,2,3\r\n' 249 | const parser = parse(options) 250 | parser[method](chunk, { enqueue }) 251 | deepEqual(enqueue.firstCall.args, [ 252 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 253 | ]) 254 | }) 255 | test(`${method}: Should parse with { newlineChar: "\\n" }`, async (t) => { 256 | const options = { newlineChar: '\n' } 257 | const enqueue = sinon.spy() 258 | const chunk = 'a,b,c\n1,2,3\n' 259 | const parser = parse(options) 260 | parser[method](chunk, { enqueue }) 261 | deepEqual(enqueue.firstCall.args, [ 262 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 263 | ]) 264 | }) 265 | test(`${method}: Should parse first chunk is shorter than the headers with { newlineChar: "" }`, async (t) => { 266 | const options = { newlineChar: '' } 267 | const enqueue = sinon.spy() 268 | const chunk0 = 'a,b,' 269 | const chunk1 = 'c\n1,2,3\n1,2,3' 270 | const parser = parse(options) 271 | console.log('parser', parser.previousChunk() + chunk0) 272 | parser[method](chunk0, { enqueue }) 273 | console.log('parser', parser.previousChunk() + chunk1) 274 | parser[method](parser.previousChunk() + chunk1, { enqueue }, true) 275 | console.log(enqueue.firstCall) 276 | deepEqual(enqueue.firstCall.args, [ 277 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 278 | ]) 279 | deepEqual(enqueue.secondCall.args, [ 280 | { data: { a: '1', b: '2', c: '3' }, idx: 3 } 281 | ]) 282 | }) 283 | test(`${method}: Should parse when no newline at end of file`, async (t) => { 284 | const options = { newlineChar: '' } 285 | const enqueue = sinon.spy() 286 | const chunk = 'a,b,c\n1,2,3\n1,2,3' 287 | const parser = parse(options) 288 | parser[method](chunk, { enqueue }) 289 | parser[method](parser.previousChunk(), { enqueue }, true) 290 | deepEqual(enqueue.firstCall.args, [ 291 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 292 | ]) 293 | deepEqual(enqueue.secondCall.args, [ 294 | { data: { a: '1', b: '2', c: '3' }, idx: 3 } 295 | ]) 296 | }) 297 | test(`${method}: Should parse when no field and newline at end of file`, async (t) => { 298 | const options = { newlineChar: '' } 299 | const enqueue = sinon.spy() 300 | const chunk = 'a,b,c\n1,2,3\n1,2,' 301 | const parser = parse(options) 302 | parser[method](chunk, { enqueue }) 303 | parser[method](parser.previousChunk(), { enqueue }, true) 304 | deepEqual(enqueue.firstCall.args, [ 305 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 306 | ]) 307 | deepEqual(enqueue.secondCall.args, [ 308 | { data: { a: '1', b: '2', c: '' }, idx: 3 } 309 | ]) 310 | }) 311 | } 312 | 313 | // *** Option: delimiter *** // 314 | for (const method of allMethods) { 315 | test(`${method}: Should parse with { delimiterValue: "" }`, async (t) => { 316 | const options = { delimiterChar: '' } 317 | const enqueue = sinon.spy() 318 | const chunk = 'a\x1Fb\x1Fc\r\n1\x1F2\x1F3\r\n' 319 | const parser = parse(options) 320 | parser[method](chunk, { enqueue }) 321 | equal(enqueue.callCount, 1) 322 | deepEqual(enqueue.firstCall.args, [ 323 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 324 | ]) 325 | }) 326 | test(`${method}: Should parse with { delimiterValue: "," }`, async (t) => { 327 | const options = { delimiterChar: ',' } 328 | const enqueue = sinon.spy() 329 | const chunk = 'a,b,c\r\n1,2,3\r\n' 330 | const parser = parse(options) 331 | parser[method](chunk, { enqueue }) 332 | equal(enqueue.callCount, 1) 333 | deepEqual(enqueue.firstCall.args, [ 334 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 335 | ]) 336 | }) 337 | test(`${method}: Should parse with { delimiterValue: "|" }`, async (t) => { 338 | const options = { delimiterChar: '|' } 339 | const enqueue = sinon.spy() 340 | const chunk = 'a|b|c\r\n1|2|3\r\n' 341 | const parser = parse(options) 342 | parser[method](chunk, { enqueue }) 343 | equal(enqueue.callCount, 1) 344 | deepEqual(enqueue.firstCall.args, [ 345 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 346 | ]) 347 | }) 348 | 349 | test(`${method}: Should parse with { delimiterValue: "\\t" }`, async (t) => { 350 | const options = { delimiterChar: '\t' } 351 | const enqueue = sinon.spy() 352 | const chunk = 'a\tb\tc\r\n1\t2\t3\r\n' 353 | const parser = parse(options) 354 | parser[method](chunk, { enqueue }) 355 | equal(enqueue.callCount, 1) 356 | deepEqual(enqueue.firstCall.args, [ 357 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 358 | ]) 359 | }) 360 | } 361 | 362 | // *** Option: quoteChar *** // 363 | for (const method of quoteMethods) { 364 | test(`${method}: Should parse with { quoteChar: '"' }`, async (t) => { 365 | const options = { quoteChar: '"' } 366 | const enqueue = sinon.spy() 367 | const chunk = '"a","b","c"\r\n"1","2","3"\r\n' 368 | const parser = parse(options) 369 | parser[method](chunk, { enqueue }) 370 | deepEqual(enqueue.firstCall.args, [ 371 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 372 | ]) 373 | }) 374 | 375 | test(`${method}: Should parse with { quoteChar: '\`' }`, async (t) => { 376 | const options = { quoteChar: '`' } 377 | const enqueue = sinon.spy() 378 | const chunk = '`a`,`b`,`c`\r\n`1`,`2`,`3`\r\n' 379 | const parser = parse(options) 380 | parser[method](chunk, { enqueue }) 381 | deepEqual(enqueue.firstCall.args, [ 382 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 383 | ]) 384 | }) 385 | } 386 | 387 | // *** Option: escapeChar *** // 388 | for (const method of quoteMethods) { 389 | test(`${method}: Should parse with { quoteChar: '"', escapeChar: '"' }`, async (t) => { 390 | const options = { quoteChar: '"', escapeChar: '"' } 391 | const enqueue = sinon.spy() 392 | const chunk = '"a","b""","c"\r\n"1","2""","3"\r\n' 393 | const parser = parse(options) 394 | parser[method](chunk, { enqueue }) 395 | deepEqual(enqueue.firstCall.args, [ 396 | { data: { a: '1', 'b"': '2"', c: '3' }, idx: 2 } 397 | ]) 398 | }) 399 | 400 | test(`${method}: Should parse with { quoteChar: '"', escapeChar: '\\' }`, async (t) => { 401 | const options = { quoteChar: '"', escapeChar: '\\' } 402 | const enqueue = sinon.spy() 403 | const chunk = '"a","b\\"","c"\r\n"1","2\\"","3"\r\n' 404 | const parser = parse(options) 405 | parser[method](chunk, { enqueue }) 406 | deepEqual(enqueue.firstCall.args, [ 407 | { data: { a: '1', 'b"': '2"', c: '3' }, idx: 2 } 408 | ]) 409 | }) 410 | 411 | test(`${method}: Should parse with { quoteChar: '"' } and field containing newline`, async (t) => { 412 | const options = { quoteChar: '"' } 413 | const enqueue = sinon.spy() 414 | const chunk = '"a","b\r\nb","c"\r\n"1","2\r\n2","3"' 415 | const parser = parse(options) 416 | parser[method](chunk, { enqueue }) 417 | parser[method](parser.previousChunk(), { enqueue }, true) 418 | deepEqual(enqueue.firstCall.args, [ 419 | { data: { a: '1', 'b\r\nb': '2\r\n2', c: '3' }, idx: 2 } 420 | ]) 421 | }) 422 | 423 | test(`${method}: Should parse with { quoteChar: '"' } and field containing delimiter`, async (t) => { 424 | const options = { delimiterValue: ',', quoteChar: '"' } 425 | const enqueue = sinon.spy() 426 | const chunk = '"a","b,b","c"\r\n"1","2,2","3"\r\n' 427 | const parser = parse(options) 428 | parser[method](chunk, { enqueue }) 429 | deepEqual(enqueue.firstCall.args, [ 430 | { data: { a: '1', 'b,b': '2,2', c: '3' }, idx: 2 } 431 | ]) 432 | }) 433 | } 434 | 435 | // *** coerceFields *** // 436 | for (const method of quoteMethods) { 437 | test(`${method}: Should parse with { coerceField: (field) => ... }`, async (t) => { 438 | const coerceField = (field, idx) => { 439 | return Object.values(coerceTo)[idx](field) 440 | } 441 | const options = { header: true, quoteChar: "'", coerceField } 442 | const enqueue = sinon.spy() 443 | const chunk = 444 | 'string,boolean,true,false,number,integer,decimal,json,timestamp,null\r\nstring,true,true,false,0,-1,-1.1,\'{"a":"b"}\',2022-07-30T04:46:24.466Z,null\r\n' 445 | const parser = parse(options) 446 | parser[method](chunk, { enqueue }) 447 | deepEqual(enqueue.firstCall.args, [ 448 | { 449 | data: { 450 | boolean: true, 451 | decimal: -1.1, 452 | false: false, 453 | number: 0, 454 | integer: -1, 455 | json: { 456 | a: 'b' 457 | }, 458 | null: null, 459 | string: 'string', 460 | timestamp: new Date('2022-07-30T04:46:24.466Z'), 461 | true: true 462 | }, 463 | idx: 2 464 | } 465 | ]) 466 | }) 467 | } 468 | 469 | test('Should coerceTo boolean', async (t) => { 470 | equal(coerceTo.true('true'), true) 471 | equal(coerceTo.true('TRUE'), true) 472 | 473 | equal(coerceTo.false('false'), false) 474 | equal(coerceTo.false('FALSE'), false) 475 | 476 | equal(coerceTo.boolean('true'), true) 477 | equal(coerceTo.boolean('TRUE'), true) 478 | equal(coerceTo.boolean('false'), false) 479 | equal(coerceTo.boolean('FALSE'), false) 480 | 481 | equal(coerceTo.any('true'), true) 482 | equal(coerceTo.any('TRUE'), true) 483 | equal(coerceTo.any('false'), false) 484 | equal(coerceTo.any('FALSE'), false) 485 | }) 486 | 487 | test('Should not coerceTo boolean', async (t) => { 488 | equal(coerceTo.null('1'), '1') 489 | equal(coerceTo.null('0'), '0') 490 | }) 491 | 492 | test('Should coerceTo number', async (t) => { 493 | equal(coerceTo.integer('1.1'), 1) 494 | equal(coerceTo.integer('1'), 1) 495 | equal(coerceTo.integer('0'), 0) 496 | equal(coerceTo.integer('-1'), -1) 497 | equal(coerceTo.integer('-1'), -1) 498 | 499 | equal(coerceTo.decimal('1.1'), 1.1) 500 | equal(coerceTo.decimal('1'), 1) 501 | equal(coerceTo.decimal('0'), 0) 502 | equal(coerceTo.decimal('-1'), -1) 503 | equal(coerceTo.decimal('-1.1'), -1.1) 504 | 505 | equal(coerceTo.number('1.1'), 1.1) 506 | equal(coerceTo.number('1'), 1) 507 | equal(coerceTo.number('0'), 0) 508 | equal(coerceTo.number('-1'), -1) 509 | equal(coerceTo.number('-1.1'), -1.1) 510 | 511 | equal(coerceTo.any('1.1'), 1.1) 512 | equal(coerceTo.any('1'), 1) 513 | equal(coerceTo.any('0'), 0) 514 | equal(coerceTo.any('-1'), -1) 515 | equal(coerceTo.any('-1.1'), -1.1) 516 | }) 517 | 518 | test('Should not coerceTo number', async (t) => { 519 | equal(coerceTo.null('a'), 'a') 520 | }) 521 | 522 | test('Should coerceTo null', async (t) => { 523 | equal(coerceTo.null('null'), null) 524 | equal(coerceTo.null('NULL'), null) 525 | 526 | equal(coerceTo.any('null'), null) 527 | equal(coerceTo.any('NULL'), null) 528 | }) 529 | 530 | test('Should not coerceTo null', async (t) => { 531 | equal(coerceTo.null('Nil'), 'Nil') 532 | }) 533 | 534 | test('Should coerceTo timestamp', async (t) => { 535 | deepEqual(coerceTo.timestamp('2000-01-01'), new Date('2000-01-01')) 536 | deepEqual( 537 | coerceTo.timestamp('2000-01-01T00:00:00Z'), 538 | new Date('2000-01-01T00:00:00Z') 539 | ) 540 | 541 | // `any` doesn't support `date` due conflict with `number` 542 | // deepEqual(coerceTo.any('2000-01-01'), new Date('2000-01-01')) 543 | // deepEqual(coerceTo.any('2000-01-01T00:00:00Z'), new Date('2000-01-01T00:00:00Z')) 544 | }) 545 | 546 | test('Should not coerceTo timestamp', async (t) => { 547 | equal(coerceTo.timestamp('not a timestamp'), 'not a timestamp') 548 | }) 549 | 550 | test('Should coerceTo json', async (t) => { 551 | deepEqual(coerceTo.json('["a"]'), ['a']) 552 | deepEqual(coerceTo.json('{"a":1}'), { a: 1 }) 553 | 554 | deepEqual(coerceTo.any('["a"]'), ['a']) 555 | deepEqual(coerceTo.any('{"a":1}'), { a: 1 }) 556 | }) 557 | 558 | test('Should not coerceTo json', async (t) => { 559 | equal(coerceTo.json('not json'), 'not json') 560 | }) 561 | 562 | // *** empty fields *** // 563 | for (const method of allMethods) { 564 | test(`${method}: Should parse with { emptyFieldValue: "" }`, async (t) => { 565 | const options = { emptyFieldValue: '' } 566 | const enqueue = sinon.spy() 567 | const chunk = 'a,b,c\r\n,,\r\n' 568 | const parser = parse(options) 569 | parser[method](chunk, { enqueue }) 570 | equal(enqueue.callCount, 1) 571 | deepEqual(enqueue.firstCall.args, [ 572 | { data: { a: '', b: '', c: '' }, idx: 2 } 573 | ]) 574 | }) 575 | test(`${method}: Should parse with { emptyFieldValue: null }`, async (t) => { 576 | const options = { emptyFieldValue: null } 577 | const enqueue = sinon.spy() 578 | const chunk = 'a,b,c\r\n,,\r\n' 579 | const parser = parse(options) 580 | parser[method](chunk, { enqueue }) 581 | equal(enqueue.callCount, 1) 582 | deepEqual(enqueue.firstCall.args, [ 583 | { data: { a: null, b: null, c: null }, idx: 2 } 584 | ]) 585 | }) 586 | test(`${method}: Should parse with { emptyFieldValue: undefined }`, async (t) => { 587 | const options = { emptyFieldValue: undefined } 588 | const enqueue = sinon.spy() 589 | const chunk = 'a,b,c\r\n,,\r\n' 590 | const parser = parse(options) 591 | parser[method](chunk, { enqueue }) 592 | equal(enqueue.callCount, 1) 593 | deepEqual(enqueue.firstCall.args, [ 594 | { data: { a: undefined, b: undefined, c: undefined }, idx: 2 } 595 | ]) 596 | }) 597 | test(`${method}: Should parse with { emptyFieldValue: "" } and first field`, async (t) => { 598 | const options = { emptyFieldValue: '' } 599 | const enqueue = sinon.spy() 600 | const chunk = 'a,b,c\r\n1,,\r\n' 601 | const parser = parse(options) 602 | parser[method](chunk, { enqueue }) 603 | equal(enqueue.callCount, 1) 604 | deepEqual(enqueue.firstCall.args, [ 605 | { data: { a: '1', b: '', c: '' }, idx: 2 } 606 | ]) 607 | }) 608 | test(`${method}: Should parse with { emptyFieldValue: "" } and middle field`, async (t) => { 609 | const options = { emptyFieldValue: '' } 610 | const enqueue = sinon.spy() 611 | const chunk = 'a,b,c\r\n,2,\r\n' 612 | const parser = parse(options) 613 | parser[method](chunk, { enqueue }) 614 | equal(enqueue.callCount, 1) 615 | deepEqual(enqueue.firstCall.args, [ 616 | { data: { a: '', b: '2', c: '' }, idx: 2 } 617 | ]) 618 | }) 619 | test(`${method}: Should parse with { emptyFieldValue: "" } and last field`, async (t) => { 620 | const options = { emptyFieldValue: '' } 621 | const enqueue = sinon.spy() 622 | const chunk = 'a,b,c\r\n,,3\r\n' 623 | const parser = parse(options) 624 | parser[method](chunk, { enqueue }) 625 | equal(enqueue.callCount, 1) 626 | deepEqual(enqueue.firstCall.args, [ 627 | { data: { a: '', b: '', c: '3' }, idx: 2 } 628 | ]) 629 | }) 630 | } 631 | 632 | // *** Option: errorOnEmptyLines *** // 633 | for (const method of allMethods) { 634 | test(`${method}: Should parse with { errorOnEmptyLine: false }`, async (t) => { 635 | const options = { errorOnEmptyLine: false } 636 | const enqueue = sinon.spy() 637 | const chunk = '\r\na,b,c\r\n\r\n1,2,3\r\n' 638 | const parser = parse(options) 639 | parser[method](chunk, { enqueue }) 640 | equal(enqueue.callCount, 1) 641 | deepEqual(enqueue.firstCall.args, [ 642 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 643 | ]) 644 | }) 645 | 646 | test(`${method}: Should parse with { errorOnEmptyLine: true }`, async (t) => { 647 | const options = { errorOnEmptyLine: true } 648 | const enqueue = sinon.spy() 649 | const chunk = '\r\na,b,c\r\n\r\n1,2,3\r\n' 650 | const parser = parse(options) 651 | parser[method](chunk, { enqueue }) 652 | equal(enqueue.callCount, 3) 653 | deepEqual(enqueue.firstCall.args, [ 654 | { 655 | err: { code: 'EmptyLineExists', message: 'Empty line detected.' }, 656 | idx: 1 657 | } 658 | ]) 659 | deepEqual(enqueue.secondCall.args, [ 660 | { 661 | err: { code: 'EmptyLineExists', message: 'Empty line detected.' }, 662 | idx: 3 663 | } 664 | ]) 665 | deepEqual(enqueue.thirdCall.args, [ 666 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 667 | ]) 668 | }) 669 | } 670 | 671 | // *** Option: commentPrefixValue && errorOnComment *** // 672 | for (const method of allMethods) { 673 | test(`${method}: Should parse with { commentPrefixValue: "//", errorOnComment: false }`, async (t) => { 674 | const options = { commentPrefixValue: '//', errorOnComment: false } 675 | const enqueue = sinon.spy() 676 | const chunk = '// header\r\na,b,c\r\n// data\r\n1,2,3\r\n' 677 | const parser = parse(options) 678 | parser[method](chunk, { enqueue }) 679 | equal(enqueue.callCount, 1) 680 | deepEqual(enqueue.firstCall.args, [ 681 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 682 | ]) 683 | }) 684 | 685 | test(`${method}: Should parse with { commentPrefixValue: "//", errorOnComment: true }`, async (t) => { 686 | const options = { commentPrefixValue: '//', errorOnComment: true } 687 | const enqueue = sinon.spy() 688 | const chunk = '// header\r\na,b,c\r\n// data\r\n1,2,3\r\n' 689 | const parser = parse(options) 690 | parser[method](chunk, { enqueue }) 691 | equal(enqueue.callCount, 3) 692 | deepEqual(enqueue.firstCall.args, [ 693 | { err: { code: 'CommentExists', message: 'Comment detected.' }, idx: 1 } 694 | ]) 695 | deepEqual(enqueue.secondCall.args, [ 696 | { err: { code: 'CommentExists', message: 'Comment detected.' }, idx: 3 } 697 | ]) 698 | deepEqual(enqueue.thirdCall.args, [ 699 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 700 | ]) 701 | }) 702 | } 703 | 704 | // *** Option: errorOnMissingFields *** // 705 | for (const method of allMethods) { 706 | test(`${method}: Should parse with { errorOnMissingFields: false }`, async (t) => { 707 | const options = { errorOnMissingFields: false } 708 | const enqueue = sinon.spy() 709 | const chunk = 'a,b,c\r\n1,2\r\n1,2,3\r\n1,2,3\r\n' 710 | const parser = parse(options) 711 | parser[method](chunk, { enqueue }) 712 | deepEqual(enqueue.firstCall.args, [{ data: { a: '1', b: '2' }, idx: 2 }]) 713 | deepEqual(enqueue.secondCall.args, [ 714 | { data: { a: '1', b: '2', c: '3' }, idx: 3 } 715 | ]) 716 | deepEqual(enqueue.thirdCall.args, [ 717 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 718 | ]) 719 | equal(enqueue.callCount, 3) 720 | }) 721 | 722 | test(`${method}: Should parse with { errorOnMissingFields: true }`, async (t) => { 723 | const options = { errorOnMissingFields: true } 724 | const enqueue = sinon.spy() 725 | const chunk = 'a,b,c\r\n1,2\r\n1,2,3\r\n1,2,3\r\n' 726 | const parser = parse(options) 727 | parser[method](chunk, { enqueue }) 728 | deepEqual(enqueue.firstCall.args, [ 729 | { 730 | err: { 731 | code: 'MissingFields', 732 | message: 'Too few fields were parsed, expected 3.' 733 | }, 734 | idx: 2 735 | } 736 | ]) 737 | deepEqual(enqueue.secondCall.args, [ 738 | { data: { a: '1', b: '2', c: '3' }, idx: 3 } 739 | ]) 740 | deepEqual(enqueue.thirdCall.args, [ 741 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 742 | ]) 743 | equal(enqueue.callCount, 3) 744 | }) 745 | } 746 | 747 | // *** Option: errorOnExtraFields *** // 748 | for (const method of allMethods) { 749 | test(`${method}: Should parse with { errorOnExtraFields: false }`, async (t) => { 750 | const options = { errorOnExtraFields: false } 751 | const enqueue = sinon.spy() 752 | const chunk = 'a,b,c\r\n1,2,3,4\r\n1,2,3\r\n1,2,3\r\n' 753 | const parser = parse(options) 754 | parser[method](chunk, { enqueue }) 755 | deepEqual(enqueue.firstCall.args, [ 756 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 757 | ]) 758 | deepEqual(enqueue.secondCall.args, [ 759 | { data: { a: '1', b: '2', c: '3' }, idx: 3 } 760 | ]) 761 | deepEqual(enqueue.thirdCall.args, [ 762 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 763 | ]) 764 | equal(enqueue.callCount, 3) 765 | }) 766 | 767 | test(`${method}: Should parse with { errorOnExtraFields: true }`, async (t) => { 768 | const options = { errorOnExtraFields: true } 769 | const enqueue = sinon.spy() 770 | const chunk = 'a,b,c\r\n1,2,3,4\r\n1,2,3\r\n1,2,3\r\n' 771 | const parser = parse(options) 772 | parser[method](chunk, { enqueue }) 773 | deepEqual(enqueue.firstCall.args, [ 774 | { 775 | err: { 776 | code: 'ExtraFields', 777 | message: 'Too many fields were parsed, expected 3.' 778 | }, 779 | idx: 2 780 | } 781 | ]) 782 | deepEqual(enqueue.secondCall.args, [ 783 | { data: { a: '1', b: '2', c: '3' }, idx: 3 } 784 | ]) 785 | deepEqual(enqueue.thirdCall.args, [ 786 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 787 | ]) 788 | equal(enqueue.callCount, 3) 789 | }) 790 | } 791 | 792 | // *** Option: errorOnFieldsMismatch *** // 793 | /* for (const method of allMethods) { 794 | test(`${method}: Should parse with { errorOnFieldsMismatch: false }`, async (t) => { 795 | const options = { errorOnFieldsMismatch: false } 796 | const enqueue = sinon.spy() 797 | const chunk = 'a,b,c\r\n1,2\r\n1,2,3,4\r\n1,2,3\r\n' 798 | const parser = parse(options) 799 | parser[method](chunk, { enqueue }) 800 | deepEqual(enqueue.firstCall.args, [ 801 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 802 | ]) 803 | equal(enqueue.callCount, 1) 804 | }) 805 | 806 | test(`${method}: Should parse with { errorOnFieldsMismatch: true }`, async (t) => { 807 | const options = { errorOnFieldsMismatch: true } 808 | const enqueue = sinon.spy() 809 | const chunk = 'a,b,c\r\n1,2\r\n1,2,3,4\r\n1,2,3\r\n' 810 | const parser = parse(options) 811 | parser[method](chunk, { enqueue }) 812 | deepEqual(enqueue.firstCall.args, [ 813 | { 814 | err: { 815 | code: 'FieldsMismatchTooFew', 816 | message: 'Too few fields were parsed, expected 3.' 817 | }, 818 | idx: 2 819 | } 820 | ]) 821 | deepEqual(enqueue.secondCall.args, [ 822 | { 823 | err: { 824 | code: 'FieldsMismatchTooMany', 825 | message: 'Too many fields were parsed, expected 3.' 826 | }, 827 | idx: 3 828 | } 829 | ]) 830 | deepEqual(enqueue.thirdCall.args, [ 831 | { data: { a: '1', b: '2', c: '3' }, idx: 4 } 832 | ]) 833 | equal(enqueue.callCount, 3) 834 | }) 835 | } */ 836 | 837 | // *** Option: errorOnFieldMalformed *** // 838 | for (const method of quoteMethods) { 839 | test(`${method}: Should parse with { errorOnFieldMalformed }`, async (t) => { 840 | const options = { errorOnFieldMalformed: true } 841 | const enqueue = sinon.spy() 842 | const chunk = 'a,b,c\r\n"1","2","3"\r\n"4' 843 | const parser = parse(options) 844 | try { 845 | parser[method](chunk, { enqueue }) 846 | parser[method](parser.previousChunk(), { enqueue }, true) 847 | } catch (e) { 848 | console.log('catch') 849 | equal(e.message, 'QuotedFieldMalformed') 850 | } 851 | deepEqual(enqueue.firstCall.args, [ 852 | { data: { a: '1', b: '2', c: '3' }, idx: 2 } 853 | ]) 854 | equal(enqueue.callCount, 1) 855 | }) 856 | } 857 | 858 | // *** extra spaces *** // 859 | /* test(`${method}: Should parse with space padding`, async (t) => { 860 | const options = { } 861 | const enqueue = sinon.spy() 862 | let chunk = 'a,b,c\r\n"1" ,"2" ,"3" \r\n' 863 | const parser = parse(options) 864 | parser[method](chunk, { enqueue }) 865 | equal(enqueue.callCount, 1) 866 | deepEqual(enqueue.firstCall.args, [{data:{ a: '1', b: '2', c: '3' },idx:2}]) 867 | }) */ 868 | --------------------------------------------------------------------------------