├── .gitignore ├── LICENSE ├── README.md ├── csv2json.js ├── package.json ├── test-browser.html └── test-node.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (https://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # TypeScript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | # next.js build output 61 | .next 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Martin Drapeau 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSVJSON csv2json() function 2 | 3 | Single function `csv2json` to reliably convert CSV to JSON. Self contained without dependencies. Used to power CSVJSON the online tool found at [www.csvjson.com/csv2json](https://www.csvjson.com/csv2json). Used by thousands everyday. 4 | 5 | [npm package here](https://www.npmjs.com/package/csvjson-csv2json) 6 | 7 | ## Usage 8 | 9 | Simply call `csv2json` passing a string to obtain JSON. The string may be CSV (comma separated values), TSV (tab separated values) or semi-colon separated values (typically used in French Excel). It will auto-detect the separator although you may override or force it via the `separator` option. 10 | 11 | ### Node example 12 | 13 | ```js 14 | const csv2json = require('./csv2json.js'); 15 | const csv = `album, year, US_peak_chart_post 16 | The White Stripes, 1999, - 17 | De Stijl, 2000, - 18 | White Blood Cells, 2001, 61 19 | Elephant, 2003, 6 20 | Get Behind Me Satan, 2005, 3 21 | Icky Thump, 2007, 2 22 | Under Great White Northern Lights, 2010, 11 23 | Live in Mississippi, 2011, - 24 | Live at the Gold Dollar, 2012, - 25 | Nine Miles from the White City, 2013, -`; 26 | 27 | const json = csv2json(csv, {parseNumbers: true}); 28 | console.log(json); 29 | ``` 30 | 31 | ### Browser example 32 | 33 | Note: In the browser, global namespace `CSVJSON` is created. It contains the `csv2json` function. 34 | 35 | ```html 36 | 37 | 53 | ``` 54 | 55 | In both cases, you would get this in the console: 56 | 57 | ```json 58 | [ 59 | { 60 | "album": "The White Stripes", 61 | "year": 1999, 62 | "US_peak_chart_post": "-" 63 | }, 64 | { 65 | "album": "De Stijl", 66 | "year": 2000, 67 | "US_peak_chart_post": "-" 68 | }, 69 | { 70 | "album": "White Blood Cells", 71 | "year": 2001, 72 | "US_peak_chart_post": 61 73 | }, 74 | { 75 | "album": "Elephant", 76 | "year": 2003, 77 | "US_peak_chart_post": 6 78 | }, 79 | { 80 | "album": "Get Behind Me Satan", 81 | "year": 2005, 82 | "US_peak_chart_post": 3 83 | }, 84 | { 85 | "album": "Icky Thump", 86 | "year": 2007, 87 | "US_peak_chart_post": 2 88 | }, 89 | { 90 | "album": "Under Great White Northern Lights", 91 | "year": 2010, 92 | "US_peak_chart_post": 11 93 | }, 94 | { 95 | "album": "Live in Mississippi", 96 | "year": 2011, 97 | "US_peak_chart_post": "-" 98 | }, 99 | { 100 | "album": "Live at the Gold Dollar", 101 | "year": 2012, 102 | "US_peak_chart_post": "-" 103 | }, 104 | { 105 | "album": "Nine Miles from the White City", 106 | "year": 2013, 107 | "US_peak_chart_post": "-" 108 | } 109 | ] 110 | ``` 111 | 112 | ## Documentation 113 | 114 | `csv2json` supports a number of options passed as an optional hash: 115 | 116 | - `separator`: Character which acts as separator. If omitted, will attempt to detect comma `,`, semi-colon `;` or tab `\t`. 117 | - `parseNumbers`: If set to `true` will attempt to convert a value to a number, if possible. 118 | - `parseJSON`: If set to `true` will attempt to convert a value to a valid JSON value if possible. Detects numbers, `null`, `false`, `true`, `[]` and `{}`. 119 | - `transpose`: If set to `true` will pivot the table. The first column becomes the header. 120 | - `hash`: If set to `true` will use the first column as a key and return a hash instead of an array of objects. 121 | 122 | You can of course test all of these options online on [www.csvjson.com/csv2json](https://www.csvjson.com/csv2json). 123 | 124 | ## Tests 125 | 126 | Run the tests in your browser by opening `test-browser.html`. 127 | 128 | Run the tests through node: 129 | 130 | ```sh 131 | node test-node.js 132 | ``` 133 | 134 | ## Companion functions 135 | 136 | [json2csv](https://github.com/martindrapeau/csvjson-csv2json) to convert JSON to CSV. [npm package here](https://www.npmjs.com/package/csvjson-json2csv). 137 | 138 | [json_beautifier](https://github.com/martindrapeau/csvjson-json_beautifier) to beautify and format your JSON. [npm package here](https://www.npmjs.com/package/csvjson-json_beautifier). 139 | 140 | [JSON2_mod](https://github.com/martindrapeau/json2-mod) a replacement of `JSON` with more options to format your JSON. [npm package here](https://www.npmjs.com/package/json2-mod). 141 | -------------------------------------------------------------------------------- /csv2json.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | /** 3 | * 4 | * Node: 5 | * const csv2json = require('./csv2json.js'); 6 | * csv2json(csv, options) 7 | * 8 | * Browser: 9 | * CSVJSON.csv2json(csv, options) 10 | * 11 | * Converts CSV to JSON. Returns an object. Use JSON.stringify to convert to a string. 12 | * 13 | * Available options: 14 | * - separator: Optional. Character which acts as separator. If omitted, 15 | * will attempt to detect comma (,), semi-colon (;) or tab (\t). 16 | * - parseNumbers: Optional. Will attempt to convert a value to a number, if possible. 17 | * - parseJSON: Optional. Will attempt to convert a value to a valid JSON value if possible. 18 | * Detects numbers, null, false, true, [] and {}. 19 | * - transpose: Optional. Will pivot the table. Default is false. 20 | * - hash: Optional. Will use the first column as a key and return a hash instead of 21 | * an array of objects. Default is false. 22 | * 23 | * Copyright (c) 2014-2019 Martin Drapeau 24 | * 25 | */ 26 | 27 | var errorDetectingSeparator = "We could not detect the separator.", 28 | errorNotWellFormed = "CSV is not well formed", 29 | errorEmpty = "Empty CSV. Please provide something.", 30 | errorEmptyHeader = "Could not detect header. Ensure first row contains your column headers.", 31 | separators = [",", ";", "\t"], 32 | pegjsSeparatorNames = { 33 | ",": "comma", 34 | ";": "semicolon", 35 | "\t": "tab" 36 | }; 37 | 38 | // Picks the separator we find the most. 39 | function detectSeparator(csv) { 40 | var counts = {}, 41 | sepMax; 42 | separators.forEach(function(sep, i) { 43 | var re = new RegExp(sep, 'g'); 44 | counts[sep] = (csv.match(re) || []).length; 45 | sepMax = !sepMax || counts[sep] > counts[sepMax] ? sep : sepMax; 46 | }); 47 | return sepMax; 48 | } 49 | 50 | // Source: https://stackoverflow.com/questions/4856717/javascript-equivalent-of-pythons-zip-function 51 | function zip() { 52 | var args = [].slice.call(arguments); 53 | var longest = args.reduce(function(a,b) { 54 | return a.length>b.length ? a : b; 55 | }, []); 56 | 57 | return longest.map(function(_,i) { 58 | return args.map(function(array) { 59 | return array[i]; 60 | }); 61 | }); 62 | } 63 | 64 | function uniquify(keys) { 65 | var counts = {}; 66 | for (var i = 0; i < keys.length; i++) { 67 | var key = keys[i]; 68 | if (counts[key] === undefined) { 69 | counts[key] = 0; 70 | } else { 71 | counts[key]++; 72 | } 73 | } 74 | 75 | var result = []; 76 | for (var i = keys.length-1; i >= 0; i--) { 77 | var key = keys[i]; 78 | if (counts[key] > 0) key = key + '__' + counts[key]--; 79 | result.unshift(key); 80 | } 81 | 82 | return result; 83 | } 84 | 85 | function convert(csv, options) { 86 | options || (options = {}); 87 | if (csv.length == 0) throw errorEmpty; 88 | 89 | var separator = options.separator || detectSeparator(csv); 90 | if (!separator) throw errorDetectingSeparator; 91 | 92 | var a = []; 93 | try { 94 | var a = csvParser.parse(csv, pegjsSeparatorNames[separator]); 95 | } catch(error) { 96 | var start = csv.lastIndexOf('\n', error.offset), 97 | end = csv.indexOf('\n', error.offset), 98 | line = csv.substring(start >= -1 ? start : 0, end > -1 ? end : csv.length); 99 | throw error.message + ' On line ' + error.line + ' and column ' + error.column + '.\n' + line; 100 | } 101 | 102 | if (options.transpose) a = zip.apply(this, a); 103 | 104 | var keys = a.shift(); 105 | if (keys.length == 0) throw errorEmptyHeader; 106 | keys = keys.map(function(key) { 107 | return key.trim().replace(/(^")|("$)/g, ''); 108 | }); 109 | 110 | keys = uniquify(keys); 111 | 112 | var json = options.hash ? {} : []; 113 | for (var l = 0; l < a.length; l++) { 114 | var row = {}, 115 | hashKey; 116 | for (var i = 0; i < keys.length; i++) { 117 | var value = (a[l][i]||'').trim().replace(/(^")|("$)/g, ''); 118 | var number = value === "" ? NaN : value - 0; 119 | if (options.hash && i == 0) { 120 | hashKey = value; 121 | } 122 | else { 123 | if (options.parseJSON || options.parseNumbers && !isNaN(number)) { 124 | try { 125 | row[keys[i]] = JSON.parse(value); 126 | } catch(error) { 127 | row[keys[i]] = value; 128 | } 129 | } 130 | else { 131 | row[keys[i]] = value; 132 | } 133 | } 134 | } 135 | if (options.hash) 136 | json[hashKey] = row; 137 | else 138 | json.push(row); 139 | } 140 | 141 | return json; 142 | }; 143 | 144 | var csvParser = (function(){ 145 | /* 146 | * Generated by PEG.js 0.7.0. 147 | * 148 | * http://pegjs.majda.cz/ 149 | * 150 | * source: https://gist.github.com/trevordixon/3362830 151 | * Martin 2018-04-2: Added parse_semicolon function. 152 | * 153 | */ 154 | 155 | function quote(s) { 156 | /* 157 | * ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a 158 | * string literal except for the closing quote character, backslash, 159 | * carriage return, line separator, paragraph separator, and line feed. 160 | * Any character may appear in the form of an escape sequence. 161 | * 162 | * For portability, we also escape escape all control and non-ASCII 163 | * characters. Note that "\0" and "\v" escape sequences are not used 164 | * because JSHint does not like the first and IE the second. 165 | */ 166 | return '"' + s 167 | .replace(/\\/g, '\\\\') // backslash 168 | .replace(/"/g, '\\"') // closing quote character 169 | .replace(/\x08/g, '\\b') // backspace 170 | .replace(/\t/g, '\\t') // horizontal tab 171 | .replace(/\n/g, '\\n') // line feed 172 | .replace(/\f/g, '\\f') // form feed 173 | .replace(/\r/g, '\\r') // carriage return 174 | .replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, escape) 175 | + '"'; 176 | } 177 | 178 | var result = { 179 | /* 180 | * Parses the input with a generated parser. If the parsing is successfull, 181 | * returns a value explicitly or implicitly specified by the grammar from 182 | * which the parser was generated (see |PEG.buildParser|). If the parsing is 183 | * unsuccessful, throws |PEG.parser.SyntaxError| describing the error. 184 | */ 185 | parse: function(input, startRule) { 186 | var parseFunctions = { 187 | "comma": parse_comma, 188 | "semicolon": parse_semicolon, 189 | "tab": parse_tab, 190 | "sv": parse_sv, 191 | "line": parse_line, 192 | "field": parse_field, 193 | "char": parse_char 194 | }; 195 | 196 | if (startRule !== undefined) { 197 | if (parseFunctions[startRule] === undefined) { 198 | throw new Error("Invalid rule name: " + quote(startRule) + "."); 199 | } 200 | } else { 201 | startRule = "comma"; 202 | } 203 | 204 | var pos = 0; 205 | var reportFailures = 0; 206 | var rightmostFailuresPos = 0; 207 | var rightmostFailuresExpected = []; 208 | 209 | function padLeft(input, padding, length) { 210 | var result = input; 211 | 212 | var padLength = length - input.length; 213 | for (var i = 0; i < padLength; i++) { 214 | result = padding + result; 215 | } 216 | 217 | return result; 218 | } 219 | 220 | function escape(ch) { 221 | var charCode = ch.charCodeAt(0); 222 | var escapeChar; 223 | var length; 224 | 225 | if (charCode <= 0xFF) { 226 | escapeChar = 'x'; 227 | length = 2; 228 | } else { 229 | escapeChar = 'u'; 230 | length = 4; 231 | } 232 | 233 | return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length); 234 | } 235 | 236 | function matchFailed(failure) { 237 | if (pos < rightmostFailuresPos) { 238 | return; 239 | } 240 | 241 | if (pos > rightmostFailuresPos) { 242 | rightmostFailuresPos = pos; 243 | rightmostFailuresExpected = []; 244 | } 245 | 246 | rightmostFailuresExpected.push(failure); 247 | } 248 | 249 | function parse_comma() { 250 | var result0, result1; 251 | var pos0, pos1; 252 | 253 | pos0 = pos; 254 | pos1 = pos; 255 | result0 = (function(offset) { return separator = ','; })(pos) ? "" : null; 256 | if (result0 !== null) { 257 | result1 = parse_sv(); 258 | if (result1 !== null) { 259 | result0 = [result0, result1]; 260 | } else { 261 | result0 = null; 262 | pos = pos1; 263 | } 264 | } else { 265 | result0 = null; 266 | pos = pos1; 267 | } 268 | if (result0 !== null) { 269 | result0 = (function(offset, sv) { return sv; })(pos0, result0[1]); 270 | } 271 | if (result0 === null) { 272 | pos = pos0; 273 | } 274 | return result0; 275 | } 276 | 277 | function parse_semicolon() { 278 | var result0, result1; 279 | var pos0, pos1; 280 | 281 | pos0 = pos; 282 | pos1 = pos; 283 | result0 = (function(offset) { return separator = ';'; })(pos) ? "" : null; 284 | if (result0 !== null) { 285 | result1 = parse_sv(); 286 | if (result1 !== null) { 287 | result0 = [result0, result1]; 288 | } else { 289 | result0 = null; 290 | pos = pos1; 291 | } 292 | } else { 293 | result0 = null; 294 | pos = pos1; 295 | } 296 | if (result0 !== null) { 297 | result0 = (function(offset, sv) { return sv; })(pos0, result0[1]); 298 | } 299 | if (result0 === null) { 300 | pos = pos0; 301 | } 302 | return result0; 303 | } 304 | 305 | function parse_tab() { 306 | var result0, result1; 307 | var pos0, pos1; 308 | 309 | pos0 = pos; 310 | pos1 = pos; 311 | result0 = (function(offset) { return separator = '\t'; })(pos) ? "" : null; 312 | if (result0 !== null) { 313 | result1 = parse_sv(); 314 | if (result1 !== null) { 315 | result0 = [result0, result1]; 316 | } else { 317 | result0 = null; 318 | pos = pos1; 319 | } 320 | } else { 321 | result0 = null; 322 | pos = pos1; 323 | } 324 | if (result0 !== null) { 325 | result0 = (function(offset, sv) { return sv; })(pos0, result0[1]); 326 | } 327 | if (result0 === null) { 328 | pos = pos0; 329 | } 330 | return result0; 331 | } 332 | 333 | function parse_sv() { 334 | var result0, result1, result2, result3, result4; 335 | var pos0, pos1, pos2, pos3; 336 | 337 | pos0 = pos; 338 | pos1 = pos; 339 | result0 = []; 340 | if (/^[\n\r]/.test(input.charAt(pos))) { 341 | result1 = input.charAt(pos); 342 | pos++; 343 | } else { 344 | result1 = null; 345 | if (reportFailures === 0) { 346 | matchFailed("[\\n\\r]"); 347 | } 348 | } 349 | while (result1 !== null) { 350 | result0.push(result1); 351 | if (/^[\n\r]/.test(input.charAt(pos))) { 352 | result1 = input.charAt(pos); 353 | pos++; 354 | } else { 355 | result1 = null; 356 | if (reportFailures === 0) { 357 | matchFailed("[\\n\\r]"); 358 | } 359 | } 360 | } 361 | if (result0 !== null) { 362 | result1 = parse_line(); 363 | if (result1 !== null) { 364 | result2 = []; 365 | pos2 = pos; 366 | pos3 = pos; 367 | if (/^[\n\r]/.test(input.charAt(pos))) { 368 | result4 = input.charAt(pos); 369 | pos++; 370 | } else { 371 | result4 = null; 372 | if (reportFailures === 0) { 373 | matchFailed("[\\n\\r]"); 374 | } 375 | } 376 | if (result4 !== null) { 377 | result3 = []; 378 | while (result4 !== null) { 379 | result3.push(result4); 380 | if (/^[\n\r]/.test(input.charAt(pos))) { 381 | result4 = input.charAt(pos); 382 | pos++; 383 | } else { 384 | result4 = null; 385 | if (reportFailures === 0) { 386 | matchFailed("[\\n\\r]"); 387 | } 388 | } 389 | } 390 | } else { 391 | result3 = null; 392 | } 393 | if (result3 !== null) { 394 | result4 = parse_line(); 395 | if (result4 !== null) { 396 | result3 = [result3, result4]; 397 | } else { 398 | result3 = null; 399 | pos = pos3; 400 | } 401 | } else { 402 | result3 = null; 403 | pos = pos3; 404 | } 405 | if (result3 !== null) { 406 | result3 = (function(offset, data) { return data; })(pos2, result3[1]); 407 | } 408 | if (result3 === null) { 409 | pos = pos2; 410 | } 411 | while (result3 !== null) { 412 | result2.push(result3); 413 | pos2 = pos; 414 | pos3 = pos; 415 | if (/^[\n\r]/.test(input.charAt(pos))) { 416 | result4 = input.charAt(pos); 417 | pos++; 418 | } else { 419 | result4 = null; 420 | if (reportFailures === 0) { 421 | matchFailed("[\\n\\r]"); 422 | } 423 | } 424 | if (result4 !== null) { 425 | result3 = []; 426 | while (result4 !== null) { 427 | result3.push(result4); 428 | if (/^[\n\r]/.test(input.charAt(pos))) { 429 | result4 = input.charAt(pos); 430 | pos++; 431 | } else { 432 | result4 = null; 433 | if (reportFailures === 0) { 434 | matchFailed("[\\n\\r]"); 435 | } 436 | } 437 | } 438 | } else { 439 | result3 = null; 440 | } 441 | if (result3 !== null) { 442 | result4 = parse_line(); 443 | if (result4 !== null) { 444 | result3 = [result3, result4]; 445 | } else { 446 | result3 = null; 447 | pos = pos3; 448 | } 449 | } else { 450 | result3 = null; 451 | pos = pos3; 452 | } 453 | if (result3 !== null) { 454 | result3 = (function(offset, data) { return data; })(pos2, result3[1]); 455 | } 456 | if (result3 === null) { 457 | pos = pos2; 458 | } 459 | } 460 | if (result2 !== null) { 461 | result3 = []; 462 | if (/^[\n\r]/.test(input.charAt(pos))) { 463 | result4 = input.charAt(pos); 464 | pos++; 465 | } else { 466 | result4 = null; 467 | if (reportFailures === 0) { 468 | matchFailed("[\\n\\r]"); 469 | } 470 | } 471 | while (result4 !== null) { 472 | result3.push(result4); 473 | if (/^[\n\r]/.test(input.charAt(pos))) { 474 | result4 = input.charAt(pos); 475 | pos++; 476 | } else { 477 | result4 = null; 478 | if (reportFailures === 0) { 479 | matchFailed("[\\n\\r]"); 480 | } 481 | } 482 | } 483 | if (result3 !== null) { 484 | result0 = [result0, result1, result2, result3]; 485 | } else { 486 | result0 = null; 487 | pos = pos1; 488 | } 489 | } else { 490 | result0 = null; 491 | pos = pos1; 492 | } 493 | } else { 494 | result0 = null; 495 | pos = pos1; 496 | } 497 | } else { 498 | result0 = null; 499 | pos = pos1; 500 | } 501 | if (result0 !== null) { 502 | result0 = (function(offset, first, rest) { rest.unshift(first); return rest; })(pos0, result0[1], result0[2]); 503 | } 504 | if (result0 === null) { 505 | pos = pos0; 506 | } 507 | return result0; 508 | } 509 | 510 | function parse_line() { 511 | var result0, result1, result2, result3, result4; 512 | var pos0, pos1, pos2, pos3; 513 | 514 | pos0 = pos; 515 | pos1 = pos; 516 | result0 = parse_field(); 517 | if (result0 !== null) { 518 | result1 = []; 519 | pos2 = pos; 520 | pos3 = pos; 521 | if (input.length > pos) { 522 | result2 = input.charAt(pos); 523 | pos++; 524 | } else { 525 | result2 = null; 526 | if (reportFailures === 0) { 527 | matchFailed("any character"); 528 | } 529 | } 530 | if (result2 !== null) { 531 | result3 = (function(offset, char) { return char == separator; })(pos, result2) ? "" : null; 532 | if (result3 !== null) { 533 | result4 = parse_field(); 534 | if (result4 !== null) { 535 | result2 = [result2, result3, result4]; 536 | } else { 537 | result2 = null; 538 | pos = pos3; 539 | } 540 | } else { 541 | result2 = null; 542 | pos = pos3; 543 | } 544 | } else { 545 | result2 = null; 546 | pos = pos3; 547 | } 548 | if (result2 !== null) { 549 | result2 = (function(offset, char, text) { return text; })(pos2, result2[0], result2[2]); 550 | } 551 | if (result2 === null) { 552 | pos = pos2; 553 | } 554 | while (result2 !== null) { 555 | result1.push(result2); 556 | pos2 = pos; 557 | pos3 = pos; 558 | if (input.length > pos) { 559 | result2 = input.charAt(pos); 560 | pos++; 561 | } else { 562 | result2 = null; 563 | if (reportFailures === 0) { 564 | matchFailed("any character"); 565 | } 566 | } 567 | if (result2 !== null) { 568 | result3 = (function(offset, char) { return char == separator; })(pos, result2) ? "" : null; 569 | if (result3 !== null) { 570 | result4 = parse_field(); 571 | if (result4 !== null) { 572 | result2 = [result2, result3, result4]; 573 | } else { 574 | result2 = null; 575 | pos = pos3; 576 | } 577 | } else { 578 | result2 = null; 579 | pos = pos3; 580 | } 581 | } else { 582 | result2 = null; 583 | pos = pos3; 584 | } 585 | if (result2 !== null) { 586 | result2 = (function(offset, char, text) { return text; })(pos2, result2[0], result2[2]); 587 | } 588 | if (result2 === null) { 589 | pos = pos2; 590 | } 591 | } 592 | if (result1 !== null) { 593 | result2 = (function(offset, first, rest) { return !!first || rest.length; })(pos, result0, result1) ? "" : null; 594 | if (result2 !== null) { 595 | result0 = [result0, result1, result2]; 596 | } else { 597 | result0 = null; 598 | pos = pos1; 599 | } 600 | } else { 601 | result0 = null; 602 | pos = pos1; 603 | } 604 | } else { 605 | result0 = null; 606 | pos = pos1; 607 | } 608 | if (result0 !== null) { 609 | result0 = (function(offset, first, rest) { rest.unshift(first); return rest; })(pos0, result0[0], result0[1]); 610 | } 611 | if (result0 === null) { 612 | pos = pos0; 613 | } 614 | return result0; 615 | } 616 | 617 | function parse_field() { 618 | var result0, result1, result2; 619 | var pos0, pos1, pos2; 620 | 621 | pos0 = pos; 622 | pos1 = pos; 623 | if (input.charCodeAt(pos) === 34) { 624 | result0 = "\""; 625 | pos++; 626 | } else { 627 | result0 = null; 628 | if (reportFailures === 0) { 629 | matchFailed("\"\\\"\""); 630 | } 631 | } 632 | if (result0 !== null) { 633 | result1 = []; 634 | result2 = parse_char(); 635 | while (result2 !== null) { 636 | result1.push(result2); 637 | result2 = parse_char(); 638 | } 639 | if (result1 !== null) { 640 | if (input.charCodeAt(pos) === 34) { 641 | result2 = "\""; 642 | pos++; 643 | } else { 644 | result2 = null; 645 | if (reportFailures === 0) { 646 | matchFailed("\"\\\"\""); 647 | } 648 | } 649 | if (result2 !== null) { 650 | result0 = [result0, result1, result2]; 651 | } else { 652 | result0 = null; 653 | pos = pos1; 654 | } 655 | } else { 656 | result0 = null; 657 | pos = pos1; 658 | } 659 | } else { 660 | result0 = null; 661 | pos = pos1; 662 | } 663 | if (result0 !== null) { 664 | result0 = (function(offset, text) { return text.join(''); })(pos0, result0[1]); 665 | } 666 | if (result0 === null) { 667 | pos = pos0; 668 | } 669 | if (result0 === null) { 670 | pos0 = pos; 671 | result0 = []; 672 | pos1 = pos; 673 | pos2 = pos; 674 | if (/^[^\n\r]/.test(input.charAt(pos))) { 675 | result1 = input.charAt(pos); 676 | pos++; 677 | } else { 678 | result1 = null; 679 | if (reportFailures === 0) { 680 | matchFailed("[^\\n\\r]"); 681 | } 682 | } 683 | if (result1 !== null) { 684 | result2 = (function(offset, char) { return char != separator; })(pos, result1) ? "" : null; 685 | if (result2 !== null) { 686 | result1 = [result1, result2]; 687 | } else { 688 | result1 = null; 689 | pos = pos2; 690 | } 691 | } else { 692 | result1 = null; 693 | pos = pos2; 694 | } 695 | if (result1 !== null) { 696 | result1 = (function(offset, char) { return char; })(pos1, result1[0]); 697 | } 698 | if (result1 === null) { 699 | pos = pos1; 700 | } 701 | while (result1 !== null) { 702 | result0.push(result1); 703 | pos1 = pos; 704 | pos2 = pos; 705 | if (/^[^\n\r]/.test(input.charAt(pos))) { 706 | result1 = input.charAt(pos); 707 | pos++; 708 | } else { 709 | result1 = null; 710 | if (reportFailures === 0) { 711 | matchFailed("[^\\n\\r]"); 712 | } 713 | } 714 | if (result1 !== null) { 715 | result2 = (function(offset, char) { return char != separator; })(pos, result1) ? "" : null; 716 | if (result2 !== null) { 717 | result1 = [result1, result2]; 718 | } else { 719 | result1 = null; 720 | pos = pos2; 721 | } 722 | } else { 723 | result1 = null; 724 | pos = pos2; 725 | } 726 | if (result1 !== null) { 727 | result1 = (function(offset, char) { return char; })(pos1, result1[0]); 728 | } 729 | if (result1 === null) { 730 | pos = pos1; 731 | } 732 | } 733 | if (result0 !== null) { 734 | result0 = (function(offset, text) { return text.join(''); })(pos0, result0); 735 | } 736 | if (result0 === null) { 737 | pos = pos0; 738 | } 739 | } 740 | return result0; 741 | } 742 | 743 | function parse_char() { 744 | var result0, result1; 745 | var pos0, pos1; 746 | 747 | pos0 = pos; 748 | pos1 = pos; 749 | if (input.charCodeAt(pos) === 34) { 750 | result0 = "\""; 751 | pos++; 752 | } else { 753 | result0 = null; 754 | if (reportFailures === 0) { 755 | matchFailed("\"\\\"\""); 756 | } 757 | } 758 | if (result0 !== null) { 759 | if (input.charCodeAt(pos) === 34) { 760 | result1 = "\""; 761 | pos++; 762 | } else { 763 | result1 = null; 764 | if (reportFailures === 0) { 765 | matchFailed("\"\\\"\""); 766 | } 767 | } 768 | if (result1 !== null) { 769 | result0 = [result0, result1]; 770 | } else { 771 | result0 = null; 772 | pos = pos1; 773 | } 774 | } else { 775 | result0 = null; 776 | pos = pos1; 777 | } 778 | if (result0 !== null) { 779 | result0 = (function(offset) { return '"'; })(pos0); 780 | } 781 | if (result0 === null) { 782 | pos = pos0; 783 | } 784 | if (result0 === null) { 785 | if (/^[^"]/.test(input.charAt(pos))) { 786 | result0 = input.charAt(pos); 787 | pos++; 788 | } else { 789 | result0 = null; 790 | if (reportFailures === 0) { 791 | matchFailed("[^\"]"); 792 | } 793 | } 794 | } 795 | return result0; 796 | } 797 | 798 | 799 | function cleanupExpected(expected) { 800 | expected.sort(); 801 | 802 | var lastExpected = null; 803 | var cleanExpected = []; 804 | for (var i = 0; i < expected.length; i++) { 805 | if (expected[i] !== lastExpected) { 806 | cleanExpected.push(expected[i]); 807 | lastExpected = expected[i]; 808 | } 809 | } 810 | return cleanExpected; 811 | } 812 | 813 | function computeErrorPosition() { 814 | /* 815 | * The first idea was to use |String.split| to break the input up to the 816 | * error position along newlines and derive the line and column from 817 | * there. However IE's |split| implementation is so broken that it was 818 | * enough to prevent it. 819 | */ 820 | 821 | var line = 1; 822 | var column = 1; 823 | var seenCR = false; 824 | 825 | for (var i = 0; i < Math.max(pos, rightmostFailuresPos); i++) { 826 | var ch = input.charAt(i); 827 | if (ch === "\n") { 828 | if (!seenCR) { line++; } 829 | column = 1; 830 | seenCR = false; 831 | } else if (ch === "\r" || ch === "\u2028" || ch === "\u2029") { 832 | line++; 833 | column = 1; 834 | seenCR = true; 835 | } else { 836 | column++; 837 | seenCR = false; 838 | } 839 | } 840 | 841 | return { line: line, column: column }; 842 | } 843 | 844 | 845 | var separator = ','; 846 | 847 | 848 | var result = parseFunctions[startRule](); 849 | 850 | /* 851 | * The parser is now in one of the following three states: 852 | * 853 | * 1. The parser successfully parsed the whole input. 854 | * 855 | * - |result !== null| 856 | * - |pos === input.length| 857 | * - |rightmostFailuresExpected| may or may not contain something 858 | * 859 | * 2. The parser successfully parsed only a part of the input. 860 | * 861 | * - |result !== null| 862 | * - |pos < input.length| 863 | * - |rightmostFailuresExpected| may or may not contain something 864 | * 865 | * 3. The parser did not successfully parse any part of the input. 866 | * 867 | * - |result === null| 868 | * - |pos === 0| 869 | * - |rightmostFailuresExpected| contains at least one failure 870 | * 871 | * All code following this comment (including called functions) must 872 | * handle these states. 873 | */ 874 | if (result === null || pos !== input.length) { 875 | var offset = Math.max(pos, rightmostFailuresPos); 876 | var found = offset < input.length ? input.charAt(offset) : null; 877 | var errorPosition = computeErrorPosition(); 878 | 879 | throw new this.SyntaxError( 880 | cleanupExpected(rightmostFailuresExpected), 881 | found, 882 | offset, 883 | errorPosition.line, 884 | errorPosition.column 885 | ); 886 | } 887 | 888 | return result; 889 | }, 890 | 891 | /* Returns the parser source code. */ 892 | toSource: function() { return this._source; } 893 | }; 894 | 895 | /* Thrown when a parser encounters a syntax error. */ 896 | 897 | result.SyntaxError = function(expected, found, offset, line, column) { 898 | function buildMessage(expected, found) { 899 | var expectedHumanized, foundHumanized; 900 | 901 | switch (expected.length) { 902 | case 0: 903 | expectedHumanized = "end of input"; 904 | break; 905 | case 1: 906 | expectedHumanized = expected[0]; 907 | break; 908 | default: 909 | expectedHumanized = expected.slice(0, expected.length - 1).join(", ") 910 | + " or " 911 | + expected[expected.length - 1]; 912 | } 913 | 914 | foundHumanized = found ? quote(found) : "end of input"; 915 | 916 | return "Expected " + expectedHumanized + " but " + foundHumanized + " found."; 917 | } 918 | 919 | this.name = "SyntaxError"; 920 | this.expected = expected; 921 | this.found = found; 922 | this.message = buildMessage(expected, found); 923 | this.offset = offset; 924 | this.line = line; 925 | this.column = column; 926 | }; 927 | 928 | result.SyntaxError.prototype = Error.prototype; 929 | 930 | return result; 931 | })(); 932 | 933 | 934 | // CommonJS or Browser 935 | if (typeof exports !== 'undefined') { 936 | if (typeof module !== 'undefined' && module.exports) { 937 | exports = module.exports = convert; 938 | } 939 | exports.csv2json = convert; 940 | } else { 941 | this.CSVJSON || (this.CSVJSON = {}); 942 | this.CSVJSON.csv2json = convert; 943 | } 944 | 945 | }).call(this); -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "csvjson-csv2json", 3 | "description": "Converts CSV to JSON. Powers the most used online tool CSVJSON https://www.csvjson.com/csv2json. Used by thousands everyday.", 4 | "version": "5.0.6", 5 | "author": "Martin Drapeau ", 6 | "contributors": [], 7 | "dependencies": { }, 8 | "devDependencies": { }, 9 | "main": "csv2json.js", 10 | "keywords": ["csv", "json", "parse", "convert"], 11 | "repository": { 12 | "type": "git", 13 | "url": "git://github.com/martindrapeau/csvjson-csv2json.git" 14 | }, 15 | "license": "MIT" 16 | } -------------------------------------------------------------------------------- /test-browser.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | CSVJSON's csv2json Test 5 | 6 | 7 |

CSVJSON's csv2json Test

8 | 9 |

Test

10 |
 11 | const csv = `album, year, US_peak_chart_post
 12 | The White Stripes, 1999, -
 13 | De Stijl, 2000, -
 14 | White Blood Cells, 2001, 61
 15 | Elephant, 2003, 6
 16 | Get Behind Me Satan, 2005, 3
 17 | Icky Thump, 2007, 2
 18 | Under Great White Northern Lights, 2010, 11
 19 | Live in Mississippi, 2011, -
 20 | Live at the Gold Dollar, 2012, -
 21 | Nine Miles from the White City, 2013, -`;
 22 | 
 23 | const json = CSVJSON.csv2json(csv, {parseNumbers: true});
 24 | 
25 |

Expected

26 |
[
 27 |   {
 28 |     "album": "The White Stripes",
 29 |     "year": 1999,
 30 |     "US_peak_chart_post": "-"
 31 |   },
 32 |   {
 33 |     "album": "De Stijl",
 34 |     "year": 2000,
 35 |     "US_peak_chart_post": "-"
 36 |   },
 37 |   {
 38 |     "album": "White Blood Cells",
 39 |     "year": 2001,
 40 |     "US_peak_chart_post": 61
 41 |   },
 42 |   {
 43 |     "album": "Elephant",
 44 |     "year": 2003,
 45 |     "US_peak_chart_post": 6
 46 |   },
 47 |   {
 48 |     "album": "Get Behind Me Satan",
 49 |     "year": 2005,
 50 |     "US_peak_chart_post": 3
 51 |   },
 52 |   {
 53 |     "album": "Icky Thump",
 54 |     "year": 2007,
 55 |     "US_peak_chart_post": 2
 56 |   },
 57 |   {
 58 |     "album": "Under Great White Northern Lights",
 59 |     "year": 2010,
 60 |     "US_peak_chart_post": 11
 61 |   },
 62 |   {
 63 |     "album": "Live in Mississippi",
 64 |     "year": 2011,
 65 |     "US_peak_chart_post": "-"
 66 |   },
 67 |   {
 68 |     "album": "Live at the Gold Dollar",
 69 |     "year": 2012,
 70 |     "US_peak_chart_post": "-"
 71 |   },
 72 |   {
 73 |     "album": "Nine Miles from the White City",
 74 |     "year": 2013,
 75 |     "US_peak_chart_post": "-"
 76 |   }
 77 | ]
78 | 79 |

Execution

80 |

 81 | 
 82 | 		

Test Result

83 |

84 | 85 | 86 | 112 | 113 | -------------------------------------------------------------------------------- /test-node.js: -------------------------------------------------------------------------------- 1 | const csv2json = require('./csv2json.js'); 2 | const csv = `album, year, US_peak_chart_post 3 | The White Stripes, 1999, - 4 | De Stijl, 2000, - 5 | White Blood Cells, 2001, 61 6 | Elephant, 2003, 6 7 | Get Behind Me Satan, 2005, 3 8 | Icky Thump, 2007, 2 9 | Under Great White Northern Lights, 2010, 11 10 | Live in Mississippi, 2011, - 11 | Live at the Gold Dollar, 2012, - 12 | Nine Miles from the White City, 2013, -`; 13 | const json = csv2json(csv, {parseNumbers: true}); 14 | const expected = [ 15 | { 16 | "album": "The White Stripes", 17 | "year": 1999, 18 | "US_peak_chart_post": "-" 19 | }, 20 | { 21 | "album": "De Stijl", 22 | "year": 2000, 23 | "US_peak_chart_post": "-" 24 | }, 25 | { 26 | "album": "White Blood Cells", 27 | "year": 2001, 28 | "US_peak_chart_post": 61 29 | }, 30 | { 31 | "album": "Elephant", 32 | "year": 2003, 33 | "US_peak_chart_post": 6 34 | }, 35 | { 36 | "album": "Get Behind Me Satan", 37 | "year": 2005, 38 | "US_peak_chart_post": 3 39 | }, 40 | { 41 | "album": "Icky Thump", 42 | "year": 2007, 43 | "US_peak_chart_post": 2 44 | }, 45 | { 46 | "album": "Under Great White Northern Lights", 47 | "year": 2010, 48 | "US_peak_chart_post": 11 49 | }, 50 | { 51 | "album": "Live in Mississippi", 52 | "year": 2011, 53 | "US_peak_chart_post": "-" 54 | }, 55 | { 56 | "album": "Live at the Gold Dollar", 57 | "year": 2012, 58 | "US_peak_chart_post": "-" 59 | }, 60 | { 61 | "album": "Nine Miles from the White City", 62 | "year": 2013, 63 | "US_peak_chart_post": "-" 64 | } 65 | ]; 66 | console.log('==Expected=='); 67 | console.log(JSON.stringify(expected, null, 2)); 68 | console.log('==Execution=='); 69 | console.log(JSON.stringify(json, null, 2)); 70 | console.log('==Result=='); 71 | if (JSON.stringify(json) == JSON.stringify(expected)) { 72 | console.log('Success!'); 73 | } else { 74 | console.log('Failed.'); 75 | } 76 | --------------------------------------------------------------------------------