├── v2 ├── worker.d.ts ├── getEol.d.ts ├── lineToJson.d.ts ├── index.js ├── util.d.ts ├── index.d.ts ├── dataClean.d.ts ├── Processor.js ├── CSVError.d.ts ├── fileline.d.ts ├── Result.d.ts ├── Processor.d.ts ├── ProcessorLocal.d.ts ├── getEol.js ├── fileline.js ├── ProcessFork.d.ts ├── rowSplit.d.ts ├── ParseRuntime.js ├── util.js ├── Parameters.js ├── Converter.d.ts ├── CSVError.js ├── ParseRuntime.d.ts ├── worker.js ├── dataClean.js ├── Parameters.d.ts ├── ProcessFork.js ├── Result.js ├── Converter.js └── lineToJson.js ├── test ├── data │ ├── emptyFile │ ├── dataWithEmptyString │ ├── testEol │ ├── dataNoTrimCRLF │ ├── dataNoTrimBOM │ ├── noheaderWithVaryColumnNum │ ├── pipeAsQuote │ ├── dataWithSlashEscape │ ├── trailingComma │ ├── dataWithMultipleLineRow │ ├── dataWithSlashEscapeAndDelimiterBetweenQuotes │ ├── dataWithLatin1Encoding │ ├── dataWithWhiteSpace │ ├── dataDiffDelimiter │ ├── data#139 │ ├── dataIgnoreEmpty │ ├── twodoublequotes │ ├── columnArray │ ├── testData │ ├── noheadercsv │ ├── dataWithType │ ├── dataWithTripleQoutes │ ├── dataWithQoutes │ ├── dataWithPipeAsDelimiter │ ├── dataWithTabAsDelimiter │ ├── dataWithUnclosedQuotes │ ├── complexJSONCSV │ ├── invalidHeader │ ├── dataWithComma │ ├── dataWithAutoDelimiter │ ├── dataWithMismatchedColumn │ ├── quoteTolerant │ ├── tabsv │ ├── csvWithUnclosedHeader │ ├── lineBreak │ └── longHeader ├── testPrototypePollution.ts └── testErrorHandle.ts ├── bin ├── csvtojson.bat ├── csvtojson ├── genCsv.js ├── options.json └── csvtojson.js ├── index.js ├── index.d.ts ├── v1 ├── interfaces │ ├── cli │ │ ├── index.js │ │ └── main.js │ ├── web │ │ ├── index.js │ │ └── webServer.js │ └── index.js ├── core │ ├── defaultParsers │ │ ├── parser_omit.js │ │ ├── index.js │ │ ├── parser_flat.js │ │ ├── parser_array.js │ │ ├── parser_jsonarray.js │ │ └── parser_json.js │ ├── index.js │ ├── fileline.js │ ├── fileLineToCSVLine.js │ ├── dataToCSVLine.js │ ├── getEol.js │ ├── getDelimiter.js │ ├── filterRow.js │ ├── csvline.js │ ├── CSVError.js │ ├── parserMgr.js │ ├── worker.js │ ├── defParam.js │ ├── parser.js │ ├── rowSplit.js │ ├── workerMgr.js │ └── linesToJson.js └── index.js ├── typings.d.ts ├── .travis.yml ├── src ├── CSVError.test.ts ├── index.ts ├── fileline.test.ts ├── getEol.ts ├── Processor.ts ├── Result.test.ts ├── CSVError.ts ├── fileline.ts ├── util.ts ├── ProcessorLocal.test.ts ├── worker.ts ├── dataClean.ts ├── ParseRuntime.ts ├── rowSplit.test.ts ├── ProcessFork.ts ├── Converter.ts ├── Result.ts ├── Parameters.ts ├── lineToJson.ts ├── rowSplit.ts └── ProcessorLocal.ts ├── .gitignore ├── .npmignore ├── LICENSE ├── docs ├── performance.md └── csvtojson-v2.md ├── webpack.config.js └── tsconfig.json /v2/worker.d.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/data/emptyFile: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bin/csvtojson.bat: -------------------------------------------------------------------------------- 1 | @node csvtojson.js %* -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports=require("./v2"); -------------------------------------------------------------------------------- /test/data/dataWithEmptyString: -------------------------------------------------------------------------------- 1 | green,40, "" 2 | -------------------------------------------------------------------------------- /test/data/testEol: -------------------------------------------------------------------------------- 1 | John,25,xxx Samantha,28,yyy 2 | -------------------------------------------------------------------------------- /test/data/dataNoTrimCRLF: -------------------------------------------------------------------------------- 1 | name,age 2 | joe,20 3 | sam,30 -------------------------------------------------------------------------------- /index.d.ts: -------------------------------------------------------------------------------- 1 | import csv=require("./v2"); 2 | 3 | export=csv; 4 | 5 | -------------------------------------------------------------------------------- /v1/interfaces/cli/index.js: -------------------------------------------------------------------------------- 1 | module.exports = require("./main.js"); -------------------------------------------------------------------------------- /test/data/dataNoTrimBOM: -------------------------------------------------------------------------------- 1 | "name","age" 2 | "joe","20" 3 | "sam","30" 4 | -------------------------------------------------------------------------------- /test/data/noheaderWithVaryColumnNum: -------------------------------------------------------------------------------- 1 | John,25,XXX 2 | Samantha,28,YYY,7 3 | -------------------------------------------------------------------------------- /test/data/pipeAsQuote: -------------------------------------------------------------------------------- 1 | test,test2,test3 2 | blag,blagh,|blahhh, blah| 3 | -------------------------------------------------------------------------------- /v1/interfaces/web/index.js: -------------------------------------------------------------------------------- 1 | module.exports = require("./webServer.js"); -------------------------------------------------------------------------------- /bin/csvtojson: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | require('./csvtojson.js')(); 4 | -------------------------------------------------------------------------------- /test/data/dataWithSlashEscape: -------------------------------------------------------------------------------- 1 | id,raw 2 | 0,"{\"hello\":\"world\",\"test\":true}" -------------------------------------------------------------------------------- /test/data/trailingComma: -------------------------------------------------------------------------------- 1 | field1,field2 2 | value1,value2, 3 | value1,value2, 4 | -------------------------------------------------------------------------------- /test/data/dataWithMultipleLineRow: -------------------------------------------------------------------------------- 1 | aa,bb 2 | ss,"12345 3 | 6789,abcde"" 4 | ddee" 5 | -------------------------------------------------------------------------------- /test/data/dataWithSlashEscapeAndDelimiterBetweenQuotes: -------------------------------------------------------------------------------- 1 | id,raw 2 | 0,"\"hello,\"world\"" -------------------------------------------------------------------------------- /typings.d.ts: -------------------------------------------------------------------------------- 1 | declare module "*.json"{ 2 | const value: any; 3 | export default value; 4 | } -------------------------------------------------------------------------------- /v1/interfaces/index.js: -------------------------------------------------------------------------------- 1 | module.exports.web=require("./web"); 2 | module.exports.cli=require("./cli"); -------------------------------------------------------------------------------- /test/data/dataWithLatin1Encoding: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keyang/node-csvtojson/HEAD/test/data/dataWithLatin1Encoding -------------------------------------------------------------------------------- /test/data/dataWithWhiteSpace: -------------------------------------------------------------------------------- 1 | "Column 1" , "Column 2", "Column 3" ,"Email" 2 | Column1Row1,Column2Row1,Column3Row1,Row1@example.com -------------------------------------------------------------------------------- /v2/getEol.d.ts: -------------------------------------------------------------------------------- 1 | import { ParseRuntime } from "./ParseRuntime"; 2 | export default function (data: string, param: ParseRuntime): string; 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "10" 4 | - "8" 5 | - "6" 6 | after_success: 'npm run coveralls' 7 | script: "npm run travis" -------------------------------------------------------------------------------- /test/data/dataDiffDelimiter: -------------------------------------------------------------------------------- 1 | annee;jour;date;b_1;b_2;devise; 2 | 2015029;LUNDI ;09/03/2015;35;31;eur; 3 | 2015028;SAMEDI ;07/03/2015;48;9;eur; 4 | -------------------------------------------------------------------------------- /test/data/data#139: -------------------------------------------------------------------------------- 1 | field1, field2, field3, field4, 2 | 2005088801,A1,9001009395, 3 | 2005088806,A6,9001009395 9001009990, 4 | 2005088807,A7,9001009989, 5 | -------------------------------------------------------------------------------- /test/data/dataIgnoreEmpty: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | col1, col2.0, col2.1,col4.col3,col4.col5 5 | d1,,d3,,world 6 | 7 | d2,d,d,d,d 8 | ,,,, 9 | d4,d2,d3 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/CSVError.test.ts: -------------------------------------------------------------------------------- 1 | // import CSVError from "./CSVError"; 2 | // import assert from "assert"; 3 | // describe("CSVError",()=>{ 4 | // it ("should toString()",()=>{ 5 | // }) 6 | // }) -------------------------------------------------------------------------------- /test/data/twodoublequotes: -------------------------------------------------------------------------------- 1 | title,actors,data,uuid,fieldA,fieldB 2 | """","[""Neil"", ""Bill"""", ""Carl"",""Richard"",""Linus""]",xyabcde,"fejal""eifa","bnej""""falkfe","""eisjfes""" 3 | -------------------------------------------------------------------------------- /v1/core/defaultParsers/parser_omit.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "name": "omit", 3 | "regExp": /^\*omit\*/, 4 | "processSafe":true, 5 | "parserFunc": function parser_omit() {} 6 | }; 7 | -------------------------------------------------------------------------------- /v2/lineToJson.d.ts: -------------------------------------------------------------------------------- 1 | import { Converter } from "./Converter"; 2 | export default function (csvRows: string[][], conv: Converter): JSONResult[]; 3 | export type JSONResult = { 4 | [key: string]: any; 5 | }; 6 | -------------------------------------------------------------------------------- /v1/core/index.js: -------------------------------------------------------------------------------- 1 | module.exports = constructor; 2 | module.exports.Converter = require("./Converter.js"); 3 | 4 | function constructor(param,options) { 5 | return new module.exports.Converter(param, options); 6 | } 7 | -------------------------------------------------------------------------------- /v1/index.js: -------------------------------------------------------------------------------- 1 | //deprecated but leave it for backward compatibility 2 | module.exports.core=require("./core"); 3 | 4 | //live apis 5 | module.exports=require("./core"); 6 | module.exports.interfaces = require("./interfaces"); 7 | -------------------------------------------------------------------------------- /test/data/columnArray: -------------------------------------------------------------------------------- 1 | TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED 2 | 1395426422,n,10028,1213,5461 3 | 1395426422,n,10013,9954,13560 4 | 1395426422,n,10109,221391500,141836 5 | 1395426422,n,10007,53448,308549 6 | 1395426422,n,10022,15506,72125 -------------------------------------------------------------------------------- /v1/core/defaultParsers/index.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | require('./parser_array.js'), 3 | require('./parser_json.js'), 4 | require('./parser_omit.js'), 5 | require('./parser_jsonarray.js'), 6 | require("./parser_flat.js") 7 | ]; 8 | -------------------------------------------------------------------------------- /test/data/testData: -------------------------------------------------------------------------------- 1 | date,employee.name,employee.age,employee.number,address.0,address.1,employee.key.0,employee.key.1,*omit*id 2 | 2012-02-12,Eric,31,51234,Dunno Street,Kilkeny Road,key1,key2,2 3 | 2012-03-06,Ted,28,51289,O FUTEBOL.¿,Tormore,key3,key4,4 -------------------------------------------------------------------------------- /test/data/noheadercsv: -------------------------------------------------------------------------------- 1 | CC102-PDMI-001,eClass_5.1.3,10/3/2014,12,40,green,40 2 | CC200-009-001,eClass_5.1.3,11/3/2014,5,3,blue,38 3 | CC200-070-001,eClass_5.1.3,10/4/2014,15,13,red,45 4 | CC200-099,eClass_5.1.3,10/5/2014,20,17,orange,28 5 | CC200-100,eClass_5.1.3,10/3/2014,5,4,black,32 6 | -------------------------------------------------------------------------------- /v2/index.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var Converter_1 = require("./Converter"); 3 | var helper = function (param, options) { 4 | return new Converter_1.Converter(param, options); 5 | }; 6 | helper["csv"] = helper; 7 | helper["Converter"] = Converter_1.Converter; 8 | module.exports = helper; 9 | -------------------------------------------------------------------------------- /test/data/dataWithType: -------------------------------------------------------------------------------- 1 | column1,user.name,column2,column3, colume4, column5, column6 , column7, column8, column9,column10.0,column10.1,name#!,column11 2 | 1234,hello world,a1234,2012-01-01,someinvaliddate, {"hello":"world"}, {"hello":"world"}, 1234,abcd, true,23,31,false,[{"hello":"world"}] 3 | -------------------------------------------------------------------------------- /test/data/dataWithTripleQoutes: -------------------------------------------------------------------------------- 1 | Year,Make,Model,Description,Price 2 | 1997,Ford,E350,"ac, abs, moon",3000.00 3 | 1999,Chevy,"Venture ""Extended Edition""","",4900.00 4 | 1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00 5 | 1996,Jeep,Grand Cherokee,"MUST SELL!air, moon roof, loaded",4799.00 -------------------------------------------------------------------------------- /test/data/dataWithQoutes: -------------------------------------------------------------------------------- 1 | TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED 2 | "13954264""22","n",""10028"",""1213"",""5461"" 3 | "abc, def, ccc", n,10013,9954,13560 4 | 1395426422,n,10013,9954,13560 5 | 1395426422,n,10109,221391500,141836 6 | 1395426422,n,10007,53448,308549 7 | 1395426422,n,10022,15506,72125 8 | 9 | -------------------------------------------------------------------------------- /test/data/dataWithPipeAsDelimiter: -------------------------------------------------------------------------------- 1 | date|*json*employee.name|*json*employee.age|*json*employee.number|*array*address|*array*address|*jsonarray*employee.key|*jsonarray*employee.key|*omit*id 2 | 2012-02-12|Eric|31|51234|Dunno Street|Kilkeny Road|key1|key2|2 3 | 2012-03-06|Ted|28|51289|O FUTEBOL.¿|Tormore|key3|key4|4 4 | -------------------------------------------------------------------------------- /test/data/dataWithTabAsDelimiter: -------------------------------------------------------------------------------- 1 | date *json*employee.name *json*employee.age *json*employee.number *array*address *array*address *jsonarray*employee.key *jsonarray*employee.key *omit*id 2 | 2012-02-12 Eric 31 51234 Dunno Street Kilkeny Road key1 key2 2 3 | 2012-03-06 Ted 28 51289 O FUTEBOL.¿ Tormore key3 key4 4 4 | -------------------------------------------------------------------------------- /test/data/dataWithUnclosedQuotes: -------------------------------------------------------------------------------- 1 | TIMESTAMP,UPDATE,UID,BYTES SENT,BYTES RCVED 2 | "13954264""22","n,""10028"",""1213"",""5461"" 3 | "abc, 4 | def, ccc", n,10013,9954,13560 5 | 1395426422,n,10013,9954,13560 6 | 1395426422,n,10109,221391500,141836 7 | 1395426422,n,10007,53448,308549 8 | "1395426422,n,10022,15506,72125 9 | -------------------------------------------------------------------------------- /v1/core/defaultParsers/parser_flat.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "name": "flat", 3 | "processSafe": true, 4 | "regExp": /^\*flat\*/, 5 | "parserFunc": function parser_flat (params) { 6 | var key = this.getHeadStr(); 7 | var val = params.item; 8 | params.resultRow[key] = val; 9 | } 10 | }; 11 | -------------------------------------------------------------------------------- /v2/util.d.ts: -------------------------------------------------------------------------------- 1 | export declare function bufFromString(str: string): Buffer; 2 | export declare function emptyBuffer(): Buffer; 3 | export declare function filterArray(arr: any[], filter: number[]): any[]; 4 | export declare const trimLeft: (str: string) => string; 5 | export declare const trimRight: (str: string) => string; 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | Thumbs.db 3 | node_modules/ 4 | *.swp 5 | *.swo 6 | .idea/ 7 | 8 | # intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 9 | .grunt 10 | 11 | pids 12 | logs 13 | results 14 | graph.png 15 | .project 16 | .settings 17 | .vscode 18 | coverage/ 19 | .nyc_output 20 | .coveralls.yml 21 | .ts-node 22 | -------------------------------------------------------------------------------- /v2/index.d.ts: -------------------------------------------------------------------------------- 1 | import { TransformOptions } from "stream"; 2 | import { CSVParseParam } from "./Parameters"; 3 | import { Converter } from "./Converter"; 4 | declare const helper: { 5 | (param?: Partial, options?: TransformOptions): Converter; 6 | csv: /*elided*/ any; 7 | Converter: typeof Converter; 8 | }; 9 | export = helper; 10 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | Thumbs.db 3 | node_modules/ 4 | *.swp 5 | *.swo 6 | 7 | # intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 8 | .grunt 9 | 10 | pids 11 | logs 12 | results 13 | build 14 | graph.png 15 | .project 16 | .settings 17 | .vscode 18 | docs/ 19 | devops 20 | coverage/ 21 | .nyc_output/ 22 | .coveralls.yml 23 | .ts-node 24 | -------------------------------------------------------------------------------- /v2/dataClean.d.ts: -------------------------------------------------------------------------------- 1 | import { ParseRuntime } from "./ParseRuntime"; 2 | /** 3 | * For each data chunk coming to parser: 4 | * 1. append the data to the buffer that is left from last chunk 5 | * 2. check if utf8 chars being split, if does, stripe the bytes and add to left buffer. 6 | * 3. stripBom 7 | */ 8 | export declare function prepareData(chunk: Buffer, runtime: ParseRuntime): string; 9 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import { TransformOptions } from "stream"; 2 | import { CSVParseParam } from "./Parameters"; 3 | import { Converter } from "./Converter"; 4 | 5 | const helper = function (param?: Partial, options?: TransformOptions): Converter { 6 | return new Converter(param, options); 7 | } 8 | helper["csv"] = helper; 9 | helper["Converter"] = Converter; 10 | export =helper; 11 | -------------------------------------------------------------------------------- /test/data/complexJSONCSV: -------------------------------------------------------------------------------- 1 | fieldA.title, fieldA.children.0.name, fieldA.children.0.id,fieldA.children.1.name, fieldA.children.1.employee.0.name,fieldA.children.1.employee.1.name, fieldA.address.0,fieldA.address.1, description 2 | Food Factory, Oscar, 0023, Tikka, Tim, Joe, 3 Lame Road, Grantstown, A fresh new food factory 3 | Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle 4 | -------------------------------------------------------------------------------- /test/data/invalidHeader: -------------------------------------------------------------------------------- 1 | header1,header1.filed1,header1.file2,header2,header2.field1.0,header2.field1.1,header2.filed2 2 | 5OlFPc,q7,ejpJdw,DIgNVqB7h9jI,f8ayrzv,undefinedzvTY3Qd3pSkKOk,S7cVvW7m50t9U 3 | 0TaUGQVPqPkOr,lT,GA,UPUuORnuaDjXdl,V6G4QFddmPH8b,65NxWPl,Lclhl0fy 4 | ,ex4,1gjT4YPJ,QtJ8S5TQ,M4zO4OppCAR4,Pg7VipESqZmHwY5,mPzz 5 | FEEa,,B1B,U9,kjxHGy,McW,6FWKBP0JoCiWn 6 | 0,,Fj0o9IPB,jwFuChbeqZdVVR,hGqs3Ps7dMEo6,Zundefined00Xqvw,hT7lN6iVUHDx 7 | -------------------------------------------------------------------------------- /v2/Processor.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | Object.defineProperty(exports, "__esModule", { value: true }); 3 | exports.Processor = void 0; 4 | var Processor = /** @class */ (function () { 5 | function Processor(converter) { 6 | this.converter = converter; 7 | this.params = converter.parseParam; 8 | this.runtime = converter.parseRuntime; 9 | } 10 | return Processor; 11 | }()); 12 | exports.Processor = Processor; 13 | -------------------------------------------------------------------------------- /v1/core/defaultParsers/parser_array.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "name": "array", 3 | "processSafe":true, 4 | "regExp": /^\*array\*/, 5 | "parserFunc": function parser_array(params) { 6 | var fieldName = params.head.replace(this.regExp, ''); 7 | if (params.resultRow[fieldName] === undefined) { 8 | params.resultRow[fieldName] = []; 9 | } 10 | params.resultRow[fieldName].push(params.item); 11 | } 12 | }; 13 | -------------------------------------------------------------------------------- /test/data/dataWithComma: -------------------------------------------------------------------------------- 1 | col1,col2,col3,col4,col5,col6,col7,col8,col9,col10 2 | "Mini. Sectt,hisar S.O",125001,S.O,Non-Delivery,Hissar,Ambala HQ,Haryana,Hisar,Hisar,HARYANA 3 | #Mini. Sectt,hisar S.O#,125001,S.O,Non-Delivery,Hissar,Ambala HQ,Haryana,Hisar,Hisar,HARYANA 4 | #Mini. Sectt,hisar S.O#,125001,S.O,Non-Delivery,Hissar,Ambala HQ,Haryana,Hisar,Hisar,HARYANA 5 | #Mini. Sectt,hisar S.O#,#125001,fenvkdsf#,S.O,Non-Delivery,Hissar,Ambala HQ,Haryana,Hisar,Hisar,HARYANA -------------------------------------------------------------------------------- /test/data/dataWithAutoDelimiter: -------------------------------------------------------------------------------- 1 | col1:col2:col3:col4:col5:col6:col7:col8:col9:col10 2 | "Mini. Sectt:hisar S.O":125001:S.O:Non-Delivery:Hissar:Ambala HQ:Haryana:Hisar:Hisar:HARYANA 3 | #Mini. Sectt:hisar S.O#:125001:S.O:Non-Delivery:Hissar:Ambala HQ:Haryana:Hisar:Hisar:HARYANA 4 | #Mini. Sectt:hisar S.O#:125001:S.O:Non-Delivery:Hissar:Ambala HQ:Haryana:Hisar:Hisar:HARYANA 5 | #Mini. Sectt:hisar S.O#:#125001:fenvkdsf#:S.O:Non-Delivery:Hissar:Ambala HQ:Haryana:Hisar:Hisar:HARYANA -------------------------------------------------------------------------------- /v1/core/fileline.js: -------------------------------------------------------------------------------- 1 | var getEol = require("./getEol"); 2 | /** 3 | * convert data chunk to file lines array 4 | * @param {string} data data chunk as utf8 string 5 | * @param {object} param Converter param object 6 | * @return {Object} {lines:[line1,line2...],partial:String} 7 | */ 8 | module.exports = function(data, param) { 9 | var eol = getEol(data,param); 10 | var lines = data.split(eol); 11 | var partial = lines.pop(); 12 | return {lines: lines, partial: partial}; 13 | }; 14 | -------------------------------------------------------------------------------- /test/data/dataWithMismatchedColumn: -------------------------------------------------------------------------------- 1 | fieldA.title, fieldA.children[0].name, fieldA.children[0].id,fieldA.children[1].name, fieldA.children[1].employee[].name,fieldA.children[1].employee[].name, fieldA.address[],fieldA.address[], description 2 | Food Factory, Oscar, 0023, Tikka, Tim, Joe, 3 Lame Road, Grantstown, A fresh new food factory 3 | Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle 4 | Kindom Garden, Ceil, 5 | Kindom Garden, Ceil, 54, Pillow, Amst, Tom, 24 Shaker Street, HelloTown, Awesome castle -------------------------------------------------------------------------------- /v1/core/fileLineToCSVLine.js: -------------------------------------------------------------------------------- 1 | var csvline=require("./csvline"); 2 | /** 3 | * Convert data chunk to csv lines with cols 4 | * @param {[type]} data [description] 5 | * @param {[type]} params [description] 6 | * @return {[type]} {lines:[[col1,col2,col3]],partial:String} 7 | */ 8 | module.exports = function(fileLine, params) { 9 | var lines = fileLine.lines; 10 | var csvLines = csvline(lines,params); 11 | return { 12 | lines: csvLines.lines, 13 | partial: csvLines.partial + fileLine.partial 14 | }; 15 | }; 16 | -------------------------------------------------------------------------------- /v2/CSVError.d.ts: -------------------------------------------------------------------------------- 1 | export default class CSVError extends Error { 2 | err: string; 3 | line: number; 4 | extra?: string | undefined; 5 | static column_mismatched(index: number, extra?: string): CSVError; 6 | static unclosed_quote(index: number, extra?: string): CSVError; 7 | static fromJSON(obj: any): CSVError; 8 | constructor(err: string, line: number, extra?: string | undefined); 9 | toJSON(): { 10 | err: string; 11 | line: number; 12 | extra: string | undefined; 13 | }; 14 | } 15 | -------------------------------------------------------------------------------- /test/data/quoteTolerant: -------------------------------------------------------------------------------- 1 | "Style","Description","SKU","BusinessAreaCode","BusinessAreaDescription","DepartmentCode","DepartmentDescription","ClassCode","ClassDescription","ColorCode","ColorDescription","SizeCode","SizeDescription","PrimaryMaterialCode","PrimaryMaterialDescription","VendorCode","VendorDescription","CurrentRetail" 2 | "503951518","OVERSIZE ROUND GOLD PENDANT ON 32" THICK BLACK CORD","00505039515193","W12","Jewelry","1231","Jewelry","123111","D2 New Jewelry Necklace","70","GOLD","100","1 Size","065","Costume/Imitation","3522","KENNETH JAY LANE, INC.","79.0000" 3 | -------------------------------------------------------------------------------- /src/fileline.test.ts: -------------------------------------------------------------------------------- 1 | import {stringToLines} from "./fileline"; 2 | import { mergeParams } from "./Parameters"; 3 | import { Converter } from "./Converter"; 4 | var assert = require("assert"); 5 | describe("fileline function", function() { 6 | it ("should convert data to multiple lines ", function() { 7 | const conv=new Converter(); 8 | var data = "abcde\nefef"; 9 | var result = stringToLines(data, conv.parseRuntime); 10 | assert.equal(result.lines.length, 1); 11 | assert.equal(result.partial, "efef"); 12 | assert.equal(result.lines[0], "abcde"); 13 | }); 14 | }); 15 | -------------------------------------------------------------------------------- /v1/core/dataToCSVLine.js: -------------------------------------------------------------------------------- 1 | var fileline=require("./fileline"); 2 | var csvline=require("./csvline"); 3 | /** 4 | * Convert data chunk to csv lines with cols 5 | * @param {[type]} data [description] 6 | * @param {[type]} params [description] 7 | * @return {[type]} {lines:[[col1,col2,col3]],partial:String} 8 | */ 9 | module.exports = function(data, params) { 10 | var line = fileline(data, params); 11 | var lines = line.lines; 12 | var csvLines = csvline(lines, params); 13 | return { 14 | lines: csvLines.lines, 15 | partial: csvLines.partial + line.partial 16 | }; 17 | }; 18 | -------------------------------------------------------------------------------- /v1/core/getEol.js: -------------------------------------------------------------------------------- 1 | //return eol from a data chunk. 2 | var eol = require("os").EOL; 3 | module.exports = function(data, param) { 4 | if (!param.eol && data) { 5 | for (var i = 0, len = data.length; i < len; i++) { 6 | if (data[i] === "\r") { 7 | if (data[i + 1] === "\n") { 8 | param.eol = "\r\n"; 9 | } else if (data[i + 1]) { 10 | param.eol = "\r"; 11 | } 12 | return param.eol; 13 | } else if (data[i] === "\n") { 14 | param.eol = "\n"; 15 | return param.eol; 16 | } 17 | } 18 | } 19 | return param.eol; 20 | }; 21 | -------------------------------------------------------------------------------- /v2/fileline.d.ts: -------------------------------------------------------------------------------- 1 | import { ParseRuntime } from "./ParseRuntime"; 2 | /** 3 | * convert data chunk to file lines array 4 | * @param {string} data data chunk as utf8 string 5 | * @param {object} param Converter param object 6 | * @return {Object} {lines:[line1,line2...],partial:String} 7 | */ 8 | export declare function stringToLines(data: string, param: ParseRuntime): StringToLinesResult; 9 | export interface StringToLinesResult { 10 | lines: Fileline[]; 11 | /** 12 | * last line which could be incomplete line. 13 | */ 14 | partial: string; 15 | } 16 | export type Fileline = string; 17 | -------------------------------------------------------------------------------- /v2/Result.d.ts: -------------------------------------------------------------------------------- 1 | import { Converter } from "./Converter"; 2 | import { ProcessLineResult } from "./Processor"; 3 | import CSVError from "./CSVError"; 4 | export declare class Result { 5 | private converter; 6 | private get needEmitLine(); 7 | private _needPushDownstream?; 8 | private get needPushDownstream(); 9 | private get needEmitAll(); 10 | private finalResult; 11 | constructor(converter: Converter); 12 | processResult(resultLines: ProcessLineResult[]): Promise; 13 | appendFinalResult(lines: any[]): void; 14 | processError(err: CSVError): void; 15 | endProcess(): void; 16 | } 17 | -------------------------------------------------------------------------------- /test/data/tabsv: -------------------------------------------------------------------------------- 1 | JournalCode JournalLib EcritureNum EcritureDate CompteNum CompteLib CompAuxNum CompAuxLib PieceRef PieceDate EcritureLib Debit Credit EcritureLet DateLet ValidDate Montantdevise Idevise 2 | AA BBBBBB 0 20140701 CCCCCCCC ddddddddddddddd ZZZZZZZ1 20140701 EEEEE EEE EEEEEE 34,81 0,00 20150630 0,00 3 | AA BBBBBB 0 20140701 CCCCCCCC ddddddddddddddd ZZZZZZZ2 20140701 EEEEE EEE EEEEEE 12,02 0,00 20150630 0,00 4 | AA BBBBBB 0 20140701 CCCCCCCC ddddddddddddddd ZZZZZZZ3 20140701 EEEEE EEE EEEEEE 37,98 0,00 20150630 0,00 5 | AA BBBBBB 0 20140701 CCCCCCCC ddddddddddddddd ZZZZZZZ4 20140701 EEEEE EEE EEEEEE 27,48 0,00 20150630 0,00 6 | -------------------------------------------------------------------------------- /v1/core/getDelimiter.js: -------------------------------------------------------------------------------- 1 | module.exports = getDelimiter; 2 | var defaulDelimiters = [",", "|", "\t", ";", ":"]; 3 | function getDelimiter(rowStr,param) { 4 | var checker; 5 | if (param.delimiter === "auto"){ 6 | checker = defaulDelimiters; 7 | } else if (param.delimiter instanceof Array) { 8 | checker = param.delimiter; 9 | } else { 10 | return param.delimiter; 11 | } 12 | var count = 0; 13 | var rtn = ","; 14 | checker.forEach(function(delim) { 15 | var delimCount = rowStr.split(delim).length; 16 | if (delimCount > count) { 17 | rtn = delim; 18 | count = delimCount; 19 | } 20 | }); 21 | return rtn; 22 | } 23 | -------------------------------------------------------------------------------- /src/getEol.ts: -------------------------------------------------------------------------------- 1 | import { ParseRuntime } from "./ParseRuntime"; 2 | //return first eol found from a data chunk. 3 | export default function (data: string, param: ParseRuntime): string { 4 | if (!param.eol && data) { 5 | for (var i = 0, len = data.length; i < len; i++) { 6 | if (data[i] === "\r") { 7 | if (data[i + 1] === "\n") { 8 | param.eol = "\r\n"; 9 | break; 10 | } else if (data[i + 1]) { 11 | param.eol = "\r"; 12 | break; 13 | } 14 | } else if (data[i] === "\n") { 15 | param.eol = "\n"; 16 | break; 17 | } 18 | } 19 | } 20 | return param.eol || "\n"; 21 | }; 22 | -------------------------------------------------------------------------------- /v2/Processor.d.ts: -------------------------------------------------------------------------------- 1 | import { Converter } from "./Converter"; 2 | import { JSONResult } from "./lineToJson"; 3 | import { CSVParseParam } from "./Parameters"; 4 | import { ParseRuntime } from "./ParseRuntime"; 5 | export declare abstract class Processor { 6 | protected converter: Converter; 7 | protected params: CSVParseParam; 8 | protected runtime: ParseRuntime; 9 | constructor(converter: Converter); 10 | abstract process(chunk: Buffer, finalChunk?: boolean): Promise; 11 | abstract destroy(): Promise; 12 | abstract flush(): Promise; 13 | } 14 | export type ProcessLineResult = string | string[] | JSONResult; 15 | -------------------------------------------------------------------------------- /v2/ProcessorLocal.d.ts: -------------------------------------------------------------------------------- 1 | import { Processor, ProcessLineResult } from "./Processor"; 2 | export declare class ProcessorLocal extends Processor { 3 | flush(): Promise; 4 | destroy(): Promise; 5 | private rowSplit; 6 | private eolEmitted; 7 | private _needEmitEol?; 8 | private get needEmitEol(); 9 | private headEmitted; 10 | private _needEmitHead?; 11 | private get needEmitHead(); 12 | process(chunk: Buffer, finalChunk?: boolean): Promise; 13 | private processCSV; 14 | private processDataWithHead; 15 | private filterHeader; 16 | private processCSVBody; 17 | private prependLeftBuf; 18 | private runPreLineHook; 19 | } 20 | -------------------------------------------------------------------------------- /src/Processor.ts: -------------------------------------------------------------------------------- 1 | import { Converter } from "./Converter"; 2 | import { JSONResult } from "./lineToJson"; 3 | import { CSVParseParam } from "./Parameters"; 4 | import { ParseRuntime } from "./ParseRuntime"; 5 | 6 | export abstract class Processor { 7 | protected params: CSVParseParam; 8 | protected runtime: ParseRuntime; 9 | constructor(protected converter: Converter) { 10 | this.params = converter.parseParam; 11 | this.runtime = converter.parseRuntime; 12 | } 13 | abstract process(chunk: Buffer,finalChunk?:boolean): Promise 14 | abstract destroy():Promise; 15 | abstract flush(): Promise; 16 | } 17 | export type ProcessLineResult = string | string[] | JSONResult; 18 | -------------------------------------------------------------------------------- /src/Result.test.ts: -------------------------------------------------------------------------------- 1 | import {Result} from "./Result"; 2 | import { Converter } from "./Converter"; 3 | import {readFileSync} from "fs"; 4 | import * as path from "path"; 5 | import assert = require("assert"); 6 | import { JSONResult } from "./lineToJson"; 7 | const dataDir=path.join(__dirname,"../test/data/"); 8 | 9 | describe("Result",()=>{ 10 | it ("should return need push downstream based on needEmitAll parameter",function (){ 11 | const conv=new Converter(); 12 | const res=new Result(conv); 13 | assert.equal(res["needEmitAll"],false); 14 | conv.then(); 15 | assert.equal(res["needEmitAll"],true); 16 | conv.parseParam.needEmitAll=false; 17 | assert.equal(res["needEmitAll"],false); 18 | }); 19 | 20 | }) 21 | -------------------------------------------------------------------------------- /v1/core/filterRow.js: -------------------------------------------------------------------------------- 1 | module.exports=function filterRow(row, param) { 2 | if (param.ignoreColumns instanceof Array && param.ignoreColumns.length > 0) { 3 | for (var igRow = 0, igColLen = param.ignoreColumns.length; igRow < igColLen; igRow++) { 4 | if (param.ignoreColumns[igRow] >= 0) { 5 | row.splice(param.ignoreColumns[igRow], 1); 6 | } 7 | } 8 | } 9 | if (param.includeColumns instanceof Array && param.includeColumns.length > 0) { 10 | var cleanRowArr = []; 11 | for (var inRow = 0, inColLen = param.includeColumns.length; inRow < inColLen; inRow++) { 12 | if (param.includeColumns[inRow] >= 0) { 13 | cleanRowArr.push(row[param.includeColumns[inRow]]); 14 | } 15 | } 16 | row = cleanRowArr; 17 | } 18 | return row; 19 | } -------------------------------------------------------------------------------- /v1/core/csvline.js: -------------------------------------------------------------------------------- 1 | var getEol = require("./getEol"); 2 | var rowSplit = require("./rowSplit"); 3 | /** 4 | * Convert lines to csv columns 5 | * @param {[type]} lines [file lines] 6 | * @param {[type]} param [Converter param] 7 | * @return {[type]} {lines:[[col1,col2,col3...]],partial:String} 8 | */ 9 | module.exports = function(lines, param) { 10 | var csvLines = []; 11 | var left = ""; 12 | while (lines.length) { 13 | var line = left + lines.shift(); 14 | var row = rowSplit(line, param); 15 | if (row.closed || param.alwaysSplitAtEOL) { 16 | csvLines.push(row.cols); 17 | left = ""; 18 | } else { 19 | left = line + (getEol(line, param) || "\n"); // if unable to getEol from data, assume "\n" 20 | } 21 | } 22 | return {lines: csvLines, partial: left}; 23 | }; 24 | -------------------------------------------------------------------------------- /src/CSVError.ts: -------------------------------------------------------------------------------- 1 | export default class CSVError extends Error { 2 | static column_mismatched(index: number, extra?: string) { 3 | return new CSVError("column_mismatched", index, extra); 4 | } 5 | static unclosed_quote(index: number, extra?: string) { 6 | return new CSVError("unclosed_quote", index, extra); 7 | } 8 | static fromJSON(obj) { 9 | return new CSVError(obj.err, obj.line, obj.extra); 10 | } 11 | constructor( 12 | public err: string, 13 | public line: number, 14 | public extra?: string 15 | ) { 16 | super("Error: " + err + ". JSON Line number: " + line + (extra ? " near: " + extra : "")); 17 | this.name = "CSV Parse Error"; 18 | } 19 | toJSON() { 20 | return { 21 | err: this.err, 22 | line: this.line, 23 | extra: this.extra 24 | } 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/fileline.ts: -------------------------------------------------------------------------------- 1 | import { ParseRuntime } from "./ParseRuntime"; 2 | import getEol from "./getEol"; 3 | // const getEol = require("./getEol"); 4 | /** 5 | * convert data chunk to file lines array 6 | * @param {string} data data chunk as utf8 string 7 | * @param {object} param Converter param object 8 | * @return {Object} {lines:[line1,line2...],partial:String} 9 | */ 10 | export function stringToLines(data: string, param: ParseRuntime): StringToLinesResult { 11 | const eol = getEol(data, param); 12 | const lines = data.split(eol); 13 | const partial = lines.pop() || ""; 14 | return { lines: lines, partial: partial }; 15 | }; 16 | 17 | 18 | export interface StringToLinesResult { 19 | lines: Fileline[], 20 | /** 21 | * last line which could be incomplete line. 22 | */ 23 | partial: string 24 | } 25 | export type Fileline = string; -------------------------------------------------------------------------------- /v2/getEol.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | Object.defineProperty(exports, "__esModule", { value: true }); 3 | exports.default = default_1; 4 | //return first eol found from a data chunk. 5 | function default_1(data, param) { 6 | if (!param.eol && data) { 7 | for (var i = 0, len = data.length; i < len; i++) { 8 | if (data[i] === "\r") { 9 | if (data[i + 1] === "\n") { 10 | param.eol = "\r\n"; 11 | break; 12 | } 13 | else if (data[i + 1]) { 14 | param.eol = "\r"; 15 | break; 16 | } 17 | } 18 | else if (data[i] === "\n") { 19 | param.eol = "\n"; 20 | break; 21 | } 22 | } 23 | } 24 | return param.eol || "\n"; 25 | } 26 | ; 27 | -------------------------------------------------------------------------------- /v2/fileline.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var __importDefault = (this && this.__importDefault) || function (mod) { 3 | return (mod && mod.__esModule) ? mod : { "default": mod }; 4 | }; 5 | Object.defineProperty(exports, "__esModule", { value: true }); 6 | exports.stringToLines = stringToLines; 7 | var getEol_1 = __importDefault(require("./getEol")); 8 | // const getEol = require("./getEol"); 9 | /** 10 | * convert data chunk to file lines array 11 | * @param {string} data data chunk as utf8 string 12 | * @param {object} param Converter param object 13 | * @return {Object} {lines:[line1,line2...],partial:String} 14 | */ 15 | function stringToLines(data, param) { 16 | var eol = (0, getEol_1.default)(data, param); 17 | var lines = data.split(eol); 18 | var partial = lines.pop() || ""; 19 | return { lines: lines, partial: partial }; 20 | } 21 | ; 22 | -------------------------------------------------------------------------------- /v1/core/CSVError.js: -------------------------------------------------------------------------------- 1 | var util = require("util"); 2 | module.exports = CSVError; 3 | function CSVError(err, index, extra) { 4 | Error.call(this, ""); 5 | this.err = err; 6 | this.line = index; 7 | this.extra = extra; 8 | this.message = "Error: " + err + ". JSON Line number: " + index + (extra ? " near: " + extra : ""); 9 | this.name = "CSV Error"; 10 | } 11 | util.inherits(CSVError, Error); 12 | 13 | CSVError.prototype.toString = function() { 14 | return JSON.stringify([this.err, this.line, this.extra]); 15 | }; 16 | 17 | CSVError.column_mismatched = function(index, extra) { 18 | return new CSVError("column_mismatched", index, extra); 19 | }; 20 | 21 | CSVError.unclosed_quote = function(index, extra) { 22 | return new CSVError("unclosed_quote", index, extra); 23 | }; 24 | 25 | CSVError.fromArray = function(arr) { 26 | return new CSVError(arr[0], arr[1], arr[2]); 27 | }; 28 | -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | export function bufFromString(str: string): Buffer { 2 | const length = Buffer.byteLength(str); 3 | const buffer = Buffer.allocUnsafe 4 | ? Buffer.allocUnsafe(length) 5 | : new Buffer(length); 6 | buffer.write(str); 7 | return buffer; 8 | } 9 | 10 | export function emptyBuffer(): Buffer { 11 | const buffer = Buffer.allocUnsafe 12 | ? Buffer.allocUnsafe(0) 13 | : new Buffer(0); 14 | return buffer; 15 | } 16 | 17 | export function filterArray(arr: any[], filter: number[]): any[] { 18 | const rtn: any[] = []; 19 | for (let i = 0; i < arr.length; i++) { 20 | if (filter.indexOf(i) > -1) { 21 | rtn.push(arr[i]); 22 | } 23 | } 24 | return rtn; 25 | } 26 | 27 | export const trimLeft = function trimLeftNative(str: string) { 28 | return str.trimStart(); 29 | } 30 | export const trimRight = function trimRightNative(str: string) { 31 | return str.trimEnd(); 32 | } -------------------------------------------------------------------------------- /v1/core/defaultParsers/parser_jsonarray.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "name": "jsonarray", 3 | "processSafe":true, 4 | "regExp": /^\*jsonarray\*/, 5 | "parserFunc": function parser_jsonarray (params) { 6 | var fieldStr = params.head.replace(this.regExp, ""); 7 | var headArr = fieldStr.split('.'); 8 | var pointer = params.resultRow; 9 | while (headArr.length > 1) { 10 | var headStr = headArr.shift(); 11 | if (headStr==="__proto__" || headStr==="prototype" ||headStr==="constructor" ){ 12 | continue; 13 | } 14 | if (pointer[headStr] === undefined) { 15 | pointer[headStr] = {}; 16 | } 17 | pointer = pointer[headStr]; 18 | } 19 | var arrFieldName = headArr.shift(); 20 | if (pointer[arrFieldName] === undefined) { 21 | pointer[arrFieldName] = []; 22 | } 23 | pointer[arrFieldName].push(params.item); 24 | } 25 | }; 26 | -------------------------------------------------------------------------------- /v1/interfaces/web/webServer.js: -------------------------------------------------------------------------------- 1 | var http = require("http"); 2 | var Converter = require("../../core/Converter.js"); 3 | function startWebServer (args) { 4 | args = args || {}; 5 | var serverArgs = { 6 | port: args.port || '8801', 7 | urlpath: args.urlpath || '/parseCSV' 8 | }; 9 | var server = http.createServer(); 10 | server.on("request", function(req, res){ 11 | if (req.url === serverArgs.urlpath && req.method === "POST"){ 12 | req.pipe(new Converter({constructResult:false})).pipe(res); 13 | } else { 14 | res.end("Please post data to: " + serverArgs.urlpath); 15 | } 16 | }); 17 | 18 | server.listen(serverArgs.port); 19 | console.log("CSV Web Server Listen On:" + serverArgs.port); 20 | console.log("POST to " + serverArgs.urlpath + " with CSV data to get parsed."); 21 | return server; 22 | } 23 | module.exports.startWebServer = startWebServer; 24 | -------------------------------------------------------------------------------- /v2/ProcessFork.d.ts: -------------------------------------------------------------------------------- 1 | import { Processor, ProcessLineResult } from "./Processor"; 2 | import { Converter } from "./Converter"; 3 | import { ChildProcess } from "child_process"; 4 | export declare class ProcessorFork extends Processor { 5 | protected converter: Converter; 6 | flush(): Promise; 7 | destroy(): Promise; 8 | childProcess: ChildProcess; 9 | inited: boolean; 10 | private resultBuf; 11 | private leftChunk; 12 | private finalChunk; 13 | private next?; 14 | constructor(converter: Converter); 15 | private prepareParam; 16 | private initWorker; 17 | private flushResult; 18 | private appendBuf; 19 | process(chunk: Buffer): Promise; 20 | } 21 | export interface Message { 22 | cmd: string; 23 | } 24 | export interface InitMessage extends Message { 25 | params: any; 26 | } 27 | export interface StringMessage extends Message { 28 | value: string; 29 | } 30 | export declare const EOM = "\u0003"; 31 | -------------------------------------------------------------------------------- /v2/rowSplit.d.ts: -------------------------------------------------------------------------------- 1 | import { Converter } from "./Converter"; 2 | import { Fileline } from "./fileline"; 3 | export declare class RowSplit { 4 | private conv; 5 | private quote; 6 | private trim; 7 | private escape; 8 | private cachedRegExp; 9 | private delimiterEmitted; 10 | private _needEmitDelimiter?; 11 | private get needEmitDelimiter(); 12 | constructor(conv: Converter); 13 | parse(fileline: Fileline): RowSplitResult; 14 | private toCSVRow; 15 | private getDelimiter; 16 | private isQuoteOpen; 17 | private isQuoteClose; 18 | private escapeQuote; 19 | parseMultiLines(lines: Fileline[]): MultipleRowResult; 20 | } 21 | export interface MultipleRowResult { 22 | rowsCells: string[][]; 23 | partial: string; 24 | } 25 | export interface RowSplitResult { 26 | /** 27 | * csv row array. ["a","b","c"] 28 | */ 29 | cells: string[]; 30 | /** 31 | * if the passed fileline is a complete row 32 | */ 33 | closed: boolean; 34 | } 35 | -------------------------------------------------------------------------------- /bin/genCsv.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var minimist = require("minimist"); 3 | var argv = process.argv; 4 | argv.shift(); 5 | argv.shift(); 6 | var args = minimist(argv); 7 | var headers = ["name", "header1", "file2", "description", "header2", "field2", "header3"]; 8 | 9 | if (args.headers) { 10 | headers = JSON.parse(args.headers); 11 | } 12 | var rowNum = args.row ? args.row : 10000; 13 | var chars = args.chars ? args.chars : "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890"; 14 | var maxLength = parseInt(args.max ? args.max : "15"); 15 | console.log(headers.join(",")); 16 | for (var i = 0; i < rowNum; i++) { 17 | var row = []; 18 | for (var j = 0; j < headers.length; j++) { 19 | row.push(genWord()); 20 | } 21 | console.log(row.join(",")); 22 | } 23 | 24 | function genWord() { 25 | var len = Math.round(Math.random() * maxLength); 26 | var rtn = ""; 27 | for (var i = 0; i < len; i++) { 28 | var pos = Math.round(Math.random() * chars.length); 29 | rtn += chars[pos]; 30 | } 31 | return rtn; 32 | } 33 | -------------------------------------------------------------------------------- /v2/ParseRuntime.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | Object.defineProperty(exports, "__esModule", { value: true }); 3 | exports.initParseRuntime = initParseRuntime; 4 | function initParseRuntime(converter) { 5 | var params = converter.parseParam; 6 | var rtn = { 7 | needProcessIgnoreColumn: false, 8 | needProcessIncludeColumn: false, 9 | selectedColumns: undefined, 10 | ended: false, 11 | hasError: false, 12 | error: undefined, 13 | delimiter: converter.parseParam.delimiter, 14 | eol: converter.parseParam.eol, 15 | columnConv: [], 16 | headerType: [], 17 | headerTitle: [], 18 | headerFlag: [], 19 | headers: undefined, 20 | started: false, 21 | parsedLineNumber: 0, 22 | columnValueSetter: [], 23 | }; 24 | if (params.ignoreColumns) { 25 | rtn.needProcessIgnoreColumn = true; 26 | } 27 | if (params.includeColumns) { 28 | rtn.needProcessIncludeColumn = true; 29 | } 30 | return rtn; 31 | } 32 | -------------------------------------------------------------------------------- /test/testPrototypePollution.ts: -------------------------------------------------------------------------------- 1 | const csv = require("../src"); 2 | const assert = require("assert"); 3 | 4 | describe("Prototype Pollution", function () { 5 | it("should not allow prototype pollution", async function () { 6 | const csvData = "a.__proto__.polluted,b.prototype.polluted\n1,2"; 7 | let polluted = false; 8 | 9 | if (({} as any).polluted) { 10 | polluted = true; 11 | } 12 | delete (Object.prototype as any).polluted; 13 | 14 | await csv().fromString(csvData); 15 | assert.strictEqual(({} as any).polluted, undefined, "Prototype should not be polluted"); 16 | if (polluted) { 17 | delete (Object.prototype as any).polluted; 18 | } 19 | }); 20 | it("should not allow prototype pollution for v1",function(){ 21 | require("../v1/core/defaultParsers/parser_jsonarray").parserFunc({ head: "a.__proto__.injectedProp", resultRow: {}, item: 'pollutedValue', regExp: /\./ }); 22 | assert.strictEqual(({} as any).injectedProp, undefined, "Prototype should not be polluted"); 23 | }) 24 | }); 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013 Keyang Xiang 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /test/data/csvWithUnclosedHeader: -------------------------------------------------------------------------------- 1 | Date and time,Sample no.,Status,Sample type,Patient ID,Last name,First name,Sex of patient,Patient birth date,Patient note,Patient department,Accession number,Sample site,Physician,Operator,Department,Note,Test order code,Draw time,Approval status,Approval time,Report layout,Patient account number,,Errors detected during measurement,"Age" (years),Error code,"Weight" (kg),Error code,"Height" (m),Error code,"HCG_Beta(P)" (mIU/mL),Error code,"Troponin_I(P)" (ng/mL),Error code,"CK_MB(P)" (ng/mL),Error code,"D-Dimer(P)" (ng/mL),Error code,"hsCRP(P)" (mg/L),Error code,"Myoglobin(P)" (ng/mL),Error code,"NT-proBNP" (pg/mL),Error code,"CRP" (mg/dL),Error code,"BNP" (ng/L),Error code,"TnT" (ng/mL),Error code,"Demo(P)" (AQT),Error code,"PCT" (ng/mL),Error code, 2 | "06/14/2018 10:34 AM","12669","OK","","4206454447","","","Unknown","","","","4206454447","","","Anonymous","","","400",,"Not reviewed","","New","",,"",,,,,,,,,,,,,,,,,,,168,"",,,,,,,,,, 3 | "06/14/2018 10:19 AM","12668","OK","","4206454403","","","Unknown","","","","4206454403","","","Anonymous","","","101",,"Not reviewed","","New","",,"",,,,,,,,,<0.010,"",4.2,"",,,,,,,,,,,,,,,,,, 4 | -------------------------------------------------------------------------------- /v2/util.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | Object.defineProperty(exports, "__esModule", { value: true }); 3 | exports.trimRight = exports.trimLeft = void 0; 4 | exports.bufFromString = bufFromString; 5 | exports.emptyBuffer = emptyBuffer; 6 | exports.filterArray = filterArray; 7 | function bufFromString(str) { 8 | var length = Buffer.byteLength(str); 9 | var buffer = Buffer.allocUnsafe 10 | ? Buffer.allocUnsafe(length) 11 | : new Buffer(length); 12 | buffer.write(str); 13 | return buffer; 14 | } 15 | function emptyBuffer() { 16 | var buffer = Buffer.allocUnsafe 17 | ? Buffer.allocUnsafe(0) 18 | : new Buffer(0); 19 | return buffer; 20 | } 21 | function filterArray(arr, filter) { 22 | var rtn = []; 23 | for (var i = 0; i < arr.length; i++) { 24 | if (filter.indexOf(i) > -1) { 25 | rtn.push(arr[i]); 26 | } 27 | } 28 | return rtn; 29 | } 30 | var trimLeft = function trimLeftNative(str) { 31 | return str.trimStart(); 32 | }; 33 | exports.trimLeft = trimLeft; 34 | var trimRight = function trimRightNative(str) { 35 | return str.trimEnd(); 36 | }; 37 | exports.trimRight = trimRight; 38 | -------------------------------------------------------------------------------- /docs/performance.md: -------------------------------------------------------------------------------- 1 | # Extremely Fast 2 | 3 | `csvtojson` takes care of performance and optimise for Node.js apps. 4 | 5 | [This project](https://github.com/Keyang/csvbench) shows `csvtojson` is about **4 - 6 times** faster than other popular csv parsing libraries. 6 | 7 | # Performance Optimisation 8 | 9 | Version 1.1.0 is much faster than versions before. 10 | 11 | Below test is parsing a 300k records csv on a 4 core machine 12 | 13 | Time | workerNum = 1 | workerNum = 2 (fork)* | workerNum = 3 | workerNum = 4 14 | --- | --- | --- | --- | --- 15 | **1.0.3** | 11.806s | 15.945s | 8.611s | 8.314s 16 | **1.1.0** | 9.707s | 10.065s | 5.955s | 4.563s 17 | 18 | *when workerNum=2, it only creates 1 extra worker to unblock main process. It is reasonalbe it is slightly slower than workerNum=1 (just use main process). 19 | 20 | The result shows V1.1.0 has about 30% - 50% performance boost. 21 | 22 | # CPU usage leverage 23 | 24 | Below is CPU core usages for v1.1.0 when running the test: 25 | 26 | Core | workerNum = 1 | workerNum = 2| workerNum = 3 | workerNum = 4 27 | --- | --- | --- | --- | --- 28 | Core 1 (Main) | 100% | 25% | 60% | 90% 29 | Core 2 | N/A | 80% | 70% | 70% 30 | Core 3 | N/A | N/A | 70% | 70% 31 | Core 4 | N/A | N/A | N/A | 70% 32 | 33 | -------------------------------------------------------------------------------- /v1/interfaces/cli/main.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Convert input to process stdout 3 | */ 4 | 5 | //implementation 6 | var Converter = require("../../core/Converter.js"); 7 | function _initConverter(){ 8 | var csvConverter = new Converter(); 9 | var started = false; 10 | var writeStream = process.stdout; 11 | csvConverter.on("record_parsed",function(rowJSON){ 12 | if (started){ 13 | writeStream.write(",\n"); 14 | } 15 | writeStream.write(JSON.stringify(rowJSON)); //write parsed JSON object one by one. 16 | if (started === false){ 17 | started = true; 18 | } 19 | }); 20 | writeStream.write("[\n"); //write array symbol 21 | 22 | csvConverter.on("end_parsed",function(){ 23 | writeStream.write("\n]"); //end array symbol 24 | }); 25 | csvConverter.on("error",function(err){ 26 | console.error(err); 27 | process.exit(-1); 28 | }); 29 | return csvConverter; 30 | } 31 | function convertFile(fileName){ 32 | var csvConverter=_initConverter(); 33 | csvConverter.from(fileName); 34 | } 35 | 36 | function convertString(csvString){ 37 | var csvConverter=_initConverter(); 38 | csvConverter.from(csvString); 39 | } 40 | //module interfaces 41 | module.exports.convertFile = convertFile; 42 | module.exports.convertString = convertString; -------------------------------------------------------------------------------- /src/ProcessorLocal.test.ts: -------------------------------------------------------------------------------- 1 | import {ProcessorLocal} from "./ProcessorLocal"; 2 | import { Converter } from "./Converter"; 3 | import {readFileSync} from "fs"; 4 | import * as path from "path"; 5 | import assert = require("assert"); 6 | import { JSONResult } from "./lineToJson"; 7 | const dataDir=path.join(__dirname,"../test/data/"); 8 | describe("ProcessLocal",()=>{ 9 | it ("should process csv chunks and output json",async function (){ 10 | const processor=new ProcessorLocal(new Converter()); 11 | const data=readFileSync(dataDir+"/complexJSONCSV"); 12 | const lines=await processor.process(data); 13 | assert(lines.length === 2); 14 | const line0=lines[0] as JSONResult; 15 | assert.equal(line0.fieldA.title,"Food Factory"); 16 | assert.equal(line0.fieldA.children.length,2); 17 | assert.equal(line0.fieldA.children[1].employee[0].name,"Tim"); 18 | }) 19 | it ("should process csv chunks and output csv rows",async function (){ 20 | const processor=new ProcessorLocal(new Converter({output:"line"})); 21 | const data=readFileSync(dataDir+"/complexJSONCSV"); 22 | const lines=await processor.process(data); 23 | 24 | assert(lines.length === 2); 25 | }) 26 | it ("should return empty array if preRawHook removed the data",()=>{ 27 | const conv=new Converter(); 28 | conv.preRawData((str)=>{ 29 | return ""; 30 | }); 31 | const processor=new ProcessorLocal(conv); 32 | const data=readFileSync(dataDir+"/complexJSONCSV"); 33 | return processor.process(data) 34 | .then((list)=>{ 35 | assert.equal(list.length,0); 36 | }) 37 | }) 38 | }) 39 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | var webpack = require('webpack') 2 | var path = require('path') 3 | 4 | 5 | 6 | 7 | /* 8 | * SplitChunksPlugin is enabled by default and replaced 9 | * deprecated CommonsChunkPlugin. It automatically identifies modules which 10 | * should be splitted of chunk by heuristics using module duplication count and 11 | * module category (i. e. node_modules). And splits the chunks… 12 | * 13 | * It is safe to remove "splitChunks" from the generated configuration 14 | * and was added as an educational example. 15 | * 16 | * https://webpack.js.org/plugins/split-chunks-plugin/ 17 | * 18 | */ 19 | 20 | /* 21 | * We've enabled UglifyJSPlugin for you! This minifies your app 22 | * in order to load faster and run less javascript. 23 | * 24 | * https://github.com/webpack-contrib/uglifyjs-webpack-plugin 25 | * 26 | */ 27 | 28 | var UglifyJSPlugin = require('uglifyjs-webpack-plugin'); 29 | 30 | 31 | 32 | 33 | module.exports = { 34 | module: { 35 | rules: [] 36 | }, 37 | 38 | entry: "./index.js", 39 | 40 | output: { 41 | filename: 'browser.js', 42 | path: path.resolve(__dirname, 'browser'), 43 | libraryTarget: "commonjs2" 44 | }, 45 | 46 | mode: 'production', 47 | plugins: [ 48 | new UglifyJSPlugin(), 49 | new webpack.IgnorePlugin(/fs/), 50 | ], 51 | optimization: { 52 | splitChunks: { 53 | cacheGroups: { 54 | vendors: { 55 | priority: -10, 56 | test: /[\\/]node_modules[\\/]/ 57 | } 58 | }, 59 | 60 | chunks: 'async', 61 | minChunks: 1, 62 | minSize: 30000, 63 | name: true 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /v2/Parameters.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) { 3 | if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) { 4 | if (ar || !(i in from)) { 5 | if (!ar) ar = Array.prototype.slice.call(from, 0, i); 6 | ar[i] = from[i]; 7 | } 8 | } 9 | return to.concat(ar || Array.prototype.slice.call(from)); 10 | }; 11 | Object.defineProperty(exports, "__esModule", { value: true }); 12 | exports.mergeParams = mergeParams; 13 | function mergeParams(params) { 14 | var defaultParam = { 15 | delimiter: ',', 16 | ignoreColumns: undefined, 17 | includeColumns: undefined, 18 | quote: '"', 19 | trim: true, 20 | checkType: false, 21 | ignoreEmpty: false, 22 | // fork: false, 23 | noheader: false, 24 | headers: undefined, 25 | flatKeys: false, 26 | maxRowLength: 0, 27 | checkColumn: false, 28 | escape: '"', 29 | colParser: {}, 30 | eol: undefined, 31 | alwaysSplitAtEOL: false, 32 | output: "json", 33 | nullObject: false, 34 | downstreamFormat: "line", 35 | needEmitAll: true 36 | }; 37 | if (!params) { 38 | params = {}; 39 | } 40 | for (var key in params) { 41 | if (params.hasOwnProperty(key)) { 42 | if (Array.isArray(params[key])) { 43 | defaultParam[key] = __spreadArray([], params[key], true); 44 | } 45 | else { 46 | defaultParam[key] = params[key]; 47 | } 48 | } 49 | } 50 | return defaultParam; 51 | } 52 | -------------------------------------------------------------------------------- /test/testErrorHandle.ts: -------------------------------------------------------------------------------- 1 | import {Converter} from "../src/Converter"; 2 | import CSVError from "../src/CSVError"; 3 | var assert = require("assert"); 4 | var fs = require("fs"); 5 | 6 | describe("Converter error handling", function() { 7 | it("should handle quote not closed", function(done) { 8 | var rs = fs.createReadStream(__dirname + "/data/dataWithUnclosedQuotes"); 9 | var conv = new Converter({}); 10 | conv.on("error", function(err:CSVError) { 11 | assert(err.err === "unclosed_quote"); 12 | done(); 13 | }); 14 | rs.pipe(conv); 15 | }); 16 | 17 | 18 | it ("should handle column number mismatched error", function(done) { 19 | var rs = fs.createReadStream(__dirname + "/data/dataWithMismatchedColumn"); 20 | var conv = new Converter({ 21 | checkColumn:true 22 | }); 23 | var tested = false; 24 | conv.on("error", function(err:CSVError) { 25 | if (tested === false) { 26 | assert(err.err === "column_mismatched"); 27 | tested = true; 28 | // done(); 29 | } 30 | }); 31 | conv.on('done',function() { 32 | assert(tested); 33 | done(); 34 | }); 35 | rs.pipe(conv); 36 | }); 37 | 38 | it("should treat quote not closed as column_mismatched when alwaysSplitAtEOL is true", function(done) { 39 | var rs = fs.createReadStream(__dirname + "/data/dataWithUnclosedQuotes"); 40 | var conv = new Converter({ 41 | checkColumn:true, 42 | alwaysSplitAtEOL:true, 43 | }); 44 | var tested = false; 45 | conv.on("error", function(err:CSVError) { 46 | if (tested === false) { 47 | assert(err.err === "column_mismatched"); 48 | tested = true; 49 | } 50 | }); 51 | conv.on('done',function() { 52 | assert(tested); 53 | done(); 54 | }); 55 | rs.pipe(conv); 56 | }); 57 | }); 58 | -------------------------------------------------------------------------------- /test/data/lineBreak: -------------------------------------------------------------------------------- 1 | F_Year,Event ID,Event Date,Time,Location,Rotue,TOC,Person type,Injury degree,FWI,Precursor,Gender,Under 16,Apparent age,Fatal,Alcohol/Drugs,Impairment,Encumbrances & Group Travel,Risk-taking Behaviour,Sub-standard conditions,Design,Operational error,Crowd management,Non-standard operation,Rushing,3rd Party Behaviour,Narrative Full,Period 2 | 2013/2014,2914930,3/2/14,23:02:00,Pembrey and Burry Port,Wales,Arriva Trains Wales,Passenger,Minor non-reportable,0.001,Passenger falls from platform onto track (no electric shock nor struck by train) under the influence,Male,,Unknown,,x,,,,,,,,,,,"A report was initially received from Dyfed Powys Police that a person had been struck by a train at Pembrey and Burry Port station and that an ambulance was in attendance (the ambulance was the source of the police report). It later transpired that the person was inebriated and had fallen off of Pembrey platform after attempting to board the train after the doors were closed and the train was ready to depart. It has been confirmed (by Pembrey signaller) that the person has not been struck by the train and has NOT fallen under the train. One police officer on site. 3 | 4 | Later advice from the train crew working 2B21 2135 Milford Haven to Cardiff Central stated the person had attempted to board 2B21 as it was ready to depart from the platform, bounced off of the train falling backwards, had then gotten back up and then fallen off of the platform and onto the track behind the train. The persons injuries (if any) have not been disclosed. 5 | 6 | The person was been removed from the track and normal working resumed from 2326. The on call MOM was called to attend and act a RIO if required, but was stood down after the incident was resolved. 7 | 8 | The initial call came from Dyfed Powys Police under reference 333 when it was advised that their report had come from the Ambulance Service (reference 1523673) and the BTP were also made aware by Dyfed Powys Police under their reference 470.",13 9 | -------------------------------------------------------------------------------- /v2/Converter.d.ts: -------------------------------------------------------------------------------- 1 | import { Transform, TransformOptions, Readable } from "stream"; 2 | import { CSVParseParam } from "./Parameters"; 3 | import { ParseRuntime } from "./ParseRuntime"; 4 | import CSVError from "./CSVError"; 5 | export declare class Converter extends Transform implements PromiseLike { 6 | options: TransformOptions; 7 | preRawData(onRawData: PreRawDataCallback): Converter; 8 | preFileLine(onFileLine: PreFileLineCallback): Converter; 9 | subscribe(onNext?: (data: any, lineNumber: number) => void | PromiseLike, onError?: (err: CSVError) => void, onCompleted?: () => void): Converter; 10 | fromFile(filePath: string, options?: string | CreateReadStreamOption | undefined): Converter; 11 | fromStream(readStream: Readable): Converter; 12 | fromString(csvString: string): Converter; 13 | then(onfulfilled?: (value: any[]) => TResult1 | PromiseLike, onrejected?: (reason: any) => TResult2 | PromiseLike): PromiseLike; 14 | get parseParam(): CSVParseParam; 15 | get parseRuntime(): ParseRuntime; 16 | private params; 17 | private runtime; 18 | private processor; 19 | private result; 20 | constructor(param?: Partial, options?: TransformOptions); 21 | _transform(chunk: any, encoding: string, cb: Function): void; 22 | _flush(cb: Function): void; 23 | private processEnd; 24 | get parsedLineNumber(): number; 25 | } 26 | export interface CreateReadStreamOption { 27 | flags?: string; 28 | encoding?: string; 29 | fd?: number; 30 | mode?: number; 31 | autoClose?: boolean; 32 | start?: number; 33 | end?: number; 34 | highWaterMark?: number; 35 | } 36 | export type CallBack = (err: Error, data: Array) => void; 37 | export type PreFileLineCallback = (line: string, lineNumber: number) => string | PromiseLike; 38 | export type PreRawDataCallback = (csvString: string) => string | PromiseLike; 39 | -------------------------------------------------------------------------------- /v2/CSVError.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var __extends = (this && this.__extends) || (function () { 3 | var extendStatics = function (d, b) { 4 | extendStatics = Object.setPrototypeOf || 5 | ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || 6 | function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; 7 | return extendStatics(d, b); 8 | }; 9 | return function (d, b) { 10 | if (typeof b !== "function" && b !== null) 11 | throw new TypeError("Class extends value " + String(b) + " is not a constructor or null"); 12 | extendStatics(d, b); 13 | function __() { this.constructor = d; } 14 | d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); 15 | }; 16 | })(); 17 | Object.defineProperty(exports, "__esModule", { value: true }); 18 | var CSVError = /** @class */ (function (_super) { 19 | __extends(CSVError, _super); 20 | function CSVError(err, line, extra) { 21 | var _this = _super.call(this, "Error: " + err + ". JSON Line number: " + line + (extra ? " near: " + extra : "")) || this; 22 | _this.err = err; 23 | _this.line = line; 24 | _this.extra = extra; 25 | _this.name = "CSV Parse Error"; 26 | return _this; 27 | } 28 | CSVError.column_mismatched = function (index, extra) { 29 | return new CSVError("column_mismatched", index, extra); 30 | }; 31 | CSVError.unclosed_quote = function (index, extra) { 32 | return new CSVError("unclosed_quote", index, extra); 33 | }; 34 | CSVError.fromJSON = function (obj) { 35 | return new CSVError(obj.err, obj.line, obj.extra); 36 | }; 37 | CSVError.prototype.toJSON = function () { 38 | return { 39 | err: this.err, 40 | line: this.line, 41 | extra: this.extra 42 | }; 43 | }; 44 | return CSVError; 45 | }(Error)); 46 | exports.default = CSVError; 47 | -------------------------------------------------------------------------------- /v2/ParseRuntime.d.ts: -------------------------------------------------------------------------------- 1 | import { CellParser } from "./Parameters"; 2 | import { Converter, PreRawDataCallback, PreFileLineCallback } from "./Converter"; 3 | import CSVError from "./CSVError"; 4 | export interface ParseRuntime { 5 | /** 6 | * If need convert ignoreColumn from column name(string) to column index (number). Parser needs column index. 7 | */ 8 | needProcessIgnoreColumn: boolean; 9 | /** 10 | * If need convert includeColumn from column name(string) to column index (number). Parser needs column index. 11 | */ 12 | needProcessIncludeColumn: boolean; 13 | /** 14 | * the indexes of columns to reserve, undefined means reserve all, [] means hide all 15 | */ 16 | selectedColumns?: number[]; 17 | ended: boolean; 18 | hasError: boolean; 19 | error?: Error; 20 | /** 21 | * Inferred delimiter 22 | */ 23 | delimiter: string | string[]; 24 | /** 25 | * Inferred eol 26 | */ 27 | eol?: string; 28 | /** 29 | * Converter function for a column. Populated at runtime. 30 | */ 31 | columnConv: (CellParser | null)[]; 32 | headerType: any[]; 33 | headerTitle: string[]; 34 | headerFlag: any[]; 35 | /** 36 | * Inferred headers 37 | */ 38 | headers?: any[]; 39 | csvLineBuffer?: Buffer; 40 | /** 41 | * after first chunk of data being processed and emitted, started will become true. 42 | */ 43 | started: boolean; 44 | preRawDataHook?: PreRawDataCallback; 45 | preFileLineHook?: PreFileLineCallback; 46 | parsedLineNumber: number; 47 | columnValueSetter: Function[]; 48 | subscribe?: { 49 | onNext?: (data: any, lineNumber: number) => void | PromiseLike; 50 | onError?: (err: CSVError) => void; 51 | onCompleted?: () => void; 52 | }; 53 | then?: { 54 | onfulfilled: (value: any[]) => any; 55 | onrejected: (err: Error) => any; 56 | }; 57 | } 58 | export declare function initParseRuntime(converter: Converter): ParseRuntime; 59 | -------------------------------------------------------------------------------- /v1/core/parserMgr.js: -------------------------------------------------------------------------------- 1 | //implementation 2 | var registeredParsers = []; 3 | var Parser = require("./parser.js"); 4 | var defaultParser = require("./defaultParsers"); 5 | 6 | function registerParser (parser) { 7 | if (parser instanceof Parser && registeredParsers.indexOf(parser) === -1) { 8 | registeredParsers.push(parser); // TODO indexOf doesn't work with object references 9 | } 10 | } 11 | 12 | function getParser(columnTitle, param) { 13 | var inst, parser; 14 | function getParserByName(parserName) { 15 | var parser; 16 | registeredParsers.forEach(function(p){ 17 | if (p.getName() === parserName){ 18 | parser = p; 19 | } 20 | }); 21 | if (parser) { 22 | var inst = parser.clone(); 23 | return inst; 24 | } 25 | return new Parser(); //TODO remove new 26 | } 27 | columnTitle = columnTitle ? columnTitle : ''; 28 | registeredParsers.forEach(function(p) { 29 | if (p.test(columnTitle)) { 30 | parser=p; 31 | } 32 | }); 33 | if (parser) { 34 | inst = parser.clone(); 35 | inst.head = columnTitle; 36 | } else { 37 | inst = getParserByName("json", columnTitle); 38 | } 39 | inst.setParam(param); 40 | inst.initHead(columnTitle); 41 | return inst; 42 | } 43 | 44 | function addParser(name, regExp, parseFunc) { 45 | var parser = new Parser(name, regExp, parseFunc,false); //TODO remove new 46 | registerParser(parser); 47 | } 48 | 49 | function addSafeParser(parserPath) { 50 | //TODO impl 51 | } 52 | 53 | function initParsers(row, param) { 54 | var parsers = []; 55 | row.forEach(function (columnTitle) { 56 | parsers.push(getParser(columnTitle, param)); 57 | }); 58 | return parsers; 59 | } 60 | 61 | defaultParser.forEach(function (parserCfg){ 62 | //TODO refactor this 63 | addParser(parserCfg.name, parserCfg.regExp, parserCfg.parserFunc, parserCfg.processSafe); 64 | }); 65 | 66 | //module interfaces 67 | module.exports.addParser = addParser; 68 | module.exports.initParsers = initParsers; 69 | module.exports.getParser = getParser; 70 | -------------------------------------------------------------------------------- /v1/core/worker.js: -------------------------------------------------------------------------------- 1 | var param = null; 2 | var fileLine = require("./fileline"); 3 | var csvline = require("./csvline"); 4 | var linesToJson = require("./linesToJson"); 5 | var CSVError = require('./CSVError'); 6 | var eom = "\x03"; 7 | var eom1 = "\x0e"; 8 | var eom2 = "\x0f"; 9 | /** 10 | * message is like : 11 | * 0{"a":"b"} 12 | * 13345|a,b,c 13 | * 14 | * is 0-9 15 | */ 16 | var buffer=""; 17 | process.stdin.on("data", function(d) { 18 | var str = d.toString("utf8"); 19 | var all = buffer + str; 20 | var cmdArr = all.split(eom); 21 | while (cmdArr.length > 1) { 22 | processMsg(cmdArr.shift()); 23 | } 24 | buffer = cmdArr[0]; 25 | }); 26 | 27 | process.on("message", processMsg); 28 | function processMsg(msg) { 29 | if (msg) { 30 | var cmd = msg[0]; 31 | var data = msg.substr(1); 32 | switch (cmd) { 33 | case "0": 34 | initParams(data); 35 | break; 36 | case "1": 37 | processData(data); 38 | break; 39 | default: 40 | console.error("Unknown command: " + msg); 41 | } 42 | } 43 | } 44 | 45 | function initParams(data) { 46 | param = JSON.parse(data); 47 | } 48 | 49 | /** 50 | * e.g. 51 | * 1023|a,b,c,d\ne,f,g,h\n 52 | * | 53 | */ 54 | function processData(data) { 55 | if (!param){ 56 | console.error("Parameter not initialised when processing data."); 57 | process.exit(1); 58 | } 59 | var sepIdx = data.indexOf("|"); 60 | var startIdx = parseInt(data.substr(0, sepIdx)); 61 | var csvData = data.substr(sepIdx + 1); 62 | var lines = fileLine(csvData,param); //convert to file lines. 63 | // process.send("0"+lines.lines.length+"|"+lines.partial); 64 | var csvLines = csvline(lines.lines,param); 65 | var res = linesToJson(csvLines.lines,param,startIdx); 66 | if (csvLines.partial) { 67 | var lastIdx = res.length > 0 ? res[res.length - 1].index + 1 : startIdx; 68 | res.push({ 69 | err:CSVError.unclosed_quote(lastIdx, csvLines.partial) 70 | }); 71 | } 72 | // console.error(res) 73 | //1|^|^err|^data|&|^|^err|^data 74 | var str="1"; 75 | res.forEach(function(item) { 76 | var errStr = item.err ? item.err.toString() : ""; 77 | str += item.index + eom2 + JSON.stringify(item.row) + eom2 + errStr + eom2 + JSON.stringify(item.json) + eom1; 78 | }); 79 | sendData(str); 80 | } 81 | 82 | function sendData(str) { 83 | process.stdout.write(str + eom); 84 | } 85 | -------------------------------------------------------------------------------- /v2/worker.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | // import { Converter } from "./Converter"; 3 | // import { Message, InitMessage, EOM } from "./ProcessFork"; 4 | // import CSVError from "./CSVError"; 5 | // import { CSVParseParam } from "./Parameters"; 6 | // process.on("message", processMsg); 7 | // let conv: Converter; 8 | // function processMsg(msg: Message) { 9 | // if (msg.cmd === "init") { 10 | // const param = prepareParams((msg as InitMessage).params); 11 | // param.fork = false; 12 | // conv = new Converter(param); 13 | // process.stdin.pipe(conv).pipe(process.stdout); 14 | // conv.on("error", (err) => { 15 | // if ((err as CSVError).line) { 16 | // process.stderr.write(JSON.stringify({ 17 | // err: (err as CSVError).err, 18 | // line: (err as CSVError).line, 19 | // extra: (err as CSVError).extra 20 | // })) 21 | // } else { 22 | // process.stderr.write(JSON.stringify({ 23 | // err: err.message, 24 | // line: -1, 25 | // extra: "Unknown error" 26 | // })); 27 | // } 28 | // }); 29 | // conv.on("eol", (eol) => { 30 | // // console.log("eol!!!",eol); 31 | // if (process.send) 32 | // process.send({ cmd: "eol", "value": eol }); 33 | // }) 34 | // conv.on("header", (header) => { 35 | // if (process.send) 36 | // process.send({ cmd: "header", "value": header }); 37 | // }) 38 | // conv.on("done", () => { 39 | // const drained = process.stdout.write("", () => { 40 | // if (drained) { 41 | // gracelyExit(); 42 | // } 43 | // }); 44 | // if (!drained) { 45 | // process.stdout.on("drain", gracelyExit) 46 | // } 47 | // // process.stdout.write(EOM); 48 | // }) 49 | // if (process.send) { 50 | // process.send({ cmd: "inited" }); 51 | // } 52 | // } 53 | // } 54 | // function gracelyExit(){ 55 | // setTimeout(()=>{ 56 | // conv.removeAllListeners(); 57 | // process.removeAllListeners(); 58 | // },50); 59 | // } 60 | // function prepareParams(p: any): CSVParseParam { 61 | // if (p.ignoreColumns) { 62 | // p.ignoreColumns = new RegExp(p.ignoreColumns.source, p.ignoreColumns.flags) 63 | // } 64 | // if (p.includeColumns) { 65 | // p.includeColumns = new RegExp(p.includeColumns.source, p.includeColumns.flags) 66 | // } 67 | // return p; 68 | // } 69 | // process.on("disconnect", () => { 70 | // process.exit(-1); 71 | // }); 72 | -------------------------------------------------------------------------------- /src/worker.ts: -------------------------------------------------------------------------------- 1 | // import { Converter } from "./Converter"; 2 | // import { Message, InitMessage, EOM } from "./ProcessFork"; 3 | // import CSVError from "./CSVError"; 4 | // import { CSVParseParam } from "./Parameters"; 5 | // process.on("message", processMsg); 6 | // let conv: Converter; 7 | // function processMsg(msg: Message) { 8 | // if (msg.cmd === "init") { 9 | // const param = prepareParams((msg as InitMessage).params); 10 | // param.fork = false; 11 | // conv = new Converter(param); 12 | // process.stdin.pipe(conv).pipe(process.stdout); 13 | // conv.on("error", (err) => { 14 | // if ((err as CSVError).line) { 15 | // process.stderr.write(JSON.stringify({ 16 | // err: (err as CSVError).err, 17 | // line: (err as CSVError).line, 18 | // extra: (err as CSVError).extra 19 | // })) 20 | // } else { 21 | // process.stderr.write(JSON.stringify({ 22 | // err: err.message, 23 | // line: -1, 24 | // extra: "Unknown error" 25 | // })); 26 | // } 27 | 28 | // }); 29 | // conv.on("eol", (eol) => { 30 | // // console.log("eol!!!",eol); 31 | // if (process.send) 32 | // process.send({ cmd: "eol", "value": eol }); 33 | // }) 34 | // conv.on("header", (header) => { 35 | // if (process.send) 36 | // process.send({ cmd: "header", "value": header }); 37 | // }) 38 | // conv.on("done", () => { 39 | // const drained = process.stdout.write("", () => { 40 | // if (drained) { 41 | // gracelyExit(); 42 | // } 43 | // }); 44 | // if (!drained) { 45 | // process.stdout.on("drain", gracelyExit) 46 | // } 47 | 48 | 49 | // // process.stdout.write(EOM); 50 | // }) 51 | // if (process.send) { 52 | // process.send({ cmd: "inited" }); 53 | // } 54 | 55 | 56 | // } 57 | // } 58 | // function gracelyExit(){ 59 | // setTimeout(()=>{ 60 | // conv.removeAllListeners(); 61 | // process.removeAllListeners(); 62 | // },50); 63 | // } 64 | // function prepareParams(p: any): CSVParseParam { 65 | // if (p.ignoreColumns) { 66 | // p.ignoreColumns = new RegExp(p.ignoreColumns.source, p.ignoreColumns.flags) 67 | // } 68 | // if (p.includeColumns) { 69 | // p.includeColumns = new RegExp(p.includeColumns.source, p.includeColumns.flags) 70 | // } 71 | // return p; 72 | // } 73 | 74 | // process.on("disconnect", () => { 75 | // process.exit(-1); 76 | // }); -------------------------------------------------------------------------------- /src/dataClean.ts: -------------------------------------------------------------------------------- 1 | import { ParseRuntime } from "./ParseRuntime"; 2 | function stripBom(string) { 3 | if (typeof string !== 'string') { 4 | throw new TypeError(`Expected a string, got ${typeof string}`); 5 | } 6 | 7 | // Catches EFBBBF (UTF-8 BOM) because the buffer-to-string 8 | // conversion translates it to FEFF (UTF-16 BOM). 9 | if (string.charCodeAt(0) === 0xFEFF) { 10 | return string.slice(1); 11 | } 12 | 13 | return string; 14 | } 15 | 16 | /** 17 | * For each data chunk coming to parser: 18 | * 1. append the data to the buffer that is left from last chunk 19 | * 2. check if utf8 chars being split, if does, stripe the bytes and add to left buffer. 20 | * 3. stripBom 21 | */ 22 | export function prepareData(chunk: Buffer, runtime: ParseRuntime): string { 23 | const workChunk = concatLeftChunk(chunk, runtime); 24 | runtime.csvLineBuffer = undefined; 25 | const cleanCSVString = cleanUtf8Split(workChunk, runtime).toString("utf8"); 26 | if (runtime.started === false) { 27 | return stripBom(cleanCSVString); 28 | } else { 29 | return cleanCSVString; 30 | } 31 | } 32 | /** 33 | * append data to buffer that is left form last chunk 34 | */ 35 | function concatLeftChunk(chunk: Buffer, runtime: ParseRuntime): Buffer { 36 | if (runtime.csvLineBuffer && runtime.csvLineBuffer.length > 0) { 37 | return Buffer.concat([runtime.csvLineBuffer, chunk]); 38 | } else { 39 | return chunk; 40 | } 41 | } 42 | /** 43 | * check if utf8 chars being split, if does, stripe the bytes and add to left buffer. 44 | */ 45 | function cleanUtf8Split(chunk: Buffer, runtime: ParseRuntime): Buffer { 46 | let idx = chunk.length - 1; 47 | /** 48 | * From Keyang: 49 | * The code below is to check if a single utf8 char (which could be multiple bytes) being split. 50 | * If the char being split, the buffer from two chunk needs to be concat 51 | * check how utf8 being encoded to understand the code below. 52 | * If anyone has any better way to do this, please let me know. 53 | */ 54 | if ((chunk[idx] & 1 << 7) != 0) { 55 | while ((chunk[idx] & 3 << 6) === 128) { 56 | idx--; 57 | } 58 | idx--; 59 | } 60 | if (idx != chunk.length - 1) { 61 | runtime.csvLineBuffer = chunk.slice(idx + 1); 62 | return chunk.slice(0, idx + 1) 63 | // var _cb=cb; 64 | // var self=this; 65 | // cb=function(){ 66 | // if (self._csvLineBuffer){ 67 | // self._csvLineBuffer=Buffer.concat([bufFromString(self._csvLineBuffer,"utf8"),left]); 68 | // }else{ 69 | // self._csvLineBuffer=left; 70 | // } 71 | // _cb(); 72 | // } 73 | } else { 74 | return chunk; 75 | } 76 | } -------------------------------------------------------------------------------- /src/ParseRuntime.ts: -------------------------------------------------------------------------------- 1 | import { CSVParseParam, CellParser } from "./Parameters"; 2 | import { Converter, PreRawDataCallback, PreFileLineCallback } from "./Converter"; 3 | import { ChildProcess } from "child_process"; 4 | import CSVError from "./CSVError"; 5 | 6 | export interface ParseRuntime { 7 | /** 8 | * If need convert ignoreColumn from column name(string) to column index (number). Parser needs column index. 9 | */ 10 | needProcessIgnoreColumn: boolean; 11 | /** 12 | * If need convert includeColumn from column name(string) to column index (number). Parser needs column index. 13 | */ 14 | needProcessIncludeColumn: boolean; 15 | /** 16 | * the indexes of columns to reserve, undefined means reserve all, [] means hide all 17 | */ 18 | selectedColumns?: number[]; 19 | ended: boolean; 20 | hasError: boolean; 21 | error?: Error; 22 | /** 23 | * Inferred delimiter 24 | */ 25 | delimiter: string | string[]; 26 | /** 27 | * Inferred eol 28 | */ 29 | eol?: string; 30 | /** 31 | * Converter function for a column. Populated at runtime. 32 | */ 33 | columnConv: (CellParser | null)[], 34 | headerType: any[], 35 | headerTitle: string[], 36 | headerFlag: any[], 37 | /** 38 | * Inferred headers 39 | */ 40 | headers?: any[], 41 | csvLineBuffer?: Buffer, 42 | 43 | /** 44 | * after first chunk of data being processed and emitted, started will become true. 45 | */ 46 | started: boolean, 47 | preRawDataHook?: PreRawDataCallback, 48 | preFileLineHook?: PreFileLineCallback, 49 | parsedLineNumber: number, 50 | 51 | columnValueSetter: Function[]; 52 | subscribe?: { 53 | onNext?: (data: any, lineNumber:number) => void | PromiseLike; 54 | onError?: (err: CSVError) => void; 55 | onCompleted?: () => void; 56 | }; 57 | then?: { 58 | onfulfilled: (value: any[]) => any; 59 | onrejected: (err: Error) => any; 60 | } 61 | 62 | } 63 | export function initParseRuntime(converter: Converter): ParseRuntime { 64 | const params = converter.parseParam; 65 | const rtn: ParseRuntime = { 66 | needProcessIgnoreColumn: false, 67 | needProcessIncludeColumn: false, 68 | selectedColumns: undefined, 69 | ended: false, 70 | hasError: false, 71 | error: undefined, 72 | delimiter: converter.parseParam.delimiter, 73 | eol: converter.parseParam.eol, 74 | columnConv: [], 75 | headerType: [], 76 | headerTitle: [], 77 | headerFlag: [], 78 | headers: undefined, 79 | started: false, 80 | parsedLineNumber: 0, 81 | columnValueSetter: [], 82 | } 83 | if (params.ignoreColumns) { 84 | rtn.needProcessIgnoreColumn = true; 85 | } 86 | if (params.includeColumns) { 87 | rtn.needProcessIncludeColumn = true; 88 | } 89 | return rtn; 90 | } -------------------------------------------------------------------------------- /v1/core/defaultParsers/parser_json.js: -------------------------------------------------------------------------------- 1 | var arrReg = /\[([0-9]*)\]/; 2 | 3 | 4 | function processHead(pointer, headArr, arrReg, flatKeys) { 5 | var headStr, match, index; 6 | while (headArr.length > 1) { 7 | headStr = headArr.shift(); 8 | // match = headStr.match(arrReg); 9 | match = flatKeys ? false : headStr.match(arrReg); 10 | if (match) { //if its array, we need add an empty json object into specified index. 11 | if (pointer[headStr.replace(match[0], '')] === undefined) { 12 | pointer[headStr.replace(match[0], '')] = []; 13 | } 14 | index = match[1]; //get index where json object should stay 15 | pointer = pointer[headStr.replace(match[0], '')]; 16 | if (index === '') { //if its dynamic array index, push to the end 17 | index = pointer.length; 18 | } 19 | if (!pointer[index]) { //current index in the array is empty. we need create a new json object. 20 | pointer[index] = {}; 21 | } 22 | pointer = pointer[index]; 23 | } else { //not array, just normal JSON object. we get the reference of it 24 | if (pointer[headStr] === undefined) { 25 | pointer[headStr] = {}; 26 | } 27 | pointer = pointer[headStr]; 28 | } 29 | } 30 | return pointer; 31 | } 32 | module.exports = { 33 | "name": "json", 34 | "processSafe": true, 35 | "regExp": /^\*json\*/, 36 | "parserFunc": function parser_json(params) { 37 | var fieldStr = this.getHeadStr(); 38 | var headArr = (params.config && params.config.flatKeys) ? [fieldStr] : fieldStr.split('.'); 39 | var match, index, key; 40 | //now the pointer is pointing the position to add a key/value pair. 41 | var pointer = processHead(params.resultRow, headArr, arrReg, params.config && params.config.flatKeys); 42 | key = headArr.shift(); 43 | match = (params.config && params.config.flatKeys) ? false : key.match(arrReg); 44 | if (match) { // the last element is an array, we need check and treat it as an array. 45 | try { 46 | key = key.replace(match[0], ''); 47 | if (!pointer[key] || !(pointer[key] instanceof Array)) { 48 | pointer[key] = []; 49 | } 50 | if (pointer[key]) { 51 | index = match[1]; 52 | if (index === '') { 53 | index = pointer[key].length; 54 | } 55 | pointer[key][index] = params.item; 56 | } else { 57 | params.resultRow[fieldStr] = params.item; 58 | } 59 | } catch (e) { 60 | params.resultRow[fieldStr] = params.item; 61 | } 62 | } else { 63 | if (typeof pointer === "string"){ 64 | params.resultRow[fieldStr] = params.item; 65 | }else{ 66 | pointer[key] = params.item; 67 | } 68 | } 69 | } 70 | }; 71 | -------------------------------------------------------------------------------- /v2/dataClean.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | Object.defineProperty(exports, "__esModule", { value: true }); 3 | exports.prepareData = prepareData; 4 | function stripBom(string) { 5 | if (typeof string !== 'string') { 6 | throw new TypeError("Expected a string, got ".concat(typeof string)); 7 | } 8 | // Catches EFBBBF (UTF-8 BOM) because the buffer-to-string 9 | // conversion translates it to FEFF (UTF-16 BOM). 10 | if (string.charCodeAt(0) === 0xFEFF) { 11 | return string.slice(1); 12 | } 13 | return string; 14 | } 15 | /** 16 | * For each data chunk coming to parser: 17 | * 1. append the data to the buffer that is left from last chunk 18 | * 2. check if utf8 chars being split, if does, stripe the bytes and add to left buffer. 19 | * 3. stripBom 20 | */ 21 | function prepareData(chunk, runtime) { 22 | var workChunk = concatLeftChunk(chunk, runtime); 23 | runtime.csvLineBuffer = undefined; 24 | var cleanCSVString = cleanUtf8Split(workChunk, runtime).toString("utf8"); 25 | if (runtime.started === false) { 26 | return stripBom(cleanCSVString); 27 | } 28 | else { 29 | return cleanCSVString; 30 | } 31 | } 32 | /** 33 | * append data to buffer that is left form last chunk 34 | */ 35 | function concatLeftChunk(chunk, runtime) { 36 | if (runtime.csvLineBuffer && runtime.csvLineBuffer.length > 0) { 37 | return Buffer.concat([runtime.csvLineBuffer, chunk]); 38 | } 39 | else { 40 | return chunk; 41 | } 42 | } 43 | /** 44 | * check if utf8 chars being split, if does, stripe the bytes and add to left buffer. 45 | */ 46 | function cleanUtf8Split(chunk, runtime) { 47 | var idx = chunk.length - 1; 48 | /** 49 | * From Keyang: 50 | * The code below is to check if a single utf8 char (which could be multiple bytes) being split. 51 | * If the char being split, the buffer from two chunk needs to be concat 52 | * check how utf8 being encoded to understand the code below. 53 | * If anyone has any better way to do this, please let me know. 54 | */ 55 | if ((chunk[idx] & 1 << 7) != 0) { 56 | while ((chunk[idx] & 3 << 6) === 128) { 57 | idx--; 58 | } 59 | idx--; 60 | } 61 | if (idx != chunk.length - 1) { 62 | runtime.csvLineBuffer = chunk.slice(idx + 1); 63 | return chunk.slice(0, idx + 1); 64 | // var _cb=cb; 65 | // var self=this; 66 | // cb=function(){ 67 | // if (self._csvLineBuffer){ 68 | // self._csvLineBuffer=Buffer.concat([bufFromString(self._csvLineBuffer,"utf8"),left]); 69 | // }else{ 70 | // self._csvLineBuffer=left; 71 | // } 72 | // _cb(); 73 | // } 74 | } 75 | else { 76 | return chunk; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /v1/core/defParam.js: -------------------------------------------------------------------------------- 1 | var numExp = /^[0-9]+$/; 2 | module.exports = function (params) { 3 | var _param = { 4 | constructResult: true, //set to false to not construct result in memory. suitable for big csv data 5 | delimiter: ',', // change the delimiter of csv columns. It is able to use an array to specify potencial delimiters. e.g. [",","|",";"] 6 | ignoreColumns: [], // columns to ignore upon input. 7 | includeColumns: [], // columns to include upon input. 8 | quote: '"', //quote for a column containing delimiter. 9 | trim: true, //trim column's space charcters 10 | checkType: false, //whether check column type 11 | toArrayString: false, //stream down stringified json array instead of string of json. (useful if downstream is file writer etc) 12 | ignoreEmpty: false, //Ignore empty value while parsing. if a value of the column is empty, it will be skipped parsing. 13 | workerNum: getEnv("CSV_WORKER", 1), //number of parallel workers. If multi-core CPU available, increase the number will get better performance for large csv data. 14 | fork: false, //use another CPU core to convert the csv stream 15 | noheader: false, //indicate if first line of CSV file is header or not. 16 | headers: null, //an array of header strings. If noheader is false and headers is array, csv header will be ignored. 17 | flatKeys: false, // Don't interpret dots and square brackets in header fields as nested object or array identifiers at all. 18 | maxRowLength: 0, //the max character a csv row could have. 0 means infinite. If max number exceeded, parser will emit "error" of "row_exceed". if a possibly corrupted csv data provided, give it a number like 65535 so the parser wont consume memory. default: 0 19 | checkColumn: false, //whether check column number of a row is the same as headers. If column number mismatched headers number, an error of "mismatched_column" will be emitted.. default: false 20 | escape: '"', //escape char for quoted column 21 | colParser:{}, //flags on columns to alter field processing. 22 | 23 | /**below are internal params */ 24 | _columnConv:[], 25 | _headerType: [], 26 | _headerTitle: [], 27 | _headerFlag: [], 28 | _headers: null, 29 | _needFilterRow: false 30 | }; 31 | if (!params) { 32 | params = {}; 33 | } 34 | for (var key in params) { 35 | if (params.hasOwnProperty(key)) { 36 | if (Array.isArray(params[key])) { 37 | _param[key] = [].concat(params[key]); 38 | } else { 39 | _param[key] = params[key]; 40 | } 41 | } 42 | } 43 | if (_param.ignoreColumns.length > 0 && !numExp.test(_param.ignoreColumns.join(""))) { 44 | _param._postIgnoreColumns = true; 45 | } 46 | if (_param.includeColumns.length > 0 && !numExp.test(_param.includeColumns.join(""))) { 47 | _param._postIncludeColumns = true; 48 | } 49 | 50 | if (_param.ignoreColumns.length || _param.includeColumns.length) { 51 | _param._needFilterRow = true; 52 | if (!_param._postIgnoreColumns){ 53 | _param.ignoreColumns.sort(function (a, b) { return b-a;}); 54 | } 55 | } 56 | 57 | 58 | return _param; 59 | }; 60 | 61 | function getEnv(key, def) { 62 | if (process.env[key]) { 63 | return process.env[key]; 64 | } else { 65 | return def; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /v1/core/parser.js: -------------------------------------------------------------------------------- 1 | var explicitTypes = ["number", "string"]; 2 | 3 | function Parser(name, regExp, parser, processSafe) { 4 | this.name = typeof name === "undefined" ? "Default" : name; 5 | this.regExp = null; 6 | this.type = ""; 7 | this.processSafe = processSafe; 8 | if (typeof regExp !== "undefined") { 9 | if (typeof regExp === "string") { 10 | this.regExp = new RegExp(regExp); 11 | } else { 12 | this.regExp = regExp; 13 | } 14 | } 15 | if (typeof parser !== "undefined") { 16 | this.parse = parser; 17 | } 18 | } 19 | // var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/; 20 | Parser.prototype.convertType = function(item) { 21 | var type=this.type; 22 | if (type === 'number') { 23 | var rtn = parseFloat(item); 24 | if (isNaN(rtn)) { 25 | return 0; 26 | } else { 27 | return rtn; 28 | } 29 | } else if (this.param && this.param.checkType && type === '') { 30 | var trimed = item.trim(); 31 | if (trimed === ""){ 32 | return trimed; 33 | } 34 | if (!isNaN(trimed)) { 35 | return parseFloat(trimed); 36 | } else if (trimed.length === 5 && trimed.toLowerCase() === "false") { 37 | return false; 38 | } else if (trimed.length === 4 && trimed.toLowerCase() === "true") { 39 | return true; 40 | } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1]==="]") { 41 | try { 42 | return JSON.parse(trimed); 43 | } catch (e) { 44 | return item; 45 | } 46 | } else { 47 | return item; 48 | } 49 | } 50 | return item; 51 | }; 52 | 53 | Parser.prototype.setParam = function(param) { 54 | this.param = param; 55 | }; 56 | 57 | Parser.prototype.test = function(str) { 58 | return this.regExp && this.regExp.test(str); 59 | }; 60 | 61 | Parser.prototype.parse = function(params) { 62 | params.resultRow[params.head] = params.item; 63 | }; 64 | 65 | Parser.prototype.getHeadStr = function() { 66 | if (this.headStr) { 67 | return this.headStr; 68 | } else { 69 | var head = this.head; 70 | this.headStr = head.replace(this.regExp, ''); 71 | if (!this.headStr) { 72 | this.headStr = "Unknown Header"; 73 | } 74 | return this.getHeadStr(); 75 | } 76 | }; 77 | 78 | Parser.prototype.getHead = function() { 79 | return this.head; 80 | }; 81 | 82 | Parser.prototype.initHead = function(columnTitle) { 83 | this.head = columnTitle; 84 | var wholeHead = columnTitle.replace(this.regExp, ''); 85 | //init type && headStr 86 | var splitArr = wholeHead.split("#!"); 87 | if (splitArr.length === 1) { //no explicit type 88 | this.headStr = splitArr[0]; 89 | } else { 90 | var type = splitArr.shift(); 91 | if (explicitTypes.indexOf(type.toLowerCase()) > -1) { 92 | this.type = type; 93 | this.headStr = splitArr.join("#!"); 94 | } else { //no explicit type 95 | this.headStr = wholeHead; 96 | } 97 | } 98 | if (!this.headStr) { 99 | this.headStr = wholeHead ? wholeHead : "Unknown Head"; 100 | } 101 | }; 102 | 103 | Parser.prototype.clone = function() { 104 | var obj = Object.create(this); 105 | var newParser = new Parser(); 106 | for (var key in obj) { 107 | newParser[key] = obj[key]; 108 | } 109 | return newParser; 110 | }; 111 | 112 | Parser.prototype.getName = function() { 113 | return this.name; 114 | }; 115 | 116 | module.exports = Parser; 117 | -------------------------------------------------------------------------------- /test/data/longHeader: -------------------------------------------------------------------------------- 1 | Date,Advertiser,Advertiser ID,Advertiser Status,Advertiser Integration Code,Insertion Order,Insertion Order ID,Insertion Order Status,Insertion Order Integration Code,Partner Currency,Advertiser Currency,Clicks,% Clicks Leading to Conversions,Conversions per 1000 Impressions,CPM Fee 1 (Adv Currency),CPM Fee 1 (Partner Currency),CPM Fee 1 (USD),CPM Fee 2 (Adv Currency),CPM Fee 2 (Partner Currency),CPM Fee 2 (USD),Click Rate (CTR),Data Fees (Adv Currency),Data Fees (Partner Currency),Data Fees (USD),Impressions,% Impressions Leading to Conversions,Post-Click Conversions,Post-View Conversions,Media Cost (Advertiser Currency),Media Cost eCPA (PC) (Adv Currency),Media Cost eCPA (PC) (Partner Currency),Media Cost eCPA (PC) (USD),Media Cost eCPA (PV) (Adv Currency),Media Cost eCPA (PV) (Partner Currency),Media Cost eCPA (PV) (USD),Media Cost eCPA (Adv Currency),Media Cost eCPA (Partner Currency),Media Cost eCPA (USD),Video Media Cost eCPCV (Adv Currency),Video Media Cost eCPCV (Partner Currency),Video Media Cost eCPCV (USD),Media Cost eCPC (Adv Currency),Media Cost eCPC (Partner Currency),Media Cost eCPC (USD),Media Cost eCPM (Adv Currency),Media Cost eCPM (Partner Currency),Media Cost eCPM (USD),Media Cost (Partner Currency),Media Cost (USD),Media Fee 1 (Adv Currency),Media Fee 1 (Partner Currency),Media Fee 1 (USD),Media Fee 2 (Adv Currency),Media Fee 2 (Partner Currency),Media Fee 2 (USD),DCM Post-Click Revenue,DCM Post-View Revenue,Profit (Advertiser Currency),Profit eCPM (Adv Currency),Profit eCPM (Partner Currency),Profit eCPM (USD),Profit Margin,Profit (Partner Currency),Profit (USD),Revenue (Adv Currency),Revenue eCPA (PC) (Adv Currency),Revenue eCPA (PC) (Partner Currency),Revenue eCPA (PC) (USD),Revenue eCPA (PV) (Adv Currency),Revenue eCPA (PV) (Partner Currency),Revenue eCPA (PV) (USD),Revenue eCPA (Adv Currency),Revenue eCPA (Partner Currency),Revenue eCPA (USD),Video Revenue eCPCV (Adv Currency),Video Revenue eCPCV (Partner Currency),Video Revenue eCPCV (USD),Revenue eCPC (Adv Currency),Revenue eCPC (Partner Currency),Revenue eCPC (USD),Revenue eCPM (Adv Currency),Revenue eCPM (Partner Currency),Revenue eCPM (USD),Revenue (Partner Currency),Revenue (USD),Complete Views (Video),First-Quartile Views (Video),Fullscreens (Video),Midpoint Views (Video),Audio Mutes (Video),Pauses (Video),Starts (Video),Skips (Video),Third-Quartile Views (Video),Total Conversions,Total Media Cost (Advertiser Currency),Total Media Cost eCPA (PC) (Adv Currency),Total Media Cost eCPA (PC) (Partner Currency),Total Media Cost eCPA (PC) (USD),Total Media Cost eCPA (PV) (Adv Currency),Total Media Cost eCPA (PV) (Partner Currency),Total Media Cost eCPA (PV) (USD),Total Media Cost eCPA (Adv Currency),Total Media Cost eCPA (Partner Currency),Total Media Cost eCPA (USD),Total Video Media Cost eCPCV (Adv Currency),Total Video Media Cost eCPCV (Partner Currency),Total Video Media Cost eCPCV (USD),Total Media Cost eCPC (Adv Currency),Total Media Cost eCPC (Partner Currency),Total Media Cost eCPC (USD),Total Media Cost eCPM (Adv Currency),Total Media Cost eCPM (Partner Currency),Total Media Cost eCPM (USD),Total Media Cost (Partner Currency),Total Media Cost (USD),Completion Rate (Video) 2 | 8/26/16,1000 AAAAAA AAAA AAAA - AAAAAA - #AAAAAAAA - AAA,1161431,Active,,1010101010 - 1000 AAAAAAA AAAA AAAA - AAAAAAA AAAA - 28 Jun 2016 - AAAAAAA - #AAAAAAAA - AAAAAAA,2427940,Paused,CTR,CAD,CAD,2,0.00%,0,0,0,0,0,0,0,0.13%,0,0,0,1540,0.00%,0,0,7.476659,0,0,0,0,0,0,0,0,0,0,0,0,3.738329,3.73833,2.892061,4.854973,4.854973,3.755923,7.476659,5.784121,0.971966,0.971966,0.751936,0,0,0,0,0,10.031375,6.51388,6.51388,5.035214,54.26%,10.031375,7.75423,18.48,0,0,0,0,0,0,0,0,0,0,0,0,9.24,9.24,7.145143,12,12,9.279407,18.48,14.290287,0,0,0,0,0,0,0,0,0,0,8.448625,0,0,0,0,0,0,0,0,0,0,0,0,4.224312,4.224312,3.268029,5.48612,5.48612,4.244193,8.448625,6.536057,0.00% -------------------------------------------------------------------------------- /v1/core/rowSplit.js: -------------------------------------------------------------------------------- 1 | var getDelimiter = require("./getDelimiter"); 2 | var filterRow=require("./filterRow"); 3 | /** 4 | * Convert a line of string to csv columns according to its delimiter 5 | * the param._header may not be ready when this is called. 6 | * @param {[type]} rowStr [description] 7 | * @param {[type]} param [Converter param] 8 | * @return {[type]} {cols:["a","b","c"],closed:boolean} the closed field indicate if the row is a complete row 9 | */ 10 | module.exports = function rowSplit(rowStr, param) { 11 | if (rowStr === "") { 12 | return { cols: [], closed: true }; 13 | } 14 | var quote = param.quote; 15 | var trim = param.trim; 16 | var escape = param.escape; 17 | if (param.delimiter instanceof Array || param.delimiter.toLowerCase() === "auto") { 18 | param.delimiter = getDelimiter(rowStr, param); 19 | } 20 | var delimiter = param.delimiter; 21 | var rowArr = rowStr.split(delimiter); 22 | if (quote === "off") { 23 | return { cols: rowArr, closed: true }; 24 | } 25 | var row = []; 26 | var inquote = false; 27 | var quoteBuff = ''; 28 | for (var i = 0, rowLen = rowArr.length; i < rowLen; i++) { 29 | var e = rowArr[i]; 30 | if (!inquote && trim) { 31 | e = e.trim(); 32 | } 33 | var len = e.length; 34 | if (!inquote) { 35 | if (isQuoteOpen(e, param)) { //quote open 36 | e = e.substr(1); 37 | if (isQuoteClose(e, param)) { //quote close 38 | e = e.substring(0, e.length - 1); 39 | e = _escapeQuote(e, quote, escape); 40 | row.push(e); 41 | continue; 42 | } else { 43 | inquote = true; 44 | quoteBuff += e; 45 | continue; 46 | } 47 | } else { 48 | row.push(e); 49 | continue; 50 | } 51 | } else { //previous quote not closed 52 | if (isQuoteClose(e, param)) { //close double quote 53 | inquote = false; 54 | e = e.substr(0, len - 1); 55 | quoteBuff += delimiter + e; 56 | quoteBuff = _escapeQuote(quoteBuff, quote, escape); 57 | if (trim) { 58 | quoteBuff = quoteBuff.replace(/\s+$/, ""); 59 | } 60 | row.push(quoteBuff); 61 | quoteBuff = ""; 62 | } else { 63 | quoteBuff += delimiter + e; 64 | } 65 | } 66 | } 67 | 68 | if (!inquote && param._needFilterRow) { 69 | row = filterRow(row, param); 70 | } 71 | 72 | return { cols: row, closed: !inquote }; 73 | // if (param.workerNum<=1){ 74 | // }else{ 75 | // if (inquote && quoteBuff.length>0){//for multi core, quote will be closed at the end of line 76 | // quoteBuff=_escapeQuote(quoteBuff,quote,escape);; 77 | // if (trim){ 78 | // quoteBuff=quoteBuff.trimRight(); 79 | // } 80 | // row.push(quoteBuff); 81 | // } 82 | // return {cols:row,closed:true}; 83 | // } 84 | 85 | }; 86 | 87 | 88 | 89 | function isQuoteOpen(str, param) { 90 | var quote = param.quote; 91 | var escape = param.escape; 92 | return str[0] === quote && ( 93 | str[1] !== quote || 94 | str[1] === escape && (str[2] === quote || str.length === 2)); 95 | } 96 | function isQuoteClose(str, param) { 97 | var quote = param.quote; 98 | var count = 0; 99 | var idx = str.length - 1; 100 | var escape = param.escape; 101 | while (str[idx] === quote || str[idx] === escape) { 102 | idx--; 103 | count++; 104 | } 105 | return count % 2 !== 0; 106 | } 107 | 108 | function twoDoubleQuote(str, quote) { 109 | var twoQuote = quote + quote; 110 | var curIndex = -1; 111 | while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) { 112 | str = str.substring(0, curIndex) + str.substring(++curIndex); 113 | } 114 | return str; 115 | } 116 | 117 | var cachedRegExp = {}; 118 | function _escapeQuote(segment, quote, escape) { 119 | 120 | var key = "es|" + quote + "|" + escape; 121 | if (cachedRegExp[key] === undefined) { 122 | 123 | // if (escape === "\\") { 124 | // escape = "\\\\"; 125 | // } 126 | cachedRegExp[key] = new RegExp('\\'+escape + '\\'+quote, 'g'); 127 | } 128 | var regExp = cachedRegExp[key]; 129 | // console.log(regExp,segment); 130 | return segment.replace(regExp, quote); 131 | } 132 | -------------------------------------------------------------------------------- /v1/core/workerMgr.js: -------------------------------------------------------------------------------- 1 | module.exports = workerMgr; 2 | var eom = "\x03"; 3 | var eom1 = "\x0e"; 4 | var eom2 = "\x0f"; 5 | var CSVError = require('./CSVError'); 6 | function workerMgr() { 7 | var exports = { 8 | initWorker: initWorker, 9 | sendWorker: sendWorker, 10 | setParams: setParams, 11 | drain: function(){}, 12 | isRunning: isRunning, 13 | destroyWorker: destroyWorker 14 | }; 15 | var workers = []; 16 | var running = 0; 17 | var waiting = null; 18 | function initWorker(num, params) { 19 | workers = []; 20 | running = 0; 21 | waiting = null; 22 | for (var i = 0; i < num; i++) { 23 | workers.push(new Worker(params)); 24 | } 25 | 26 | } 27 | function isRunning() { 28 | return running > 0; 29 | } 30 | function destroyWorker() { 31 | workers.forEach(function(w) { 32 | w.destroy(); 33 | }); 34 | } 35 | 36 | function sendWorker(data, startIdx, transformCb, cbResult) { 37 | if (workers.length > 0) { 38 | var worker = workers.shift(); 39 | running++; 40 | worker.parse(data, startIdx, function(result) { 41 | // var arr=JSON.parse(result); 42 | // arr.forEach(function(item){ 43 | // console.log('idx',item.index) 44 | // }) 45 | workers.push(worker); 46 | cbResult(result, startIdx); 47 | running--; 48 | if (waiting === null && running === 0) { 49 | exports.drain(); 50 | } else if (waiting) { 51 | sendWorker.apply(this, waiting); 52 | waiting = null; 53 | } 54 | }); 55 | process.nextTick(transformCb); 56 | } else { 57 | waiting = [data, startIdx, transformCb, cbResult]; 58 | } 59 | } 60 | 61 | function setParams(params) { 62 | workers.forEach(function(w) { 63 | w.setParams(params); 64 | }); 65 | } 66 | return exports; 67 | } 68 | 69 | function Worker(params) { 70 | var spawn = require("child_process").spawn; 71 | this.cp = spawn(process.execPath, [__dirname + "/worker.js"], { 72 | env: { 73 | child:true 74 | }, 75 | stdio:['pipe', 'pipe', 2, 'ipc'] 76 | // stdio:[0,1,2,'ipc'] 77 | }); 78 | this.setParams(params); 79 | this.cp.on("message", this.onChildMsg.bind(this)); 80 | this.buffer = ""; 81 | var self = this; 82 | this.cp.stdout.on("data", function(d) { 83 | var str = d.toString("utf8"); 84 | var all = self.buffer + str; 85 | var cmdArr = all.split(eom); 86 | while (cmdArr.length > 1) { 87 | self.onChildMsg(cmdArr.shift()); 88 | } 89 | self.buffer = cmdArr[0]; 90 | }); 91 | } 92 | 93 | Worker.prototype.setParams = function(params) { 94 | var msg = "0" + JSON.stringify(params); 95 | this.sendMsg(msg); 96 | }; 97 | 98 | /** 99 | * msg is like: 100 | * 101 | * cmd is from 0-9 102 | */ 103 | Worker.prototype.onChildMsg = function(msg) { 104 | if (msg) { 105 | var cmd = msg[0]; 106 | var data = msg.substr(1); 107 | switch (cmd) { 108 | case "0": //total line number of current chunk 109 | if (this.cbLine) { 110 | var sp = data.split("|"); 111 | var len = parseInt(sp[0]); 112 | var partial = sp[1]; 113 | this.cbLine(len, partial); 114 | } 115 | break; 116 | case "1": // json array of current chunk 117 | if (this.cbResult) { 118 | var rows = data.split(eom1); 119 | rows.pop(); 120 | var res = []; 121 | rows.forEach(function(row) { 122 | var sp = row.split(eom2); 123 | res.push({ 124 | index: sp[0], 125 | row: sp[1], 126 | err: sp[2] ? CSVError.fromArray(JSON.parse(sp[2])) : null, 127 | json: sp[3] 128 | }); 129 | }); 130 | this.cbResult(res); 131 | } 132 | break; 133 | } 134 | } 135 | }; 136 | 137 | Worker.prototype.parse = function(data, startIdx, cbResult) { 138 | this.cbResult = cbResult; 139 | var msg = "1" + startIdx + "|" + data; 140 | this.sendMsg(msg); 141 | }; 142 | 143 | Worker.prototype.destroy = function() { 144 | this.cp.kill(); 145 | }; 146 | 147 | Worker.prototype.sendMsg = function(msg) { 148 | this.cp.stdin.write(msg + eom, "utf8"); 149 | // this.cp.send(msg) 150 | }; 151 | -------------------------------------------------------------------------------- /src/rowSplit.test.ts: -------------------------------------------------------------------------------- 1 | import { RowSplit, MultipleRowResult, RowSplitResult } from "./rowSplit"; 2 | import { Converter } from "./Converter"; 3 | const assert = require("assert"); 4 | 5 | describe("Test delimiters", function () { 6 | const getDelimiter = (str, opt: { delimiter: string | string[] }): string => { 7 | return RowSplit.prototype["getDelimiter"].call({ 8 | conv: { 9 | parseParam: { 10 | delimiter: opt.delimiter 11 | } 12 | } 13 | }, str); 14 | } 15 | 16 | it("should return the explicitly specified delimiter", function () { 17 | var delimiter = ";"; 18 | var rowStr = "a;b;c"; 19 | var returnedDelimiter = getDelimiter(rowStr, { delimiter: ";" }); 20 | assert.equal(returnedDelimiter, delimiter); 21 | }); 22 | 23 | it("should return the autodetected delimiter if 'auto' specified", function () { 24 | var rowStr = "a;b;c"; 25 | var returnedDelimiter = getDelimiter(rowStr, { delimiter: "auto" }); 26 | assert(returnedDelimiter === ";"); 27 | }); 28 | 29 | it("should return the ',' delimiter if delimiter cannot be specified, in case of 'auto'", function () { 30 | var rowStr = "abc"; 31 | var returnedDelimiter = getDelimiter(rowStr, { delimiter: "auto" }); 32 | assert(returnedDelimiter === ","); 33 | }); 34 | 35 | it("should accept an array with potential delimiters", function () { 36 | var rowStr = "a$b$c"; 37 | var returnedDelimiter = getDelimiter(rowStr, { delimiter: [",", ";", "$"] }); 38 | assert(returnedDelimiter === '$'); 39 | }); 40 | }); 41 | 42 | describe("ParseMultiLine function", function () { 43 | const rowSplit = new RowSplit(new Converter()); 44 | const func = (lines: string[]): MultipleRowResult => { 45 | return rowSplit.parseMultiLines(lines); 46 | } 47 | it("should convert lines to csv lines", function () { 48 | var lines = [ 49 | "a,b,c,d", 50 | "hello,world,csvtojson,abc", 51 | "1,2,3,4" 52 | ]; 53 | var res = func(lines); 54 | assert.equal(res.rowsCells.length, 3); 55 | assert.equal(res.partial, ""); 56 | }); 57 | 58 | it("should process line breaks", function () { 59 | var lines = [ 60 | "a,b,c", 61 | '15",hello,"ab', 62 | "cde\"", 63 | "\"b\"\"b\",cc,dd" 64 | ]; 65 | var res = func(lines); 66 | assert.equal(res.rowsCells.length, 3); 67 | assert.equal(res.rowsCells[1][0], "15\""); 68 | assert.equal(res.rowsCells[1][2], "ab\ncde"); 69 | assert.equal(res.rowsCells[2][0], "b\"b"); 70 | assert.equal(res.partial, ""); 71 | }); 72 | 73 | it("should return partial if line not closed", function () { 74 | var lines = [ 75 | "a,b,c", 76 | '15",hello,"ab', 77 | "d,e,f" 78 | ]; 79 | var res = func(lines); 80 | assert.equal(res.rowsCells.length, 1); 81 | assert.equal(res.partial, "15\",hello,\"ab\nd,e,f\n"); 82 | }); 83 | }); 84 | 85 | describe("RowSplit.parse function", function () { 86 | const rowSplit = new RowSplit(new Converter()); 87 | const func = (str): RowSplitResult => { 88 | return rowSplit.parse(str); 89 | } 90 | it("should split complete csv line", function () { 91 | var str = "hello,world,csvtojson,awesome"; 92 | var res = func(str); 93 | assert.equal(res.cells.length, 4); 94 | assert.equal(res.closed, true); 95 | }); 96 | 97 | it("should split incomplete csv line", function () { 98 | var str = "hello,world,\"csvtojson,awesome"; 99 | var res = func(str); 100 | assert.equal(res.closed, false); 101 | }); 102 | 103 | it("should allow multiple line", function () { 104 | var str = "\"he\"llo\",world,\"csvtojson,a\"\nwesome\""; 105 | var res = func(str); 106 | assert.equal(res.closed, true); 107 | assert.equal(res.cells[2], 'csvtojson,a"\nwesome'); 108 | }); 109 | it("should allow blank quotes", () => { 110 | const data = "a|^^|^b^"; 111 | 112 | const rowSplit = new RowSplit(new Converter({ 113 | delimiter: '|', 114 | quote: '^', 115 | noheader: true 116 | })); 117 | const res = rowSplit.parse(data); 118 | assert.equal(res.cells[1], ""); 119 | }) 120 | it("should allow blank quotes in quotes", () => { 121 | const data = 'a,"hello,this,"", test"'; 122 | 123 | const rowSplit = new RowSplit(new Converter({ 124 | noheader: true 125 | })); 126 | const res = rowSplit.parse(data); 127 | assert.equal(res.cells[1], 'hello,this,", test'); 128 | }) 129 | it("should smart detect if an initial quote is only part of value ", () => { 130 | const data = '"Weight" (kg),Error code,"Height" (m)'; 131 | const rowSplit = new RowSplit(new Converter({ 132 | noheader: true 133 | })); 134 | const res = rowSplit.parse(data); 135 | assert.equal(res.cells.length, 3); 136 | assert(res.closed); 137 | assert.equal(res.cells[0],'"Weight" (kg)'); 138 | assert.equal(res.cells[1],'Error code'); 139 | assert.equal(res.cells[2],'"Height" (m)'); 140 | 141 | }) 142 | }); 143 | -------------------------------------------------------------------------------- /src/ProcessFork.ts: -------------------------------------------------------------------------------- 1 | import { Processor, ProcessLineResult } from "./Processor"; 2 | import { Converter } from "./Converter"; 3 | import { ChildProcess } from "child_process"; 4 | import { CSVParseParam, mergeParams } from "./Parameters"; 5 | import { ParseRuntime } from "./ParseRuntime"; 6 | import { Readable, Writable } from "stream"; 7 | import { bufFromString, emptyBuffer } from "./util"; 8 | import CSVError from "./CSVError"; 9 | 10 | export class ProcessorFork extends Processor { 11 | flush(): Promise { 12 | return new Promise((resolve, reject) => { 13 | // console.log("flush"); 14 | this.finalChunk = true; 15 | this.next = resolve; 16 | this.childProcess.stdin?.end(); 17 | // this.childProcess.stdout.on("end",()=>{ 18 | // // console.log("!!!!"); 19 | // this.flushResult(); 20 | // }) 21 | }); 22 | } 23 | destroy(): Promise { 24 | this.childProcess.kill(); 25 | return Promise.resolve(); 26 | } 27 | childProcess: ChildProcess; 28 | inited: boolean = false; 29 | private resultBuf: ProcessLineResult[] = []; 30 | private leftChunk: string = ""; 31 | private finalChunk: boolean = false; 32 | private next?: (result: ProcessLineResult[]) => any; 33 | constructor(protected converter: Converter) { 34 | super(converter); 35 | this.childProcess = require("child_process").spawn(process.execPath, [__dirname + "/../v2/worker.js"], { 36 | stdio: ["pipe", "pipe", "pipe", "ipc"] 37 | }); 38 | this.initWorker(); 39 | } 40 | private prepareParam(param:CSVParseParam):any{ 41 | const clone:any=mergeParams(param); 42 | if (clone.ignoreColumns){ 43 | clone.ignoreColumns={ 44 | source:clone.ignoreColumns.source, 45 | flags:clone.ignoreColumns.flags 46 | } 47 | } 48 | if (clone.includeColumns){ 49 | clone.includeColumns={ 50 | source:clone.includeColumns.source, 51 | flags:clone.includeColumns.flags 52 | } 53 | } 54 | return clone; 55 | } 56 | private initWorker() { 57 | this.childProcess.on("exit",()=>{ 58 | this.flushResult(); 59 | }) 60 | this.childProcess.send({ 61 | cmd: "init", 62 | params: this.prepareParam(this.converter.parseParam) 63 | } as InitMessage); 64 | this.childProcess.on("message", (msg: Message) => { 65 | if (msg.cmd === "inited") { 66 | this.inited = true; 67 | } else if (msg.cmd === "eol") { 68 | if (this.converter.listeners("eol").length > 0){ 69 | this.converter.emit("eol",(msg as StringMessage).value); 70 | } 71 | }else if (msg.cmd === "header") { 72 | if (this.converter.listeners("header").length > 0){ 73 | this.converter.emit("header",(msg as StringMessage).value); 74 | } 75 | }else if (msg.cmd === "done"){ 76 | 77 | // this.flushResult(); 78 | } 79 | 80 | }); 81 | this.childProcess.stdout?.on("data", (data) => { 82 | // console.log("stdout", data.toString()); 83 | const res = data.toString(); 84 | // console.log(res); 85 | this.appendBuf(res); 86 | 87 | }); 88 | this.childProcess.stderr?.on("data", (data) => { 89 | // console.log("stderr", data.toString()); 90 | this.converter.emit("error", CSVError.fromJSON(JSON.parse(data.toString()))); 91 | }); 92 | 93 | } 94 | private flushResult() { 95 | // console.log("flush result", this.resultBuf.length); 96 | if (this.next) { 97 | this.next(this.resultBuf); 98 | } 99 | this.resultBuf = []; 100 | } 101 | private appendBuf(data: string) { 102 | const res = this.leftChunk + data; 103 | const list = res.split("\n"); 104 | let counter = 0; 105 | const lastBit = list[list.length - 1]; 106 | if (lastBit !== "") { 107 | this.leftChunk = list.pop() || ""; 108 | } else { 109 | this.leftChunk = ""; 110 | } 111 | this.resultBuf=this.resultBuf.concat(list); 112 | // while (list.length) { 113 | // let item = list.shift() || ""; 114 | // if (item.length === 0 ) { 115 | // continue; 116 | // } 117 | // // if (this.params.output !== "line") { 118 | // // item = JSON.parse(item); 119 | // // } 120 | // this.resultBuf.push(item); 121 | // counter++; 122 | // } 123 | // console.log("buf length",this.resultBuf.length); 124 | } 125 | 126 | process(chunk: Buffer): Promise { 127 | return new Promise((resolve, reject) => { 128 | // console.log("chunk", chunk.length); 129 | this.next = resolve; 130 | // this.appendReadBuf(chunk); 131 | this.childProcess.stdin?.write(chunk, () => { 132 | // console.log("chunk callback"); 133 | this.flushResult(); 134 | }); 135 | }); 136 | } 137 | } 138 | 139 | export interface Message { 140 | cmd: string 141 | } 142 | 143 | export interface InitMessage extends Message { 144 | params: any; 145 | } 146 | export interface StringMessage extends Message { 147 | value: string 148 | } 149 | export const EOM = "\x03"; 150 | -------------------------------------------------------------------------------- /v2/Parameters.d.ts: -------------------------------------------------------------------------------- 1 | export interface CSVParseParam { 2 | /** 3 | * delimiter used for seperating columns. Use "auto" if delimiter is unknown in advance, in this case, delimiter will be auto-detected (by best attempt). Use an array to give a list of potential delimiters e.g. [",","|","$"]. default: "," 4 | */ 5 | delimiter: string | string[]; 6 | /** 7 | * This parameter instructs the parser to ignore columns as specified by the regular expression. Example: /(name|age)/ will ignore columns whose header contains "name" or "age" 8 | */ 9 | ignoreColumns?: RegExp; 10 | /** 11 | * This parameter instructs the parser to include only those columns as specified by the regular expression. Example: /(name|age)/ will parse and include columns whose header contains "name" or "age" 12 | */ 13 | includeColumns?: RegExp; 14 | /** 15 | * If a column contains delimiter, it is able to use quote character to surround the column content. e.g. "hello, world" wont be split into two columns while parsing. Set to "off" will ignore all quotes. default: " (double quote) 16 | */ 17 | quote: string; 18 | /** 19 | * Indicate if parser trim off spaces surrounding column content. e.g. " content " will be trimmed to "content". Default: true 20 | */ 21 | trim: boolean; 22 | /** 23 | * This parameter turns on and off whether check field type. Default is false. 24 | */ 25 | checkType: boolean; 26 | /** 27 | * Ignore the empty value in CSV columns. If a column value is not given, set this to true to skip them. Default: false. 28 | */ 29 | ignoreEmpty: boolean; 30 | /** 31 | * Delegate parsing work to another process. 32 | */ 33 | /** 34 | * Indicating csv data has no header row and first row is data row. Default is false. 35 | */ 36 | noheader: boolean; 37 | /** 38 | * An array to specify the headers of CSV data. If --noheader is false, this value will override CSV header row. Default: null. Example: ["my field","name"]. 39 | */ 40 | headers?: string[]; 41 | /** 42 | * Don't interpret dots (.) and square brackets in header fields as nested object or array identifiers at all (treat them like regular characters for JSON field identifiers). Default: false. 43 | */ 44 | flatKeys: boolean; 45 | /** 46 | * the max character a csv row could have. 0 means infinite. If max number exceeded, parser will emit "error" of "row_exceed". if a possibly corrupted csv data provided, give it a number like 65535 so the parser wont consume memory. default: 0 47 | */ 48 | maxRowLength: number; 49 | /** 50 | * whether check column number of a row is the same as headers. If column number mismatched headers number, an error of "mismatched_column" will be emitted.. default: false 51 | */ 52 | checkColumn: boolean; 53 | /** 54 | * escape character used in quoted column. Default is double quote (") according to RFC4108. Change to back slash (\) or other chars for your own case. 55 | */ 56 | escape: string; 57 | /** 58 | * Allows override parsing logic for a specific column. It accepts a JSON object with fields like: headName: . e.g. {field1:'number'} will use built-in number parser to convert value of the field1 column to number. Another example {"name":nameProcessFunc} will use specified function to parse the value. 59 | */ 60 | colParser: { 61 | [key: string]: string | CellParser | ColumnParam; 62 | }; 63 | /** 64 | * End of line character. If omitted, parser will attempt to retrieve it from the first chunks of CSV data 65 | */ 66 | eol?: string; 67 | /** 68 | * Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). Default is false. Change to true if you are confident no inline line breaks (like line break in a cell which has multi line text) 69 | */ 70 | alwaysSplitAtEOL: boolean; 71 | /** 72 | * The format to be converted to. "json" (default) -- convert csv to json. "csv" -- convert csv to csv row array. "line" -- convert csv to csv line string 73 | */ 74 | output: "json" | "csv" | "line"; 75 | /** 76 | * Convert string "null" to null object in JSON outputs. Default is false. 77 | */ 78 | nullObject: boolean; 79 | /** 80 | * Define the format required by downstream (this parameter does not work if objectMode is on). `line` -- json is emitted in a single line separated by a line breake like "json1\njson2" . `array` -- downstream requires array format like "[json1,json2]". Default is line. 81 | */ 82 | downstreamFormat: "line" | "array"; 83 | /** 84 | * Define whether .then(callback) returns all JSON data in its callback. Default is true. Change to false to save memory if subscribing json lines. 85 | */ 86 | needEmitAll: boolean; 87 | } 88 | export type CellParser = (item: string, head: string, resultRow: any, row: string[], columnIndex: number) => any; 89 | export interface ColumnParam { 90 | flat?: boolean; 91 | cellParser?: string | CellParser; 92 | } 93 | export declare function mergeParams(params?: Partial): CSVParseParam; 94 | -------------------------------------------------------------------------------- /bin/options.json: -------------------------------------------------------------------------------- 1 | { 2 | "commands": { 3 | "parse": "(Default)Parse a csv file to json", 4 | "version": "Show version of current csvtojson" 5 | }, 6 | "options": { 7 | "--output":{ 8 | "desc": "The format to be converted to. \"json\" (default) -- convert csv to json. \"csv\" -- convert csv to csv row array. \"line\" -- convert csv to csv line string", 9 | "type": "string" 10 | }, 11 | "--delimiter": { 12 | "desc": "delimiter to separate columns. Possible to give an array or just use 'auto'. default comma (,). e.g. --delimiter=# --delimiter='[\",\",\";\"]' --delimiter=auto", 13 | "type": "~object" 14 | }, 15 | "--quote": { 16 | "desc": "quote surrounding a column content containing delimiters. To turn off quote, please use 'off' --quote=off. default double quote (\"). e.g. chage to hash: --quote=# ", 17 | "type": "string" 18 | }, 19 | "--trim": { 20 | "desc": "Indicate if parser trim off spaces surrounding column content. e.g. \" content \" will be trimmed to \"content\". Default: true", 21 | "type": "boolean" 22 | }, 23 | "--checkType": { 24 | "desc": "This parameter turns on and off whether check field type. default is false.", 25 | "type": "boolean" 26 | 27 | }, 28 | "--ignoreEmpty": { 29 | "desc": "This parameter turns on and off whether ignore empty column values while parsing. default is false", 30 | "type": "boolean" 31 | }, 32 | "--noheader": { 33 | "desc": "Indicating csv data has no header row and first row is data row. Default is false", 34 | "type": "boolean" 35 | }, 36 | "--headers": { 37 | "desc": "An array to specify the headers of CSV data. If --noheader is false, this value will override CSV header. Default: null. Example: --headers='[\"my field\",\"name\"]'", 38 | "type": "object" 39 | }, 40 | "--flatKeys": { 41 | "desc": "Don't interpret dots (.) and square brackets in header fields as nested object or array identifiers at all (treat them like regular characters for JSON field identifiers). Default: false.", 42 | "type": "boolean" 43 | }, 44 | "--maxRowLength": { 45 | "desc": "the max character a csv row could have. 0 means infinite. If max number exceeded, parser will emit \"error\" of \"row_exceed\". if a possibly corrupted csv data provided, give it a number like 65535 so the parser wont consume memory. default: 10240", 46 | "type": "number" 47 | }, 48 | "--checkColumn": { 49 | "desc": "whether check column number of a row is the same as headers. If column number mismatched headers number, an error of \"mismatched_column\" will be emitted.. default: false", 50 | "type": "boolean" 51 | }, 52 | "--eol": { 53 | "desc": "Explicitly specify the end of line character to use.", 54 | "type": "string" 55 | }, 56 | "--quiet": { 57 | "desc": "If any error happens, quit the process quietly rather than log out the error. Default is false.", 58 | "type": "boolean" 59 | }, 60 | "--escape":{ 61 | "desc":"escape character used in quoted column. Default is double quote (\") according to RFC4108. Change to back slash (\\) or other chars for your own case.", 62 | "type":"string" 63 | }, 64 | "--ignoreColumns": { 65 | "desc": "RegExp matched columns to ignore from input. e.g. --ignoreColumns=/(name|age)/ ", 66 | "type": "string" 67 | }, 68 | "--includeColumns": { 69 | "desc": "RegExp matched columns to include from input. e.g. --includeColumns=/(name|age)/ ", 70 | "type": "string" 71 | }, 72 | "--colParser": { 73 | "desc": "Specific parser for columns. e.g. --colParser='{\"col1\":\"number\",\"col2\":\"string\"}'", 74 | "type": "~object" 75 | }, 76 | "--alwaysSplitAtEOL":{ 77 | "desc": "Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows.", 78 | "type": "boolean" 79 | }, 80 | "--nullObject":{ 81 | "desc":"How to parse if a csv cell contains 'null'. Default false will keep 'null' as string. Change to true if a null object is needed.", 82 | "type":"boolean" 83 | }, 84 | "--downstreamFormat":{ 85 | "desc":"Option to set what JSON array format is needed by downstream. 'line' is also called ndjson format. This format will write lines of JSON (without square brackets and commas) to downstream. 'array' will write complete JSON array string to downstream (suitable for file writable stream etc). Default 'line'", 86 | "type":"string" 87 | } 88 | }, 89 | "examples": [ 90 | "csvtojson < csvfile", 91 | "csvtojson ", 92 | "cat | csvtojson", 93 | "csvtojson --checkType=false --trim=false --delimiter=#" 94 | ] 95 | } 96 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Basic Options */ 4 | "target": "es5", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017','ES2018' or 'ESNEXT'. */ 5 | "module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */ 6 | "lib": ["es5","es6"], /* Specify library files to be included in the compilation. */ 7 | // "allowJs": true, /* Allow javascript files to be compiled. */ 8 | // "checkJs": true, /* Report errors in .js files. */ 9 | // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */ 10 | "declaration": true, /* Generates corresponding '.d.ts' file. */ 11 | "sourceMap": false, /* Generates corresponding '.map' file. */ 12 | // "outFile": "./", /* Concatenate and emit output to single file. */ 13 | "outDir": "./v2", /* Redirect output structure to the directory. */ 14 | "rootDir": "./src", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */ 15 | // "removeComments": true, /* Do not emit comments to output. */ 16 | // "noEmit": true, /* Do not emit outputs. */ 17 | // "importHelpers": true, /* Import emit helpers from 'tslib'. */ 18 | // "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ 19 | // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ 20 | 21 | /* Strict Type-Checking Options */ 22 | "strict": true, /* Enable all strict type-checking options. */ 23 | "noImplicitAny": false, /* Raise error on expressions and declarations with an implied 'any' type. */ 24 | // "strictNullChecks": true, /* Enable strict null checks. */ 25 | // "strictFunctionTypes": true, /* Enable strict checking of function types. */ 26 | // "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ 27 | // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ 28 | // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ 29 | 30 | /* Additional Checks */ 31 | // "noUnusedLocals": true, /* Report errors on unused locals. */ 32 | // "noUnusedParameters": true, /* Report errors on unused parameters. */ 33 | // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 34 | // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 35 | 36 | /* Module Resolution Options */ 37 | // "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */ 38 | // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ 39 | // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ 40 | // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ 41 | // "typeRoots": [], /* List of folders to include type definitions from. */ 42 | // "types": [], /* Type declaration files to be included in compilation. */ 43 | // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ 44 | "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */ 45 | // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ 46 | 47 | /* Source Map Options */ 48 | // "sourceRoot": "./", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ 49 | // "mapRoot": "./", /* Specify the location where debugger should locate map files instead of generated locations. */ 50 | // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ 51 | // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ 52 | 53 | /* Experimental Options */ 54 | // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ 55 | // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ 56 | }, 57 | "include": [ 58 | "src/**/*.ts" 59 | ], 60 | "exclude": [ 61 | "src/**/*.test.ts" 62 | ] 63 | 64 | } -------------------------------------------------------------------------------- /src/Converter.ts: -------------------------------------------------------------------------------- 1 | import { Transform, TransformOptions, Readable } from "stream"; 2 | import { CSVParseParam, mergeParams } from "./Parameters"; 3 | import { ParseRuntime, initParseRuntime } from "./ParseRuntime"; 4 | import { Processor } from "./Processor"; 5 | import { ProcessorLocal } from "./ProcessorLocal"; 6 | import { Result } from "./Result"; 7 | import CSVError from "./CSVError"; 8 | 9 | export class Converter extends Transform implements PromiseLike { 10 | preRawData(onRawData: PreRawDataCallback): Converter { 11 | this.runtime.preRawDataHook = onRawData; 12 | return this; 13 | } 14 | preFileLine(onFileLine: PreFileLineCallback): Converter { 15 | this.runtime.preFileLineHook = onFileLine; 16 | return this; 17 | } 18 | subscribe( 19 | onNext?: (data: any, lineNumber: number) => void | PromiseLike, 20 | onError?: (err: CSVError) => void, 21 | onCompleted?: () => void): Converter { 22 | this.parseRuntime.subscribe = { 23 | onNext, 24 | onError, 25 | onCompleted 26 | } 27 | return this; 28 | } 29 | fromFile(filePath: string, options?: string | CreateReadStreamOption | undefined): Converter { 30 | const fs = require("fs"); 31 | fs.exists(filePath, (exist) => { 32 | if (exist) { 33 | const rs = fs.createReadStream(filePath, options); 34 | rs.pipe(this); 35 | } else { 36 | this.emit('error', new Error(`File does not exist at ${filePath}. Check to make sure the file path to your csv is correct.`)); 37 | } 38 | }); 39 | return this; 40 | } 41 | fromStream(readStream: Readable): Converter { 42 | readStream.pipe(this); 43 | return this; 44 | } 45 | fromString(csvString: string): Converter { 46 | const csv = csvString.toString(); 47 | const read = new Readable(); 48 | let idx = 0; 49 | read._read = function (size) { 50 | if (idx >= csvString.length) { 51 | this.push(null); 52 | } else { 53 | const str = csvString.substring(idx, idx + size); 54 | this.push(str); 55 | idx += size; 56 | } 57 | } 58 | return this.fromStream(read); 59 | } 60 | then(onfulfilled?: (value: any[]) => TResult1 | PromiseLike, onrejected?: (reason: any) => TResult2 | PromiseLike): PromiseLike { 61 | return new Promise((resolve, reject) => { 62 | this.parseRuntime.then = { 63 | onfulfilled: (value: any[]) => { 64 | if (onfulfilled) { 65 | resolve(onfulfilled(value)); 66 | } else { 67 | resolve(value as any); 68 | } 69 | }, 70 | onrejected: (err: Error) => { 71 | if (onrejected) { 72 | resolve(onrejected(err)); 73 | } else { 74 | reject(err); 75 | } 76 | } 77 | } 78 | }); 79 | } 80 | public get parseParam(): CSVParseParam { 81 | return this.params; 82 | } 83 | public get parseRuntime(): ParseRuntime { 84 | return this.runtime; 85 | } 86 | private params: CSVParseParam; 87 | private runtime: ParseRuntime; 88 | private processor: Processor; 89 | private result: Result; 90 | constructor(param?: Partial, public options: TransformOptions = {}) { 91 | super(options); 92 | this.params = mergeParams(param); 93 | this.runtime = initParseRuntime(this); 94 | this.result = new Result(this); 95 | this.processor = new ProcessorLocal(this); 96 | this.once("error", (err: any) => { 97 | setImmediate(() => { 98 | this.result.processError(err); 99 | this.emit("done", err); 100 | }); 101 | 102 | }); 103 | this.once("done", () => { 104 | this.processor.destroy(); 105 | }) 106 | 107 | return this; 108 | } 109 | _transform(chunk: any, encoding: string, cb: Function) { 110 | this.processor.process(chunk) 111 | .then((result) => { 112 | // console.log(result); 113 | if (result.length > 0) { 114 | this.runtime.started = true; 115 | 116 | return this.result.processResult(result); 117 | } 118 | }) 119 | .then(() => { 120 | this.emit("drained"); 121 | cb(); 122 | }, (error) => { 123 | this.runtime.hasError = true; 124 | this.runtime.error = error; 125 | this.emit("error", error); 126 | cb(); 127 | }); 128 | } 129 | _flush(cb: Function) { 130 | this.processor.flush() 131 | .then((data) => { 132 | if (data.length > 0) { 133 | 134 | return this.result.processResult(data); 135 | } 136 | }) 137 | .then(() => { 138 | this.processEnd(cb); 139 | }, (err) => { 140 | this.emit("error", err); 141 | cb(); 142 | }) 143 | } 144 | private processEnd(cb) { 145 | this.result.endProcess(); 146 | this.emit("done"); 147 | cb(); 148 | } 149 | get parsedLineNumber(): number { 150 | return this.runtime.parsedLineNumber; 151 | } 152 | } 153 | export interface CreateReadStreamOption { 154 | flags?: string; 155 | encoding?: string; 156 | fd?: number; 157 | mode?: number; 158 | autoClose?: boolean; 159 | start?: number; 160 | end?: number; 161 | highWaterMark?: number; 162 | } 163 | export type CallBack = (err: Error, data: Array) => void; 164 | 165 | 166 | export type PreFileLineCallback = (line: string, lineNumber: number) => string | PromiseLike; 167 | export type PreRawDataCallback = (csvString: string) => string | PromiseLike; 168 | -------------------------------------------------------------------------------- /src/Result.ts: -------------------------------------------------------------------------------- 1 | import { Converter } from "./Converter"; 2 | import { ProcessLineResult } from "./Processor"; 3 | import CSVError from "./CSVError"; 4 | import { EOL } from "os"; 5 | export class Result { 6 | private get needEmitLine(): boolean { 7 | return !!this.converter.parseRuntime.subscribe && !!this.converter.parseRuntime.subscribe.onNext || this.needPushDownstream 8 | } 9 | private _needPushDownstream?: boolean; 10 | private get needPushDownstream(): boolean { 11 | if (this._needPushDownstream === undefined) { 12 | this._needPushDownstream = this.converter.listeners("data").length > 0 || this.converter.listeners("readable").length > 0; 13 | } 14 | return this._needPushDownstream; 15 | } 16 | private get needEmitAll(): boolean { 17 | return !!this.converter.parseRuntime.then && this.converter.parseParam.needEmitAll; 18 | // return !!this.converter.parseRuntime.then; 19 | } 20 | private finalResult: any[] = []; 21 | constructor(private converter: Converter) { } 22 | processResult(resultLines: ProcessLineResult[]): Promise { 23 | const startPos = this.converter.parseRuntime.parsedLineNumber; 24 | if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") { 25 | if (startPos === 0) { 26 | pushDownstream(this.converter, "[" + EOL); 27 | } 28 | } 29 | // let prom: Promise; 30 | return new Promise((resolve, reject) => { 31 | if (this.needEmitLine) { 32 | processLineByLine( 33 | resultLines, 34 | this.converter, 35 | 0, 36 | this.needPushDownstream, 37 | (err) => { 38 | if (err) { 39 | reject(err); 40 | } else { 41 | this.appendFinalResult(resultLines); 42 | resolve(undefined); 43 | } 44 | }, 45 | ) 46 | // resolve(); 47 | } else { 48 | this.appendFinalResult(resultLines); 49 | resolve(undefined); 50 | } 51 | }) 52 | } 53 | appendFinalResult(lines: any[]) { 54 | if (this.needEmitAll) { 55 | this.finalResult = this.finalResult.concat(lines); 56 | } 57 | this.converter.parseRuntime.parsedLineNumber += lines.length; 58 | } 59 | processError(err: CSVError) { 60 | if (this.converter.parseRuntime.subscribe && this.converter.parseRuntime.subscribe.onError) { 61 | this.converter.parseRuntime.subscribe.onError(err); 62 | } 63 | if (this.converter.parseRuntime.then && this.converter.parseRuntime.then.onrejected) { 64 | this.converter.parseRuntime.then.onrejected(err); 65 | } 66 | } 67 | endProcess() { 68 | 69 | if (this.converter.parseRuntime.then && this.converter.parseRuntime.then.onfulfilled) { 70 | if (this.needEmitAll) { 71 | this.converter.parseRuntime.then.onfulfilled(this.finalResult); 72 | }else{ 73 | this.converter.parseRuntime.then.onfulfilled([]); 74 | } 75 | } 76 | if (this.converter.parseRuntime.subscribe && this.converter.parseRuntime.subscribe.onCompleted) { 77 | this.converter.parseRuntime.subscribe.onCompleted(); 78 | } 79 | if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") { 80 | pushDownstream(this.converter, "]" + EOL); 81 | } 82 | } 83 | } 84 | 85 | function processLineByLine( 86 | lines: ProcessLineResult[], 87 | 88 | conv: Converter, 89 | offset: number, 90 | needPushDownstream: boolean, 91 | cb: (err?) => void, 92 | ) { 93 | if (offset >= lines.length) { 94 | cb(); 95 | } else { 96 | if (conv.parseRuntime.subscribe && conv.parseRuntime.subscribe.onNext) { 97 | const hook = conv.parseRuntime.subscribe.onNext; 98 | const nextLine = lines[offset]; 99 | const res = hook(nextLine, conv.parseRuntime.parsedLineNumber + offset); 100 | offset++; 101 | // if (isAsync === undefined) { 102 | if (res && res.then) { 103 | res.then(function () { 104 | processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine); 105 | }, cb); 106 | } else { 107 | // processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, false); 108 | if (needPushDownstream) { 109 | pushDownstream(conv, nextLine); 110 | } 111 | while (offset < lines.length) { 112 | const line = lines[offset]; 113 | hook(line, conv.parseRuntime.parsedLineNumber + offset); 114 | offset++; 115 | if (needPushDownstream) { 116 | pushDownstream(conv, line); 117 | } 118 | } 119 | cb(); 120 | } 121 | // } else if (isAsync === true) { 122 | // (res as PromiseLike).then(function () { 123 | // processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, true); 124 | // }, cb); 125 | // } else if (isAsync === false) { 126 | // processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, false); 127 | // } 128 | } else { 129 | if (needPushDownstream) { 130 | while (offset < lines.length) { 131 | const line = lines[offset++]; 132 | pushDownstream(conv, line); 133 | } 134 | 135 | } 136 | cb(); 137 | } 138 | 139 | } 140 | } 141 | 142 | function processRecursive( 143 | lines: ProcessLineResult[], 144 | hook: (data: any, lineNumber: number) => void | PromiseLike, 145 | conv: Converter, 146 | offset: number, 147 | needPushDownstream: boolean, 148 | cb: (err?) => void, 149 | res: ProcessLineResult, 150 | ) { 151 | if (needPushDownstream) { 152 | pushDownstream(conv, res); 153 | } 154 | processLineByLine(lines, conv, offset, needPushDownstream, cb); 155 | } 156 | function pushDownstream(conv: Converter, res: ProcessLineResult) { 157 | if (typeof res === "object" && !conv.options.objectMode) { 158 | const data = JSON.stringify(res); 159 | conv.push(data + (conv.parseParam.downstreamFormat === "array" ? "," + EOL : EOL), "utf8"); 160 | } else { 161 | conv.push(res); 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/Parameters.ts: -------------------------------------------------------------------------------- 1 | export interface CSVParseParam { 2 | /** 3 | * delimiter used for seperating columns. Use "auto" if delimiter is unknown in advance, in this case, delimiter will be auto-detected (by best attempt). Use an array to give a list of potential delimiters e.g. [",","|","$"]. default: "," 4 | */ 5 | delimiter: string | string[]; 6 | /** 7 | * This parameter instructs the parser to ignore columns as specified by the regular expression. Example: /(name|age)/ will ignore columns whose header contains "name" or "age" 8 | */ 9 | ignoreColumns?: RegExp; 10 | /** 11 | * This parameter instructs the parser to include only those columns as specified by the regular expression. Example: /(name|age)/ will parse and include columns whose header contains "name" or "age" 12 | */ 13 | includeColumns?: RegExp; 14 | /** 15 | * If a column contains delimiter, it is able to use quote character to surround the column content. e.g. "hello, world" wont be split into two columns while parsing. Set to "off" will ignore all quotes. default: " (double quote) 16 | */ 17 | quote: string; 18 | /** 19 | * Indicate if parser trim off spaces surrounding column content. e.g. " content " will be trimmed to "content". Default: true 20 | */ 21 | trim: boolean; 22 | /** 23 | * This parameter turns on and off whether check field type. Default is false. 24 | */ 25 | checkType: boolean; 26 | /** 27 | * Ignore the empty value in CSV columns. If a column value is not given, set this to true to skip them. Default: false. 28 | */ 29 | ignoreEmpty: boolean; 30 | /** 31 | * Delegate parsing work to another process. 32 | */ 33 | // fork: boolean; 34 | /** 35 | * Indicating csv data has no header row and first row is data row. Default is false. 36 | */ 37 | noheader: boolean; 38 | /** 39 | * An array to specify the headers of CSV data. If --noheader is false, this value will override CSV header row. Default: null. Example: ["my field","name"]. 40 | */ 41 | headers?: string[]; 42 | /** 43 | * Don't interpret dots (.) and square brackets in header fields as nested object or array identifiers at all (treat them like regular characters for JSON field identifiers). Default: false. 44 | */ 45 | flatKeys: boolean; 46 | /** 47 | * the max character a csv row could have. 0 means infinite. If max number exceeded, parser will emit "error" of "row_exceed". if a possibly corrupted csv data provided, give it a number like 65535 so the parser wont consume memory. default: 0 48 | */ 49 | maxRowLength: number; 50 | /** 51 | * whether check column number of a row is the same as headers. If column number mismatched headers number, an error of "mismatched_column" will be emitted.. default: false 52 | */ 53 | checkColumn: boolean; 54 | /** 55 | * escape character used in quoted column. Default is double quote (") according to RFC4108. Change to back slash (\) or other chars for your own case. 56 | */ 57 | escape: string; 58 | /** 59 | * Allows override parsing logic for a specific column. It accepts a JSON object with fields like: headName: . e.g. {field1:'number'} will use built-in number parser to convert value of the field1 column to number. Another example {"name":nameProcessFunc} will use specified function to parse the value. 60 | */ 61 | colParser: { 62 | [key: string]: string | CellParser | ColumnParam 63 | }; 64 | /** 65 | * End of line character. If omitted, parser will attempt to retrieve it from the first chunks of CSV data 66 | */ 67 | eol?: string; 68 | /** 69 | * Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). Default is false. Change to true if you are confident no inline line breaks (like line break in a cell which has multi line text) 70 | */ 71 | alwaysSplitAtEOL: boolean; 72 | /** 73 | * The format to be converted to. "json" (default) -- convert csv to json. "csv" -- convert csv to csv row array. "line" -- convert csv to csv line string 74 | */ 75 | output: "json" | "csv" | "line"; 76 | 77 | /** 78 | * Convert string "null" to null object in JSON outputs. Default is false. 79 | */ 80 | nullObject:boolean; 81 | /** 82 | * Define the format required by downstream (this parameter does not work if objectMode is on). `line` -- json is emitted in a single line separated by a line breake like "json1\njson2" . `array` -- downstream requires array format like "[json1,json2]". Default is line. 83 | */ 84 | downstreamFormat: "line" | "array"; 85 | /** 86 | * Define whether .then(callback) returns all JSON data in its callback. Default is true. Change to false to save memory if subscribing json lines. 87 | */ 88 | needEmitAll: boolean; 89 | } 90 | 91 | export type CellParser = (item: string, head: string, resultRow: any, row: string[], columnIndex: number) => any; 92 | 93 | export interface ColumnParam { 94 | flat?: boolean; 95 | cellParser?: string | CellParser; 96 | } 97 | 98 | export function mergeParams(params?: Partial): CSVParseParam { 99 | const defaultParam: CSVParseParam = { 100 | delimiter: ',', 101 | ignoreColumns: undefined, 102 | includeColumns: undefined, 103 | quote: '"', 104 | trim: true, 105 | checkType: false, 106 | ignoreEmpty: false, 107 | // fork: false, 108 | noheader: false, 109 | headers: undefined, 110 | flatKeys: false, 111 | maxRowLength: 0, 112 | checkColumn: false, 113 | escape: '"', 114 | colParser: {}, 115 | eol: undefined, 116 | alwaysSplitAtEOL: false, 117 | output: "json", 118 | nullObject: false, 119 | downstreamFormat:"line", 120 | needEmitAll:true 121 | } 122 | if (!params) { 123 | params = {}; 124 | } 125 | for (let key in params) { 126 | if (params.hasOwnProperty(key)) { 127 | if (Array.isArray(params[key])) { 128 | defaultParam[key] = [...params[key]]; 129 | } else { 130 | defaultParam[key] = params[key]; 131 | } 132 | } 133 | } 134 | return defaultParam; 135 | } 136 | -------------------------------------------------------------------------------- /bin/csvtojson.js: -------------------------------------------------------------------------------- 1 | function csvtojson() { 2 | var Converter = require("../v2").Converter; 3 | var fs = require("fs"); 4 | var options = require("./options.json"); 5 | var cmds = options.commands; 6 | var opts = options.options; 7 | var exps = options.examples; 8 | var pkg = require("../package.json"); 9 | var os = require("os"); 10 | /** 11 | *{ 12 | "cmd": "parse", command to run 13 | "options": {}, options to passe to the command 14 | "inputStream": process.stdin // input stream for the command. default is stdin. can be a file read stream. 15 | }; 16 | * 17 | */ 18 | var parsedCmd; 19 | 20 | function _showHelp(errno) { 21 | var key; 22 | errno = typeof errno === "number" ? errno : 0; 23 | console.log("csvtojson: Convert csv to JSON format"); 24 | console.log("version:", pkg.version); 25 | console.log("Usage: csvtojson [] [] filepath\n"); 26 | console.log("Commands: "); 27 | for (key in cmds) { 28 | if (cmds.hasOwnProperty(key)) { 29 | console.log("\t%s: %s", key, cmds[key]); 30 | } 31 | } 32 | console.log("Options: "); 33 | for (key in opts) { 34 | if (opts.hasOwnProperty(key)) { 35 | console.log("\t%s: %s", key, opts[key].desc); 36 | } 37 | } 38 | console.log("Examples: "); 39 | for (var i = 0; i < exps.length; i++) { 40 | console.log("\t%s", exps[i]); 41 | } 42 | process.exit(errno); 43 | } 44 | function stringToRegExp(str) { 45 | var lastSlash = str.lastIndexOf("/"); 46 | var source = str.substr(1, lastSlash); 47 | var flag = str.substr(lastSlash + 1); 48 | return new RegExp(source,flag); 49 | } 50 | function parse() { 51 | var is = parsedCmd.inputStream; 52 | if (parsedCmd.options.maxRowLength === undefined) { 53 | parsedCmd.options.maxRowLength = 10240; 54 | } 55 | if (is === process.stdin && is.isTTY) { 56 | console.log("Please specify csv file path or pipe the csv data through.\n"); 57 | _showHelp(1); 58 | } 59 | if (parsedCmd.options.delimiter === "\\t") { 60 | parsedCmd.options.delimiter = "\t"; 61 | } 62 | if (parsedCmd.options.ignoreColumns) { 63 | parsedCmd.options.ignoreColumns=stringToRegExp(parsedCmd.options.ignoreColumns); 64 | 65 | } 66 | if (parsedCmd.options.includeColumns) { 67 | parsedCmd.options.includeColumns=stringToRegExp(parsedCmd.options.includeColumns); 68 | 69 | } 70 | var conv = new Converter(parsedCmd.options); 71 | var isFirst = true; 72 | conv.on("error", function (err, pos) { 73 | if (!parsedCmd.options.quiet) { 74 | console.error("csvtojson got an error: ", err); 75 | if (pos) { 76 | console.error("The error happens at following line: "); 77 | console.log(pos); 78 | } 79 | } 80 | process.exit(1); 81 | }) 82 | .on("data",function (dataStr) { 83 | process.stdout.write((isFirst ? "" : "," + os.EOL) + dataStr.toString().substr(0,dataStr.length-1)); 84 | isFirst = false; 85 | }) 86 | .on("done", function () { 87 | console.log(os.EOL + "]"); 88 | }) 89 | console.log("["); 90 | is.pipe(conv); 91 | // is.pipe(conv); 92 | } 93 | 94 | function run(cmd, options) { 95 | if (cmd === "parse") { 96 | parse(); 97 | } else if (cmd === "version") { 98 | console.log(pkg.version); 99 | } else { 100 | console.log("unknown command %s.", cmd); 101 | _showHelp(1); 102 | } 103 | } 104 | 105 | function commandParser() { 106 | var parsedCmd = { 107 | "cmd": "parse", 108 | "options": {}, 109 | "inputStream": process.stdin 110 | }; 111 | 112 | function parseObject(val, optional) { 113 | try { 114 | return JSON.parse(val); 115 | } catch (e) { 116 | if (optional) { 117 | return val; 118 | } else { 119 | console.error(e); 120 | process.exit(1); 121 | } 122 | } 123 | } 124 | 125 | function parseBool(str, optName) { 126 | str = str.toLowerCase(); 127 | if (str === "true" || str === "y") { 128 | return true; 129 | } else if (str === "false" || str === "n") { 130 | return false; 131 | } 132 | console.log("Unknown boolean value %s for parameter %s.", str, optName); 133 | _showHelp(1); 134 | } 135 | process.argv.slice(2).forEach(function (item) { 136 | if (item.indexOf("--") > -1) { 137 | var itemArr = item.split("="); 138 | var optName = itemArr[0]; 139 | var key, val, type; 140 | if (!opts[optName]) { 141 | console.log("Option %s not supported.", optName); 142 | _showHelp(1); 143 | } 144 | key = optName.replace('--', ''); 145 | val = itemArr[1] || ''; 146 | type = opts[optName].type; 147 | if (type === "string") { 148 | parsedCmd.options[key] = val.toString(); 149 | } else if (type === "boolean") { 150 | parsedCmd.options[key] = parseBool(val, optName); 151 | } else if (type === "number") { 152 | parsedCmd.options[key] = parseFloat(val); 153 | } else if (type === "object") { 154 | parsedCmd.options[key] = parseObject(val, false); 155 | } else if (type === "~object") { 156 | parsedCmd.options[key] = parseObject(val, true); 157 | } else { 158 | throw ({ 159 | name: "UnimplementedException", 160 | message: "Option type parsing not implemented. See bin/options.json" 161 | }); 162 | } 163 | } else if (cmds[item]) { 164 | parsedCmd.cmd = item; 165 | } else if (fs.existsSync(item)) { 166 | parsedCmd.inputStream = fs.createReadStream(item); 167 | } else { 168 | console.log("unknown parameter %s.", item); 169 | } 170 | }); 171 | return parsedCmd; 172 | } 173 | process.stdin.setEncoding('utf8'); 174 | parsedCmd = commandParser(); 175 | run(parsedCmd.cmd, parsedCmd.options); 176 | } 177 | module.exports = csvtojson; 178 | if (!module.parent) { 179 | csvtojson(); 180 | } 181 | -------------------------------------------------------------------------------- /src/lineToJson.ts: -------------------------------------------------------------------------------- 1 | import { Converter } from "./Converter"; 2 | import CSVError from "./CSVError"; 3 | import { CellParser, ColumnParam } from "./Parameters"; 4 | import set from "lodash/set"; 5 | 6 | const numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/; 7 | 8 | export default function (csvRows: string[][], conv: Converter): JSONResult[] { 9 | const res: JSONResult[] = []; 10 | for (let i = 0, len = csvRows.length; i < len; i++) { 11 | const r = processRow(csvRows[i], conv, i); 12 | if (r) { 13 | res.push(r); 14 | } 15 | } 16 | return res; 17 | }; 18 | export type JSONResult = { 19 | [key: string]: any 20 | } 21 | 22 | function processRow(row: string[], conv: Converter, index): JSONResult | null { 23 | 24 | if (conv.parseParam.checkColumn && conv.parseRuntime.headers && row.length !== conv.parseRuntime.headers.length) { 25 | throw (CSVError.column_mismatched(conv.parseRuntime.parsedLineNumber + index)) 26 | } 27 | 28 | const headRow = conv.parseRuntime.headers || []; 29 | const resultRow = convertRowToJson(row, headRow, conv); 30 | if (resultRow) { 31 | return resultRow; 32 | } else { 33 | return null; 34 | } 35 | } 36 | 37 | function convertRowToJson(row: string[], headRow: string[], conv: Converter): { [key: string]: any } | null { 38 | let hasValue = false; 39 | const resultRow = {}; 40 | 41 | for (let i = 0, len = row.length; i < len; i++) { 42 | let item = row[i]; 43 | 44 | if (conv.parseParam.ignoreEmpty && item === '') { 45 | continue; 46 | } 47 | hasValue = true; 48 | 49 | let head = headRow[i]; 50 | if (!head || head === "") { 51 | head = headRow[i] = "field" + (i + 1); 52 | } 53 | const convFunc = getConvFunc(head, i, conv); 54 | if (convFunc) { 55 | const convRes = convFunc(item, head, resultRow, row, i); 56 | if (convRes !== undefined) { 57 | setPath(resultRow, head, convRes, conv, i); 58 | } 59 | } else { 60 | if (conv.parseParam.checkType) { 61 | const convertFunc = checkType(item, head, i, conv); 62 | item = convertFunc(item); 63 | } 64 | if (item !== undefined) { 65 | setPath(resultRow, head, item, conv, i); 66 | } 67 | } 68 | } 69 | if (hasValue) { 70 | return resultRow; 71 | } else { 72 | return null; 73 | } 74 | } 75 | 76 | const builtInConv: { [key: string]: CellParser } = { 77 | "string": stringType, 78 | "number": numberType, 79 | "omit": function () { } 80 | } 81 | function getConvFunc(head: string, i: number, conv: Converter): CellParser | null { 82 | if (conv.parseRuntime.columnConv[i] !== undefined) { 83 | return conv.parseRuntime.columnConv[i]; 84 | } else { 85 | let flag = conv.parseParam.colParser[head]; 86 | if (flag === undefined) { 87 | return conv.parseRuntime.columnConv[i] = null; 88 | } 89 | if (typeof flag === "object") { 90 | flag = (flag as ColumnParam).cellParser || "string"; 91 | } 92 | if (typeof flag === "string") { 93 | flag = flag.trim().toLowerCase(); 94 | const builtInFunc = builtInConv[flag]; 95 | if (builtInFunc) { 96 | return conv.parseRuntime.columnConv[i] = builtInFunc; 97 | } else { 98 | return conv.parseRuntime.columnConv[i] = null; 99 | } 100 | } else if (typeof flag === "function") { 101 | return conv.parseRuntime.columnConv[i] = flag; 102 | } else { 103 | return conv.parseRuntime.columnConv[i] = null; 104 | } 105 | } 106 | } 107 | function setPath(resultJson: any, head: string, value: any, conv: Converter, headIdx: number) { 108 | 109 | if (!conv.parseRuntime.columnValueSetter[headIdx]) { 110 | if (conv.parseParam.flatKeys) { 111 | conv.parseRuntime.columnValueSetter[headIdx] = flatSetter; 112 | } else { 113 | 114 | if (head.indexOf(".") > -1) { 115 | const headArr = head.split("."); 116 | let jsonHead = true; 117 | while (headArr.length > 0) { 118 | const headCom = headArr.shift(); 119 | if (headCom!.length === 0) { 120 | jsonHead = false; 121 | break; 122 | } 123 | } 124 | if (!jsonHead || conv.parseParam.colParser[head] && (conv.parseParam.colParser[head] as ColumnParam).flat) { 125 | conv.parseRuntime.columnValueSetter[headIdx] = flatSetter; 126 | } else { 127 | conv.parseRuntime.columnValueSetter[headIdx] = jsonSetter; 128 | } 129 | } else { 130 | conv.parseRuntime.columnValueSetter[headIdx] = flatSetter; 131 | } 132 | } 133 | } 134 | if (conv.parseParam.nullObject === true && value === "null") { 135 | value = null; 136 | } 137 | conv.parseRuntime.columnValueSetter[headIdx](resultJson, head, value); 138 | // flatSetter(resultJson, head, value); 139 | 140 | } 141 | function flatSetter(resultJson: any, head: string, value: any) { 142 | resultJson[head] = value; 143 | } 144 | function jsonSetter(resultJson: any, head: string, value: any) { 145 | set(resultJson, head, value); 146 | } 147 | 148 | 149 | function checkType(item: string, head: string, headIdx: number, conv: Converter): Function { 150 | if (conv.parseRuntime.headerType[headIdx]) { 151 | return conv.parseRuntime.headerType[headIdx]; 152 | } else if (head.indexOf('number#!') > -1) { 153 | return conv.parseRuntime.headerType[headIdx] = numberType; 154 | } else if (head.indexOf('string#!') > -1) { 155 | return conv.parseRuntime.headerType[headIdx] = stringType; 156 | } else if (conv.parseParam.checkType) { 157 | return conv.parseRuntime.headerType[headIdx] = dynamicType; 158 | } else { 159 | return conv.parseRuntime.headerType[headIdx] = stringType; 160 | } 161 | } 162 | 163 | function numberType(item) { 164 | var rtn = parseFloat(item); 165 | if (isNaN(rtn)) { 166 | return item; 167 | } 168 | return rtn; 169 | } 170 | 171 | function stringType(item: string): string { 172 | return item.toString(); 173 | } 174 | 175 | function dynamicType(item) { 176 | var trimed = item.trim(); 177 | if (trimed === "") { 178 | return stringType(item); 179 | } 180 | if (numReg.test(trimed)) { 181 | return numberType(item); 182 | } else if (trimed.length === 5 && trimed.toLowerCase() === "false" || trimed.length === 4 && trimed.toLowerCase() === "true") { 183 | return booleanType(item); 184 | } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") { 185 | return jsonType(item); 186 | } else { 187 | return stringType(item); 188 | } 189 | } 190 | 191 | function booleanType(item) { 192 | const trimmed = item.trim(); 193 | return !(trimmed.length === 5 && trimmed.toLowerCase() === "false"); 194 | } 195 | 196 | function jsonType(item) { 197 | try { 198 | return JSON.parse(item); 199 | } catch (e) { 200 | return item; 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /v2/ProcessFork.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var __extends = (this && this.__extends) || (function () { 3 | var extendStatics = function (d, b) { 4 | extendStatics = Object.setPrototypeOf || 5 | ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || 6 | function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; 7 | return extendStatics(d, b); 8 | }; 9 | return function (d, b) { 10 | if (typeof b !== "function" && b !== null) 11 | throw new TypeError("Class extends value " + String(b) + " is not a constructor or null"); 12 | extendStatics(d, b); 13 | function __() { this.constructor = d; } 14 | d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); 15 | }; 16 | })(); 17 | var __importDefault = (this && this.__importDefault) || function (mod) { 18 | return (mod && mod.__esModule) ? mod : { "default": mod }; 19 | }; 20 | Object.defineProperty(exports, "__esModule", { value: true }); 21 | exports.EOM = exports.ProcessorFork = void 0; 22 | var Processor_1 = require("./Processor"); 23 | var Parameters_1 = require("./Parameters"); 24 | var CSVError_1 = __importDefault(require("./CSVError")); 25 | var ProcessorFork = /** @class */ (function (_super) { 26 | __extends(ProcessorFork, _super); 27 | function ProcessorFork(converter) { 28 | var _this = _super.call(this, converter) || this; 29 | _this.converter = converter; 30 | _this.inited = false; 31 | _this.resultBuf = []; 32 | _this.leftChunk = ""; 33 | _this.finalChunk = false; 34 | _this.childProcess = require("child_process").spawn(process.execPath, [__dirname + "/../v2/worker.js"], { 35 | stdio: ["pipe", "pipe", "pipe", "ipc"] 36 | }); 37 | _this.initWorker(); 38 | return _this; 39 | } 40 | ProcessorFork.prototype.flush = function () { 41 | var _this = this; 42 | return new Promise(function (resolve, reject) { 43 | var _a; 44 | // console.log("flush"); 45 | _this.finalChunk = true; 46 | _this.next = resolve; 47 | (_a = _this.childProcess.stdin) === null || _a === void 0 ? void 0 : _a.end(); 48 | // this.childProcess.stdout.on("end",()=>{ 49 | // // console.log("!!!!"); 50 | // this.flushResult(); 51 | // }) 52 | }); 53 | }; 54 | ProcessorFork.prototype.destroy = function () { 55 | this.childProcess.kill(); 56 | return Promise.resolve(); 57 | }; 58 | ProcessorFork.prototype.prepareParam = function (param) { 59 | var clone = (0, Parameters_1.mergeParams)(param); 60 | if (clone.ignoreColumns) { 61 | clone.ignoreColumns = { 62 | source: clone.ignoreColumns.source, 63 | flags: clone.ignoreColumns.flags 64 | }; 65 | } 66 | if (clone.includeColumns) { 67 | clone.includeColumns = { 68 | source: clone.includeColumns.source, 69 | flags: clone.includeColumns.flags 70 | }; 71 | } 72 | return clone; 73 | }; 74 | ProcessorFork.prototype.initWorker = function () { 75 | var _this = this; 76 | var _a, _b; 77 | this.childProcess.on("exit", function () { 78 | _this.flushResult(); 79 | }); 80 | this.childProcess.send({ 81 | cmd: "init", 82 | params: this.prepareParam(this.converter.parseParam) 83 | }); 84 | this.childProcess.on("message", function (msg) { 85 | if (msg.cmd === "inited") { 86 | _this.inited = true; 87 | } 88 | else if (msg.cmd === "eol") { 89 | if (_this.converter.listeners("eol").length > 0) { 90 | _this.converter.emit("eol", msg.value); 91 | } 92 | } 93 | else if (msg.cmd === "header") { 94 | if (_this.converter.listeners("header").length > 0) { 95 | _this.converter.emit("header", msg.value); 96 | } 97 | } 98 | else if (msg.cmd === "done") { 99 | // this.flushResult(); 100 | } 101 | }); 102 | (_a = this.childProcess.stdout) === null || _a === void 0 ? void 0 : _a.on("data", function (data) { 103 | // console.log("stdout", data.toString()); 104 | var res = data.toString(); 105 | // console.log(res); 106 | _this.appendBuf(res); 107 | }); 108 | (_b = this.childProcess.stderr) === null || _b === void 0 ? void 0 : _b.on("data", function (data) { 109 | // console.log("stderr", data.toString()); 110 | _this.converter.emit("error", CSVError_1.default.fromJSON(JSON.parse(data.toString()))); 111 | }); 112 | }; 113 | ProcessorFork.prototype.flushResult = function () { 114 | // console.log("flush result", this.resultBuf.length); 115 | if (this.next) { 116 | this.next(this.resultBuf); 117 | } 118 | this.resultBuf = []; 119 | }; 120 | ProcessorFork.prototype.appendBuf = function (data) { 121 | var res = this.leftChunk + data; 122 | var list = res.split("\n"); 123 | var counter = 0; 124 | var lastBit = list[list.length - 1]; 125 | if (lastBit !== "") { 126 | this.leftChunk = list.pop() || ""; 127 | } 128 | else { 129 | this.leftChunk = ""; 130 | } 131 | this.resultBuf = this.resultBuf.concat(list); 132 | // while (list.length) { 133 | // let item = list.shift() || ""; 134 | // if (item.length === 0 ) { 135 | // continue; 136 | // } 137 | // // if (this.params.output !== "line") { 138 | // // item = JSON.parse(item); 139 | // // } 140 | // this.resultBuf.push(item); 141 | // counter++; 142 | // } 143 | // console.log("buf length",this.resultBuf.length); 144 | }; 145 | ProcessorFork.prototype.process = function (chunk) { 146 | var _this = this; 147 | return new Promise(function (resolve, reject) { 148 | var _a; 149 | // console.log("chunk", chunk.length); 150 | _this.next = resolve; 151 | // this.appendReadBuf(chunk); 152 | (_a = _this.childProcess.stdin) === null || _a === void 0 ? void 0 : _a.write(chunk, function () { 153 | // console.log("chunk callback"); 154 | _this.flushResult(); 155 | }); 156 | }); 157 | }; 158 | return ProcessorFork; 159 | }(Processor_1.Processor)); 160 | exports.ProcessorFork = ProcessorFork; 161 | exports.EOM = "\x03"; 162 | -------------------------------------------------------------------------------- /v1/core/linesToJson.js: -------------------------------------------------------------------------------- 1 | var parserMgr = require("./parserMgr.js"); 2 | var CSVError = require("./CSVError"); 3 | var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/; 4 | /** 5 | * Convert lines of csv array into json 6 | * @param {[type]} lines [[col1,col2,col3]] 7 | * @param {[type]} params Converter params with _headers field populated 8 | * @param {[type]} idx start pos of the lines 9 | * @return {[type]} [{err:null,json:obj,index:line,row:[csv row]}] 10 | */ 11 | module.exports = function (lines, params, idx) { 12 | if (params._needParseJson) { 13 | if (!params._headers || !Array.isArray(params._headers)) { 14 | params._headers = []; 15 | } 16 | if (!params.parseRules) { 17 | var row = params._headers; 18 | params.parseRules = parserMgr.initParsers(row, params); 19 | } 20 | return processRows(lines, params, idx); 21 | } else { 22 | return justReturnRows(lines, params, idx); 23 | } 24 | }; 25 | 26 | function justReturnRows(lines, params, idx) { 27 | var rtn = []; 28 | for (var i = 0, len = lines.length; i < len; i++) { 29 | rtn.push({ 30 | err: null, 31 | json: {}, 32 | index: idx++, 33 | row: lines[i] 34 | }); 35 | } 36 | return rtn; 37 | } 38 | 39 | function processRows(csvRows, params, startIndex) { 40 | var res = []; 41 | for (var i = 0, len = csvRows.length; i < len; i++) { 42 | var r = processRow(csvRows[i], params, startIndex++); 43 | if (r) { 44 | res.push(r); 45 | } 46 | } 47 | return res; 48 | } 49 | 50 | function processRow(row, param, index) { 51 | var parseRules = param.parseRules; 52 | if (param.checkColumn && row.length !== parseRules.length) { 53 | return { 54 | err: CSVError.column_mismatched(index) 55 | }; 56 | } 57 | 58 | var headRow = param._headers; 59 | var resultRow = convertRowToJson(row, headRow, param); 60 | if (resultRow) { 61 | return { 62 | json: resultRow, 63 | index: index, 64 | row: row 65 | }; 66 | } else { 67 | return null; 68 | } 69 | } 70 | 71 | function convertRowToJson(row, headRow, param) { 72 | var hasValue = false; 73 | var resultRow = {}; 74 | 75 | for (var i = 0, len = row.length; i < len; i++) { 76 | var convertFunc, head, item; 77 | item = row[i]; 78 | 79 | if (param.ignoreEmpty && item === '') { 80 | continue; 81 | } 82 | hasValue = true; 83 | 84 | head = headRow[i]; 85 | if (!head || head === "") { 86 | head = headRow[i] = "field" + (i + 1); 87 | } 88 | var convFunc = getConvFunc(head, i, param); 89 | if (convFunc) { 90 | var convRes = convFunc(item, head, resultRow,row,i); 91 | if (convRes !== undefined) { 92 | setPath(resultRow, head, convRes); 93 | } 94 | } else { 95 | var flag = getFlag(head, i, param); 96 | if (flag === 'omit') { 97 | continue; 98 | } 99 | if (param.checkType) { 100 | convertFunc = checkType(item, head, i, param); 101 | item = convertFunc(item); 102 | } 103 | var title = getTitle(head, i, param); 104 | if (flag === 'flat' || param.flatKeys) { 105 | resultRow[title] = item; 106 | } else { 107 | setPath(resultRow, title, item); 108 | } 109 | } 110 | } 111 | if (hasValue) { 112 | return resultRow; 113 | } else { 114 | return false; 115 | } 116 | } 117 | 118 | var builtInConv={ 119 | "string":stringType, 120 | "number":numberType, 121 | "omit":function(){} 122 | } 123 | function getConvFunc(head,i,param){ 124 | if (param._columnConv[i] !== undefined){ 125 | return param._columnConv[i]; 126 | }else{ 127 | var flag=param.colParser[head]; 128 | if (flag === undefined){ 129 | return param._columnConv[i]=false; 130 | } 131 | if (typeof flag ==="string"){ 132 | flag=flag.trim().toLowerCase(); 133 | var builtInFunc=builtInConv[flag]; 134 | if (builtInFunc){ 135 | return param._columnConv[i]=builtInFunc; 136 | }else{ 137 | return param._columnConv[i]=false; 138 | } 139 | }else if (typeof flag ==="function"){ 140 | return param._columnConv[i]=flag; 141 | }else{ 142 | return param._columnConv[i]=false; 143 | } 144 | } 145 | } 146 | function setPath(json, path, value) { 147 | var _set = require('lodash/set'); 148 | var pathArr = path.split('.'); 149 | if (pathArr.length === 1) { 150 | json[path] = value; 151 | } else { 152 | _set(json, path, value); 153 | } 154 | } 155 | 156 | function getFlag(head, i, param) { 157 | if (typeof param._headerFlag[i] === "string") { 158 | return param._headerFlag[i]; 159 | } else if (head.indexOf('*omit*') > -1) { 160 | return param._headerFlag[i] = 'omit'; 161 | } else if (head.indexOf('*flat*') > -1) { 162 | return param._headerFlag[i] = 'flat'; 163 | } else { 164 | return param._headerFlag[i] = ''; 165 | } 166 | } 167 | 168 | function getTitle(head, i, param) { 169 | if (param._headerTitle[i]) { 170 | return param._headerTitle[i]; 171 | } 172 | 173 | var flag = getFlag(head, i, param); 174 | var str = head.replace('*flat*', '').replace('string#!', '').replace('number#!', ''); 175 | return param._headerTitle[i] = str; 176 | } 177 | 178 | function checkType(item, head, headIdx, param) { 179 | if (param._headerType[headIdx]) { 180 | return param._headerType[headIdx]; 181 | } else if (head.indexOf('number#!') > -1) { 182 | return param._headerType[headIdx] = numberType; 183 | } else if (head.indexOf('string#!') > -1) { 184 | return param._headerType[headIdx] = stringType; 185 | } else if (param.checkType) { 186 | return param._headerType[headIdx] = dynamicType; 187 | } else { 188 | return param._headerType[headIdx] = stringType; 189 | } 190 | } 191 | 192 | function numberType(item) { 193 | var rtn = parseFloat(item); 194 | if (isNaN(rtn)) { 195 | return item; 196 | } 197 | return rtn; 198 | } 199 | 200 | function stringType(item) { 201 | return item.toString(); 202 | } 203 | 204 | function dynamicType(item) { 205 | var trimed = item.trim(); 206 | if (trimed === "") { 207 | return stringType(item); 208 | } 209 | if (numReg.test(trimed)) { 210 | return numberType(item); 211 | } else if (trimed.length === 5 && trimed.toLowerCase() === "false" || trimed.length === 4 && trimed.toLowerCase() === "true") { 212 | return booleanType(item); 213 | } else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") { 214 | return jsonType(item); 215 | } else { 216 | return stringType(item); 217 | } 218 | } 219 | 220 | function booleanType(item) { 221 | var trimed = item.trim(); 222 | if (trimed.length === 5 && trimed.toLowerCase() === "false") { 223 | return false; 224 | } else { 225 | return true; 226 | } 227 | } 228 | 229 | function jsonType(item) { 230 | try { 231 | return JSON.parse(item); 232 | } catch (e) { 233 | return item; 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /v2/Result.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | Object.defineProperty(exports, "__esModule", { value: true }); 3 | exports.Result = void 0; 4 | var os_1 = require("os"); 5 | var Result = /** @class */ (function () { 6 | function Result(converter) { 7 | this.converter = converter; 8 | this.finalResult = []; 9 | } 10 | Object.defineProperty(Result.prototype, "needEmitLine", { 11 | get: function () { 12 | return !!this.converter.parseRuntime.subscribe && !!this.converter.parseRuntime.subscribe.onNext || this.needPushDownstream; 13 | }, 14 | enumerable: false, 15 | configurable: true 16 | }); 17 | Object.defineProperty(Result.prototype, "needPushDownstream", { 18 | get: function () { 19 | if (this._needPushDownstream === undefined) { 20 | this._needPushDownstream = this.converter.listeners("data").length > 0 || this.converter.listeners("readable").length > 0; 21 | } 22 | return this._needPushDownstream; 23 | }, 24 | enumerable: false, 25 | configurable: true 26 | }); 27 | Object.defineProperty(Result.prototype, "needEmitAll", { 28 | get: function () { 29 | return !!this.converter.parseRuntime.then && this.converter.parseParam.needEmitAll; 30 | // return !!this.converter.parseRuntime.then; 31 | }, 32 | enumerable: false, 33 | configurable: true 34 | }); 35 | Result.prototype.processResult = function (resultLines) { 36 | var _this = this; 37 | var startPos = this.converter.parseRuntime.parsedLineNumber; 38 | if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") { 39 | if (startPos === 0) { 40 | pushDownstream(this.converter, "[" + os_1.EOL); 41 | } 42 | } 43 | // let prom: Promise; 44 | return new Promise(function (resolve, reject) { 45 | if (_this.needEmitLine) { 46 | processLineByLine(resultLines, _this.converter, 0, _this.needPushDownstream, function (err) { 47 | if (err) { 48 | reject(err); 49 | } 50 | else { 51 | _this.appendFinalResult(resultLines); 52 | resolve(undefined); 53 | } 54 | }); 55 | // resolve(); 56 | } 57 | else { 58 | _this.appendFinalResult(resultLines); 59 | resolve(undefined); 60 | } 61 | }); 62 | }; 63 | Result.prototype.appendFinalResult = function (lines) { 64 | if (this.needEmitAll) { 65 | this.finalResult = this.finalResult.concat(lines); 66 | } 67 | this.converter.parseRuntime.parsedLineNumber += lines.length; 68 | }; 69 | Result.prototype.processError = function (err) { 70 | if (this.converter.parseRuntime.subscribe && this.converter.parseRuntime.subscribe.onError) { 71 | this.converter.parseRuntime.subscribe.onError(err); 72 | } 73 | if (this.converter.parseRuntime.then && this.converter.parseRuntime.then.onrejected) { 74 | this.converter.parseRuntime.then.onrejected(err); 75 | } 76 | }; 77 | Result.prototype.endProcess = function () { 78 | if (this.converter.parseRuntime.then && this.converter.parseRuntime.then.onfulfilled) { 79 | if (this.needEmitAll) { 80 | this.converter.parseRuntime.then.onfulfilled(this.finalResult); 81 | } 82 | else { 83 | this.converter.parseRuntime.then.onfulfilled([]); 84 | } 85 | } 86 | if (this.converter.parseRuntime.subscribe && this.converter.parseRuntime.subscribe.onCompleted) { 87 | this.converter.parseRuntime.subscribe.onCompleted(); 88 | } 89 | if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") { 90 | pushDownstream(this.converter, "]" + os_1.EOL); 91 | } 92 | }; 93 | return Result; 94 | }()); 95 | exports.Result = Result; 96 | function processLineByLine(lines, conv, offset, needPushDownstream, cb) { 97 | if (offset >= lines.length) { 98 | cb(); 99 | } 100 | else { 101 | if (conv.parseRuntime.subscribe && conv.parseRuntime.subscribe.onNext) { 102 | var hook_1 = conv.parseRuntime.subscribe.onNext; 103 | var nextLine_1 = lines[offset]; 104 | var res = hook_1(nextLine_1, conv.parseRuntime.parsedLineNumber + offset); 105 | offset++; 106 | // if (isAsync === undefined) { 107 | if (res && res.then) { 108 | res.then(function () { 109 | processRecursive(lines, hook_1, conv, offset, needPushDownstream, cb, nextLine_1); 110 | }, cb); 111 | } 112 | else { 113 | // processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, false); 114 | if (needPushDownstream) { 115 | pushDownstream(conv, nextLine_1); 116 | } 117 | while (offset < lines.length) { 118 | var line = lines[offset]; 119 | hook_1(line, conv.parseRuntime.parsedLineNumber + offset); 120 | offset++; 121 | if (needPushDownstream) { 122 | pushDownstream(conv, line); 123 | } 124 | } 125 | cb(); 126 | } 127 | // } else if (isAsync === true) { 128 | // (res as PromiseLike).then(function () { 129 | // processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, true); 130 | // }, cb); 131 | // } else if (isAsync === false) { 132 | // processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, false); 133 | // } 134 | } 135 | else { 136 | if (needPushDownstream) { 137 | while (offset < lines.length) { 138 | var line = lines[offset++]; 139 | pushDownstream(conv, line); 140 | } 141 | } 142 | cb(); 143 | } 144 | } 145 | } 146 | function processRecursive(lines, hook, conv, offset, needPushDownstream, cb, res) { 147 | if (needPushDownstream) { 148 | pushDownstream(conv, res); 149 | } 150 | processLineByLine(lines, conv, offset, needPushDownstream, cb); 151 | } 152 | function pushDownstream(conv, res) { 153 | if (typeof res === "object" && !conv.options.objectMode) { 154 | var data = JSON.stringify(res); 155 | conv.push(data + (conv.parseParam.downstreamFormat === "array" ? "," + os_1.EOL : os_1.EOL), "utf8"); 156 | } 157 | else { 158 | conv.push(res); 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /v2/Converter.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var __extends = (this && this.__extends) || (function () { 3 | var extendStatics = function (d, b) { 4 | extendStatics = Object.setPrototypeOf || 5 | ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) || 6 | function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; }; 7 | return extendStatics(d, b); 8 | }; 9 | return function (d, b) { 10 | if (typeof b !== "function" && b !== null) 11 | throw new TypeError("Class extends value " + String(b) + " is not a constructor or null"); 12 | extendStatics(d, b); 13 | function __() { this.constructor = d; } 14 | d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); 15 | }; 16 | })(); 17 | Object.defineProperty(exports, "__esModule", { value: true }); 18 | exports.Converter = void 0; 19 | var stream_1 = require("stream"); 20 | var Parameters_1 = require("./Parameters"); 21 | var ParseRuntime_1 = require("./ParseRuntime"); 22 | var ProcessorLocal_1 = require("./ProcessorLocal"); 23 | var Result_1 = require("./Result"); 24 | var Converter = /** @class */ (function (_super) { 25 | __extends(Converter, _super); 26 | function Converter(param, options) { 27 | if (options === void 0) { options = {}; } 28 | var _this = _super.call(this, options) || this; 29 | _this.options = options; 30 | _this.params = (0, Parameters_1.mergeParams)(param); 31 | _this.runtime = (0, ParseRuntime_1.initParseRuntime)(_this); 32 | _this.result = new Result_1.Result(_this); 33 | _this.processor = new ProcessorLocal_1.ProcessorLocal(_this); 34 | _this.once("error", function (err) { 35 | setImmediate(function () { 36 | _this.result.processError(err); 37 | _this.emit("done", err); 38 | }); 39 | }); 40 | _this.once("done", function () { 41 | _this.processor.destroy(); 42 | }); 43 | return _this; 44 | } 45 | Converter.prototype.preRawData = function (onRawData) { 46 | this.runtime.preRawDataHook = onRawData; 47 | return this; 48 | }; 49 | Converter.prototype.preFileLine = function (onFileLine) { 50 | this.runtime.preFileLineHook = onFileLine; 51 | return this; 52 | }; 53 | Converter.prototype.subscribe = function (onNext, onError, onCompleted) { 54 | this.parseRuntime.subscribe = { 55 | onNext: onNext, 56 | onError: onError, 57 | onCompleted: onCompleted 58 | }; 59 | return this; 60 | }; 61 | Converter.prototype.fromFile = function (filePath, options) { 62 | var _this = this; 63 | var fs = require("fs"); 64 | fs.exists(filePath, function (exist) { 65 | if (exist) { 66 | var rs = fs.createReadStream(filePath, options); 67 | rs.pipe(_this); 68 | } 69 | else { 70 | _this.emit('error', new Error("File does not exist at ".concat(filePath, ". Check to make sure the file path to your csv is correct."))); 71 | } 72 | }); 73 | return this; 74 | }; 75 | Converter.prototype.fromStream = function (readStream) { 76 | readStream.pipe(this); 77 | return this; 78 | }; 79 | Converter.prototype.fromString = function (csvString) { 80 | var csv = csvString.toString(); 81 | var read = new stream_1.Readable(); 82 | var idx = 0; 83 | read._read = function (size) { 84 | if (idx >= csvString.length) { 85 | this.push(null); 86 | } 87 | else { 88 | var str = csvString.substring(idx, idx + size); 89 | this.push(str); 90 | idx += size; 91 | } 92 | }; 93 | return this.fromStream(read); 94 | }; 95 | Converter.prototype.then = function (onfulfilled, onrejected) { 96 | var _this = this; 97 | return new Promise(function (resolve, reject) { 98 | _this.parseRuntime.then = { 99 | onfulfilled: function (value) { 100 | if (onfulfilled) { 101 | resolve(onfulfilled(value)); 102 | } 103 | else { 104 | resolve(value); 105 | } 106 | }, 107 | onrejected: function (err) { 108 | if (onrejected) { 109 | resolve(onrejected(err)); 110 | } 111 | else { 112 | reject(err); 113 | } 114 | } 115 | }; 116 | }); 117 | }; 118 | Object.defineProperty(Converter.prototype, "parseParam", { 119 | get: function () { 120 | return this.params; 121 | }, 122 | enumerable: false, 123 | configurable: true 124 | }); 125 | Object.defineProperty(Converter.prototype, "parseRuntime", { 126 | get: function () { 127 | return this.runtime; 128 | }, 129 | enumerable: false, 130 | configurable: true 131 | }); 132 | Converter.prototype._transform = function (chunk, encoding, cb) { 133 | var _this = this; 134 | this.processor.process(chunk) 135 | .then(function (result) { 136 | // console.log(result); 137 | if (result.length > 0) { 138 | _this.runtime.started = true; 139 | return _this.result.processResult(result); 140 | } 141 | }) 142 | .then(function () { 143 | _this.emit("drained"); 144 | cb(); 145 | }, function (error) { 146 | _this.runtime.hasError = true; 147 | _this.runtime.error = error; 148 | _this.emit("error", error); 149 | cb(); 150 | }); 151 | }; 152 | Converter.prototype._flush = function (cb) { 153 | var _this = this; 154 | this.processor.flush() 155 | .then(function (data) { 156 | if (data.length > 0) { 157 | return _this.result.processResult(data); 158 | } 159 | }) 160 | .then(function () { 161 | _this.processEnd(cb); 162 | }, function (err) { 163 | _this.emit("error", err); 164 | cb(); 165 | }); 166 | }; 167 | Converter.prototype.processEnd = function (cb) { 168 | this.result.endProcess(); 169 | this.emit("done"); 170 | cb(); 171 | }; 172 | Object.defineProperty(Converter.prototype, "parsedLineNumber", { 173 | get: function () { 174 | return this.runtime.parsedLineNumber; 175 | }, 176 | enumerable: false, 177 | configurable: true 178 | }); 179 | return Converter; 180 | }(stream_1.Transform)); 181 | exports.Converter = Converter; 182 | -------------------------------------------------------------------------------- /v2/lineToJson.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | var __importDefault = (this && this.__importDefault) || function (mod) { 3 | return (mod && mod.__esModule) ? mod : { "default": mod }; 4 | }; 5 | Object.defineProperty(exports, "__esModule", { value: true }); 6 | exports.default = default_1; 7 | var CSVError_1 = __importDefault(require("./CSVError")); 8 | var set_1 = __importDefault(require("lodash/set")); 9 | var numReg = /^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/; 10 | function default_1(csvRows, conv) { 11 | var res = []; 12 | for (var i = 0, len = csvRows.length; i < len; i++) { 13 | var r = processRow(csvRows[i], conv, i); 14 | if (r) { 15 | res.push(r); 16 | } 17 | } 18 | return res; 19 | } 20 | ; 21 | function processRow(row, conv, index) { 22 | if (conv.parseParam.checkColumn && conv.parseRuntime.headers && row.length !== conv.parseRuntime.headers.length) { 23 | throw (CSVError_1.default.column_mismatched(conv.parseRuntime.parsedLineNumber + index)); 24 | } 25 | var headRow = conv.parseRuntime.headers || []; 26 | var resultRow = convertRowToJson(row, headRow, conv); 27 | if (resultRow) { 28 | return resultRow; 29 | } 30 | else { 31 | return null; 32 | } 33 | } 34 | function convertRowToJson(row, headRow, conv) { 35 | var hasValue = false; 36 | var resultRow = {}; 37 | for (var i = 0, len = row.length; i < len; i++) { 38 | var item = row[i]; 39 | if (conv.parseParam.ignoreEmpty && item === '') { 40 | continue; 41 | } 42 | hasValue = true; 43 | var head = headRow[i]; 44 | if (!head || head === "") { 45 | head = headRow[i] = "field" + (i + 1); 46 | } 47 | var convFunc = getConvFunc(head, i, conv); 48 | if (convFunc) { 49 | var convRes = convFunc(item, head, resultRow, row, i); 50 | if (convRes !== undefined) { 51 | setPath(resultRow, head, convRes, conv, i); 52 | } 53 | } 54 | else { 55 | if (conv.parseParam.checkType) { 56 | var convertFunc = checkType(item, head, i, conv); 57 | item = convertFunc(item); 58 | } 59 | if (item !== undefined) { 60 | setPath(resultRow, head, item, conv, i); 61 | } 62 | } 63 | } 64 | if (hasValue) { 65 | return resultRow; 66 | } 67 | else { 68 | return null; 69 | } 70 | } 71 | var builtInConv = { 72 | "string": stringType, 73 | "number": numberType, 74 | "omit": function () { } 75 | }; 76 | function getConvFunc(head, i, conv) { 77 | if (conv.parseRuntime.columnConv[i] !== undefined) { 78 | return conv.parseRuntime.columnConv[i]; 79 | } 80 | else { 81 | var flag = conv.parseParam.colParser[head]; 82 | if (flag === undefined) { 83 | return conv.parseRuntime.columnConv[i] = null; 84 | } 85 | if (typeof flag === "object") { 86 | flag = flag.cellParser || "string"; 87 | } 88 | if (typeof flag === "string") { 89 | flag = flag.trim().toLowerCase(); 90 | var builtInFunc = builtInConv[flag]; 91 | if (builtInFunc) { 92 | return conv.parseRuntime.columnConv[i] = builtInFunc; 93 | } 94 | else { 95 | return conv.parseRuntime.columnConv[i] = null; 96 | } 97 | } 98 | else if (typeof flag === "function") { 99 | return conv.parseRuntime.columnConv[i] = flag; 100 | } 101 | else { 102 | return conv.parseRuntime.columnConv[i] = null; 103 | } 104 | } 105 | } 106 | function setPath(resultJson, head, value, conv, headIdx) { 107 | if (!conv.parseRuntime.columnValueSetter[headIdx]) { 108 | if (conv.parseParam.flatKeys) { 109 | conv.parseRuntime.columnValueSetter[headIdx] = flatSetter; 110 | } 111 | else { 112 | if (head.indexOf(".") > -1) { 113 | var headArr = head.split("."); 114 | var jsonHead = true; 115 | while (headArr.length > 0) { 116 | var headCom = headArr.shift(); 117 | if (headCom.length === 0) { 118 | jsonHead = false; 119 | break; 120 | } 121 | } 122 | if (!jsonHead || conv.parseParam.colParser[head] && conv.parseParam.colParser[head].flat) { 123 | conv.parseRuntime.columnValueSetter[headIdx] = flatSetter; 124 | } 125 | else { 126 | conv.parseRuntime.columnValueSetter[headIdx] = jsonSetter; 127 | } 128 | } 129 | else { 130 | conv.parseRuntime.columnValueSetter[headIdx] = flatSetter; 131 | } 132 | } 133 | } 134 | if (conv.parseParam.nullObject === true && value === "null") { 135 | value = null; 136 | } 137 | conv.parseRuntime.columnValueSetter[headIdx](resultJson, head, value); 138 | // flatSetter(resultJson, head, value); 139 | } 140 | function flatSetter(resultJson, head, value) { 141 | resultJson[head] = value; 142 | } 143 | function jsonSetter(resultJson, head, value) { 144 | (0, set_1.default)(resultJson, head, value); 145 | } 146 | function checkType(item, head, headIdx, conv) { 147 | if (conv.parseRuntime.headerType[headIdx]) { 148 | return conv.parseRuntime.headerType[headIdx]; 149 | } 150 | else if (head.indexOf('number#!') > -1) { 151 | return conv.parseRuntime.headerType[headIdx] = numberType; 152 | } 153 | else if (head.indexOf('string#!') > -1) { 154 | return conv.parseRuntime.headerType[headIdx] = stringType; 155 | } 156 | else if (conv.parseParam.checkType) { 157 | return conv.parseRuntime.headerType[headIdx] = dynamicType; 158 | } 159 | else { 160 | return conv.parseRuntime.headerType[headIdx] = stringType; 161 | } 162 | } 163 | function numberType(item) { 164 | var rtn = parseFloat(item); 165 | if (isNaN(rtn)) { 166 | return item; 167 | } 168 | return rtn; 169 | } 170 | function stringType(item) { 171 | return item.toString(); 172 | } 173 | function dynamicType(item) { 174 | var trimed = item.trim(); 175 | if (trimed === "") { 176 | return stringType(item); 177 | } 178 | if (numReg.test(trimed)) { 179 | return numberType(item); 180 | } 181 | else if (trimed.length === 5 && trimed.toLowerCase() === "false" || trimed.length === 4 && trimed.toLowerCase() === "true") { 182 | return booleanType(item); 183 | } 184 | else if (trimed[0] === "{" && trimed[trimed.length - 1] === "}" || trimed[0] === "[" && trimed[trimed.length - 1] === "]") { 185 | return jsonType(item); 186 | } 187 | else { 188 | return stringType(item); 189 | } 190 | } 191 | function booleanType(item) { 192 | var trimmed = item.trim(); 193 | return !(trimmed.length === 5 && trimmed.toLowerCase() === "false"); 194 | } 195 | function jsonType(item) { 196 | try { 197 | return JSON.parse(item); 198 | } 199 | catch (e) { 200 | return item; 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /src/rowSplit.ts: -------------------------------------------------------------------------------- 1 | import { CSVParseParam } from "./Parameters"; 2 | import { Converter } from "./Converter"; 3 | import { Fileline } from "./fileline"; 4 | import getEol from "./getEol"; 5 | import { filterArray, trimLeft, trimRight } from "./util"; 6 | 7 | const defaultDelimiters = [",", "|", "\t", ";", ":"]; 8 | export class RowSplit { 9 | private quote: string; 10 | private trim: boolean; 11 | private escape: string; 12 | private cachedRegExp: { [key: string]: RegExp } = {}; 13 | private delimiterEmitted = false; 14 | private _needEmitDelimiter?: boolean = undefined; 15 | private get needEmitDelimiter() { 16 | if (this._needEmitDelimiter === undefined) { 17 | this._needEmitDelimiter = this.conv.listeners("delimiter").length > 0; 18 | } 19 | return this._needEmitDelimiter; 20 | } 21 | constructor(private conv: Converter) { 22 | this.quote = conv.parseParam.quote; 23 | this.trim = conv.parseParam.trim; 24 | this.escape = conv.parseParam.escape; 25 | } 26 | parse(fileline: Fileline): RowSplitResult { 27 | if (fileline.length === 0 || (this.conv.parseParam.ignoreEmpty && fileline.trim().length === 0)) { 28 | return { cells: [], closed: true }; 29 | } 30 | const quote = this.quote; 31 | const trim = this.trim; 32 | const escape = this.escape; 33 | if (this.conv.parseRuntime.delimiter instanceof Array || this.conv.parseRuntime.delimiter.toLowerCase() === "auto") { 34 | this.conv.parseRuntime.delimiter = this.getDelimiter(fileline); 35 | 36 | } 37 | if (this.needEmitDelimiter && !this.delimiterEmitted) { 38 | this.conv.emit("delimiter", this.conv.parseRuntime.delimiter); 39 | this.delimiterEmitted = true; 40 | } 41 | const delimiter = this.conv.parseRuntime.delimiter; 42 | const rowArr = fileline.split(delimiter); 43 | if (quote === "off") { 44 | if (trim) { 45 | for (let i = 0; i < rowArr.length; i++) { 46 | rowArr[i] = rowArr[i].trim(); 47 | } 48 | } 49 | return { cells: rowArr, closed: true }; 50 | } else { 51 | return this.toCSVRow(rowArr, trim, quote, delimiter); 52 | } 53 | 54 | } 55 | private toCSVRow(rowArr: string[], trim: boolean, quote: string, delimiter: string): RowSplitResult { 56 | const row: string[] = []; 57 | let inquote = false; 58 | let quoteBuff = ''; 59 | for (let i = 0, rowLen = rowArr.length; i < rowLen; i++) { 60 | let e = rowArr[i]; 61 | if (!inquote && trim) { 62 | e = trimLeft(e); 63 | } 64 | const len = e.length; 65 | if (!inquote) { 66 | if (len === 2 && e === this.quote + this.quote) { 67 | row.push(""); 68 | continue; 69 | } else if (this.isQuoteOpen(e)) { //quote open 70 | e = e.substr(1); 71 | if (this.isQuoteClose(e)) { //quote close 72 | e = e.substr(0, e.lastIndexOf(quote)); 73 | e = this.escapeQuote(e); 74 | row.push(e); 75 | continue; 76 | } else if (e.indexOf(quote) !== -1) { 77 | let count = 0; 78 | let prev = ""; 79 | for (const c of e) { 80 | // count quotes only if previous character is not escape char 81 | if (c === quote && prev !== this.escape) { 82 | count++; 83 | prev = ""; 84 | } else { 85 | // save previous char to temp variable 86 | prev = c; 87 | } 88 | } 89 | if (count % 2 === 1) { 90 | if (trim) { 91 | e = trimRight(e); 92 | } 93 | row.push(quote + e); 94 | continue; 95 | }else{ 96 | inquote = true; 97 | quoteBuff += e; 98 | continue; 99 | } 100 | } 101 | else { 102 | inquote = true; 103 | quoteBuff += e; 104 | continue; 105 | } 106 | } else { 107 | if (trim) { 108 | e = trimRight(e); 109 | } 110 | row.push(e); 111 | continue; 112 | } 113 | } else { //previous quote not closed 114 | if (this.isQuoteClose(e)) { //close double quote 115 | inquote = false; 116 | e = e.substr(0, len - 1); 117 | quoteBuff += delimiter + e; 118 | quoteBuff = this.escapeQuote(quoteBuff); 119 | if (trim) { 120 | quoteBuff = trimRight(quoteBuff); 121 | } 122 | row.push(quoteBuff); 123 | quoteBuff = ""; 124 | } else { 125 | quoteBuff += delimiter + e; 126 | } 127 | } 128 | } 129 | 130 | // if (!inquote && param._needFilterRow) { 131 | // row = filterRow(row, param); 132 | // } 133 | 134 | return { cells: row, closed: !inquote }; 135 | } 136 | private getDelimiter(fileline: Fileline): string { 137 | let possibleDelimiters; 138 | if (this.conv.parseParam.delimiter === "auto") { 139 | possibleDelimiters = defaultDelimiters; 140 | } else if (this.conv.parseParam.delimiter instanceof Array) { 141 | possibleDelimiters = this.conv.parseParam.delimiter; 142 | } else { 143 | return this.conv.parseParam.delimiter; 144 | } 145 | let count = 0; 146 | let delimiter = ","; 147 | possibleDelimiters.forEach(function (delim) { 148 | const delimCount = fileline.split(delim).length; 149 | if (delimCount > count) { 150 | delimiter = delim; 151 | count = delimCount; 152 | } 153 | }); 154 | return delimiter; 155 | } 156 | private isQuoteOpen(str: string): boolean { 157 | const quote = this.quote; 158 | const escape = this.escape; 159 | return str[0] === quote && ( 160 | str[1] !== quote || 161 | str[1] === escape && (str[2] === quote || str.length === 2)); 162 | } 163 | private isQuoteClose(str: string): boolean { 164 | const quote = this.quote; 165 | const escape = this.escape; 166 | if (this.conv.parseParam.trim) { 167 | str = trimRight(str); 168 | } 169 | let count = 0; 170 | let idx = str.length - 1; 171 | while (str[idx] === quote || str[idx] === escape) { 172 | idx--; 173 | count++; 174 | } 175 | return count % 2 !== 0; 176 | } 177 | 178 | // private twoDoubleQuote(str: string): string { 179 | // var twoQuote = this.quote + this.quote; 180 | // var curIndex = -1; 181 | // while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) { 182 | // str = str.substring(0, curIndex) + str.substring(++curIndex); 183 | // } 184 | // return str; 185 | // } 186 | 187 | 188 | private escapeQuote(segment: string): string { 189 | const key = "es|" + this.quote + "|" + this.escape; 190 | if (this.cachedRegExp[key] === undefined) { 191 | this.cachedRegExp[key] = new RegExp('\\' + this.escape + '\\' + this.quote, 'g'); 192 | } 193 | const regExp = this.cachedRegExp[key]; 194 | // console.log(regExp,segment); 195 | return segment.replace(regExp, this.quote); 196 | } 197 | parseMultiLines(lines: Fileline[]): MultipleRowResult { 198 | const csvLines: string[][] = []; 199 | let left = ""; 200 | while (lines.length) { 201 | const line = left + lines.shift(); 202 | const row = this.parse(line); 203 | if (row.cells.length === 0 && this.conv.parseParam.ignoreEmpty) { 204 | continue; 205 | } 206 | if (row.closed || this.conv.parseParam.alwaysSplitAtEOL) { 207 | if (this.conv.parseRuntime.selectedColumns) { 208 | csvLines.push(filterArray(row.cells, this.conv.parseRuntime.selectedColumns)); 209 | } else { 210 | csvLines.push(row.cells); 211 | } 212 | 213 | left = ""; 214 | } else { 215 | left = line + (getEol(line, this.conv.parseRuntime) || "\n"); 216 | } 217 | } 218 | return { rowsCells: csvLines, partial: left }; 219 | } 220 | } 221 | export interface MultipleRowResult { 222 | rowsCells: string[][]; 223 | partial: string; 224 | } 225 | export interface RowSplitResult { 226 | /** 227 | * csv row array. ["a","b","c"] 228 | */ 229 | cells: string[], 230 | /** 231 | * if the passed fileline is a complete row 232 | */ 233 | closed: boolean 234 | } 235 | 236 | -------------------------------------------------------------------------------- /docs/csvtojson-v2.md: -------------------------------------------------------------------------------- 1 | # V2 Features / Changes 2 | 3 | [Features](#features) 4 | 5 | * [Add Promise and Async / Await support](#add-promise-and-async--await-support) 6 | * [Add asynchronous line by line processing support](#add-asynchronous-line-by-line-processing-supportt) 7 | * [Built-in TypeScript support](#built-in-typescript-support) 8 | * [Output format options](#output-format-options) 9 | * [Async Hooks Support](#async-hooks-support) 10 | * [Performance Improvement](#performance-improvement) 11 | 12 | [Upgrade to v2](#upgrade-to-csvtojson-v2) 13 | 14 | * [Dropped support to node.js<4](#dropped-support-to-nodejs4) 15 | * ['csv', 'json', 'record_parsed', 'end_parsed' events were replaced by .subscribe and .then](#csv-json-record_parsed-end_parsed-events-were-replaced-by-subscribe-and-then) 16 | * [Worker has been removed](#worker-has-been-removed) 17 | * [fromFile / fromStream / fromString will not accept callback. Use .then instead](#fromfile--fromstream--fromstring-will-not-accept-callback-use-then-instead) 18 | * [ignoreColumns and includeColumns accepts only RegExp now](#ignorecolumns-and-includecolumns-accepts-only-regexp-now) 19 | * [.transf is removed](#transf-is-removed) 20 | * [.preRawData uses Promise instead of using callback](#prerawdata-uses-promise-instead-of-using-callback) 21 | * [removed toArrayString parameter](#removed-toarraystring-parameter) 22 | * [line number now starts from 0 instead of 1](#line-number-now-starts-from-0-instead-of-1) 23 | * [Moved Converter constructor.](#moved-converter-constructor) 24 | * [end event will not emit if no downstream](#end-event-will-not-emit-if-no-downstream) 25 | 26 | # Features 27 | 28 | ## Add Promise and Async / Await support 29 | 30 | ```js 31 | // Promise 32 | csv() 33 | .fromFile(myCSVFilePath) 34 | .then((jsonArray)=>{ 35 | 36 | }, errorHandle); 37 | 38 | // async / await 39 | const jsonArray= await csv().fromFile(myCSVFilePath); 40 | 41 | // Promise chain 42 | request.get(csvUrl) 43 | .then((csvdata)=>{ 44 | return csv().fromString(csvdata) 45 | }) 46 | .then((jsonArray)=>{ 47 | 48 | }) 49 | ``` 50 | 51 | ## Add asynchronous line by line processing support 52 | 53 | ```js 54 | // async process 55 | csv() 56 | .fromFile(csvFilePath) 57 | .subscribe((json,lineNumber)=>{ 58 | return Promise((resolve,reject)=>{ 59 | // process the json line in asynchronous. 60 | }) 61 | },onError, onComplete) 62 | 63 | // sync process 64 | csv() 65 | .fromFile(csvFilePath) 66 | .subscribe((json,lineNumber)=>{ 67 | // process the json line in synchronous. 68 | },onError, onComplete) 69 | 70 | ``` 71 | 72 | ## Built-in TypeScript support 73 | 74 | ```ts 75 | // csvtojson/index.d.ts file 76 | import csv from "csvtojson"; 77 | ``` 78 | 79 | ## Output format options 80 | 81 | ```js 82 | /** 83 | * csv data: 84 | * a,b,c 85 | * 1,2,3 86 | * const csvStr; 87 | */ 88 | 89 | let result= await csv().fromString(csvStr); 90 | /** 91 | * result is json array: 92 | * [{ 93 | * a: "1", 94 | * b: "2", 95 | * c: "3: 96 | * }] 97 | */ 98 | result= await csv({output:"csv",noheader: true}).fromString(csvStr); 99 | /** 100 | * result is array of csv rows: 101 | * [ 102 | * ["a","b","c"], 103 | * ["1","2","3"] 104 | * ] 105 | */ 106 | result= await csv({output:"line",noheader: true}).fromString(csvStr); 107 | /** 108 | * result is array of csv line in string (including end of line in cells if exists): 109 | * [ 110 | * "a,b,c", 111 | * "1,2,3" 112 | * ] 113 | */ 114 | 115 | ``` 116 | 117 | 118 | ## Async Hooks support 119 | 120 | ### preRawData 121 | 122 | ```js 123 | csv().fromFile(csvFile) 124 | .preRawData((data)=>{ 125 | //async 126 | return new Promise((resolve,reject)=>{ 127 | //async process 128 | }); 129 | //sync 130 | return data.replace("a","b"); 131 | }) 132 | ``` 133 | 134 | ### preFileLine 135 | 136 | ```js 137 | csv().fromFile(csvFile) 138 | .preFileLine((fileLine,lineNumber)=>{ 139 | //async 140 | return new Promise((resolve,reject)=>{ 141 | //async process 142 | }); 143 | //sync 144 | return fileLine.replace("a","b"); 145 | }) 146 | ``` 147 | 148 | ### trans 149 | 150 | `.trans` has been replaced by `.subscribe`. see below. 151 | 152 | ## Performance Improvement 153 | 154 | When converting to `json` array, `v2` is around 8-10 times faster than `v1` 155 | 156 | # Upgrade to csvtojson V2 157 | 158 | There are many exciting changes in csvtojson `v2`. 159 | 160 | However, as a major release, it breaks something. 161 | 162 | ## Dropped support to node.js<4 163 | 164 | From `v2.0.0` csvtojson only supports Node.JS >=4.0.0 165 | 166 | 167 | ## 'csv', 'json', 'record_parsed', 'end_parsed' events were replaced by .subscribe and .then 168 | 169 | From `2.0.0`, those events above are replaced by `.subscribe` and `.then` methods. The output format is controlled by a `output` parameter which could be `json`, `csv`, `line` in `v2.0.0` 170 | 171 | Below some examples on code changes: 172 | 173 | ```js 174 | //before -- get json object 175 | csv().fromString(myCSV).on("json",function(json){}); 176 | csv().fromString(myCSV).on("record_parsed",function(json){}); 177 | //now 178 | csv().fromString(myCSV).subscribe(function(json){}); 179 | 180 | //before -- get csv row 181 | csv().fromString(myCSV).on("csv",function(csvRow){}); 182 | //now 183 | csv({output:"csv"}).fromString(myCSV).subscribe(function(csvRow){}); 184 | 185 | //before -- get final json array 186 | csv().fromString(myCSV).on("end_parsed",function(jsonArray){}); 187 | //now 188 | csv().fromString(myCSV).then(function(jsonArray){}); // Promise 189 | const jsonArray=await csv().fromString(myCSV); // async /await 190 | ``` 191 | 192 | 193 | ## Worker has been removed 194 | 195 | Worker feature makes sense to Command Line where it could utilize multiple CPU cores to speed up processing large csv file. However, it does not quite work as expected mainly because cooperation of multiple processes' result is very complex. Also the inter process communication adds too much overhead which minimize the benefit gained from spawning workers. 196 | 197 | Thus in version `2.0.0` I decided to temporarily remove `Worker` feature and will re-think how to better utilize multiple CPU Cores. 198 | 199 | 200 | ## fromFile / fromStream / fromString will not accept callback. Use `.then` instead 201 | 202 | 203 | **Before** 204 | 205 | ```js 206 | csv().fromFile(myFile,function(err,jsonArr){}) 207 | ``` 208 | 209 | **After** 210 | 211 | ```js 212 | //Promise 213 | csv().fromFile(myFile).then(function(jsonArr){},function(err){}) 214 | 215 | // Async 216 | const jsonArr=await csv().fromFile(myFile); 217 | ``` 218 | 219 | ## ignoreColumns and includeColumns accepts only RegExp now 220 | 221 | **Before** 222 | 223 | ```js 224 | csv({ 225 | ignoreColumns:["gender","age"] 226 | }) 227 | ``` 228 | 229 | **Now** 230 | 231 | ```js 232 | csv({ 233 | ignoreColumns: /gender|age/ 234 | }) 235 | ``` 236 | 237 | ## .transf is removed 238 | 239 | `.transf` was used purely for result transformation and has very bad performance. 240 | 241 | It is now recommended to use `.subscribe` instead 242 | 243 | **Before** 244 | ```js 245 | csv() 246 | .transf((jsonObj)=>{ 247 | jsonObj.myNewKey='some value' 248 | }).pipe(downstream) 249 | ``` 250 | 251 | **After** 252 | ```js 253 | csv() 254 | .subscribe((jsonObj)=>{ 255 | jsonObj.myNewKey='some value' 256 | }).pipe(downstream) 257 | ``` 258 | 259 | ## .preRawData uses Promise instead of using callback 260 | **Before** 261 | 262 | ```js 263 | csv() 264 | .preRawData((csvRawData,cb)=>{ 265 | var newData=csvRawData.replace('some value','another value') 266 | cb(newData); 267 | }) 268 | ``` 269 | 270 | **After** 271 | 272 | ```js 273 | csv() 274 | .preRawData((csvRawData)=>{ 275 | var newData=csvRawData.replace('some value','another value') 276 | // synchronous 277 | return newData; 278 | // or asynchronously 279 | return Promise.resolve(newData); 280 | }) 281 | ``` 282 | 283 | ## removed toArrayString parameter 284 | 285 | this feature is mostly not used. 286 | 287 | ## line number now starts from 0 instead of 1 288 | 289 | first row in csv now is always indexed as 0 -- no matter it is header row or not. 290 | 291 | 292 | ## end event will not emit if no downstream 293 | 294 | The definition of [end event](https://nodejs.org/api/stream.html#stream_event_end) is when there is no more data to be consumed from the stream. Thus it will not emit if there is no downstream after the parser. To subscribe the parsing finish, use `done` event instead. 295 | 296 | ```js 297 | // before 298 | csv().on("end",()=>{}) 299 | 300 | // now 301 | csv().on("done",()=>{}) 302 | ``` 303 | -------------------------------------------------------------------------------- /src/ProcessorLocal.ts: -------------------------------------------------------------------------------- 1 | import { Processor, ProcessLineResult } from "./Processor"; 2 | import { prepareData } from "./dataClean"; 3 | import getEol from "./getEol"; 4 | import { stringToLines } from "./fileline"; 5 | import { bufFromString, filterArray,trimLeft } from "./util"; 6 | import { RowSplit } from "./rowSplit"; 7 | import lineToJson from "./lineToJson"; 8 | import { ParseRuntime } from "./ParseRuntime"; 9 | import CSVError from "./CSVError"; 10 | 11 | 12 | 13 | export class ProcessorLocal extends Processor { 14 | flush(): Promise { 15 | if (this.runtime.csvLineBuffer && this.runtime.csvLineBuffer.length > 0) { 16 | const buf = this.runtime.csvLineBuffer; 17 | this.runtime.csvLineBuffer = undefined; 18 | return this.process(buf, true) 19 | .then((res) => { 20 | if (this.runtime.csvLineBuffer && this.runtime.csvLineBuffer.length > 0) { 21 | return Promise.reject(CSVError.unclosed_quote(this.runtime.parsedLineNumber, this.runtime.csvLineBuffer.toString())) 22 | } else { 23 | return Promise.resolve(res); 24 | } 25 | }) 26 | } else { 27 | return Promise.resolve([]); 28 | } 29 | } 30 | destroy(): Promise { 31 | return Promise.resolve(); 32 | } 33 | private rowSplit: RowSplit = new RowSplit(this.converter); 34 | private eolEmitted = false; 35 | private _needEmitEol?: boolean = undefined; 36 | private get needEmitEol() { 37 | if (this._needEmitEol === undefined) { 38 | this._needEmitEol = this.converter.listeners("eol").length > 0; 39 | } 40 | return this._needEmitEol; 41 | } 42 | private headEmitted = false; 43 | private _needEmitHead?: boolean = undefined; 44 | private get needEmitHead() { 45 | if (this._needEmitHead === undefined) { 46 | this._needEmitHead = this.converter.listeners("header").length > 0; 47 | } 48 | return this._needEmitHead; 49 | 50 | } 51 | process(chunk: Buffer, finalChunk = false): Promise { 52 | let csvString: string; 53 | if (finalChunk) { 54 | csvString = chunk.toString(); 55 | } else { 56 | csvString = prepareData(chunk, this.converter.parseRuntime); 57 | 58 | } 59 | return Promise.resolve() 60 | .then(() => { 61 | if (this.runtime.preRawDataHook) { 62 | return this.runtime.preRawDataHook(csvString); 63 | } else { 64 | return csvString; 65 | } 66 | }) 67 | .then((csv) => { 68 | if (csv && csv.length > 0) { 69 | return this.processCSV(csv, finalChunk); 70 | } else { 71 | return Promise.resolve([]); 72 | } 73 | }) 74 | } 75 | private processCSV(csv: string, finalChunk: boolean): Promise { 76 | const params = this.params; 77 | const runtime = this.runtime; 78 | if (!runtime.eol) { 79 | getEol(csv, runtime); 80 | } 81 | if (this.needEmitEol && !this.eolEmitted && runtime.eol) { 82 | this.converter.emit("eol", runtime.eol); 83 | this.eolEmitted = true; 84 | } 85 | // trim csv file has initial blank lines. 86 | if (params.ignoreEmpty && !runtime.started) { 87 | csv = trimLeft(csv); 88 | } 89 | const stringToLineResult = stringToLines(csv, runtime); 90 | if (!finalChunk) { 91 | this.prependLeftBuf(bufFromString(stringToLineResult.partial)); 92 | } else { 93 | stringToLineResult.lines.push(stringToLineResult.partial); 94 | stringToLineResult.partial = ""; 95 | } 96 | if (stringToLineResult.lines.length > 0) { 97 | let prom: Promise; 98 | if (runtime.preFileLineHook) { 99 | prom = this.runPreLineHook(stringToLineResult.lines); 100 | } else { 101 | prom = Promise.resolve(stringToLineResult.lines); 102 | } 103 | return prom.then((lines) => { 104 | if (!runtime.started 105 | && !this.runtime.headers 106 | ) { 107 | return this.processDataWithHead(lines); 108 | } else { 109 | return this.processCSVBody(lines); 110 | } 111 | 112 | }) 113 | 114 | } else { 115 | 116 | return Promise.resolve([]); 117 | } 118 | 119 | } 120 | private processDataWithHead(lines: string[]): ProcessLineResult[] { 121 | if (this.params.noheader) { 122 | if (this.params.headers) { 123 | this.runtime.headers = this.params.headers; 124 | } else { 125 | this.runtime.headers = []; 126 | } 127 | } else { 128 | let left = ""; 129 | let headerRow: string[] = []; 130 | while (lines.length) { 131 | const line = left + lines.shift(); 132 | const row = this.rowSplit.parse(line); 133 | if (row.closed) { 134 | headerRow = row.cells; 135 | left = ""; 136 | break; 137 | } else { 138 | left = line + getEol(line, this.runtime); 139 | } 140 | } 141 | this.prependLeftBuf(bufFromString(left)); 142 | 143 | if (headerRow.length === 0) { 144 | return []; 145 | } 146 | if (this.params.headers) { 147 | this.runtime.headers = this.params.headers; 148 | } else { 149 | this.runtime.headers = headerRow; 150 | } 151 | } 152 | if (this.runtime.needProcessIgnoreColumn || this.runtime.needProcessIncludeColumn) { 153 | this.filterHeader(); 154 | } 155 | if (this.needEmitHead && !this.headEmitted) { 156 | this.converter.emit("header", this.runtime.headers); 157 | this.headEmitted = true; 158 | } 159 | return this.processCSVBody(lines); 160 | } 161 | private filterHeader() { 162 | this.runtime.selectedColumns = []; 163 | if (this.runtime.headers) { 164 | const headers = this.runtime.headers; 165 | for (let i = 0; i < headers.length; i++) { 166 | if (this.params.ignoreColumns) { 167 | if (this.params.ignoreColumns.test(headers[i])) { 168 | if (this.params.includeColumns && this.params.includeColumns.test(headers[i])) { 169 | this.runtime.selectedColumns.push(i); 170 | } else { 171 | continue; 172 | } 173 | } else { 174 | this.runtime.selectedColumns.push(i); 175 | } 176 | } else if (this.params.includeColumns) { 177 | if (this.params.includeColumns.test(headers[i])) { 178 | this.runtime.selectedColumns.push(i); 179 | } 180 | } else { 181 | this.runtime.selectedColumns.push(i); 182 | } 183 | // if (this.params.includeColumns && this.params.includeColumns.test(headers[i])){ 184 | // this.runtime.selectedColumns.push(i); 185 | // }else{ 186 | // if (this.params.ignoreColumns && this.params.ignoreColumns.test(headers[i])){ 187 | // continue; 188 | // }else{ 189 | // if (this.params.ignoreColumns && !this.params.includeColumns){ 190 | // this.runtime.selectedColumns.push(i); 191 | // } 192 | 193 | // } 194 | // } 195 | } 196 | this.runtime.headers = filterArray(this.runtime.headers, this.runtime.selectedColumns); 197 | } 198 | 199 | } 200 | private processCSVBody(lines: string[]): ProcessLineResult[] { 201 | if (this.params.output === "line") { 202 | return lines; 203 | } else { 204 | const result = this.rowSplit.parseMultiLines(lines); 205 | this.prependLeftBuf(bufFromString(result.partial)); 206 | if (this.params.output === "csv") { 207 | return result.rowsCells; 208 | } else { 209 | return lineToJson(result.rowsCells, this.converter); 210 | } 211 | } 212 | 213 | // var jsonArr = linesToJson(lines.lines, params, this.recordNum); 214 | // this.processResult(jsonArr); 215 | // this.lastIndex += jsonArr.length; 216 | // this.recordNum += jsonArr.length; 217 | } 218 | 219 | private prependLeftBuf(buf: Buffer) { 220 | if (buf) { 221 | if (this.runtime.csvLineBuffer) { 222 | this.runtime.csvLineBuffer = Buffer.concat([buf, this.runtime.csvLineBuffer]); 223 | } else { 224 | this.runtime.csvLineBuffer = buf; 225 | } 226 | } 227 | 228 | } 229 | private runPreLineHook(lines: string[]): Promise { 230 | return new Promise((resolve, reject) => { 231 | processLineHook(lines, this.runtime, 0, (err) => { 232 | if (err) { 233 | reject(err); 234 | } else { 235 | resolve(lines); 236 | } 237 | }) 238 | }); 239 | } 240 | } 241 | 242 | function processLineHook(lines: string[], runtime: ParseRuntime, offset: number, 243 | cb: (err?) => void 244 | ) { 245 | if (offset >= lines.length) { 246 | cb(); 247 | } else { 248 | if (runtime.preFileLineHook) { 249 | const line = lines[offset]; 250 | const res = runtime.preFileLineHook(line, runtime.parsedLineNumber + offset); 251 | offset++; 252 | if (res && (res as PromiseLike).then) { 253 | (res as PromiseLike).then((value) => { 254 | lines[offset - 1] = value; 255 | processLineHook(lines, runtime, offset, cb); 256 | }); 257 | } else { 258 | lines[offset - 1] = res as string; 259 | while (offset < lines.length) { 260 | lines[offset] = runtime.preFileLineHook(lines[offset], runtime.parsedLineNumber + offset) as string; 261 | offset++; 262 | } 263 | cb(); 264 | } 265 | } else { 266 | cb(); 267 | } 268 | } 269 | } 270 | --------------------------------------------------------------------------------