├── test ├── test.xlsx └── test.js ├── .github └── workflows │ └── main.yml ├── .gitignore ├── package.json ├── LICENSE ├── bin ├── sheetstack.js └── sheetstack.sh ├── index.js └── README.md /test/test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/sheetstack/master/test/test.xlsx -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | node-version: [12.x, 14.x, 16.x] 11 | 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Use Node.js ${{ matrix.node-version }} 15 | uses: actions/setup-node@v3 16 | with: 17 | node-version: ${{ matrix.node-version }} 18 | cache: 'npm' 19 | - run: npm ci 20 | - run: npm test 21 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | var sheetstack = require('../index'); 2 | var j = require('j'); 3 | 4 | var callback = function(err, data) { 5 | if (err) console.error(err); 6 | console.assert(data.indexOf('dogs') > -1); 7 | console.assert(data.indexOf('cats') > -1); 8 | console.assert(data.indexOf('ONE FAMILY HOMES') > -1); 9 | console.assert(data.indexOf('sdsd') > -1); 10 | }; 11 | 12 | console.assert(typeof(sheetstack) == 'function'); 13 | 14 | var contents = j.readFile(__dirname + '/test.xlsx'); 15 | 16 | sheetstack(contents, {}, callback); 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # Compiled binary addons (http://nodejs.org/api/addons.html) 20 | build/Release 21 | 22 | # Dependency directory 23 | # Commenting this out is preferred by some people, see 24 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 25 | node_modules 26 | 27 | # Users Environment Variables 28 | .lock-wscript 29 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sheetstack", 3 | "version": "0.2.0", 4 | "description": "Combine multiple XLS sheets into a single CSV", 5 | "main": "./bin/sheetstack.js", 6 | "bin": { 7 | "sheetstack": "./bin/sheetstack.js" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "http://github.com/fitnr/sheetstack" 12 | }, 13 | "keywords": [ 14 | "csv", 15 | "xls", 16 | "converter" 17 | ], 18 | "files": [ 19 | "LICENSE", 20 | "bin", 21 | "README.md" 22 | ], 23 | "scripts": { 24 | "test": "node test/test.js" 25 | }, 26 | "author": "Neil Freeman", 27 | "license": "MIT", 28 | "bugs": { 29 | "url": "https://github.com/fitnr/sheetstack/issues" 30 | }, 31 | "homepage": "https://github.com/fitnr/sheetstack", 32 | "dependencies": { 33 | "commander": "^2.9.0", 34 | "concat-stream": "^1.5.1", 35 | "j": "^1.0.0", 36 | "xlsx": "^0.18.5" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Neil Freeman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /bin/sheetstack.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var j = require('j'), 4 | fs = require('fs'), 5 | stream = require('stream'), 6 | program = require('commander'), 7 | concat = require('concat-stream'), 8 | sheetstack = require('../index'); 9 | 10 | var version = '0.1.3'; 11 | 12 | function list(val) { return val.split(','); } 13 | 14 | program 15 | .version(version) 16 | .usage('[OPTIONS] \n Combine XLS or XLSX sheets into a single CSV') 17 | .option('-s, --sheets ', 'list of sheets to read (by default, all sheets will be read', list) 18 | .option('-g, --groups ', 'value of the field to be added at the start of each line (by default, the name of the sheet)', list) 19 | .option('-r, --rm-lines ', 'number of lines to remove from the start of every sheet (except for the first). default: 1', 1) 20 | .option('-n, --group-name ', 'name of grouping column. default: sheet', 'sheet') 21 | .option('-F, --field-sep ', 'CSV field separator', ',') 22 | .option('-R, --row-sep ', 'CSV row separator', "\n") 23 | .option('-o, --output ', 'output to specified file') 24 | .option('-q, --quiet', 'quiet mode'); 25 | 26 | // get cli args 27 | program.parse(process.argv); 28 | 29 | // set options 30 | var filename = program.args[0], 31 | writer = (program.output) ? fs.createWriteStream(program.output, {flags: 'w'}) : concat(function(data) { 32 | console.log(data); 33 | }), 34 | callback = function(err, csv) { 35 | if (err) throw err; 36 | 37 | writer.write(csv); 38 | writer.end(); 39 | }; 40 | 41 | // Allow for piping 42 | if (filename === "-") { 43 | process.stdin.pipe(concat(function(data){ 44 | w = j.read(data); 45 | sheetstack(w, program, callback); 46 | })); 47 | 48 | } else { 49 | converted = j.readFile(filename); 50 | sheetstack(converted, program, callback); 51 | } 52 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var XLSX = require('xlsx'); 2 | 3 | function grouper(g, sep) { 4 | return function(x) { 5 | return g + sep + x; 6 | }; 7 | } 8 | 9 | // ignore lines that are empty or only empty fields 10 | function goodLine(re) { 11 | return function(x) { 12 | return x.length > -1 && x.search(re) > -1; 13 | }; 14 | } 15 | 16 | function sheetstack(data, config, callback) { 17 | /* 18 | :data contents of xls/x file 19 | :config object sheets, groups, fieldSep, rowSep, groupName, rmLines 20 | :callback function. Should take err, string arguments 21 | */ 22 | try { 23 | var XL = (data[0].utils.sheet_to_csv) ? data[0] : XLSX, 24 | workbook = data[1], 25 | sheets = (config.sheets) ? config.sheets : workbook.SheetNames, 26 | groups = (config.groups) ? config.groups : sheets, 27 | fieldSep = config.fieldSep || ',', 28 | rmLines = config.rmLines || 0, 29 | rowSep = config.rowSep || "\n", 30 | re = new RegExp('[^' + fieldSep + ']'), 31 | opts = { 32 | FS: fieldSep, 33 | RS: rowSep 34 | }, 35 | add_group_name = grouper(config.groupName || 'sheet', fieldSep); 36 | 37 | var filter = goodLine(re); 38 | var csv = ''; 39 | 40 | // for each sheet 41 | for (var i = 0, s = sheets.length; i < s; i++) { 42 | // function for prefixing group 43 | var add_group = grouper(groups[i], fieldSep); 44 | // pull rows from sheet 45 | var rows = XL.utils.sheet_to_csv(workbook.Sheets[sheets[i]], opts).split('\n'); 46 | var body; 47 | 48 | // on the first sheet, add the group name 49 | if (i === 0) { 50 | var firstline = add_group_name(rows[0]); 51 | var mutated = rows.splice(1).filter(filter).map(add_group); 52 | body = [firstline].concat(mutated); 53 | 54 | } else { 55 | body = rows.splice(rmLines).filter(filter).map(add_group); 56 | } 57 | 58 | csv += body.join('\n') + '\n'; 59 | } 60 | 61 | callback(null, csv); 62 | 63 | } catch(e) { 64 | callback(e); 65 | } 66 | } 67 | 68 | module.exports = sheetstack; 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sheetstack 2 | 3 | Sheetstack is a command line utility that merges multiple XLS/X sheets into a single CSV. 4 | 5 | It's a simple extension of [J](https://www.npmjs.com/package/j) useful for processing files with the exact same layout split into several worksheets. 6 | 7 | Sheetstack adds a grouping column to the resulting CSV. By default this is the name of the sheet, but it could be anything. 8 | 9 | ## Install 10 | 11 | ```` 12 | npm install sheetstack 13 | ```` 14 | 15 | ## Usage 16 | 17 | Let's say we have an xls file with two sheets, "dogs" and "cats": 18 | 19 | ````csv 20 | name,best friend 21 | Pluto,Mickey 22 | Santa's Little Helper,Bart 23 | Scooby Doo,Shaggy 24 | ```` 25 | 26 | ````csv 27 | name,best friend 28 | Cat in the Hat,the fish 29 | Garfield,Jon 30 | Hello Kitty,you 31 | ```` 32 | 33 | The simplest use will combine all the files and output the result to stdout. 34 | 35 | ```` 36 | $ sheetstack file.xls 37 | 38 | sheet,name,best friend 39 | dogs,Pluto,Mickey 40 | dogs,Santa's Little Helper,Bart 41 | dogs,Scooby Doo,Shaggy 42 | cats,Cat in the Hat,the fish 43 | cats,Garfield,Jon 44 | cats,Hello Kitty,you 45 | ```` 46 | 47 | ### Sheets 48 | The `--sheets` option controls which sheets are included, and in what order. 49 | 50 | ```` 51 | $ sheetstack --sheets dog,cat file.xls 52 | 53 | sheet,name,best friend 54 | dogs,Pluto,Mickey 55 | dogs,Santa's Little Helper,Bart 56 | dogs,Scooby Doo,Shaggy 57 | ```` 58 | 59 | ### Groups 60 | The `--groups` option specifies custom values for the grouping column, `--group-name` sets the value for the top of the column. 61 | 62 | ```` 63 | $ sheetstack --groups canis,felis --group-name genus 64 | 65 | species,name,best friend 66 | canis,Pluto,Mickey 67 | canis,Santa's Little Helper,Bart 68 | canis,Scooby Doo,Shaggy 69 | felis,Cat in the Hat,the fish 70 | felis,Garfield,Jon 71 | felis,Hello Kitty,you 72 | ```` 73 | 74 | ### Removing leading lines 75 | 76 | By default, sheetstack removes the first line from all sheets except for the first one. The can be changed with the `--rm-lines` setting. If `--rm-lines` is set to 0, no lines will be removed. Higher values will remove more lines, but no lines will be removed from the first sheet. 77 | 78 | ### Output format 79 | 80 | You can also specify row-separator and field-separator options, which are passed through to J: 81 | 82 | ```` 83 | $ sheetstack --row-sep '\r\n' --field-sep ; 84 | ```` 85 | 86 | -------------------------------------------------------------------------------- /bin/sheetstack.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | # Join together sheets of an XLS with csvstack 4 | # Requires j 5 | usage() { 6 | echo "usage: sheetstack [OPTIONS] 7 | Combine XLS or XLSX sheets into a single CSV 8 | 9 | options: 10 | -s, --sheets list of sheets to read (by default, all sheets will be read) 11 | -g, --groups value of the field to be added at the start of each line (by default, the name of the sheet) 12 | -r, --rm-lines number of lines to remove from the start of every sheet (except for the first) 13 | -n, --group-name name of grouping column. default: sheet 14 | -F, --field-sep CSV field separator 15 | " | sed "s/^/ /" 16 | } 17 | 18 | GRPS= 19 | GROUPS_NL= 20 | SHEETS= 21 | SHEETS_NL= 22 | GROUPNAME=sheet 23 | RMLINES=1 24 | SEP=, 25 | 26 | while [ "$#" -gt 0 ]; do 27 | case $1 in 28 | -h|-\?|--help) 29 | usage 30 | exit 31 | ;; 32 | # Takes an option argument, ensuring it has been specified. 33 | -s|--sheets) 34 | if [ "$#" -gt 1 ]; then 35 | SHEETS=$2 36 | SHEETS_NL="$(sed 's/,/\ 37 | /g' <<< "$2")" 38 | shift 2 39 | continue 40 | else 41 | echo 'ERROR: Must specify a non-empty "--sheets" argument.' >&2 42 | exit 1 43 | fi 44 | ;; 45 | -g|--groups) 46 | if [ "$#" -gt 1 ]; then 47 | GRPS="$2" 48 | GROUPS_NL="$(sed 's/,/\ 49 | /g' <<< "$2")" 50 | shift 2 51 | continue 52 | else 53 | echo 'ERROR: Must specify a non-empty "--groups" argument.' >&2 54 | exit 1 55 | fi 56 | ;; 57 | -n|--group-name) 58 | if [ "$#" -gt 1 ]; then 59 | GROUPNAME="$2" 60 | shift 2 61 | continue 62 | else 63 | echo 'ERROR: Must specify a non-empty "--group-name" argument.' >&2 64 | exit 1 65 | fi 66 | ;; 67 | -r|--rm-lines) 68 | if [ "$#" -gt 1 ]; then 69 | RMLINES=$(expr $2 + 1) 70 | shift 2 71 | continue 72 | else 73 | echo 'ERROR: Must specify a non-empty "--rm-lines" argument.' >&2 74 | exit 1 75 | fi 76 | ;; 77 | -F|--field-sep) 78 | if [ "$#" -gt 1 ]; then 79 | SEP="$2" 80 | shift 2 81 | continue 82 | else 83 | echo 'ERROR: Must specify a non-empty "--field-sep" argument.' >&2 84 | exit 1 85 | fi 86 | ;; 87 | # End of all options. 88 | --) 89 | shift 90 | break 91 | ;; 92 | -?*) 93 | printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2 94 | ;; 95 | # Default case: If no more options then break out of the loop. 96 | *) 97 | XLSFILE="$1" 98 | if [ "$#" -lt 1 ]; then 99 | break 100 | fi; 101 | esac 102 | 103 | shift 104 | done 105 | 106 | if [ -z "$SHEETS" ]; then 107 | SHEETS_NL=$(j -l ${XLSFILE}) 108 | fi 109 | 110 | if [ -z "$GRPS" ]; then 111 | GROUPS_NL="$SHEETS_NL" 112 | GRPS=$(tr '\n' , <<< "$SHEETS_NL" | sed 's/,$//') 113 | fi 114 | 115 | first=yes 116 | 117 | while IFS=, read sheet group 118 | do 119 | # convert to CSV with j. 120 | j -s "$sheet" "$XLSFILE" | 121 | ( 122 | # Add group name to first line, else add sheet name 123 | [[ $first ]] && sed -e "1s/^/${GROUPNAME},/" -e "2,\$s/^/${group},/g" || 124 | sed -e "s/^/${group},/g" | tail -n+$RMLINES 125 | ) | 126 | grep -v -e '^$' -e "^${group}${SEP}\+$" 127 | first= 128 | done <<< "$(paste -d, <(echo "$SHEETS_NL") <(echo "$GROUPS_NL"))" --------------------------------------------------------------------------------