├── test
    ├── test.xlsx
    └── test.js
├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── package.json
├── LICENSE
├── bin
    ├── sheetstack.js
    └── sheetstack.sh
├── index.js
└── README.md


/test/test.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fitnr/sheetstack/master/test/test.xlsx


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         node-version: [12.x, 14.x, 16.x]
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v3
14 |     - name: Use Node.js ${{ matrix.node-version }}
15 |       uses: actions/setup-node@v3
16 |       with:
17 |         node-version: ${{ matrix.node-version }}
18 |         cache: 'npm'
19 |     - run: npm ci
20 |     - run: npm test
21 | 


--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
 1 | var sheetstack = require('../index');
 2 | var j = require('j');
 3 | 
 4 | var callback = function(err, data) {
 5 |     if (err) console.error(err);
 6 |     console.assert(data.indexOf('dogs') > -1);
 7 |     console.assert(data.indexOf('cats') > -1);
 8 |     console.assert(data.indexOf('ONE FAMILY HOMES') > -1);
 9 |     console.assert(data.indexOf('sdsd') > -1);
10 | };
11 | 
12 | console.assert(typeof(sheetstack) == 'function');
13 | 
14 | var contents = j.readFile(__dirname + '/test.xlsx');
15 | 
16 | sheetstack(contents, {}, callback);
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | 
 5 | # Runtime data
 6 | pids
 7 | *.pid
 8 | *.seed
 9 | 
10 | # Directory for instrumented libs generated by jscoverage/JSCover
11 | lib-cov
12 | 
13 | # Coverage directory used by tools like istanbul
14 | coverage
15 | 
16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
17 | .grunt
18 | 
19 | # Compiled binary addons (http://nodejs.org/api/addons.html)
20 | build/Release
21 | 
22 | # Dependency directory
23 | # Commenting this out is preferred by some people, see
24 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git-
25 | node_modules
26 | 
27 | # Users Environment Variables
28 | .lock-wscript
29 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "sheetstack",
 3 |   "version": "0.2.0",
 4 |   "description": "Combine multiple XLS sheets into a single CSV",
 5 |   "main": "./bin/sheetstack.js",
 6 |   "bin": {
 7 |     "sheetstack": "./bin/sheetstack.js"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "http://github.com/fitnr/sheetstack"
12 |   },
13 |   "keywords": [
14 |     "csv",
15 |     "xls",
16 |     "converter"
17 |   ],
18 |   "files": [
19 |     "LICENSE",
20 |     "bin",
21 |     "README.md"
22 |   ],
23 |   "scripts": {
24 |     "test": "node test/test.js"
25 |   },
26 |   "author": "Neil Freeman",
27 |   "license": "MIT",
28 |   "bugs": {
29 |     "url": "https://github.com/fitnr/sheetstack/issues"
30 |   },
31 |   "homepage": "https://github.com/fitnr/sheetstack",
32 |   "dependencies": {
33 |     "commander": "^2.9.0",
34 |     "concat-stream": "^1.5.1",
35 |     "j": "^1.0.0",
36 |     "xlsx": "^0.18.5"
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Neil Freeman
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/bin/sheetstack.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | var j = require('j'),
 4 |     fs = require('fs'),
 5 |     stream = require('stream'),
 6 |     program = require('commander'),
 7 |     concat = require('concat-stream'),
 8 |     sheetstack = require('../index');
 9 | 
10 | var version = '0.1.3';
11 | 
12 | function list(val) { return val.split(','); }
13 | 
14 | program
15 |     .version(version)
16 |     .usage('[OPTIONS] <file>\n  Combine XLS or XLSX sheets into a single CSV')
17 |     .option('-s, --sheets <items>', 'list of sheets to read (by default, all sheets will be read', list)
18 |     .option('-g, --groups <items>', 'value of the field to be added at the start of each line (by default, the name of the sheet)', list)
19 |     .option('-r, --rm-lines <n>', 'number of lines to remove from the start of every sheet (except for the first). default: 1', 1)
20 |     .option('-n, --group-name <value>', 'name of grouping column. default: sheet', 'sheet')
21 |     .option('-F, --field-sep <sep>', 'CSV field separator', ',')
22 |     .option('-R, --row-sep <sep>', 'CSV row separator', "\n")
23 |     .option('-o, --output <file>', 'output to specified file')
24 |     .option('-q, --quiet', 'quiet mode');
25 | 
26 | // get cli args
27 | program.parse(process.argv);
28 | 
29 | // set options
30 | var filename = program.args[0],
31 |     writer = (program.output) ? fs.createWriteStream(program.output, {flags: 'w'}) : concat(function(data) {
32 |         console.log(data);
33 |     }),
34 |     callback = function(err, csv) {
35 |         if (err) throw err;
36 | 
37 |         writer.write(csv);
38 |         writer.end();
39 |     };
40 | 
41 | // Allow for piping
42 | if (filename === "-") {
43 |     process.stdin.pipe(concat(function(data){
44 |         w = j.read(data);
45 |         sheetstack(w, program, callback);
46 |     }));
47 | 
48 | } else {
49 |     converted = j.readFile(filename);
50 |     sheetstack(converted, program, callback);
51 | }
52 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | var XLSX = require('xlsx');
 2 | 
 3 | function grouper(g, sep) {
 4 |     return function(x) {
 5 |         return g + sep + x;
 6 |     };
 7 | }
 8 | 
 9 | // ignore lines that are empty or only empty fields
10 | function goodLine(re) {
11 |     return function(x) {
12 |         return x.length > -1 && x.search(re) > -1;
13 |     };
14 | }
15 | 
16 | function sheetstack(data, config, callback) {
17 |     /*
18 |     :data contents of xls/x file 
19 |     :config object sheets, groups, fieldSep, rowSep, groupName, rmLines
20 |     :callback function. Should take err, string arguments
21 |     */
22 |     try {
23 |         var XL = (data[0].utils.sheet_to_csv) ? data[0] : XLSX,
24 |             workbook = data[1],
25 |             sheets = (config.sheets) ? config.sheets : workbook.SheetNames,
26 |             groups = (config.groups) ? config.groups : sheets,
27 |             fieldSep = config.fieldSep || ',',
28 |             rmLines = config.rmLines || 0,
29 |             rowSep = config.rowSep || "\n",
30 |             re = new RegExp('[^' + fieldSep + ']'),
31 |             opts = {
32 |                 FS: fieldSep,
33 |                 RS: rowSep
34 |             },
35 |             add_group_name = grouper(config.groupName || 'sheet', fieldSep);
36 | 
37 |         var filter = goodLine(re);
38 |         var csv = '';
39 | 
40 |         // for each sheet
41 |         for (var i = 0, s = sheets.length; i < s; i++) {
42 |             // function for prefixing group
43 |             var add_group = grouper(groups[i], fieldSep);
44 |             // pull rows from sheet
45 |             var rows = XL.utils.sheet_to_csv(workbook.Sheets[sheets[i]], opts).split('\n');
46 |             var body;
47 | 
48 |             // on the first sheet, add the group name
49 |             if (i === 0) {
50 |                 var firstline = add_group_name(rows[0]);
51 |                 var mutated = rows.splice(1).filter(filter).map(add_group);
52 |                 body = [firstline].concat(mutated);
53 |                 
54 |             } else {
55 |                 body = rows.splice(rmLines).filter(filter).map(add_group);
56 |             }
57 | 
58 |             csv += body.join('\n') + '\n';
59 |         }
60 | 
61 |         callback(null, csv);
62 | 
63 |     } catch(e) {
64 |         callback(e);
65 |     }
66 | }
67 | 
68 | module.exports = sheetstack;
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # sheetstack
 2 | 
 3 | Sheetstack is a command line utility that merges multiple XLS/X sheets into a single CSV.
 4 | 
 5 | It's a simple extension of [J](https://www.npmjs.com/package/j) useful for processing files with the exact same layout split into several worksheets. 
 6 | 
 7 | Sheetstack adds a grouping column to the resulting CSV. By default this is the name of the sheet, but it could be anything.
 8 | 
 9 | ## Install
10 | 
11 | ````
12 | npm install sheetstack
13 | ````
14 | 
15 | ## Usage
16 | 
17 | Let's say we have an xls file with two sheets, "dogs" and "cats":
18 | 
19 | ````csv
20 | name,best friend
21 | Pluto,Mickey
22 | Santa's Little Helper,Bart
23 | Scooby Doo,Shaggy
24 | ````
25 | 
26 | ````csv
27 | name,best friend
28 | Cat in the Hat,the fish
29 | Garfield,Jon
30 | Hello Kitty,you
31 | ````
32 | 
33 | The simplest use will combine all the files and output the result to stdout.
34 | 
35 | ````
36 | $ sheetstack file.xls
37 | 
38 | sheet,name,best friend
39 | dogs,Pluto,Mickey
40 | dogs,Santa's Little Helper,Bart
41 | dogs,Scooby Doo,Shaggy
42 | cats,Cat in the Hat,the fish
43 | cats,Garfield,Jon
44 | cats,Hello Kitty,you
45 | ````
46 | 
47 | ### Sheets
48 | The `--sheets` option controls which sheets are included, and in what order.
49 | 
50 | ````
51 | $ sheetstack --sheets dog,cat file.xls
52 | 
53 | sheet,name,best friend
54 | dogs,Pluto,Mickey
55 | dogs,Santa's Little Helper,Bart
56 | dogs,Scooby Doo,Shaggy
57 | ````
58 | 
59 | ### Groups
60 | The `--groups` option specifies custom values for the grouping column, `--group-name` sets the value for the top of the column.
61 | 
62 | ````
63 | $ sheetstack --groups canis,felis --group-name genus
64 | 
65 | species,name,best friend
66 | canis,Pluto,Mickey
67 | canis,Santa's Little Helper,Bart
68 | canis,Scooby Doo,Shaggy
69 | felis,Cat in the Hat,the fish
70 | felis,Garfield,Jon
71 | felis,Hello Kitty,you
72 | ````
73 | 
74 | ### Removing leading lines
75 | 
76 | By default, sheetstack removes the first line from all sheets except for the first one. The can be changed with the `--rm-lines` setting. If `--rm-lines` is set to 0, no lines will be removed. Higher values will remove more lines, but no lines will be removed from the first sheet.
77 | 
78 | ### Output format
79 | 
80 | You can also specify row-separator and field-separator options, which are passed through to J:
81 | 
82 | ````
83 | $ sheetstack --row-sep '\r\n' --field-sep ;
84 | ````
85 | 
86 | 


--------------------------------------------------------------------------------
/bin/sheetstack.sh:
--------------------------------------------------------------------------------
  1 | #/bin/bash
  2 | 
  3 | # Join together sheets of an XLS with csvstack
  4 | # Requires j
  5 | usage() {
  6 | echo "usage: sheetstack [OPTIONS] <file>
  7 | Combine XLS or XLSX sheets into a single CSV
  8 | 
  9 | options:
 10 | -s, --sheets <comma-separated list>  list of sheets to read (by default, all sheets will be read)
 11 | -g, --groups <comma-separated list>  value of the field to be added at the start of each line (by default, the name of the sheet)
 12 | -r, --rm-lines <number>              number of lines to remove from the start of every sheet (except for the first)
 13 | -n, --group-name <name>              name of grouping column. default: sheet
 14 | -F, --field-sep <char>               CSV field separator
 15 | " | sed "s/^/    /"
 16 | }
 17 | 
 18 | GRPS=
 19 | GROUPS_NL=
 20 | SHEETS=
 21 | SHEETS_NL=
 22 | GROUPNAME=sheet
 23 | RMLINES=1
 24 | SEP=,
 25 | 
 26 | while [ "$#" -gt 0 ]; do
 27 |     case $1 in
 28 |         -h|-\?|--help)
 29 |             usage
 30 |             exit
 31 |             ;;
 32 |         # Takes an option argument, ensuring it has been specified.
 33 |         -s|--sheets)
 34 |             if [ "$#" -gt 1 ]; then
 35 |                 SHEETS=$2
 36 |                 SHEETS_NL="$(sed 's/,/\
 37 | /g' <<< "$2")"
 38 |                 shift 2
 39 |                 continue
 40 |             else
 41 |                 echo 'ERROR: Must specify a non-empty "--sheets" argument.' >&2
 42 |                 exit 1
 43 |             fi
 44 |             ;;
 45 |         -g|--groups)
 46 |             if [ "$#" -gt 1 ]; then
 47 |                 GRPS="$2"
 48 |                 GROUPS_NL="$(sed 's/,/\
 49 | /g' <<< "$2")"
 50 |                 shift 2
 51 |                 continue
 52 |             else
 53 |                 echo 'ERROR: Must specify a non-empty "--groups" argument.' >&2
 54 |                 exit 1
 55 |             fi
 56 |             ;;
 57 |         -n|--group-name)
 58 |             if [ "$#" -gt 1 ]; then
 59 |                 GROUPNAME="$2"
 60 |                 shift 2
 61 |                 continue
 62 |             else
 63 |                 echo 'ERROR: Must specify a non-empty "--group-name" argument.' >&2
 64 |                 exit 1
 65 |             fi
 66 |             ;;
 67 |         -r|--rm-lines)
 68 |             if [ "$#" -gt 1 ]; then
 69 |                 RMLINES=$(expr $2 + 1)
 70 |                 shift 2
 71 |                 continue
 72 |             else
 73 |                 echo 'ERROR: Must specify a non-empty "--rm-lines" argument.' >&2
 74 |                 exit 1
 75 |             fi
 76 |             ;;
 77 |         -F|--field-sep)
 78 |             if [ "$#" -gt 1 ]; then
 79 |                 SEP="$2"
 80 |                 shift 2
 81 |                 continue
 82 |             else
 83 |                 echo 'ERROR: Must specify a non-empty "--field-sep" argument.' >&2
 84 |                 exit 1
 85 |             fi
 86 |             ;;
 87 |         # End of all options.
 88 |         --)
 89 |             shift
 90 |             break
 91 |             ;;
 92 |         -?*)
 93 |             printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2
 94 |             ;;
 95 |         # Default case: If no more options then break out of the loop.
 96 |         *)
 97 |             XLSFILE="$1"
 98 |             if [ "$#" -lt 1 ]; then
 99 |                 break
100 |             fi;
101 |     esac
102 | 
103 |     shift
104 | done
105 | 
106 | if [ -z "$SHEETS" ]; then
107 |     SHEETS_NL=$(j -l ${XLSFILE})
108 | fi
109 | 
110 | if [ -z "$GRPS" ]; then
111 |     GROUPS_NL="$SHEETS_NL"
112 |     GRPS=$(tr '\n' , <<< "$SHEETS_NL" | sed 's/,$//')
113 | fi
114 | 
115 | first=yes
116 | 
117 | while IFS=, read sheet group
118 |     do
119 |         # convert to CSV with j. 
120 |         j -s "$sheet" "$XLSFILE" |
121 |         (
122 |             # Add group name to first line, else add sheet name
123 |             [[ $first ]] && sed -e "1s/^/${GROUPNAME},/" -e "2,\$s/^/${group},/g" ||
124 |             sed -e "s/^/${group},/g" | tail -n+$RMLINES
125 |         ) |
126 |         grep -v -e '^$' -e "^${group}${SEP}\+$"
127 |         first=
128 |     done <<< "$(paste -d, <(echo "$SHEETS_NL") <(echo "$GROUPS_NL"))"


--------------------------------------------------------------------------------