├── .gitignore ├── README.md ├── app.js ├── converter └── converter.js ├── med.html ├── package-lock.json ├── package.json ├── word └── med.docx └── wordfolder ├── AtWork_GlobalMobility_Immigration_EntryGuidance.docx ├── Life Events See all page Intro_text_061317.docx ├── US_Benefits_Family_GrowingYourFamily.docx └── US_HealthAndWellness_1a_Medical Plan Overview_D2.docx /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | .idea 3 | /output/* 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # doc2html 2 | This is a simple command-line Node app to convert Word .docx files to HTML files. You can convert a single Word file using the `convert` command or convert a folder of Word files by using the `convertdir` command. 3 | 4 | ## Installation 5 | Download repo to your local machine and run npm install 6 | ``` 7 | npm install 8 | ``` 9 | 10 | ## Convert Single Word File 11 | Convert a single Word file using the `convert` command by passing the path the file name `--from or -f` and specifying the output path/file name `--to or -t`. 12 | 13 | ``` 14 | node app.js convert --from './word/med.docx' --to './word/med.html' 15 | 16 | or 17 | 18 | node app.js convert -f './word/med.docx' -t './word/med.html' 19 | ``` 20 | (__Note:__ the sample document in the example above is included in repo `/word/med.docx`) 21 | 22 | ## Convert All Word Files Within a Folder 23 | Convert all the Word files in a given directory by using the `convertdir` command and passing the path to the folder `--folder or -d`. The converted files will be in a directory named with the same name provided and nested in an `/output` folder. 24 | 25 | ``` 26 | node app.js convertdir --folder './wordfolder' 27 | 28 | or 29 | 30 | node app.js convertdir -d './wordfolder' 31 | ``` 32 | (__Note:__ the sample folder in the example above is included in repo `/wordfolder`) 33 | -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const yargs = require('yargs'); 3 | const _ = require('lodash'); 4 | 5 | const converter = require('./converter/converter'); 6 | 7 | const argv = yargs 8 | .command('convert', 'Converts Word files to HTML files', { 9 | from: { 10 | describe: 'Path to Word file', 11 | demand: true, 12 | alias: 'f' 13 | }, 14 | to: { 15 | describe: 'Path to HTML output', 16 | demand: true, 17 | alias: 't' 18 | } 19 | }) 20 | .command('convertdir', 'Converts all Word files in a given directory to HTML', { 21 | folder: { 22 | describe: 'Path to directory', 23 | demand: true, 24 | alias: 'd' 25 | } 26 | }) 27 | .command('cmsify', 'Converts HTML to CMS tags', { 28 | file: { 29 | describe: 'Path to file and filename', 30 | demand: true, 31 | alias: 'f' 32 | } 33 | }) 34 | .command('cmsifydir', 'Converts HTML tags to CMS tags for all files in folder', { 35 | folder: { 36 | describe: 'Path to directory', 37 | demand: true, 38 | alias: 'd' 39 | } 40 | }) 41 | .help() 42 | .argv; 43 | 44 | let command = argv._[0]; 45 | 46 | if (command === 'convert') { 47 | converter.convertWord(argv.from, argv.to); 48 | 49 | } else if (command === 'convertdir') { 50 | converter.convertFolder(argv.folder); 51 | 52 | } else if (command === 'cmsify') { 53 | converter.cmsify(argv.file); 54 | 55 | } else if (command === 'cmsifydir') { 56 | converter.cmsifyFolder(argv.folder); 57 | } else { 58 | console.log('Command not recognized'); 59 | } -------------------------------------------------------------------------------- /converter/converter.js: -------------------------------------------------------------------------------- 1 | const mammoth = require('mammoth'); 2 | const cleaner = require('clean-html'); 3 | const replacer = require('replace-in-file'); 4 | const fs = require('fs'); 5 | 6 | let options = { 7 | styleMap: [ 8 | "p[style-name='Intro'] => p.intro", 9 | "p[style-name='List Bullet'] => ul > li:fresh" 10 | ] 11 | }; 12 | 13 | const cmsify = (file) => { 14 | let cmsOptions = { 15 | files: file, 16 | 17 | from: [ /

\[Context\]<\/p>/g, 18 | /

\[End context\]<\/p>/g, 19 | /

\[Highlights\]<\/p>/g, 20 | /

\[End highlights\]<\/p>/g, 21 | /

\[Details\]<\/p>/g, 22 | /

\[End details\]<\/p>/g, 23 | /

/g, 24 | /<\/p>/g, 25 | /

/g, 26 | /<\/h1>/g, 27 | /

/g, 28 | /<\/h2>/g, 29 | /

/g, 30 | /<\/h3>/g, 31 | /

/g, 32 | /<\/h4>/g, 33 | /

/g, 34 | /<\/h5>/g, 35 | /