├── src ├── index.js ├── templates.js ├── bin.js └── convert.js ├── .prettierrc.yml ├── babel.config.js ├── jest.config.js ├── .travis.yml ├── .codeclimate.yml ├── .eslintrc.yml ├── test ├── test-cases │ ├── 2 │ │ └── index.html │ └── 3 │ │ ├── index.html │ │ └── output.tex └── unit │ └── convert.js ├── rollup.config.js ├── LICENSE ├── package.json ├── .gitignore └── README.md /src/index.js: -------------------------------------------------------------------------------- 1 | export { convertText, convertFile } from './convert'; 2 | -------------------------------------------------------------------------------- /.prettierrc.yml: -------------------------------------------------------------------------------- 1 | printWidth: 100 2 | singleQuote: true 3 | trailingComma: all 4 | tabWidth: 2 5 | useTabs: false 6 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [ 3 | [ 4 | '@babel/preset-env', 5 | { 6 | targets: { 7 | node: 'current', 8 | }, 9 | }, 10 | ], 11 | ], 12 | }; 13 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | testRegex: 'test\\/.*\\.js$', 4 | testMatch: null, 5 | testURL: 'http://localhost/', 6 | testTimeout: 15000, 7 | 8 | coverageDirectory: 'coverage', 9 | collectCoverage: true, 10 | }; 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | version: ~> 1.0 2 | 3 | language: node_js 4 | 5 | node_js: 6 | - node 7 | 8 | jobs: 9 | include: 10 | - stage: lint 11 | script: 12 | - yarn lint 13 | 14 | - stage: build 15 | script: 16 | - yarn build 17 | 18 | - stage: test 19 | script: 20 | - yarn test && yarn codecov 21 | -------------------------------------------------------------------------------- /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | engines: 2 | shellcheck: 3 | enabled: true 4 | duplication: 5 | enabled: true 6 | config: 7 | languages: 8 | javascript: 9 | mass_threshold: 70 10 | count_threshold: 3 11 | 12 | ratings: 13 | paths: 14 | - '**.js' 15 | 16 | exclude_paths: 17 | - node_modules 18 | - coverage 19 | - dist 20 | -------------------------------------------------------------------------------- /.eslintrc.yml: -------------------------------------------------------------------------------- 1 | env: 2 | node: true 3 | jest/globals: true 4 | 5 | extends: 6 | - airbnb-base 7 | - plugin:prettier/recommended 8 | - plugin:jest/recommended 9 | 10 | ignorePatterns: 11 | - node_modules/ 12 | - dist/ 13 | - '**/rollup.config.js' 14 | 15 | plugins: 16 | - import 17 | 18 | rules: 19 | import/prefer-default-export: off 20 | import/extensions: 21 | - error 22 | - never 23 | - json: always 24 | 25 | settings: 26 | import/resolver: 27 | node: 28 | extensions: 29 | - .js 30 | -------------------------------------------------------------------------------- /test/test-cases/2/index.html: -------------------------------------------------------------------------------- 1 |

2 | Newton's Laws of Motion 3 |

4 | 5 |

6 | Concept of Forces 9 |

10 | 11 |

12 | 13 | 14 | Some types of forces may be
15 | (i) Contact forces, (ii) Non-contact forces
16 | Contact forces involve physical contact between two objects. 17 | 18 | 19 |

-------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import { preserveShebangs } from 'rollup-plugin-preserve-shebangs'; 2 | import pkg from "./package.json"; 3 | 4 | export default [{ 5 | input: "src/bin.js", 6 | external: [ 7 | ...Object.keys(pkg.dependencies || {}), 8 | ...Object.keys(pkg.peerDependencies || {}) 9 | ], 10 | output: [ 11 | { 12 | file: pkg.bin.html2latex, 13 | format: "cjs" 14 | } 15 | ], 16 | plugins: [ 17 | preserveShebangs() 18 | ] 19 | }, 20 | { 21 | input: "src/convert.js", 22 | external: [ 23 | ...Object.keys(pkg.dependencies || {}), 24 | ...Object.keys(pkg.peerDependencies || {}) 25 | ], 26 | output: [ 27 | { 28 | file: pkg.main, 29 | format: "cjs" 30 | } 31 | ] 32 | }]; -------------------------------------------------------------------------------- /test/test-cases/3/index.html: -------------------------------------------------------------------------------- 1 | Three concentric metal shells $A,{\rm{ }}B$ and $C$ of respective radii $a,{\rm{ }}b$ and $c$ $(a < b < c)$ have surface charge densities $ + \sigma , - \sigma $ and $ + \sigma $ respectively. The potential of shell ${\rm{B}}$ is:

2 | 3 | Net potential at ${\rm{B}}$ is due to superposition of plotential due to all shells.
${V_B} = {V_{B(I)}} + {V_{B(II)}} + {V_{B(III)}}$
$ = \cfrac{{K4\pi {a^2}\sigma }}{b} + \cfrac{{K4\pi {b^2}( - \sigma )}}{b} + \cfrac{{K4\pi {c^2}(\sigma )}}{c}$
Required potential of b
$ = \cfrac{{\sigma {a^2}}}{{{\varepsilon _0}b}} - \cfrac{{\sigma {b^2}}}{{{\varepsilon _0}b}} + \cfrac{{\sigma {c^2}}}{{{\varepsilon _0}c}}$
$ = \cfrac{\sigma }{{{\varepsilon _0}}}\left( {\cfrac{{{a^2} - {b^2}}}{b} + c} \right)$ -------------------------------------------------------------------------------- /test/test-cases/3/output.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \usepackage{amsmath} 4 | \usepackage{graphicx} 5 | 6 | 7 | \begin{document} 8 | 9 | Three concentric metal shells $A,{\rm{ }}B$ and $C$ of respective radii $a,{\rm{ }}b$ and $c$ $(a < b < c)$ have surface charge densities $ + \sigma , - \sigma $ and $ + \sigma $ respectively. The potential of shell ${\rm{B}}$ is: 10 | 11 | \begin{center} 12 | \includegraphics{images/image2.png} 13 | \end{center} 14 | 15 | Net potential at ${\rm{B}}$ is due to superposition of plotential due to all shells. 16 | 17 | ${V_B} = {V_{B(I)}} + {V_{B(II)}} + {V_{B(III)}}$ 18 | 19 | $ = \cfrac{{K4\pi {a^2}\sigma }}{b} + \cfrac{{K4\pi {b^2}( - \sigma )}}{b} + \cfrac{{K4\pi {c^2}(\sigma )}}{c}$ 20 | 21 | Required potential of b 22 | 23 | $ = \cfrac{{\sigma {a^2}}}{{{\varepsilon _0}b}} - \cfrac{{\sigma {b^2}}}{{{\varepsilon _0}b}} + \cfrac{{\sigma {c^2}}}{{{\varepsilon _0}c}}$ 24 | 25 | $ = \cfrac{\sigma }{{{\varepsilon _0}}}\left( {\cfrac{{{a^2} - {b^2}}}{b} + c} \right)$ 26 | 27 | 28 | \end{document} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "html-to-latex", 3 | "description": "convert html to latex", 4 | "version": "0.8.0", 5 | "dependencies": { 6 | "entities": "^2.0.0", 7 | "fs-extra": "^9.0.0", 8 | "got": "^11.0.2", 9 | "parse5": "^6.0.0", 10 | "shortid": "^2.2.15", 11 | "sywac": "^1.3.0", 12 | "upath": "^1.2.0" 13 | }, 14 | "devDependencies": { 15 | "@babel/core": "^7.9.0", 16 | "@babel/preset-env": "^7.9.5", 17 | "babel-jest": "^25.4.0", 18 | "codecov": "^3.6.5", 19 | "cz-conventional-changelog": "3.1.0", 20 | "eslint": "^6.8.0", 21 | "eslint-config-airbnb-base": "^14.1.0", 22 | "eslint-config-prettier": "^6.11.0", 23 | "eslint-plugin-import": "^2.20.2", 24 | "eslint-plugin-jest": "^23.8.2", 25 | "eslint-plugin-prettier": "^3.1.3", 26 | "jest": "^25.4.0", 27 | "prettier": "^2.0.5", 28 | "rollup": "^2.7.3", 29 | "rollup-plugin-preserve-shebangs": "^0.2.0", 30 | "tempy": "^0.5.0" 31 | }, 32 | "files": [ 33 | "dist" 34 | ], 35 | "license": "MIT", 36 | "bin": { 37 | "html2latex": "dist/bin.js" 38 | }, 39 | "main": "dist/index.js", 40 | "scripts": { 41 | "build": "rollup -c", 42 | "commit": "npx git-cz", 43 | "test": "jest test", 44 | "lint": "prettier --check '{src,test}/**/*.js' && eslint '{src,test}/**/*/*.js'", 45 | "lint:fix": "prettier --write '{src,test}/**/*.js' && eslint '{src,test}/**/*/*.js' --fix" 46 | }, 47 | "config": { 48 | "commitizen": { 49 | "path": "./node_modules/cz-conventional-changelog" 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/templates.js: -------------------------------------------------------------------------------- 1 | import { normalizeSafe } from 'upath'; 2 | 3 | export const nls = (text) => `${text}\n`; 4 | export const nlp = (text) => `\n${text}`; 5 | 6 | export const centerblk = (text) => `\\begin{center}\n\t${text}\n\\end{center}`; 7 | export const centering = (text) => `\\centering{${text}}`; 8 | 9 | export const section = (text) => `\\section*{${centering(text)}}`; 10 | export const subsection = (text) => `\\subsection*{${text}}`; 11 | export const subsubsection = (text) => `\\subsubsection*{${text}}`; 12 | 13 | export const bold = (text) => `\\textbf{${text}}`; 14 | export const italic = (text) => `\\textit{${text}}`; 15 | export const underline = (text) => `\\underline{${text}}`; 16 | 17 | export const divider = nls('\\hrule'); 18 | 19 | export const enumerate = (text) => `\\begin{enumerate}\n${text}\n\\end{enumerate}`; 20 | export const itemize = (text) => `\\begin{itemize}\n${text}\n\\end{itemize}`; 21 | export const item = (text) => `\t\\item ${text}`; 22 | export function image(path, { width, height, keepRatio, center } = { center: true }) { 23 | const line = ['\\includegraphics']; 24 | const options = []; 25 | 26 | if (width) options.push(`width=${width}`); 27 | if (height) options.push(`height=${height}`); 28 | if ((width || height) && keepRatio) options.push('keepaspectratio'); 29 | if (options.length) line.push(`[${options.join(',')}]`); 30 | 31 | line.push(`{${normalizeSafe(path)}}`); 32 | 33 | return center ? centerblk(line.join('')) : line.join(''); 34 | } 35 | 36 | export function usePackages(packageNames) { 37 | return nls(packageNames.map((n) => `\\usepackage{${n}}`).join('\n')); 38 | } 39 | 40 | export function beginDocument({ title, includeDate = false, author } = {}) { 41 | const beginningText = []; 42 | 43 | if (title) beginningText.push(`\\title{${title}}`); 44 | if (author) beginningText.push(`\\author{${author}}`); 45 | if (includeDate) beginningText.push(`\\date{\\today}`); 46 | 47 | if (beginningText.length) beginningText.push(nlp(`\\begin{document}`)); 48 | else beginningText.push(`\\begin{document}`); 49 | 50 | if (title) beginningText.push(nlp('\\maketitle')); 51 | 52 | return beginningText.join('\n'); 53 | } 54 | 55 | export const endDocument = nlp('\\end{document}'); 56 | export const docClass = (className) => `\\documentclass{${className}}`; 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/macos,windows,linux,node 3 | 4 | ### Linux ### 5 | *~ 6 | 7 | # temporary files which can be created if a process still has a handle open of a deleted file 8 | .fuse_hidden* 9 | 10 | # KDE directory preferences 11 | .directory 12 | 13 | # Linux trash folder which might appear on any partition or disk 14 | .Trash-* 15 | 16 | # .nfs files are created when an open file is removed but is still being accessed 17 | .nfs* 18 | 19 | ### macOS ### 20 | *.DS_Store 21 | .AppleDouble 22 | .LSOverride 23 | 24 | # Icon must end with two \r 25 | Icon 26 | 27 | # Thumbnails 28 | ._* 29 | 30 | # Files that might appear in the root of a volume 31 | .DocumentRevisions-V100 32 | .fseventsd 33 | .Spotlight-V100 34 | .TemporaryItems 35 | .Trashes 36 | .VolumeIcon.icns 37 | .com.apple.timemachine.donotpresent 38 | 39 | # Directories potentially created on remote AFP share 40 | .AppleDB 41 | .AppleDesktop 42 | Network Trash Folder 43 | Temporary Items 44 | .apdisk 45 | 46 | ### Node ### 47 | # Logs 48 | logs 49 | *.log 50 | npm-debug.log* 51 | yarn-debug.log* 52 | yarn-error.log* 53 | 54 | # Runtime data 55 | pids 56 | *.pid 57 | *.seed 58 | *.pid.lock 59 | 60 | # Directory for instrumented libs generated by jscoverage/JSCover 61 | lib-cov 62 | 63 | # Coverage directory used by tools like istanbul 64 | coverage 65 | 66 | # nyc test coverage 67 | .nyc_output 68 | 69 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 70 | .grunt 71 | 72 | # Bower dependency directory (https://bower.io/) 73 | bower_components 74 | 75 | # node-waf configuration 76 | .lock-wscript 77 | 78 | # Compiled binary addons (http://nodejs.org/api/addons.html) 79 | build/Release 80 | 81 | # Dependency directories 82 | node_modules/ 83 | jspm_packages/ 84 | 85 | # Typescript v1 declaration files 86 | typings/ 87 | 88 | # Optional npm cache directory 89 | .npm 90 | 91 | # Optional eslint cache 92 | .eslintcache 93 | 94 | # Optional REPL history 95 | .node_repl_history 96 | 97 | # Output of 'npm pack' 98 | *.tgz 99 | 100 | # Yarn Integrity file 101 | .yarn-integrity 102 | 103 | # dotenv environment variables file 104 | .env 105 | 106 | 107 | ### Windows ### 108 | # Windows thumbnail cache files 109 | Thumbs.db 110 | ehthumbs.db 111 | ehthumbs_vista.db 112 | 113 | # Folder config file 114 | Desktop.ini 115 | 116 | # Recycle Bin used on file shares 117 | $RECYCLE.BIN/ 118 | 119 | # Windows Installer files 120 | *.cab 121 | *.msi 122 | *.msm 123 | *.msp 124 | 125 | # Windows shortcuts 126 | *.lnk 127 | 128 | # End of https://www.gitignore.io/api/macos,windows,linux,node 129 | 130 | package-lock.json 131 | yarn.lock 132 | dist 133 | *.pdf 134 | *.aux -------------------------------------------------------------------------------- /src/bin.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /* eslint no-console: 0 */ 3 | 4 | import program from 'sywac'; 5 | import { extname, basename } from 'path'; 6 | import { convertFile } from './convert'; 7 | 8 | // Add default settings 9 | program 10 | .version('-v, --version') 11 | .help('-h, --help') 12 | .epilogue('Copyright 2020') 13 | .command('convert-file', { 14 | desc: 'Convert HTML to Latex', 15 | setup: (args) => { 16 | args 17 | .positional('[--ifp] ', { 18 | type: 'string', 19 | }) 20 | .string('-ofp --output-file-path', { 21 | group: 'Output Options', 22 | }) 23 | .boolean('-ib --ignore-breaks', { 24 | group: 'Parsing Options', 25 | defaultValue: true, 26 | }) 27 | .boolean('-dm --prefer-dollar-inline-math', { 28 | group: 'Parsing Options', 29 | defaultValue: false, 30 | }) 31 | .boolean('-swe --skip-wrapping-equations', { 32 | group: 'Parsing Options', 33 | defaultValue: false, 34 | }) 35 | .boolean('-dw --include-document-wrapper', { 36 | group: 'Parsing Options', 37 | defaultValue: false, 38 | }) 39 | .string('-dc --document-class', { 40 | group: 'Parsing Options', 41 | defaultValue: 'article', 42 | }) 43 | .array('-ip --include-packages', { 44 | group: 'Parsing Options', 45 | }) 46 | .string('-t --title', { 47 | group: 'Parsing Options', 48 | }) 49 | .string('-a --author', { 50 | group: 'Parsing Options', 51 | }) 52 | .boolean('-d --include-date', { 53 | group: 'Parsing Options', 54 | }) 55 | .string('-cdr --compilation-dir', { 56 | group: 'Image Parsing Options', 57 | default: process.cwd(), 58 | }) 59 | .boolean('-ain --autogen-image-names', { 60 | group: 'Image Parsing Options', 61 | defaultValue: true, 62 | }) 63 | .string('-iw --image-width', { 64 | group: 'Image Parsing Options', 65 | }) 66 | .string('-ih --image-height', { 67 | group: 'Image Parsing Options', 68 | }) 69 | .boolean('-kar --keep-image-aspect-ratio', { 70 | group: 'Image Parsing Options', 71 | default: false, 72 | }) 73 | .boolean('--debug', { 74 | group: 'Image Parsing Options', 75 | defaultValue: false, 76 | }); 77 | }, 78 | run: async (args) => { 79 | await convertFile(args.ifp, args.ofp, { 80 | autoGenImageNames: args.ain, 81 | includeDocumentWrapper: args.dw, 82 | documentClass: args.dc, 83 | includePackages: args.ip, 84 | compilationDir: args.cpr, 85 | preferDollarInlineMath: args.dm, 86 | skipWrappingEquations: args.swe, 87 | debug: args.debug, 88 | imageWidth: args.iw, 89 | imageHeight: args.ih, 90 | keepImageAspectRatio: args.kar, 91 | title: args.t, 92 | includeDate: args.d, 93 | author: args.a, 94 | ignoreBreaks: args.ib, 95 | }); 96 | }, 97 | }); 98 | 99 | // Parse input 100 | program.parse().then(({ output }) => { 101 | console.log(output); 102 | }); 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

HTML-to-Latex

2 |

Basic script to convert HTML source into Latex

3 |

4 | 5 | $Travis Pipeline Status$ 6 | 7 | 8 | $Code Climate maintainability$ 9 | 10 | 11 | $CodeCov test coverage$ 12 | 13 | 14 | $Dependency Status$ 15 | 16 | 17 | $Dev Dependency Status$ 18 | 19 | $Dependabot Badge$ 20 | 21 | 22 | 23 | 24 | 25 | $Install Size$ 26 | 27 | 28 | 29 | 30 |

31 | 32 | **[IN DEVELOPMENT]** 33 | 34 | ## Install 35 | 36 | ```bash 37 | $ npm install html-to-latex 38 | ``` 39 | 40 | ## Usage 41 | 42 | Converting html text: 43 | ```javascript 44 | import { convertText } from 'html-to-latex'; 45 | 46 | const html = `

Styled Text

`; 47 | const tex = await convertText(html); 48 | 49 | console.log(tex) 50 | //\documentclass{article} 51 | // 52 | //\begin{document} 53 | // 54 | //Styled \textbf{Text} 55 | // 56 | //\end{document} 57 | ``` 58 | 59 | Converting html file: 60 | ```javascript 61 | import { convertFile } from 'html-to-latex'; 62 | 63 | const html = 'filePath.html'; 64 | 65 | await convertFile(html); 66 | ``` 67 | 68 | ### API 69 | 70 | #### convertText(htmlText, options?) 71 | 72 | Returns: `Promise` 73 | 74 | Converts the input htmlText to a valid latex string. 75 | 76 | ##### htmlString 77 | 78 | Type: `string` 79 | 80 | ##### options 81 | 82 | Type: `object` 83 | 84 | ###### ignoreBreaks 85 | 86 | Type: `boolean` 87 | Default: `true` 88 | CLI Options: `-ib` or `--ignore-breaks` 89 | 90 | Instead of replacing `
` with //, ending the line, a simple space character is inserted instead. 91 | 92 | ###### preferDollarInlineMath 93 | 94 | Type: `boolean` 95 | Default: `false` 96 | CLI Options: `-dm` or `--prefer-dollar-inline-math` 97 | 98 | Replace `$` and `$` with `$`. 99 | 100 | ###### skipWrappingEquations 101 | 102 | Type: `boolean` 103 | Default: `false` 104 | CLI Options: `-swe` or `--skip-wrapping-equations` 105 | 106 | Is an equation is defined in a `p` tag without any other content besides that equation, it will automatically be wrapped in `\[` and `\]`. 107 | 108 | ###### includeDocumentWrapper 109 | 110 | Type: `boolean` 111 | Default: `false` 112 | CLI Options: `-dw` or `--include-document-wrapper` 113 | 114 | Adds a latex document wrapper around the converted text. This is required to have a valid latex file: 115 | 116 | ```latex 117 | \documentclass{article} 118 | 119 | \begin{document} 120 | %...converted text 121 | \end{document} 122 | ``` 123 | 124 | ###### documentClass 125 | 126 | Type: `string` 127 | Default: `article` 128 | CLI Options: `-dc` or `--document-class` 129 | 130 | If a document wrapper is added, the document class will be set. 131 | 132 | ```latex 133 | \documentclass{article} 134 | %... 135 | ``` 136 | 137 | ###### includePackages 138 | 139 | Type: `string[]` 140 | Default: `[]`* 141 | CLI Options: `-ip` or `--include-packages` 142 | 143 | If the document wrapper is added, a list of used packages will be set. 144 | 145 | ```latex 146 | \documentclass{article} 147 | 148 | \usepackage{packagename} 149 | 150 | \begin{document} 151 | %...converted text 152 | \end{document} 153 | ``` 154 | 155 | \*If nothing is specified, the list of includes packages will be inferred from the html: 156 | 157 | 158 | | Tag | Added Package | 159 | |--------|---------------| 160 | | \cfrac | amsmath | 161 | | \img | graphicx | 162 | | \therefore | amssymb | 163 | 164 | ###### title 165 | 166 | Type: `string` 167 | Default: `undefined` 168 | CLI Options: `-t` or `--title` 169 | 170 | If a document wrapper is added, the title will be set. 171 | 172 | ```latex 173 | \documentclass{article} 174 | 175 | \title{Altered Carbon} 176 | 177 | \begin{document} 178 | %...converted text 179 | \end{document} 180 | ``` 181 | 182 | ###### author 183 | 184 | Type: `string` 185 | Default: `undefined` 186 | CLI Options: `-a` or `--author` 187 | 188 | If a document wrapper is added, the author will be set. 189 | 190 | ```latex 191 | \documentclass{article} 192 | 193 | \author{Takashi Kovacs} 194 | 195 | \begin{document} 196 | %...converted text 197 | \end{document} 198 | ``` 199 | 200 | ###### includeDate 201 | 202 | Type: `boolean` 203 | Default: `false` 204 | CLI Options: `-d` or `--incude-date` 205 | 206 | If a document wrapper is added, the current date will be set. 207 | 208 | ```latex 209 | \documentclass{article} 210 | 211 | \date{\today} 212 | 213 | \begin{document} 214 | %...converted text 215 | \end{document} 216 | ``` 217 | 218 | ###### compilationDir 219 | 220 | Type: `string` 221 | Default: `process.cwd` 222 | CLI Options: `-cdr` or `--compilation-dir` 223 | 224 | If any images need to be downloaded for the latex compilation, they will be places in a 'images' subdirectory inside this directory. 225 | 226 | ###### autoGenImageNames 227 | 228 | Type: `boolean` 229 | Default: `true` 230 | CLI Options: `-ain` or `--autogen-image-names` 231 | 232 | To avoid any weird file names, image files that are downloaded are automatically given a random Id with the extension of the original file. This can be turned off by passing a `false` value. 233 | 234 | ###### imageWidth 235 | 236 | Type: `string` 237 | Default: `undefined` 238 | CLI Options: `-iw` or `--image-width` 239 | 240 | Allows you to set a image width. This would be in the form normally accepted by latex such as: `2cm` 241 | 242 | ###### imageHeight 243 | 244 | Type: `string` 245 | Default: `undefined` 246 | CLI Options: `-ih` or `--image-height` 247 | 248 | Allows you to set a image height. This would be in the form normally accepted by latex such as: `2cm` 249 | 250 | ###### keepImageAspectRatio 251 | 252 | Type: `boolean` 253 | Default: `undefined` 254 | CLI Options: `-kar` or `--keep-aspect-ratio` 255 | 256 | Allows you to maintain the aspect ratio of the image. This also requires either the image width property or image height property to be set. 257 | 258 | ###### debug 259 | 260 | Type: `boolean` 261 | Default: `false` 262 | CLI Options: `--debug` 263 | 264 | Prints error messages when they occur such as when an image cannot be found at the given url. 265 | 266 | 267 | #### convertFile(filepath, options?) 268 | CLI: `available` (see options for cli option names) 269 | Returns: `Promise` 270 | 271 | Converts the input file to a valid latex file. 272 | 273 | ##### filepath 274 | 275 | Type: `string` 276 | CLI Option: Positional, or `-ifp` 277 | 278 | Path of html file 279 | 280 | ##### options 281 | 282 | Type: `object` 283 | 284 | All options included in .... and 285 | 286 | *includeDocumentWrapper option is defaulted to true for this function, as it would make more sense to do so* 287 | 288 | ##### outputFilepath 289 | 290 | Type: `string` 291 | Default: `filepath` (The input file path) 292 | CLI Option: `-ofp` or `--output-file-path` 293 | 294 | The output filepath of the converted file. By default it will overwrite the input file. 295 | 296 | ### CLI API 297 | The same arguments are valid for the cli. The cli is exposed under html-to-latex executable and has the functions: `convert-file` and `convert-text`. Run `html-to-latex --help` for more information. 298 | 299 | ## Improving output 300 | ### Ignoring br tags 301 | Instead designate new sections/paragraphs using the proper html tag such as a `

` 302 | -------------------------------------------------------------------------------- /src/convert.js: -------------------------------------------------------------------------------- 1 | import { parseFragment } from 'parse5'; 2 | import { decodeHTML } from 'entities'; 3 | import { outputFile, readFile, pathExists, ensureDir } from 'fs-extra'; 4 | import { resolve, basename, join, dirname, extname } from 'path'; 5 | import { stream } from 'got'; 6 | import { pipeline as pipelineSync } from 'stream'; 7 | import { promisify } from 'util'; 8 | import { createWriteStream } from 'fs'; 9 | import { generate as generateId } from 'shortid'; 10 | import { 11 | docClass, 12 | usePackages, 13 | beginDocument, 14 | endDocument, 15 | section, 16 | subsection, 17 | subsubsection, 18 | bold, 19 | italic, 20 | underline, 21 | divider, 22 | itemize, 23 | enumerate, 24 | item, 25 | image, 26 | } from './templates'; 27 | 28 | const pipeline = promisify(pipelineSync); 29 | 30 | function analyzeForPackageImports(HTMLText) { 31 | const pkgs = []; 32 | 33 | if (HTMLText.includes('\\cfrac')) pkgs.push('amsmath'); 34 | if (HTMLText.includes(' name === 'src').value; 58 | const ext = extname(origPath) || '.jpg'; 59 | const base = autoGenImageNames ? `${generateId()}${ext}` : basename(origPath); 60 | const localPath = resolve(imagesDir, base); 61 | const localLatexPath = join('images', base); 62 | const exists = await pathExists(localPath); 63 | 64 | if (!exists) { 65 | try { 66 | const url = new URL(origPath); 67 | 68 | await ensureDir(imagesDir); 69 | 70 | await pipeline(stream(url.href), createWriteStream(localPath)); 71 | } catch (e) { 72 | if (debug) { 73 | console.debug(`URL: ${origPath}`); 74 | console.debug(e); 75 | } 76 | } 77 | } 78 | 79 | return image(localLatexPath, { 80 | width: imageWidth, 81 | height: imageHeight, 82 | keepRatio: keepImageAspectRatio, 83 | center: centerImages, 84 | }); 85 | } 86 | 87 | function convertPlainText(value, opts) { 88 | const breakReplacement = opts.ignoreBreaks ? '' : '\n\n'; 89 | const cleanText = value 90 | .replace(/(\n|\r)/g, breakReplacement) // Standardize line breaks or remove them 91 | .replace(/\t/g, '') // Remove tabs 92 | .replace(/(? bold(t)); 105 | case 'i': 106 | return convertRichText(n, opts).then((t) => italic(t)); 107 | case 'u': 108 | return convertRichText(n, opts).then((t) => underline(t)); 109 | case 'br': 110 | return opts.ignoreBreaks ? ' ' : '\n\n'; 111 | case 'span': 112 | return convertRichText(n, opts); 113 | case '#text': 114 | return convertPlainText(n.value, opts); 115 | default: 116 | return ''; 117 | } 118 | } 119 | 120 | async function convertRichText(node, opts) { 121 | if (node.childNodes && node.childNodes.length > 0) { 122 | const converted = await Promise.all(node.childNodes.map((n) => convertRichTextSingle(n, opts))); 123 | return converted.join(''); 124 | } 125 | 126 | return convertRichTextSingle(node, opts); 127 | } 128 | 129 | async function convertUnorderedLists({ childNodes }, opts) { 130 | const filtered = await childNodes.filter(({ nodeName }) => nodeName === 'li'); 131 | const texts = await Promise.all( 132 | filtered.map((f) => convert([f], { ...opts, includeDocumentWrapper: false })), 133 | ); 134 | const listItems = texts.map(item); 135 | 136 | return itemize(listItems.join('\n')); 137 | } 138 | 139 | async function convertOrderedLists({ childNodes }, opts) { 140 | const filtered = await childNodes.filter(({ nodeName }) => nodeName === 'li'); 141 | const texts = await Promise.all( 142 | filtered.map((f) => convert([f], { ...opts, includeDocumentWrapper: false })), 143 | ); 144 | const listItems = texts.map(item); 145 | 146 | return enumerate(listItems.join('\n')); 147 | } 148 | 149 | async function convertHeading(node, opts) { 150 | const text = await convertRichText(node, opts); 151 | 152 | switch (node.nodeName) { 153 | case 'h1': 154 | return section(text); 155 | case 'h2': 156 | return subsection(text); 157 | default: 158 | return subsubsection(text); 159 | } 160 | } 161 | 162 | export async function convert( 163 | nodes, 164 | { 165 | autoGenImageNames = true, 166 | includeDocumentWrapper = false, 167 | documentClass = 'article', 168 | includePackages = [], 169 | compilationDir = process.cwd(), 170 | ignoreBreaks = true, 171 | preferDollarInlineMath = false, 172 | skipWrappingEquations = false, 173 | debug = false, 174 | imageWidth, 175 | imageHeight, 176 | keepImageAspectRatio, 177 | centerImages, 178 | title, 179 | includeDate, 180 | author, 181 | } = {}, 182 | ) { 183 | const blockedNodes = [ 184 | 'h1', 185 | 'h2', 186 | 'h3', 187 | 'ul', 188 | 'ol', 189 | 'img', 190 | 'hr', 191 | 'div', 192 | 'section', 193 | 'body', 194 | 'html', 195 | 'header', 196 | 'footer', 197 | 'aside', 198 | 'p', 199 | ]; 200 | const doc = []; 201 | const opts = { 202 | compilationDir, 203 | ignoreBreaks, 204 | preferDollarInlineMath, 205 | skipWrappingEquations, 206 | autoGenImageNames, 207 | debug, 208 | imageWidth, 209 | imageHeight, 210 | keepImageAspectRatio, 211 | centerImages, 212 | }; 213 | let tempInlineDoc = []; 214 | 215 | if (includeDocumentWrapper) { 216 | doc.push(docClass(documentClass)); 217 | 218 | if (includePackages.length > 0) doc.push(usePackages(includePackages)); 219 | 220 | doc.push(beginDocument({ title, includeDate, author })); 221 | } 222 | 223 | nodes.forEach(async (n) => { 224 | if (!blockedNodes.includes(n.nodeName)) { 225 | tempInlineDoc.push(convertRichText(n, opts)); 226 | return; 227 | } 228 | 229 | if (tempInlineDoc.length > 0) { 230 | doc.push(Promise.all(tempInlineDoc).then((t) => t.join('').trim())); 231 | tempInlineDoc = []; 232 | } 233 | 234 | switch (n.nodeName) { 235 | case 'h1': 236 | case 'h2': 237 | case 'h3': 238 | doc.push(convertHeading(n, opts)); 239 | break; 240 | case 'ul': 241 | doc.push(convertUnorderedLists(n, opts)); 242 | break; 243 | case 'ol': 244 | doc.push(convertOrderedLists(n, opts)); 245 | break; 246 | case 'img': 247 | doc.push(convertImage(n, opts)); 248 | break; 249 | case 'hr': 250 | doc.push(divider); 251 | break; 252 | case 'div': 253 | case 'section': 254 | case 'body': 255 | case 'html': 256 | case 'header': 257 | case 'footer': 258 | case 'aside': 259 | doc.push( 260 | convert(n.childNodes, { 261 | ...opts, 262 | includeDocumentWrapper: false, 263 | }), 264 | ); 265 | break; 266 | case 'p': 267 | doc.push( 268 | convertRichText(n, opts).then((t) => { 269 | const trimmed = t.trim(); 270 | 271 | // Check if text is only an equation. If so, switch  & $ $, for \[ \] 272 | if ( 273 | !opts.skipWrappingEquations && 274 | trimmed.match(/^(\$|\\$)/) && 275 | trimmed.match(/(\\$|\$)$/) 276 | ) { 277 | const rewrapped = trimmed.replace(/^(\$|\\$)/, '\\[').replace(/(\\$|\$)$/, '\\]'); 278 | 279 | // TODO: Move all of this into the above regex check 280 | if (!rewrapped.includes('$')) return rewrapped; 281 | } 282 | 283 | return trimmed; 284 | }), 285 | ); 286 | break; 287 | default: 288 | } 289 | }); 290 | 291 | // Insert any left over inline nodes 292 | if (tempInlineDoc.length > 0) { 293 | doc.push(Promise.all(tempInlineDoc).then((t) => t.join('').trim())); 294 | } 295 | 296 | // Add document wrapper if configuration is set 297 | if (includeDocumentWrapper) doc.push(endDocument); 298 | 299 | const converted = await Promise.all(doc); 300 | 301 | return converted.filter(Boolean).join('\n\n'); 302 | } 303 | 304 | export async function convertText(data, options = {}) { 305 | const root = await parseFragment(data); 306 | 307 | return convert(root.childNodes, { 308 | ...options, 309 | includePackages: options.includePackages || analyzeForPackageImports(data), 310 | }); 311 | } 312 | 313 | export async function convertFile(filepath, { outputFilepath = filepath, ...options } = {}) { 314 | const data = await readFile(filepath, 'utf-8'); 315 | const processed = await convertText(data, { includeDocumentWrapper: true, ...options }); 316 | 317 | await exportFile(processed, outputFilepath, dirname(filepath)); 318 | } 319 | -------------------------------------------------------------------------------- /test/unit/convert.js: -------------------------------------------------------------------------------- 1 | import { directory } from 'tempy'; 2 | import { pathExists, remove, readFile } from 'fs-extra'; 3 | import { resolve } from 'path'; 4 | import ShortId from 'shortid'; 5 | import { convertText, exportFile, convertFile } from '../../src/convert'; 6 | 7 | describe('exportFile', () => { 8 | let dir; 9 | 10 | beforeEach(() => { 11 | dir = directory(); 12 | }); 13 | 14 | afterEach(async () => { 15 | await remove(dir); 16 | }); 17 | 18 | it('should export latex file', async () => { 19 | await exportFile('testing', 'test', dir); 20 | 21 | const exists = await pathExists(resolve(dir, 'test.tex')); 22 | 23 | expect(exists).toBeTruthy(); 24 | }); 25 | }); 26 | 27 | describe('convertText', () => { 28 | describe('Document wrapper', () => { 29 | it('should insert the basic document wrapper and default document class of article', async () => { 30 | const html = ``; 31 | const tex = await convertText(html, { includeDocumentWrapper: true }); 32 | 33 | expect(tex).toBe('\\documentclass{article}\n\n\\begin{document}\n\n\n\\end{document}'); 34 | }); 35 | 36 | it('should insert the basic document heading with author', async () => { 37 | const html = ``; 38 | const tex = await convertText(html, { includeDocumentWrapper: true, author: 'Takashi' }); 39 | 40 | expect(tex).toBe( 41 | '\\documentclass{article}\n\n\\author{Takashi}\n\n\\begin{document}\n\n\n\\end{document}', 42 | ); 43 | }); 44 | 45 | it('should insert the basic document heading with title', async () => { 46 | const html = ``; 47 | const tex = await convertText(html, { 48 | includeDocumentWrapper: true, 49 | title: 'Altered Carbon', 50 | }); 51 | 52 | expect(tex).toBe( 53 | '\\documentclass{article}\n\n\\title{Altered Carbon}\n\n\\begin{document}\n\n\\maketitle\n\n\n\\end{document}', 54 | ); 55 | }); 56 | 57 | it('should insert the basic document heading with date', async () => { 58 | const html = ``; 59 | const tex = await convertText(html, { includeDocumentWrapper: true, includeDate: true }); 60 | 61 | expect(tex).toBe( 62 | '\\documentclass{article}\n\n\\date{\\today}\n\n\\begin{document}\n\n\n\\end{document}', 63 | ); 64 | }); 65 | }); 66 | 67 | describe('Converting embedded sectioning tags', () => { 68 | it('should properly convert section tags', async () => { 69 | const html = `

Test

`; 70 | const tex = await convertText(html); 71 | 72 | expect(tex).toBe('Test'); 73 | }); 74 | 75 | it('should properly convert aside tags', async () => { 76 | const html = ``; 77 | const tex = await convertText(html); 78 | 79 | expect(tex).toBe('Test'); 80 | }); 81 | 82 | it('should properly convert div tags', async () => { 83 | const html = `

Test

`; 84 | const tex = await convertText(html); 85 | 86 | expect(tex).toBe('Test'); 87 | }); 88 | 89 | it('should properly convert html tags', async () => { 90 | const html = `Test`; 91 | const tex = await convertText(html); 92 | 93 | expect(tex).toBe('Test'); 94 | }); 95 | 96 | it('should properly convert header tags', async () => { 97 | const html = `

Test

`; 98 | const tex = await convertText(html); 99 | 100 | expect(tex).toBe('Test'); 101 | }); 102 | 103 | it('should properly convert footer tags', async () => { 104 | const html = ``; 105 | const tex = await convertText(html); 106 | 107 | expect(tex).toBe('Test'); 108 | }); 109 | }); 110 | 111 | describe('Converting general text', () => { 112 | it('should convert simple text tag with bold `b` styling', async () => { 113 | const html = `

Styled Text

`; 114 | const tex = await convertText(html); 115 | 116 | expect(tex).toBe('Styled \\textbf{Text}'); 117 | }); 118 | 119 | it('should convert simple text tag with bold `strong` styling', async () => { 120 | const html = `

Styled Text

`; 121 | const tex = await convertText(html); 122 | 123 | expect(tex).toBe('Styled \\textbf{Text}'); 124 | }); 125 | 126 | it('should convert simple text tag with italics styling', async () => { 127 | const html = `

Styled Text

`; 128 | const tex = await convertText(html); 129 | 130 | expect(tex).toBe('Styled \\textit{Text}'); 131 | }); 132 | 133 | it('should convert simple text tag with underline styling', async () => { 134 | const html = `

Styled Text

`; 135 | const tex = await convertText(html); 136 | 137 | expect(tex).toBe('Styled \\underline{Text}'); 138 | }); 139 | 140 | it('should convert text tag with span nesting', async () => { 141 | const html = `

Styled Text

`; 142 | const tex = await convertText(html); 143 | 144 | expect(tex).toBe('Styled Text'); 145 | }); 146 | 147 | it('should ignore `\t`', async () => { 148 | const html = `

Styled\tText

`; 149 | const tex = await convertText(html); 150 | 151 | expect(tex).toBe('StyledText'); 152 | }); 153 | 154 | it('should escape `%`', async () => { 155 | const html = `

Styled%Text

`; 156 | const tex = await convertText(html); 157 | 158 | expect(tex).toBe('Styled\\%Text'); 159 | }); 160 | 161 | it('should not escape `%` if its already escaped', async () => { 162 | const html = `

Styled\\%Text

`; 163 | const tex = await convertText(html); 164 | 165 | expect(tex).toBe('Styled\\%Text'); 166 | }); 167 | }); 168 | 169 | describe('Converting text with different types of breaks', () => { 170 | it('should convert simple `p` tag text with `br` tags. These will be ignored by default', async () => { 171 | const html = `

Styled
Text

`; 172 | const tex = await convertText(html); 173 | 174 | expect(tex).toBe('Styled Text'); 175 | }); 176 | 177 | it('should convert simple `p` tag text with `br` tags and the ignoreBreaks argument set to false', async () => { 178 | const html = `

Styled
Text

`; 179 | const tex = await convertText(html, { ignoreBreaks: false }); 180 | 181 | expect(tex).toBe('Styled\n\nText'); 182 | }); 183 | 184 | it('should convert simple text with `\n` and the ignoreBreaks argument set to false', async () => { 185 | const html = `

Styled\nText

`; 186 | const tex = await convertText(html, { ignoreBreaks: false }); 187 | 188 | expect(tex).toBe('Styled\n\nText'); 189 | }); 190 | 191 | it('should convert simple text with `\r` and the ignoreBreaks argument set to false', async () => { 192 | const html = `

Styled\rText

`; 193 | const tex = await convertText(html, { ignoreBreaks: false }); 194 | 195 | expect(tex).toBe('Styled\n\nText'); 196 | }); 197 | }); 198 | 199 | describe('Unwrapped content', () => { 200 | it('should convert simple text with `br` tags and the ignoreBreaks argument set to false', async () => { 201 | const html = `Styled
Text`; 202 | const tex = await convertText(html, { ignoreBreaks: false }); 203 | 204 | expect(tex).toBe('Styled\n\nText'); 205 | }); 206 | 207 | it('should convert complex text with `br` tags and the ignoreBreaks argument set to false', async () => { 208 | const html = `Three concentric metal shells
More text here.

Inner p tag

`; 209 | const tex = await convertText(html, { ignoreBreaks: false }); 210 | 211 | expect(tex).toBe('Three concentric metal shells\n\nMore text here.\n\nInner p tag'); 212 | }); 213 | }); 214 | 215 | describe('Converting text with equations', () => { 216 | it('should convert eq wrappers p tags with only an eq to use the \\[ wrapper instead of \\(', async () => { 217 | const html = `

\$x = 5\\Omega\$

`; 218 | const tex = await convertText(html); 219 | 220 | expect(tex).toBe('\\[x = 5\\Omega\\]'); 221 | }); 222 | 223 | it('should convert p tags with only an eq to use the \\[ wrapper instead of $', async () => { 224 | const html = `

$x = 5\\Omega$

`; 225 | const tex = await convertText(html); 226 | 227 | expect(tex).toBe('\\[x = 5\\Omega\\]'); 228 | }); 229 | 230 | it('should not convert p tags with only an eq to use the \\[ wrapper instead of \\( if skipWrappingEquations is true', async () => { 231 | const html = `

\$x = 5\\Omega\$

`; 232 | const tex = await convertText(html, { skipWrappingEquations: true }); 233 | 234 | expect(tex).toBe('\$x = 5\\Omega\$'); 235 | }); 236 | 237 | it('should not convert p tags with only an eq to use the \\[ wrapper instead of $ if skipWrappingEquations is true', async () => { 238 | const html = `

$x = 5\\Omega$

`; 239 | const tex = await convertText(html, { skipWrappingEquations: true }); 240 | 241 | expect(tex).toBe('$x = 5\\Omega$'); 242 | }); 243 | 244 | it('should not modify eq wrappers in p tags with an eq and other content', async () => { 245 | const html = `

Some content $x = 5\\Omega$

`; 246 | const tex = await convertText(html); 247 | 248 | expect(tex).toBe('Some content $x = 5\\Omega$'); 249 | }); 250 | 251 | it('should prefer $ eq wrappers if configuration is given', async () => { 252 | const html = `

Some content \$x = 5\\Omega\$

`; 253 | const tex = await convertText(html, { preferDollarInlineMath: true }); 254 | 255 | expect(tex).toBe('Some content $x = 5\\Omega$'); 256 | }); 257 | 258 | it('should handle eqs deep within text without tag wrapping', async () => { 259 | const html = 260 | 'This is some plain text \$A,{\\rm{ }}B\$ and \$C\$ with random equations \$a,{\\rm{ }}b\$ and \$c\$ \$(a < b < c)\$'; 261 | const tex = await convertText(html, { preferDollarInlineMath: true }); 262 | 263 | expect(tex).toBe( 264 | 'This is some plain text $A,{\\rm{ }}B$ and $C$ with random equations $a,{\\rm{ }}b$ and $c$ $(a < b < c)$', 265 | ); 266 | }); 267 | }); 268 | 269 | describe('Converting H tags', () => { 270 | it('should convert simple h tag without special chars', async () => { 271 | const html = `

Heading

`; 272 | const tex = await convertText(html); 273 | 274 | expect(tex).toBe('\\section*{\\centering{Heading}}'); 275 | }); 276 | 277 | it('should convert simple h2 tag without special chars', async () => { 278 | const html = `

Heading

`; 279 | const tex = await convertText(html); 280 | 281 | expect(tex).toBe('\\subsection*{Heading}'); 282 | }); 283 | 284 | it('should convert simple h3 tag without special chars', async () => { 285 | const html = `

Heading

`; 286 | const tex = await convertText(html); 287 | 288 | expect(tex).toBe('\\subsubsection*{Heading}'); 289 | }); 290 | 291 | it('should convert simple h tag with special chars', async () => { 292 | const html = `

Heading's

`; 293 | const tex = await convertText(html); 294 | 295 | expect(tex).toBe("\\section*{\\centering{Heading's}}"); 296 | }); 297 | 298 | it('should convert h tag with embedded css', async () => { 299 | const html = `

Heading's

`; 300 | const tex = await convertText(html); 301 | 302 | expect(tex).toBe("\\section*{\\centering{Heading's}}"); 303 | }); 304 | 305 | it('should convert h tag with embedded css and special characters', async () => { 306 | const html = 307 | '

Newton's Laws of Motion

'; 308 | const tex = await convertText(html); 309 | 310 | expect(tex).toBe("\\section*{\\centering{\\underline{\\textbf{Newton's Laws of Motion}}}}"); 311 | }); 312 | }); 313 | 314 | describe('Converting divider tags', () => { 315 | it('should convert simple divider tag', async () => { 316 | const html = `

Text

More Text

`; 317 | const tex = await convertText(html); 318 | 319 | expect(tex).toBe('Text\n\n\\hrule\n\n\nMore Text'); 320 | }); 321 | }); 322 | 323 | describe('Converting img tags', () => { 324 | it('should convert simple img tag', async () => { 325 | const html = `

`; 326 | const tex = await convertText(html, { autoGenImageNames: false }); 327 | 328 | expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image.png}\n\\end{center}'); 329 | }); 330 | 331 | it('should convert wrapped img tag', async () => { 332 | const spy = jest.spyOn(ShortId, 'generate'); 333 | spy.mockImplementation(() => 'image2'); 334 | 335 | const html = `

`; 336 | const tex = await convertText(html); 337 | 338 | expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image2.png}\n\\end{center}'); 339 | 340 | spy.mockClear(); 341 | }); 342 | 343 | it('should default to a jpg extension when converting img tag with a image url without a extension', async () => { 344 | const spy = jest.spyOn(ShortId, 'generate'); 345 | spy.mockImplementation(() => 'image2'); 346 | 347 | const html = `

`; 348 | const tex = await convertText(html); 349 | 350 | expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image2.jpg}\n\\end{center}'); 351 | 352 | spy.mockClear(); 353 | }); 354 | 355 | it('should add width restrictions when given', async () => { 356 | const html = `

`; 357 | const tex = await convertText(html, { autoGenImageNames: false, imageWidth: '2cm' }); 358 | 359 | expect(tex).toBe( 360 | '\\begin{center}\n\t\\includegraphics[width=2cm]{images/image.png}\n\\end{center}', 361 | ); 362 | }); 363 | 364 | it('should add height restrictions when given', async () => { 365 | const html = `

`; 366 | const tex = await convertText(html, { autoGenImageNames: false, imageHeight: '2cm' }); 367 | 368 | expect(tex).toBe( 369 | '\\begin{center}\n\t\\includegraphics[height=2cm]{images/image.png}\n\\end{center}', 370 | ); 371 | }); 372 | 373 | it('should keep aspect ratio when given and width or height are restricted', async () => { 374 | const html = `

`; 375 | const tex = await convertText(html, { 376 | autoGenImageNames: false, 377 | imageHeight: '2cm', 378 | keepImageAspectRatio: true, 379 | }); 380 | 381 | expect(tex).toBe( 382 | '\\begin{center}\n\t\\includegraphics[height=2cm,keepaspectratio]{images/image.png}\n\\end{center}', 383 | ); 384 | }); 385 | 386 | it('should ignore aspect ratio when given if width or height are not restricted', async () => { 387 | const html = `

`; 388 | const tex = await convertText(html, { autoGenImageNames: false, keepImageAspectRatio: true }); 389 | 390 | expect(tex).toBe('\\begin{center}\n\t\\includegraphics{images/image.png}\n\\end{center}'); 391 | }); 392 | 393 | it('should not center the image', async () => { 394 | const html = `

`; 395 | const tex = await convertText(html, { autoGenImageNames: false, centerImages: false }); 396 | 397 | expect(tex).toBe('\\includegraphics{images/image.png}'); 398 | }); 399 | }); 400 | 401 | describe('Converting list tags', () => { 402 | it('should convert simple ul list tag', async () => { 403 | const html = `

Angle reaction

`; 404 | const tex = await convertText(html); 405 | 406 | expect(tex).toBe('\\begin{itemize}\n\t\\item Angle reaction\n\\end{itemize}'); 407 | }); 408 | 409 | it('should convert simple ol list tag', async () => { 410 | const html = `

Angle reaction

`; 411 | const tex = await convertText(html); 412 | 413 | expect(tex).toBe('\\begin{enumerate}\n\t\\item Angle reaction\n\\end{enumerate}'); 414 | }); 415 | }); 416 | 417 | describe('Converting with debug flag', () => { 418 | it('should display errors when converting img tag with an inaccessible source url with the debug flag', async () => { 419 | const spy = jest.spyOn(console, 'debug').mockImplementation(); 420 | const html = `

`; 421 | 422 | await convertText(html, { autoGenImageNames: false, debug: true }); 423 | 424 | expect(spy).toBeCalledTimes(2); 425 | 426 | spy.mockRestore(); 427 | }); 428 | 429 | it('should not display errors when converting img tag with an inaccessible source url without the debug flag', async () => { 430 | const spy = jest.spyOn(console, 'debug').mockImplementation(); 431 | const html = `

`; 432 | 433 | await convertText(html, { autoGenImageNames: false }); 434 | 435 | expect(spy).toBeCalledTimes(0); 436 | 437 | spy.mockRestore(); 438 | }); 439 | }); 440 | }); 441 | 442 | describe('convertFile', () => { 443 | describe('Converting mixed tags', () => { 444 | it('should convert text with a mixture of nested tags', async () => { 445 | await convertFile(resolve(__dirname, '../test-cases/2/index.html'), { 446 | includeDocumentWrapper: false, 447 | }); 448 | 449 | const tex = await readFile(resolve(__dirname, '../test-cases/2/index.html.tex'), 'utf-8'); 450 | const text = [ 451 | "\\section*{\\centering{\\underline{\\textbf{Newton's Laws of Motion}}}}", 452 | '', 453 | '\\subsection*{\\textbf{Concept of Forces}}', 454 | '', 455 | 'Some types of forces may be (i) Contact forces, (ii) Non-contact forces \\textbf{Contact forces} involve physical contact between two objects.', 456 | ]; 457 | 458 | expect(tex).toBe(text.join('\n')); 459 | 460 | await remove(resolve(__dirname, '../test-cases/2/index.html.tex')); 461 | }); 462 | }); 463 | 464 | it('should convert text without tag wrapper while ignoring break tags', async () => { 465 | const spy = jest.spyOn(ShortId, 'generate'); 466 | spy.mockImplementation(() => 'image2'); 467 | 468 | await convertFile(resolve(__dirname, '../test-cases/3/index.html'), { ignoreBreaks: false }); 469 | 470 | const tex = await readFile(resolve(__dirname, '../test-cases/3/index.html.tex'), 'utf-8'); 471 | const ref = await readFile(resolve(__dirname, '../test-cases/3/output.tex'), 'utf-8'); 472 | 473 | expect(tex).toBe(ref); 474 | 475 | await remove(resolve(__dirname, '../test-cases/3/index.html.tex')); 476 | 477 | spy.mockClear(); 478 | }); 479 | }); 480 | --------------------------------------------------------------------------------