├── .gitignore ├── src ├── global.d.ts ├── config.ts ├── types.ts ├── validateOptions.ts ├── index.ts └── generate.ts ├── LICENSE ├── tsconfig.json ├── package.json └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | lib 3 | -------------------------------------------------------------------------------- /src/global.d.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Bucher + Suter. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | declare module 'html-toc'; -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Bucher + Suter. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | import fs from 'fs-extra'; 9 | import importFresh from 'import-fresh'; 10 | import {DocusaurusConfig} from '@docusaurus/types'; 11 | 12 | export default function loadConfig(configPath: string): DocusaurusConfig { 13 | 14 | const pluginLogPrefix = '[papersaurus] '; 15 | 16 | if (!fs.existsSync(configPath)) { 17 | throw new Error(`${pluginLogPrefix}Config file "${configPath}" not found`); 18 | } 19 | 20 | const loadedConfig = importFresh(configPath) as DocusaurusConfig; 21 | return loadedConfig 22 | } 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Bucher + Suter. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2017", 4 | "module": "commonjs", 5 | "lib": ["es2017","es2019.array","dom"], 6 | "declaration": true, 7 | "declarationMap": false, 8 | "types": ["node"], 9 | 10 | /* Strict Type-Checking Options */ 11 | "strict": true, 12 | "strictNullChecks": true, 13 | "strictFunctionTypes": true, 14 | "strictBindCallApply": true, 15 | "strictPropertyInitialization": true, 16 | "noImplicitThis": true, 17 | "alwaysStrict": true, 18 | 19 | /* Additional Checks */ 20 | "noUnusedLocals": true, 21 | "noUnusedParameters": true, 22 | "noImplicitReturns": true, 23 | "noFallthroughCasesInSwitch": true, 24 | 25 | /* Module Resolution Options */ 26 | "moduleResolution": "node", 27 | "allowSyntheticDefaultImports": true, 28 | "esModuleInterop": true, 29 | 30 | /* Advanced Options */ 31 | "resolveJsonModule": true, 32 | 33 | /* Plugin*/ 34 | "incremental": true, 35 | "tsBuildInfoFile": "./lib/.tsbuildinfo", 36 | "rootDir": "src", 37 | "outDir": "lib", 38 | "skipLibCheck": true 39 | }, 40 | "exclude": ["node_modules", "**/lib/**/*"] 41 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "docusaurus-plugin-papersaurus", 3 | "version": "2.0.3", 4 | "description": "Papersaurus plugin for Docusaurus (create PDF files).", 5 | "main": "lib/index.js", 6 | "scripts": { 7 | "build": "tsc", 8 | "watch": "tsc --watch" 9 | }, 10 | "publishConfig": { 11 | "access": "public" 12 | }, 13 | "repository": { 14 | "type": "git", 15 | "url": "https://github.com/simologos/docusaurus-plugin-papersaurus.git" 16 | }, 17 | "license": "MIT", 18 | "dependencies": { 19 | "@docusaurus/core": "^2.3.0", 20 | "@docusaurus/plugin-content-docs": "^2.3.0", 21 | "cheerio": "^1.0.0-rc.12", 22 | "easy-pdf-merge": "^0.2.6", 23 | "express": "^4.17.1", 24 | "fs-extra": "^10.0.0", 25 | "github-slugger": "^1.4.0", 26 | "html-toc": "^2.0.0", 27 | "net": "^1.0.2", 28 | "path": "^0.12.7", 29 | "pdf-parse": "^1.1.1", 30 | "puppeteer": "^22.4.0" 31 | }, 32 | "devDependencies": { 33 | "@docusaurus/types": "^2.3.0", 34 | "@types/express": "^4.17.13", 35 | "@types/fs-extra": "^9.0.13", 36 | "@types/node": "^16.11.2", 37 | "@types/puppeteer": "^7.0.4", 38 | "typescript": "^4.4.4" 39 | }, 40 | "engines": { 41 | "node": ">=12.13.0" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Bucher + Suter. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | export interface PluginOptions { 9 | addDownloadButton?: boolean; 10 | autoBuildPdfs?: boolean; 11 | downloadButtonText?: string; 12 | ignoreDocs?: string[]; 13 | stylesheets?: string[]; 14 | alwaysIncludeSiteStyles?: boolean; 15 | scripts?: string[]; 16 | coverPageHeader?: string; 17 | coverPageFooter?: string; 18 | getPdfCoverPage?: PageFunction; 19 | getPdfPageHeader?: PageFunction; 20 | getPdfPageFooter?: PageFunction; 21 | margins?: Margins; 22 | coverMargins?: Margins; 23 | author?: string; 24 | footerParser?: RegExp; 25 | keepDebugHtmls?: boolean; 26 | puppeteerTimeout?: number; 27 | sidebarNames?: string[]; 28 | versions?: string[]; 29 | productVersion?: string; 30 | subfolders?: string[]; 31 | productTitles?: string[]; 32 | useExtraPaths?: UsePath[]; 33 | ignoreCssSelectors?: string[]; 34 | jQueryUrl?: string; 35 | getPdfFileName?: FileNameFunction; 36 | } 37 | 38 | export type PapersaurusPluginOptions = Required; 39 | 40 | export type PageFunction = ( 41 | siteConfig: any, 42 | pluginConfig: PapersaurusPluginOptions, 43 | pageTitle: string, 44 | version: string 45 | ) => string; 46 | 47 | export type FileNameFunction = ( 48 | siteConfig: any, 49 | pluginConfig: PapersaurusPluginOptions, 50 | pageTitle: string, 51 | pageId: string, 52 | parentTitles: string[], 53 | parentIds: string[], 54 | version: string, 55 | versionPath: string 56 | ) => string; 57 | 58 | export type UsePath = { 59 | serverPath: string, 60 | localPath: string 61 | }; 62 | 63 | export type TocInfo = { 64 | link: string, 65 | href: string, 66 | text: string 67 | } 68 | 69 | export type Margins = { 70 | top: string, 71 | right: string, 72 | bottom: string, 73 | left: string 74 | } 75 | -------------------------------------------------------------------------------- /src/validateOptions.ts: -------------------------------------------------------------------------------- 1 | import { Joi } from "@docusaurus/utils-validation"; 2 | import { PluginOptions, PapersaurusPluginOptions, PageFunction, FileNameFunction, Margins, UsePath } from "./types"; 3 | const he = require('he'); 4 | 5 | const isStringOrArrayOfStrings = Joi.alternatives().try( 6 | Joi.string(), 7 | Joi.array().items(Joi.string()) 8 | ); 9 | 10 | const defaultCoverPageFunction: PageFunction = (siteConfig, _pluginConfig, pageTitle, _version) => { 11 | return ` 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 |

${siteConfig.projectName}

21 |

${(pageTitle || siteConfig.tagline)}

22 | 23 |
24 |
Author:
25 |
Your name
26 |
Date:
27 |
${new Date().toISOString().substring(0,10)}
28 |
29 |

30 | 31 | 32 | 33 | `; 34 | }; 35 | 36 | const defaultPageHeaderFunction: PageFunction = (_siteConfig, pluginConfig, pageTitle, _version) => { 37 | return ` 38 |
39 | ${pluginConfig.author} 40 | ${pageTitle} 41 |
42 | `; 43 | }; 44 | 45 | const defaultPageFooterFunction: PageFunction = (_siteConfig, pluginConfig, _pageTitle, _version) => { 46 | return ` 47 |
48 | © ${pluginConfig.author} 49 | ${new Date().toISOString().substring(0,10)} 50 | Page / 51 |
52 | `; 53 | }; 54 | 55 | const defaultPdfFileNameFunction: FileNameFunction = ( 56 | _siteConfig: any, 57 | _pluginConfig: PapersaurusPluginOptions, 58 | _pageTitle: string, 59 | pageId: string, 60 | _parentTitles: string[], 61 | parentIds: string[], 62 | _version: string, 63 | _versionPath: string) => { 64 | let pdfFilename = he.decode(pageId); 65 | if (parentIds.length > 1) { 66 | pdfFilename = parentIds.slice(1).filter(id => id != "").join('-') + '-' + pdfFilename; 67 | } 68 | return pdfFilename; 69 | }; 70 | 71 | const marginsSchema = Joi.object({ 72 | top: Joi.string().required(), 73 | right: Joi.string().required(), 74 | bottom: Joi.string().required(), 75 | left: Joi.string().required(), 76 | }); 77 | 78 | const schema = Joi.object({ 79 | addDownloadButton: Joi.boolean().default(true), 80 | autoBuildPdfs: Joi.boolean().default(true), 81 | downloadButtonText: Joi.string().default("Download as PDF"), 82 | ignoreDocs: isStringOrArrayOfStrings.default([]), 83 | stylesheets: isStringOrArrayOfStrings.default([]), 84 | alwaysIncludeSiteStyles: Joi.boolean().default(false), 85 | scripts: isStringOrArrayOfStrings.default([]), 86 | coverPageHeader: Joi.string().default("..."), 87 | coverPageFooter: Joi.string().default("..."), 88 | getPdfCoverPage: Joi.func().default(() => defaultCoverPageFunction), 89 | getPdfPageHeader: Joi.func().default(() => defaultPageHeaderFunction), 90 | getPdfPageFooter: Joi.func().default(() => defaultPageFooterFunction), 91 | margins: marginsSchema.default({ 92 | top: "5cm", 93 | right: "2cm", 94 | bottom:"2.3cm", 95 | left: "2cm", 96 | }), 97 | coverMargins: marginsSchema.default({ 98 | top: "10cm", 99 | right: "0", 100 | bottom: "3cm", 101 | left: "0", 102 | }), 103 | author: Joi.string().default("").allow(""), 104 | footerParser: Joi.object().instance(RegExp), 105 | keepDebugHtmls: Joi.boolean().default(false), 106 | puppeteerTimeout: Joi.number().default(30000), 107 | sidebarNames: isStringOrArrayOfStrings.default([]), 108 | versions: isStringOrArrayOfStrings.default([]), 109 | productVersion: Joi.string().default("").allow(""), 110 | subfolders: isStringOrArrayOfStrings.default([]), 111 | productTitles: isStringOrArrayOfStrings.default([]), 112 | useExtraPaths: Joi.array().items(Joi.object({ 113 | serverPath: Joi.string().required(), 114 | localPath: Joi.string().required(), 115 | })).default([]), 116 | ignoreCssSelectors: isStringOrArrayOfStrings.default([]), 117 | jQueryUrl: Joi.string().allow('').default("https://code.jquery.com/jquery-3.6.0.min.js"), 118 | getPdfFileName: Joi.func().default(() => defaultPdfFileNameFunction), 119 | }); 120 | 121 | type ValidateFn = ( 122 | schema: Joi.Schema, 123 | options: PluginOptions | undefined 124 | ) => Required; 125 | 126 | export function validateOptions({ 127 | options, 128 | validate, 129 | }: { 130 | options: PluginOptions | undefined; 131 | validate: ValidateFn; 132 | }): Required { 133 | return validate(schema, options || {}); 134 | } 135 | 136 | export function processOptions( 137 | options: PluginOptions | undefined, 138 | ): PapersaurusPluginOptions { 139 | const pluginOptions = { ...options } as PapersaurusPluginOptions; 140 | if (!pluginOptions.footerParser) { 141 | pluginOptions.footerParser = RegExp(`© ${pluginOptions.author}\\d{4}-\\d{2}-\\d{2}Page \\d* \\/ \\d*`, 'g'); 142 | } 143 | 144 | ensureArray(pluginOptions, "ignoreDocs"); 145 | ensureArray(pluginOptions, "stylesheets"); 146 | ensureArray(pluginOptions, "scripts"); 147 | ensureArray(pluginOptions, "sidebarNames"); 148 | ensureArray(pluginOptions, "subfolders"); 149 | ensureArray(pluginOptions, "productTitles"); 150 | ensureArray(pluginOptions, "ignoreCssSelectors"); 151 | 152 | return pluginOptions; 153 | } 154 | 155 | function ensureArray(object: T, key: keyof T): void { 156 | if (!Array.isArray(object[key])) { 157 | (object as any)[key] = [object[key]]; 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Bucher + Suter. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | import {LoadContext, Plugin, DocusaurusConfig} from '@docusaurus/types'; 9 | import {generatePdfFiles} from './generate'; 10 | import {PluginOptions, PapersaurusPluginOptions} from './types'; 11 | import {processOptions} from './validateOptions'; 12 | import importFresh from 'import-fresh'; 13 | import * as fs from "fs"; 14 | 15 | function loadConfig(configPath: string): DocusaurusConfig { 16 | if (!fs.existsSync(configPath)) { 17 | throw new Error(`Config file "${configPath}" not found`); 18 | } 19 | const loadedConfig = importFresh(configPath) as DocusaurusConfig; 20 | return loadedConfig 21 | } 22 | 23 | export default function ( 24 | _context: LoadContext, 25 | options?: PluginOptions, 26 | ): Plugin { 27 | 28 | let pluginOptions:PapersaurusPluginOptions = processOptions(options); 29 | 30 | return { 31 | 32 | name: 'docusaurus-plugin-papersaurus', 33 | 34 | injectHtmlTags() { 35 | 36 | if (!pluginOptions.addDownloadButton) { 37 | return {}; 38 | } 39 | 40 | const CWD = process.cwd(); 41 | const siteConfig = loadConfig(`${CWD}/docusaurus.config.js`); 42 | 43 | return { 44 | headTags: [` 45 | ${(pluginOptions.jQueryUrl ? "" : "")} 46 | ` 162 | ], 163 | }; 164 | }, 165 | 166 | async postBuild(props) { 167 | let forceBuild = process.env.BUILD_PDF || ""; 168 | if ((pluginOptions.autoBuildPdfs && !forceBuild.startsWith("0")) || forceBuild.startsWith("1")) { 169 | await generatePdfFiles(_context.outDir, pluginOptions, props); 170 | } 171 | }, 172 | 173 | }; 174 | } 175 | 176 | export { validateOptions } from "./validateOptions"; 177 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `docusaurus-plugin-papersaurus` 2 | 3 | Plugin for Docusaurus v2 to generate PDF files including table of contents and a cover page. 4 | 5 | The plugin is creating a PDF file for each individual document as well as a PDF file for any section and subsection found in sidebar and also one overall PDF file containing all sections. 6 | 7 | Optionally the plugin can add a download button to the documentation website which opens a menu with download links for the PDF files of current chapter, current sections and the overall PDF file. 8 | 9 | ## Introduction 10 | 11 | This plugin is ported from existing `papersaurus` project built for [Docusaurus](https://docusaurus.io/) 1.x to use in [Docusaurus](https://docusaurus.io/) 2.x. 12 | 13 | It is a [Docusaurus](https://docusaurus.io/) 2.x plugin and can be triggered automatically after docusaurus builds or on docusaurus command line. 14 | 15 | It uses [Puppeteer](https://pptr.dev/) to convert html pages to PDF. 16 | 17 | ### Please note 18 | 19 | 1. Puppeteer does not yet support individual headers / footers for the cover page. Therefore this plugin generates a PDF with just uses [easy-pdf-merge](https://www.npmjs.com/package/easy-pdf-merge) See [this SO question](https://stackoverflow.com/questions/55470714/trying-to-hide-first-footer-header-on-pdf-generated-with-puppeteer) 20 | 21 | 2. Puppeteer does not yet support the generation of TOCs. See [this feature request](https://github.com/puppeteer/puppeteer/issues/1778) and [this Chromium bug](https://bugs.chromium.org/p/chromium/issues/detail?id=840455). Therefore this package generates a PDF, then parses it again to update the page numbers in the TOC. Therefore the parameter footerParser... 22 | 23 | ## Installation 24 | 25 | ``` 26 | yarn add docusaurus-plugin-papersaurus 27 | ``` 28 | or 29 | ``` 30 | npm install docusaurus-plugin-papersaurus --save 31 | ``` 32 | 33 | ## Configuration 34 | 35 | Then adapt your `docusaurus.config.js` with the following block: 36 | 37 | ``` 38 | plugins: [ 39 | [ 40 | 'docusaurus-plugin-papersaurus', 41 | { 42 | keepDebugHtmls: false, 43 | sidebarNames: ['someSidebar'], 44 | addDownloadButton: true, 45 | autoBuildPdfs: true, 46 | ignoreDocs: ['licenses'], 47 | author: 'Author name' 48 | }, 49 | ], 50 | ], 51 | ``` 52 | 53 | ### autoBuildPdfs 54 | 55 | Set this parameter to `true`, if you want the plugin to automatically build the PDF files after each docusaurus build. If this is not set the environment variable BUILD_PDF needs to be set before running `docusaurus build`. 56 | 57 | Default: `true` 58 | 59 | ### keepDebugHtmls 60 | 61 | The plugin creates one temporary HTML file per generated PDF file that is then converted using [Puppeteer](https://pptr.dev/) to a PDF file. This HTML file also contains the table of contents including page numbers. 62 | After generating the PDF file, these temporary HTML files are deleted. You may want to keep these HTML files to debug your printing CSS file in a browser. 63 | 64 | Set this parameter to `true` to keep the files. 65 | 66 | Default: `false` 67 | 68 | ### puppeteerTimeout 69 | 70 | Large PDFs might cause a timeout error on puppeteer. Use this parameter to set the puppeteer timeout in ms. 71 | 72 | Default: `30000` 73 | 74 | ### sidebarNames 75 | 76 | The plugin is using your `sidebars.js` file to find the sections and documents. Since the file can contain multiple sidebars, add the name(s) of the sidebar(s) that should be used to generate files. If none is specified all will be used. 77 | 78 | Default: `[]` 79 | 80 | ### versions 81 | 82 | Specify documentation versions to generate pdfs for. If none is specified pdfs will be generated for all versions. 83 | 84 | Default: `[]` 85 | 86 | ### productVersion 87 | 88 | In case the documentation is not versioned, but an external (product) version should be added to the PDFs, inject it using this parameter. 89 | 90 | Default: `""` 91 | 92 | ### subfolders 93 | 94 | If you are using multiple sidebars your files are located in different subfolders. Enter the names of the subfolders located in your docs folder. In case your main sidebar is directly in the docs directory enter an empty string and the names of the other folders. 95 | 96 | Example for different directories for the sidebars: 97 | 98 | ``` 99 | subfolders: ['mainDoc', 'otherDoc'], 100 | ``` 101 | 102 | Example if the main sidebar content is located directly in /doc: 103 | 104 | ``` 105 | subfolders: ['', 'otherDoc'], 106 | ``` 107 | 108 | Default: `[]` 109 | 110 | ### productTitles 111 | 112 | Add the product name for the different sidebars. This title will be included on the cover page as well as in the header. 113 | 114 | The following example shows the configuration if you want to display the product title for the second sidebar but not the first: 115 | 116 | ``` 117 | sidebarNames: ['someSidebar', 'otherSidebar'], 118 | productTitles: ['', 'Other'], 119 | ``` 120 | 121 | This would display the 'Other' name on the cover page and the header for all documentation downloaded from the 'otherSidebar' sidebar but not from the 'someSidebar' 122 | 123 | Default: `[]` 124 | 125 | ### addDownloadButton 126 | 127 | Set this parameter to `true`, if you want the plugin to add a PDF download button to the documentation website. 128 | 129 | Default: `true` 130 | 131 | ### downloadButtonText 132 | 133 | Use this parameter to define the text of the download button. 134 | If you prefer to have an icon instead of a text button, you can replace the text with a button in CSS stylesheet. 135 | 136 | Default: `Download as PDF` 137 | 138 | ### ignoreDocs 139 | 140 | If you want to exclude some documents from the section or overall PDF's and want to have it only available as individual chapter PDF, add the id to this parameter. 141 | 142 | The parameter type is a string array. 143 | 144 | Default: `[]` 145 | 146 | ### stylesheets 147 | 148 | Add the style sheets you would use for printing here. Add the same as in `stylesheets` if you want to use the styles used on the docusaurus web page. 149 | 150 | The parameter type is a string array. 151 | 152 | Default: `[]` 153 | 154 | ### alwaysIncludeSiteStyles 155 | 156 | Set this parameter to `true`, if you want the plugin to include the styles generated by docusaurus even when you have specified your own `stylesheets`. 157 | 158 | Default: `false` 159 | 160 | ### scripts 161 | 162 | Add the scripts you would use for printing here. Add the same as in `scripts` if you want to use the scripts used on the docusaurus web page. 163 | 164 | The parameter type is a string array. 165 | 166 | Default: `[]` 167 | 168 | ### coverPageHeader 169 | 170 | String containing HTML code which will be displayed as the header of the cover page. 171 | 172 | Default: `'...'` 173 | 174 | ### coverPageFooter 175 | 176 | String containing HTML code which will be displayed as the footer of the cover page 177 | 178 | Default: `'...'` 179 | 180 | ### getPdfCoverPage 181 | 182 | Function which returns the Cover Page as HTML. Example: 183 | 184 | ``` 185 | getPdfCoverPage: (siteConfig, pluginConfig, pageTitle, version) => { 186 | return ` 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 |
195 |

${siteConfig.projectName}

196 |

${(pageTitle || siteConfig.tagline)}

197 | 198 |
199 |
Author:
200 |
Your name
201 |
Date:
202 |
${new Date().toISOString().substring(0,10)}
203 |
204 |

205 | 206 | 207 | 208 | `; 209 | } 210 | ``` 211 | 212 | ### getPdfPageHeader 213 | 214 | Function which returns the Header of the content pages as HTML. Example: 215 | 216 | ``` 217 | getPdfPageHeader: (siteConfig, pluginConfig, pageTitle) => { 218 | return ` 219 |
220 |
221 | 226 |
227 | ${pageTitle} 228 |
229 | `; 230 | }, 231 | ``` 232 | 233 | ### getPdfPageFooter 234 | 235 | Function which returns the Footer of the content pages as HTML. Example: 236 | 237 | ``` 238 | getPdfPageFooter: (siteConfig, pluginConfig, pageTitle) => { 239 | return ` 240 |
241 | © You 242 | ${new Date().toISOString().substring(0,10)} 243 | Page / 244 |
`; 245 | }, 246 | ``` 247 | 248 | Puppeteer uses classes to inject values at print time. See: https://pptr.dev/#?product=Puppeteer&version=v3.0.4&show=api-pagepdfoptions 249 | 250 | ### author 251 | 252 | String you would like to use as author. 253 | 254 | The value may be used in `getPdfCoverPage`, `getPdfPageHeader` or `getPdfPageFooter` with `pluginConfig.author`. 255 | 256 | ### footerParser 257 | 258 | In order to update the TOC with the correct page numbers, this package has to parse the generated PDF and then manually update the TOC. In order to split the parsed text by pages, a regex expression is used to identify the content footer text. Think of calling jQuery's ```$.text()``` on the footer wrapper. The regular expression must match this text. 259 | 260 | Example: 261 | 262 | ```/© Your Company\d{4}-\d{2}-\d{2}Page \d* \/ \d*/g``` 263 | 264 | ### coverMargins 265 | 266 | Margins for the cover page. 267 | 268 | Default: 269 | ``` 270 | { 271 | top: "10cm", 272 | right: "0", 273 | bottom: "3cm", 274 | left: "0", 275 | } 276 | ``` 277 | 278 | ### margins 279 | 280 | Margins for content pages. 281 | 282 | Default: 283 | ``` 284 | { 285 | top: "5cm", 286 | right: "2cm", 287 | bottom:"2.3cm", 288 | left: "2cm", 289 | } 290 | ``` 291 | 292 | ### useExtraPaths 293 | 294 | In case you have stylesheets or scripts that needs to be included from some other folder than the output folder specify them here. 295 | 296 | Example: 297 | ```useExtraPaths: [{serverPath: "/", localPath: ".."}]``` 298 | 299 | Default: `[]` 300 | 301 | ### ignoreCssSelectors 302 | 303 | A list of css selectors that can be used to remove elements of the html before rendering it to pdf. 304 | 305 | Example: 306 | ```ignoreCssSelectors: [".breadcrumbs", ".theme-doc-version-badge"]``` 307 | 308 | Default: `[]` 309 | 310 | ### jQueryUrl 311 | 312 | This plugin requires jQuery to insert a download button if `addDownloadButton` is set to `true`. Leave empty in case you provide jQuery some other way. 313 | 314 | Default: `https://code.jquery.com/jquery-3.6.0.min.js` 315 | 316 | ### getPdfFileName 317 | 318 | Override this function to customize the file name of the generated pdfs. By default the names are based on the page ids. 319 | 320 | Example: 321 | ``` 322 | getPdfFileName: (siteConfig, pluginConfig, pageTitle, pageId, parentTitles, parentIds, version, versionPath) => { 323 | let verString = version; 324 | if (verString == "current" || verString == "next") { 325 | verString = versionPath; 326 | } 327 | verString = verString.replace(".", "_"); 328 | if (parentIds.length == 0) { 329 | return "mydoc-" + verString; 330 | } 331 | let pdfFilename = he.decode(pageId); 332 | if (parentIds.length > 1) { 333 | pdfFilename = parentIds.slice(1).filter(id => id != "").join('-') + '-' + pdfFilename; 334 | } 335 | return "mydoc-" + verString + pdfFilename; 336 | } 337 | ``` 338 | 339 | ## Limitation 340 | 341 | - Just documentations are generated, no pages or blog posts 342 | - No support for translations 343 | -------------------------------------------------------------------------------- /src/generate.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Bucher + Suter. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | import { 9 | PapersaurusPluginOptions, 10 | TocInfo, 11 | } from './types'; 12 | import { Props, LoadedPlugin } from '@docusaurus/types'; 13 | import { LoadedContent, LoadedVersion, DocMetadata } from "@docusaurus/plugin-content-docs" 14 | import puppeteer = require('puppeteer'); 15 | import toc = require('html-toc'); 16 | const pdfMerge = require('easy-pdf-merge'); 17 | const pdfParse = require('pdf-parse'); 18 | const join = require('path').join; 19 | import express = require('express'); 20 | import { AddressInfo } from 'net'; 21 | import * as fs from 'fs-extra'; 22 | const GithubSlugger = require('github-slugger'); 23 | const cheerio = require('cheerio'); 24 | 25 | let slugger = new GithubSlugger(); 26 | 27 | const pluginLogPrefix = '[papersaurus] '; 28 | 29 | export async function generatePdfFiles( 30 | outDir: string, 31 | pluginOptions: PapersaurusPluginOptions, 32 | { siteConfig, plugins }: Props) { 33 | 34 | console.log(`${pluginLogPrefix}Execute generatePdfFiles...`); 35 | 36 | if (!plugins) { 37 | throw new Error(`${pluginLogPrefix}No docs plugin found.`); 38 | } 39 | 40 | const docsPlugins = plugins.filter( 41 | (item) => item.name === "docusaurus-plugin-content-docs" 42 | ); 43 | if (docsPlugins.length > 1 || docsPlugins.length == 0) { 44 | throw new Error(`${pluginLogPrefix}Too many or too few docs plugins found, only 1 is supported.`); 45 | } 46 | let docPlugin: LoadedPlugin = docsPlugins[0]; 47 | 48 | // Check if docusaurus build directory exists 49 | const docusaurusBuildDir = outDir; 50 | if (!fs.existsSync(docusaurusBuildDir) || 51 | !fs.existsSync(join(docusaurusBuildDir, 'index.html')) || 52 | !fs.existsSync(join(docusaurusBuildDir, '404.html'))) { 53 | throw new Error( 54 | `${pluginLogPrefix}Could not find a valid docusaurus build directory at "${docusaurusBuildDir}". ` + 55 | 'Did you run "docusaurus build" before?' 56 | ); 57 | } 58 | 59 | // Check pdf build directory and clean if requested 60 | const pdfPath = 'pdfs'; 61 | const pdfBuildDir = join(docusaurusBuildDir, pdfPath); 62 | fs.ensureDirSync(pdfBuildDir); 63 | console.log(`${pluginLogPrefix}Clean pdf build folder '${pdfBuildDir}'`); 64 | fs.emptyDirSync(pdfBuildDir); 65 | 66 | // Start local webserver and host files in docusaurus build folder 67 | const app = express(); 68 | const httpServer = await app.listen(); 69 | const address = httpServer.address(); 70 | if (!address || !isAddressInfo(address)) { 71 | httpServer.close(); 72 | throw new Error(`${pluginLogPrefix}Something went wrong spinning up the express webserver.`); 73 | } 74 | app.use(siteConfig.baseUrl, express.static(docusaurusBuildDir)); 75 | for (const extraUsePath of pluginOptions.useExtraPaths) { 76 | let localPath = extraUsePath.localPath; 77 | if (localPath == '..') { 78 | localPath = join(docusaurusBuildDir, localPath); 79 | } 80 | app.use(extraUsePath.serverPath, express.static(localPath)); 81 | } 82 | const siteAddress = `http://127.0.0.1:${address.port}${siteConfig.baseUrl}`; 83 | console.log(`${pluginLogPrefix}Server started at ${siteAddress}`); 84 | 85 | // Start a puppeteer browser 86 | const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-web-security'] }); 87 | 88 | const linkToFile: { [key: string]: { label: string, file: string, type: 'root' | 'section' | 'chapter' }[] } = {}; 89 | 90 | // Loop through all found versions 91 | for (const versionInfo of (docPlugin.content as LoadedContent).loadedVersions) { 92 | if (pluginOptions.versions.length != 0 && !pluginOptions.versions.includes(versionInfo.versionName)) { 93 | // Skip this version as it was not specified in versions option. 94 | continue; 95 | } 96 | 97 | console.log(`${pluginLogPrefix}Processing version '${versionInfo.label}'`); 98 | 99 | if (pluginOptions.sidebarNames.length == 0) { 100 | // No sidebar specified, use all of them. 101 | let allSidebarNames: string[] = []; 102 | for (const name in versionInfo.sidebars) { 103 | allSidebarNames.push(name); 104 | } 105 | pluginOptions.sidebarNames = allSidebarNames; 106 | } 107 | // Loop through all configured sidebar names 108 | for (const [i, sidebarName] of pluginOptions.sidebarNames.entries()) { 109 | 110 | slugger = new GithubSlugger(); 111 | let folderName = ''; 112 | let productTitle = ''; 113 | 114 | if (pluginOptions.productTitles && pluginOptions.productTitles.length > i) { 115 | productTitle = pluginOptions.productTitles[i]; 116 | } 117 | 118 | if (pluginOptions.subfolders && pluginOptions.subfolders.length > i) { 119 | folderName = pluginOptions.subfolders[i]; 120 | } 121 | 122 | // Create build folder for that version 123 | let versionPath = getVersionPath(versionInfo, siteConfig); 124 | const versionPdfPath = [pdfPath, versionPath, folderName].filter(str => str != "").join("/"); 125 | const versionBuildDir = join(pdfBuildDir, versionPath, folderName); 126 | fs.ensureDirSync(versionBuildDir); 127 | 128 | console.log(`${pluginLogPrefix}Start processing sidebar named '${sidebarName}' in version '${versionInfo.label}'`); 129 | 130 | let sidebar = versionInfo.sidebars[sidebarName]; 131 | if (sidebar) { 132 | let projectName = siteConfig.projectName; 133 | if (!projectName) { 134 | console.log(`${pluginLogPrefix}Docusaurus projectName not set, using placeholder...`); 135 | projectName = 'Unnamed project'; 136 | } 137 | // Create a fake category with root of sidebar 138 | const rootCategory: any = { 139 | type: 'category', 140 | label: projectName, 141 | unversionedId: projectName, 142 | items: sidebar, 143 | collapsed: true, 144 | collapsible: true 145 | }; 146 | 147 | // Browse through all documents of this sidebar 148 | pickHtmlArticlesRecursive(rootCategory, [], versionInfo, `${siteAddress}docs/`, docusaurusBuildDir, siteConfig); 149 | 150 | let productVersion = ""; 151 | 152 | if ((docPlugin.content as LoadedContent).loadedVersions.length === 1){ 153 | productVersion = pluginOptions.productVersion; 154 | } 155 | 156 | // Create all PDF files for this sidebar 157 | await createPdfFilesRecursive( 158 | rootCategory, 159 | [], 160 | [], 161 | versionInfo, 162 | pluginOptions, 163 | siteConfig, 164 | versionBuildDir, 165 | versionPdfPath, 166 | browser, 167 | siteAddress, 168 | productTitle, 169 | productVersion 170 | ); 171 | 172 | // Save url to filename mappings 173 | saveUrlToFileMappingsRecursive(rootCategory.items, linkToFile, [{ 174 | label: rootCategory.label, 175 | file: rootCategory.pdfFilename, 176 | type: 'root' 177 | }]) 178 | } 179 | else { 180 | console.log(`${pluginLogPrefix}Sidebar '${sidebarName}' doesn't exist in version '${versionInfo.label}', continue without it...`); 181 | } 182 | 183 | } 184 | 185 | } 186 | 187 | fs.writeFileSync(join(docusaurusBuildDir, 'pdfs.json'), JSON.stringify(linkToFile)); 188 | 189 | browser.close(); 190 | httpServer.close(); 191 | 192 | console.log(`${pluginLogPrefix}generatePdfFiles finished!`); 193 | } 194 | 195 | function stripTrailingSlash (str: string) { 196 | return str.endsWith('/') ? 197 | str.slice(0, -1) : str; 198 | }; 199 | 200 | function saveUrlToFileMappingsRecursive( 201 | sideBarItems: any[], 202 | output: { [key: string]: { label: string, file: string, type: 'root' | 'section' | 'chapter' }[] }, 203 | parents: { label: string, file: string, type: 'root' | 'section' | 'chapter' }[] = []) { 204 | 205 | for (const item of sideBarItems) { 206 | if (item.permalink) { 207 | output[stripTrailingSlash(item.permalink)] = [...parents, { 208 | label: item.label, 209 | file: item.pdfFilename, 210 | type: 'chapter' 211 | }]; 212 | } 213 | 214 | if (item.items) { 215 | saveUrlToFileMappingsRecursive(item.items, output, [...parents, { 216 | label: item.label, 217 | file: item.pdfFilename, 218 | type: 'section' 219 | }]); 220 | } 221 | } 222 | }; 223 | 224 | function pickHtmlArticlesRecursive(sideBarItem: any, 225 | parentTitles: string[], 226 | version: LoadedVersion, 227 | rootDocUrl: string, 228 | htmlDir: string, 229 | siteConfig: any) { 230 | switch (sideBarItem.type) { 231 | case 'category': { 232 | const hasDocLink = sideBarItem.link && sideBarItem.link.type == 'doc'; 233 | if (hasDocLink) { 234 | let path = htmlDir; 235 | for (const doc of version.docs) { 236 | if (doc.id == sideBarItem.link.id) { 237 | sideBarItem.id = doc.id; 238 | sideBarItem.unversionedId = doc.unversionedId.split("/").pop(); 239 | sideBarItem.permalink = doc.permalink; 240 | path = join(path, getPermaLink(doc, siteConfig)); 241 | break; 242 | } 243 | } 244 | readHtmlForItem(sideBarItem, parentTitles, rootDocUrl, path, version, siteConfig); 245 | } 246 | else { 247 | sideBarItem.unversionedId = sideBarItem.label || "untitled"; 248 | } 249 | const newParentTitles = [...parentTitles]; 250 | newParentTitles.push(sideBarItem.label); 251 | for (const categorySubItem of sideBarItem.items) { 252 | pickHtmlArticlesRecursive(categorySubItem, newParentTitles, version, rootDocUrl, htmlDir, siteConfig); 253 | if (!hasDocLink && !sideBarItem.stylePath) { 254 | sideBarItem.stylePath = categorySubItem.stylePath; 255 | sideBarItem.scriptPath = categorySubItem.scriptPath; 256 | } 257 | } 258 | break; 259 | } 260 | case 'doc': { 261 | // Merge properties we need that is specified on the document. 262 | let path = htmlDir; 263 | for (const doc of version.docs) { 264 | if (doc.id == sideBarItem.id || doc.unversionedId == sideBarItem.id) { 265 | sideBarItem.label = doc.title; 266 | sideBarItem.unversionedId = doc.unversionedId.split("/").pop(); 267 | sideBarItem.permalink = doc.permalink; 268 | path = join(path, getPermaLink(doc, siteConfig)); 269 | break; 270 | } 271 | } 272 | readHtmlForItem(sideBarItem, parentTitles, rootDocUrl, path, version, siteConfig); 273 | break; 274 | } 275 | default: 276 | break; 277 | } 278 | } 279 | 280 | async function createPdfFilesRecursive(sideBarItem: any, 281 | parentTitles: string[], 282 | parentIds: string[], 283 | version: LoadedVersion, 284 | pluginOptions: PapersaurusPluginOptions, 285 | siteConfig: any, 286 | buildDir: string, 287 | pdfPath: string, 288 | browser: puppeteer.Browser, 289 | siteAddress: string, 290 | productTitle: string, 291 | productVersion: string 292 | ): Promise { 293 | 294 | let articles: any[] = []; 295 | switch (sideBarItem.type) { 296 | case 'category': { 297 | if (sideBarItem.permalink) { 298 | articles.push(sideBarItem); 299 | } 300 | const newParentTitles = [...parentTitles]; 301 | newParentTitles.push(sideBarItem.label); 302 | const newParentIds = [...parentIds]; 303 | newParentIds.push(sideBarItem.unversionedId); 304 | for (const categorySubItem of sideBarItem.items) { 305 | const subDocs = await createPdfFilesRecursive(categorySubItem, 306 | newParentTitles, 307 | newParentIds, 308 | version, 309 | pluginOptions, 310 | siteConfig, 311 | buildDir, 312 | pdfPath, 313 | browser, 314 | siteAddress, 315 | productTitle, 316 | productVersion 317 | ); 318 | articles.push(...subDocs); 319 | } 320 | break; 321 | } 322 | case 'doc': { 323 | articles.push(sideBarItem); 324 | break; 325 | } 326 | default: 327 | break; 328 | } 329 | 330 | let pdfFilename = pluginOptions.getPdfFileName(siteConfig, pluginOptions, sideBarItem.label, sideBarItem.unversionedId, parentTitles, parentIds, version.versionName, version.path); 331 | pdfFilename = slugger.slug(pdfFilename); 332 | 333 | let documentTitle = sideBarItem.label || ''; 334 | 335 | if (parentTitles.length > 1) { 336 | documentTitle = parentTitles.slice(1).join(' / ') + ' / ' + documentTitle; 337 | } 338 | 339 | if (productTitle) { 340 | documentTitle = productTitle + ' / ' + documentTitle; 341 | } 342 | 343 | if (articles.length > 0) { 344 | await createPdfFromArticles(documentTitle, 345 | productVersion || version.label, 346 | pdfFilename, 347 | articles, 348 | pluginOptions, 349 | siteConfig, 350 | buildDir, 351 | browser, 352 | siteAddress); 353 | 354 | sideBarItem.pdfFilename = `${pdfPath}/${pdfFilename}.pdf`; 355 | } 356 | 357 | return articles; 358 | } 359 | 360 | function readHtmlForItem( 361 | item: any, 362 | parentTitles: string[], 363 | rootDocUrl: string, 364 | htmlDir: string, 365 | version: LoadedVersion, 366 | siteConfig: any) { 367 | 368 | let htmlFilePath = htmlDir; 369 | htmlFilePath = join(htmlFilePath, 'index.html'); 370 | 371 | let stylePath = ''; 372 | let scriptPath = ''; 373 | let html = ''; 374 | 375 | console.log(`${pluginLogPrefix}Reading file ${htmlFilePath}`); 376 | 377 | let htmlFileContent: string = fs.readFileSync(htmlFilePath, { encoding: 'utf8' }); 378 | 379 | const origin = (new URL(rootDocUrl)).origin; 380 | stylePath = getStylesheetPathFromHTML(htmlFileContent, origin); 381 | 382 | try { 383 | scriptPath = getScriptPathFromHTML(htmlFileContent, origin); 384 | } 385 | catch { 386 | } 387 | 388 | const articleMatch = htmlFileContent.match(/
.*<\/article>/s); 389 | if (articleMatch) { 390 | html = articleMatch[0]; 391 | const markDownDivPos = html.indexOf('
'); 392 | const footerPos = html.indexOf('
0 && footerPos > markDownDivPos) { 394 | html = html.substring(markDownDivPos, footerPos); 395 | } 396 | } 397 | html = html.replace(/loading="lazy"/g, 'loading="eager"'); 398 | 399 | // Search for title in h1 tag 400 | let titleMatch = html.match(/

.*<\/h1>/s); 401 | if (!titleMatch) { 402 | titleMatch = html.match(/

.*<\/h1>/s); 403 | } 404 | if (titleMatch) { 405 | const h1Tag = titleMatch[0]; 406 | // Save found title in item 407 | item.pageTitle = h1Tag.substring(h1Tag.indexOf('>') + 1, h1Tag.indexOf('

')); 408 | 409 | // Add parent titles in front of existing title in h1 tag 410 | let newTitle = item.pageTitle; 411 | if (parentTitles.length > 1) { 412 | newTitle = parentTitles.slice(1).join(' / ') + ' / ' + item.pageTitle; 413 | } 414 | const newH1Tag = h1Tag.substring(0, h1Tag.indexOf('>') + 1) + newTitle + h1Tag.substring(h1Tag.indexOf('')); 415 | html = html.replace(h1Tag, newH1Tag); 416 | } 417 | 418 | html = getHtmlWithAbsoluteLinks(html, version, siteConfig); 419 | 420 | item.articleHtml = html; 421 | item.scriptPath = scriptPath; 422 | item.stylePath = stylePath; 423 | item.parentTitles = parentTitles; 424 | 425 | return; 426 | } 427 | 428 | async function createPdfFromArticles( 429 | documentTitle: string, 430 | documentVersion: string, 431 | pdfName: string, 432 | articleList: any[], 433 | pluginOptions: PapersaurusPluginOptions, 434 | siteConfig: any, 435 | buildDir: string, 436 | browser: puppeteer.Browser, 437 | siteAddress: string 438 | ): Promise { 439 | 440 | console.log(`${pluginLogPrefix}Creating PDF ${buildDir}\\${pdfName}.pdf...`); 441 | 442 | const titlePdfFile = join(buildDir, `${pdfName}.title.pdf`); 443 | const contentRawPdfFile = join(buildDir, `${pdfName}.content.raw.pdf`); 444 | const contentHtmlFile = join(buildDir, `${pdfName}.content.html`); 445 | const contentPdfFile = join(buildDir, `${pdfName}.content.pdf`); 446 | const finalPdfFile = join(buildDir, `${pdfName}.pdf`); 447 | 448 | const coverPage = await browser.newPage(); 449 | await coverPage.setContent( 450 | pluginOptions.getPdfCoverPage(siteConfig, pluginOptions, documentTitle, documentVersion), 451 | { 452 | timeout: pluginOptions.puppeteerTimeout 453 | }); 454 | await coverPage.pdf({ 455 | format: 'a4', 456 | path: titlePdfFile, 457 | headerTemplate: pluginOptions.coverPageHeader, 458 | footerTemplate: pluginOptions.coverPageFooter, 459 | displayHeaderFooter: true, 460 | printBackground: true, 461 | margin: pluginOptions.coverMargins, 462 | timeout: pluginOptions.puppeteerTimeout 463 | }); 464 | await coverPage.close(); 465 | 466 | const page = await browser.newPage(); 467 | 468 | let stylePath = articleList[0].stylePath; 469 | let scriptPath = articleList[0].scriptPath; 470 | 471 | let fullHtml = ''; 472 | for (const article of articleList) { 473 | if (articleList.length > 1 && pluginOptions.ignoreDocs.includes(article.unversionedId || '-IdIsEmpty-')) { 474 | // Don't add ignored articles to PDF's with multiple articles (section pdf's, complete document pdf) 475 | continue; 476 | } 477 | fullHtml += article.articleHtml || ''; 478 | } 479 | 480 | // Remove header tags (around h1) 481 | fullHtml = fullHtml.replace(/
/g, ''); 482 | fullHtml = fullHtml.replace(//g, ''); 483 | 484 | // Hide hashlinks (replace visible hash with space) 485 | fullHtml = fullHtml.replace(/">#<\/a>/g, `"> `); 486 | 487 | const $ = cheerio.load(fullHtml); 488 | if (pluginOptions.ignoreCssSelectors) { 489 | for (const ignoreSelector of pluginOptions.ignoreCssSelectors) { 490 | $(ignoreSelector).remove(); 491 | } 492 | } 493 | $(".theme-doc-breadcrumbs").remove(); 494 | $(".theme-doc-version-badge").remove(); 495 | $(".theme-doc-toc-mobile").remove(); 496 | $(".buttonGroup__atx").remove(); 497 | 498 | fullHtml = $.html(); 499 | 500 | // Add table of contents 501 | fullHtml = toc('
' + fullHtml, { 502 | anchorTemplate: function (id: string) { 503 | return ``; 504 | }, 505 | selectors: 'h1,h2,h3', 506 | parentLink: false, 507 | header: '

Contents

', 508 | minLength: 0, 509 | addId: false //=default 510 | }); 511 | 512 | let htmlToc = fullHtml.substring(14, fullHtml.indexOf('
')); 513 | 514 | htmlToc = htmlToc.replace(/class="nav sidenav"/g, 'class="toc-headings"'); 515 | htmlToc = htmlToc.replace(/class="nav"/g, 'class="toc-headings"'); 516 | htmlToc = htmlToc.replace(/[\r\n]+/g, ''); 517 | 518 | const htmlArticles = fullHtml.substring(fullHtml.indexOf('') + 6); 519 | const tocLinks = htmlToc.match(/[^<>]+<\/a>/g); 520 | let tocLinksInfos = tocLinks?.map((link) => { 521 | const entry: TocInfo = { 522 | link: link, 523 | href: link.substring(link.indexOf('href="') + 6, link.indexOf('">')), 524 | text: link.substring(link.indexOf('">') + 2, link.indexOf('')), 525 | } 526 | return entry; 527 | }); 528 | tocLinksInfos = tocLinksInfos || []; 529 | 530 | for (const tocLinkInfo of tocLinksInfos) { 531 | htmlToc = htmlToc.replace(tocLinkInfo.link, 532 | `${tocLinkInfo.text}_`); 533 | } 534 | 535 | let htmlStyles = ``; 555 | const hasCustomStyles = pluginOptions.stylesheets && pluginOptions.stylesheets.length > 0; 556 | if (hasCustomStyles) { 557 | for (const stylesheet of pluginOptions.stylesheets) { 558 | htmlStyles = `${htmlStyles}`; 559 | } 560 | } 561 | 562 | if (!hasCustomStyles || pluginOptions.alwaysIncludeSiteStyles) { 563 | if (stylePath) { 564 | htmlStyles = `${htmlStyles}`; 565 | } 566 | } 567 | 568 | let htmlScripts = ''; 569 | if (pluginOptions.scripts && pluginOptions.scripts.length > 0) { 570 | for (const script of pluginOptions.scripts) { 571 | htmlScripts = `${htmlScripts}`; 572 | } 573 | } 574 | else { 575 | if (scriptPath) { 576 | htmlScripts = `${htmlScripts}`; 577 | } 578 | } 579 | 580 | let htmlContent = ` 581 | 582 | 583 | 584 | 585 | 586 | 587 | ${htmlStyles} 588 | ${htmlScripts} 589 | 590 | 591 | ${htmlToc}${htmlArticles} 592 | 593 | `; 594 | 595 | await generateContentPdf(contentRawPdfFile); 596 | 597 | const dataBuffer = fs.readFileSync(contentRawPdfFile); 598 | const parsedData = await pdfParse(dataBuffer); 599 | 600 | htmlContent = getPageWithFixedToc(pluginOptions.footerParser, tocLinksInfos, parsedData.text, htmlContent); 601 | 602 | await generateContentPdf(contentPdfFile); 603 | 604 | htmlContent = await page.content(); 605 | fs.writeFileSync(contentHtmlFile, htmlContent); 606 | 607 | await page.close(); 608 | 609 | await mergeMultiplePDF([titlePdfFile, contentPdfFile], finalPdfFile); 610 | 611 | fs.unlinkSync(titlePdfFile); 612 | fs.unlinkSync(contentRawPdfFile); 613 | fs.unlinkSync(contentPdfFile); 614 | if (!pluginOptions.keepDebugHtmls) { 615 | fs.unlinkSync(contentHtmlFile); 616 | } 617 | 618 | async function generateContentPdf(targetFile: string) { 619 | await page.goto(siteAddress); 620 | await page.setContent(htmlContent, { 621 | timeout: pluginOptions.puppeteerTimeout 622 | }); 623 | await page.pdf({ 624 | path: targetFile, 625 | format: 'a4', 626 | headerTemplate: pluginOptions.getPdfPageHeader(siteConfig, pluginOptions, documentTitle, documentVersion), 627 | footerTemplate: pluginOptions.getPdfPageFooter(siteConfig, pluginOptions, documentTitle, documentVersion), 628 | displayHeaderFooter: true, 629 | printBackground: true, 630 | scale: 1, 631 | margin: pluginOptions.margins, 632 | timeout: pluginOptions.puppeteerTimeout 633 | }); 634 | 635 | } 636 | } 637 | 638 | const mergeMultiplePDF = (pdfFiles: string[], name: string) => { 639 | return new Promise((resolve, reject) => { 640 | pdfMerge(pdfFiles, name, function (err: any) { 641 | 642 | if (err) { 643 | console.log(err); 644 | reject(err) 645 | } 646 | 647 | resolve('') 648 | }); 649 | }); 650 | }; 651 | 652 | const escapeHeaderRegex = (header: string) => { 653 | return header 654 | // escape all regex reserved characters 655 | .replace(/[-[\]/{}()*+?.\\^$|]/g, '\\$&') 656 | // replace white-spaces to allow line breaks 657 | .replace(/\s/g, '(\\s|\\s\\n)'); 658 | } 659 | 660 | const pdfHeaderRegex = [ 661 | (h1: string) => new RegExp(`^\\d+\\s{2}${escapeHeaderRegex(h1)}(\\s|\\s\\n)?$`, 'gm'), 662 | (h2: string) => new RegExp(`^\\d+\\.\\d+\\s{2}${escapeHeaderRegex(h2)}(\\s|\\s\\n)?$`, 'gm'), 663 | (h3: string) => new RegExp(`^\\d+\\.\\d+.\\d+\\s{2}${escapeHeaderRegex(h3)}(\\s|\\s\\n)?$`, 'gm'), 664 | (unnumbered: string) => new RegExp(`^${escapeHeaderRegex(unnumbered)}$`, 'gm') 665 | ]; 666 | 667 | const getHtmlWithAbsoluteLinks = (html: string, version: LoadedVersion, siteConfig: any) => { 668 | let versionPath = ''; 669 | if (!version.isLast) { 670 | versionPath = `${getVersionPath(version, siteConfig)}/`; 671 | } 672 | 673 | return html.replace(/]*?\s+)?href=(["'])(.*?)\1/g, function (matched, _p1, p2) { 674 | if (p2.indexOf('http') === 0) { 675 | // ignore already external links 676 | return matched; 677 | } 678 | 679 | if (p2.indexOf('#') === 0) { 680 | // ignore anchor links. because we don't know in which file 681 | // they are. Plus they will allways work (but can have multiple targets when merging) 682 | return matched; 683 | } 684 | 685 | if (p2.indexOf('.') === 0) { 686 | // this is some kind of a manually created link. 687 | return matched; 688 | } 689 | 690 | if (p2.indexOf(siteConfig.baseUrl) === 0) { 691 | return matched.replace(p2, `${siteConfig.url}${p2}`); 692 | } 693 | 694 | return matched.replace(p2, `${siteConfig.url}${siteConfig.baseUrl}docs/${versionPath}${p2}`); 695 | }); 696 | }; 697 | 698 | const getVersionPath = (version: LoadedVersion, siteConfig: any) => { 699 | let versionPath = version.path; 700 | return versionPath.substring(siteConfig.baseUrl.length, versionPath.length); 701 | }; 702 | 703 | const getPermaLink = (doc: DocMetadata, siteConfig: any) => { 704 | let link = doc.permalink; 705 | return link.substring(siteConfig.baseUrl.length, link.length); 706 | }; 707 | 708 | const decodeHtml = (str: string) => { 709 | // Taken from here: https://stackoverflow.com/a/39243641 710 | const htmlEntities: { [key: string]: string } = { 711 | nbsp: ' ', 712 | cent: '¢', 713 | pound: '£', 714 | yen: '¥', 715 | euro: '€', 716 | copy: '©', 717 | reg: '®', 718 | lt: '<', 719 | gt: '>', 720 | quot: '"', 721 | amp: '&', 722 | apos: '\'' 723 | }; 724 | 725 | return str.replace(/\&([^;]+);/g, function (entity, entityCode) { 726 | var match; 727 | 728 | if (entityCode in htmlEntities) { 729 | return htmlEntities[entityCode]; 730 | /*eslint no-cond-assign: 0*/ 731 | } else if (match = entityCode.match(/^#x([\da-fA-F]+)$/)) { 732 | return String.fromCharCode(parseInt(match[1], 16)); 733 | /*eslint no-cond-assign: 0*/ 734 | } else if (match = entityCode.match(/^#(\d+)$/)) { 735 | return String.fromCharCode(~~match[1]); 736 | } else { 737 | return entity; 738 | } 739 | }) 740 | // taken from here: https://stackoverflow.com/a/11305926 741 | .replace(/[\u200B-\u200D\uFEFF]/g, ''); 742 | } 743 | 744 | const getPageWithFixedToc = (footerRegEx: RegExp, tocList: TocInfo[], pdfContent: string, htmlContent: string) => { 745 | 746 | const pdfPages = pdfContent.split(footerRegEx); 747 | if (!pdfPages.length) { 748 | return htmlContent; 749 | } 750 | 751 | let pageIndex = 0; 752 | tocList.forEach(e => { 753 | for (; pageIndex < pdfPages.length; pageIndex++) { 754 | let page = pdfPages[pageIndex]; 755 | let found = false; 756 | for (let i = 0; i < pdfHeaderRegex.length; ++i) { 757 | if (pdfHeaderRegex[i](decodeHtml(e.text)).test(page)) { 758 | htmlContent = htmlContent.replace( 759 | '_', 760 | `${pageIndex}` 761 | ); 762 | found = true; 763 | break; 764 | } 765 | } 766 | if (found) { 767 | break; 768 | } 769 | } 770 | }); 771 | 772 | return htmlContent; 773 | } 774 | 775 | const getURL = (origin: string, filePath: string) => { 776 | return origin + '/' + filePath.substring(filePath.startsWith('/') ? 1 : 0); 777 | }; 778 | 779 | const getStylesheetPathFromHTML = (html: string, origin: string) => { 780 | const regExp = /(?:|]*){1}href="([^<>]*styles[^<>]*?\.css){1}"/g; 781 | let filePath = ''; 782 | try { 783 | filePath = getFirstCapturingGroup(regExp, html); 784 | } catch { 785 | throw new Error( 786 | "The href attribute of the 'styles*.css' file could not be found!" 787 | ); 788 | } 789 | return getURL(origin, filePath); 790 | }; 791 | 792 | const getScriptPathFromHTML = (html: string, origin: string) => { 793 | const regExp = /(?:|]*){1}src="([^<>]*styles[^<>]*?\.js){1}"/g; 794 | let filePath = ''; 795 | try { 796 | filePath = getFirstCapturingGroup(regExp, html); 797 | } catch { 798 | throw new Error( 799 | "The src attribute of the 'styles*.js' file could not be found!" 800 | ); 801 | } 802 | return getURL(origin, filePath); 803 | }; 804 | 805 | const getFirstCapturingGroup = (regExp: RegExp, text: string) => { 806 | const match = regExp.exec(text); 807 | if (match && match[1]) { 808 | return match[1]; 809 | } else { 810 | throw new ReferenceError('No capture group found in the provided text.'); 811 | } 812 | }; 813 | 814 | function isObject(x: unknown): x is Record { 815 | return x !== null && typeof x === 'object'; 816 | } 817 | 818 | function hasOwnProperty< 819 | X extends Record, 820 | Y extends PropertyKey 821 | >(obj: X, prop: Y): obj is X & Record { 822 | return Object.prototype.hasOwnProperty.call(obj, prop); 823 | } 824 | 825 | const isAddressInfo = (arg: unknown): arg is AddressInfo => { 826 | return ( 827 | isObject(arg) && 828 | hasOwnProperty(arg, 'address') && 829 | typeof arg.address == 'string' && 830 | hasOwnProperty(arg, 'family') && 831 | typeof arg.family == 'string' && 832 | hasOwnProperty(arg, 'port') && 833 | typeof arg.port == 'number' 834 | ); 835 | }; 836 | --------------------------------------------------------------------------------