├── .clang-format
├── .github
└── workflows
│ └── tests.yaml
├── .gitignore
├── .npmignore
├── .prettierrc.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── dist
├── index.d.ts
└── index.js
├── jest.config.js
├── package-lock.json
├── package.json
├── src
└── index.ts
├── test
├── files
│ ├── index.ts
│ ├── out2.txt
│ ├── pdf1.pdf
│ ├── pdf2.pdf
│ ├── pdf3.pdf
│ ├── renderPDFPagesToPNG-rotated-output.png
│ └── renderPDFPagesToPNG-rotated-output2.png
└── index.test.ts
└── tsconfig.json
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: google
2 | ColumnLimit: 120
3 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
1 | name: Unit tests
2 | on:
3 | push:
4 | branches: [ main ]
5 | pull_request:
6 | branches: [ main ]
7 | jobs:
8 | tests:
9 | runs-on: ubuntu-latest
10 | steps:
11 | # Checkout the Repo
12 | - uses: actions/checkout@v2
13 |
14 | # Install Node 16
15 | - name: Setup Node
16 | uses: actions/setup-node@v1
17 | with:
18 | node-version: 16
19 |
20 | # Install APT dependencies
21 | - run: sudo apt update && sudo apt install -y python2.7 python3 build-essential ghostscript libjpeg-dev libpng-dev libcurl4-openssl-dev mupdf-tools libfreetype6-dev qpdf
22 |
23 | # Install NPM dependencies
24 | - run: npm install
25 |
26 | # Run tests
27 | - run: npm test
28 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | lerna-debug.log*
8 |
9 | # Diagnostic reports (https://nodejs.org/api/report.html)
10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
11 |
12 | # Runtime data
13 | pids
14 | *.pid
15 | *.seed
16 | *.pid.lock
17 |
18 | # Directory for instrumented libs generated by jscoverage/JSCover
19 | lib-cov
20 |
21 | # Coverage directory used by tools like istanbul
22 | coverage
23 | *.lcov
24 |
25 | # nyc test coverage
26 | .nyc_output
27 |
28 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
29 | .grunt
30 |
31 | # Bower dependency directory (https://bower.io/)
32 | bower_components
33 |
34 | # node-waf configuration
35 | .lock-wscript
36 |
37 | # Compiled binary addons (https://nodejs.org/api/addons.html)
38 | build/Release
39 |
40 | # Dependency directories
41 | node_modules/
42 | jspm_packages/
43 |
44 | # TypeScript v1 declaration files
45 | typings/
46 |
47 | # TypeScript cache
48 | *.tsbuildinfo
49 |
50 | # Optional npm cache directory
51 | .npm
52 |
53 | # Optional eslint cache
54 | .eslintcache
55 |
56 | # Microbundle cache
57 | .rpt2_cache/
58 | .rts2_cache_cjs/
59 | .rts2_cache_es/
60 | .rts2_cache_umd/
61 |
62 | # Optional REPL history
63 | .node_repl_history
64 |
65 | # Output of 'npm pack'
66 | *.tgz
67 |
68 | # Yarn Integrity file
69 | .yarn-integrity
70 |
71 | # dotenv environment variables file
72 | .env
73 | .env.test
74 |
75 | # parcel-bundler cache (https://parceljs.org/)
76 | .cache
77 |
78 | # Next.js build output
79 | .next
80 |
81 | # Nuxt.js build / generate output
82 | .nuxt
83 |
84 | # Gatsby files
85 | .cache/
86 | # Comment in the public line in if your project uses Gatsby and *not* Next.js
87 | # https://nextjs.org/blog/next-9-1#public-directory-support
88 | # public
89 |
90 | # vuepress build output
91 | .vuepress/dist
92 |
93 | # Serverless directories
94 | .serverless/
95 |
96 | # FuseBox cache
97 | .fusebox/
98 |
99 | # DynamoDB Local files
100 | .dynamodb/
101 |
102 | # TernJS port file
103 | .tern-port
104 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | test/*
2 |
--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "printWidth": 120,
3 | "trailingComma": "all",
4 | "singleQuote": true,
5 | "ignorePath": ".gitignore",
6 | "arrowParens": "avoid"
7 | }
8 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # 1.5.0 (2023-09-06)
2 |
3 | - add `compressPDF` (gabolera) [#4](https://github.com/sigalor/ghostscript-node/pull/4)
4 | - add `convertToPDFA` (MedAzizKhayati) [#6](https://github.com/sigalor/ghostscript-node/pull/6)
5 |
6 | # 1.4.1 (2023-01-26)
7 |
8 | - updated NPM dependencies
9 |
10 | # 1.4.0 (2022-06-07)
11 |
12 | - transformed entire library to use TypeScript
13 | - fix `countPDFPages` to ignore Ghostscript warnings (e.g. due to a damaged XREF table)
14 |
15 | # 1.3.2 (2022-02-16)
16 |
17 | - escape input in countPDFPages for Windows, fix redundant await (Loupi) [#2](https://github.com/sigalor/ghostscript-node/pull/2)
18 |
19 | # 1.3.1 (2022-01-24)
20 |
21 | - fix handling of rotated PDFs
22 | - improve internal structure of unit tests
23 |
24 | # 1.3.0 (2022-01-06)
25 |
26 | - added `isValidPDF` function
27 |
28 | # 1.2.0 (2021-07-27)
29 |
30 | - added `renderPDFPagesToPNG` function
31 |
32 | # 1.1.2 (2021-01-28)
33 |
34 | - use qpdf instead of PDFtk for rotating PDFs
35 |
36 | # 1.1.1 (2021-01-28)
37 |
38 | - fix TypeScript definition file
39 |
40 | # 1.1.0 (2021-01-28)
41 |
42 | - added `rotatePDF` function, based on PDFtk
43 |
44 | # 1.0.0 (2021-01-25)
45 |
46 | - initial release
47 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 sigalor
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ghostscript-node
2 |
3 | [](https://github.com/sigalor/ghostscript-node/blob/master/LICENSE) [](https://www.npmjs.com/package/ghostscript-node) [](https://github.com/sigalor/ghostscript-node/actions/workflows/tests.yaml)
4 |
5 | ## Introduction
6 |
7 | A fully promise-based Node.js library which can work with PDFs, based on Ghostscript and qpdf.
8 |
9 | All PDF files that are handled by this library in the form of `Buffer` objects, i.e. to a user of this library, it looks like everything works only in memory. In the background though, file system access (via [tempy](https://www.npmjs.com/package/tempy)) is needed.
10 |
11 | Additionally, this library requires the `gs` command (Ghostscript) as well as `qpdf` to be available. The required apt dependencies on Ubuntu 20.04 can be installed via the following command:
12 |
13 | ```
14 | sudo apt install -y python python3 build-essential ghostscript libjpeg-dev libpng-dev libcurl4-openssl-dev mupdf-tools libfreetype6-dev qpdf
15 | ```
16 |
17 | ## Getting started
18 |
19 | ```
20 | npm install ghostscript-node
21 | ```
22 |
23 | ## Usage
24 |
25 | ```javascript
26 | const gs = require("ghostscript-node");
27 |
28 | (async () => {
29 | const pdf1 = /* load first PDF as Buffer, e.g. from database */;
30 | const pdf2 = /* load second PDF as Buffer, e.g. from database */;
31 |
32 | // get Buffer object with bytes of combined PDF
33 | const combinedPDF = await gs.combinePDFs([pdf1, pdf2]);
34 |
35 | // get number of pages of pdf1
36 | const numPagesPDF1 = await gs.countPDFPages(pdf1);
37 |
38 | // get Buffer object with bytes of specified set of pages
39 | // page numbers begin with 1, last page is included
40 | const partOfPDF1 = await gs.extractPDFPages(pdf1, 3, 5);
41 |
42 | // rotate all pages of pdf1 by 90 degrees clockwise
43 | const rotatedPDF = await gs.rotatePDF(pdf1, "90");
44 |
45 | // returns an array of buffers containing PNG images of the desired pages
46 | const renderedPages = await gs.renderPDFPagesToPNG(pdf1);
47 |
48 | // checks if pdf1 is a valid PDF file
49 | const isPDF1Valid = await gs.isValidPDF(pdf1);
50 | })();
51 | ```
52 |
53 | ## License
54 |
55 | MIT
56 |
--------------------------------------------------------------------------------
/dist/index.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 | export declare function combinePDFs(pdfBuffers: Buffer[]): Promise;
3 | export declare function countPDFPages(pdfBuffer: Buffer): Promise;
4 | export declare function extractPDFPages(pdfBuffer: Buffer, firstPage: number, lastPage: number): Promise;
5 | export declare function rotatePDF(pdfBuffer: Buffer, direction: '90' | '180' | '270'): Promise;
6 | /**
7 | * Converts a PDF to PDF/A.
8 | *
9 | * @param pdfBuffer - Buffer of the PDF to convert
10 | * @param options - Options for the conversion
11 | * @param options.version - PDF/A version to convert to. Defaults to 1.
12 | * @returns
13 | */
14 | export declare function convertToPDFA(pdfBuffer: Buffer, options?: {
15 | version: number;
16 | }): Promise;
17 | /**
18 | * If `firstPage` is not given, 1 is used.
19 | * If `lastPage` is not given, the document's last page is used.
20 | * If `firstPage` is negative (e.g. -n), this refers to the last n pages and `lastPage` must be undefined.
21 | * All page numbers start at 1.
22 | */
23 | export declare function renderPDFPagesToPNG(pdfBuffer: Buffer, firstPage?: number, lastPage?: number, resolution?: number): Promise;
24 | export declare function isValidPDF(pdfBuffer: Buffer): Promise;
25 | /**
26 | * This function try, reduce size of your PDF not destroying quality
27 | * @param pdfBuffer Buffer
28 | * @returns Buffer
29 | */
30 | export declare function compressPDF(pdfBuffer: Buffer | string, encoding?: BufferEncoding): Promise;
31 |
--------------------------------------------------------------------------------
/dist/index.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | var __importDefault = (this && this.__importDefault) || function (mod) {
3 | return (mod && mod.__esModule) ? mod : { "default": mod };
4 | };
5 | Object.defineProperty(exports, "__esModule", { value: true });
6 | exports.compressPDF = exports.isValidPDF = exports.renderPDFPagesToPNG = exports.convertToPDFA = exports.rotatePDF = exports.extractPDFPages = exports.countPDFPages = exports.combinePDFs = void 0;
7 | const child_process_1 = __importDefault(require("child_process"));
8 | const fs_extra_1 = __importDefault(require("fs-extra"));
9 | const tempy_1 = __importDefault(require("tempy"));
10 | const util_1 = __importDefault(require("util"));
11 | const exec = util_1.default.promisify(child_process_1.default.exec);
12 | async function useTempFiles(filenameSets, fn) {
13 | const filenames = {};
14 | // create all desired temporary files (either empty files or write data buffers to them)
15 | for (const [k, config] of Object.entries(filenameSets)) {
16 | const { numFiles, writeBuffers, tempyConfig } = config;
17 | if (numFiles !== undefined) {
18 | filenames[k] = Array(numFiles)
19 | .fill(0)
20 | .map(() => tempy_1.default.file(tempyConfig));
21 | }
22 | else if (writeBuffers !== undefined) {
23 | filenames[k] = await Promise.all(writeBuffers.map(b => tempy_1.default.write(b, tempyConfig)));
24 | }
25 | }
26 | // execute the worker function which uses these files
27 | const ret = await fn(filenames);
28 | // remove all the temporary files again
29 | await Promise.all([].concat(...Object.values(filenames)).map(f => fs_extra_1.default.unlink(f)));
30 | // return the result of the worker function
31 | return ret;
32 | }
33 | async function useTempFilesPDF(filenameSets, fn) {
34 | Object.values(filenameSets).forEach(v => (v.tempyConfig = { extension: '.pdf' }));
35 | return useTempFiles(filenameSets, fn);
36 | }
37 | // writes inputBuffer to one temporary file, creates an empty output file, calls the worker function and returns the output file contents as Buffer
38 | async function useTempFilesPDFInOut(inputBuffer, fn) {
39 | return useTempFilesPDF({ input: { writeBuffers: [inputBuffer] }, output: { numFiles: 1 } }, async ({ input, output }) => {
40 | await fn(input[0], output[0]);
41 | return fs_extra_1.default.readFile(output[0]);
42 | });
43 | }
44 | async function useTempFilesPDFIn(inputBuffer, fn) {
45 | return useTempFilesPDF({ input: { writeBuffers: [inputBuffer] } }, async ({ input }) => fn(input[0]));
46 | }
47 | async function combinePDFs(pdfBuffers) {
48 | if (pdfBuffers.length === 0)
49 | return Buffer.alloc(0);
50 | if (pdfBuffers.length === 1)
51 | return pdfBuffers[0];
52 | try {
53 | return await useTempFilesPDF({ inputs: { writeBuffers: pdfBuffers }, output: { numFiles: 1 } }, async ({ inputs, output }) => {
54 | await exec(`gs -q -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=${output[0]} -dBATCH -dAutoRotatePages=/None ${inputs.join(' ')} -c "[ /Creator () /Producer () /DOCINFO pdfmark"`);
55 | return fs_extra_1.default.readFile(output[0]);
56 | });
57 | }
58 | catch (e) {
59 | throw new Error('Failed to combine PDFs: ' + e.message);
60 | }
61 | }
62 | exports.combinePDFs = combinePDFs;
63 | async function countPDFPages(pdfBuffer) {
64 | try {
65 | return await useTempFilesPDFIn(pdfBuffer, async (input) => {
66 | const escapedInput = input.replace(/\\/g, '\\\\');
67 | let { stdout } = await exec(`gs -q -dNOPAUSE -dBATCH -dNOSAFER -dNODISPLAY -c "(${escapedInput}) (r) file runpdfbegin pdfpagecount = quit"`);
68 | /**
69 | * Necessary for when Ghostscript detects a damaged but repairable PDF file, because then it outputs
70 | * the following before the number of pages (three spaces before each line starting with "****"):
71 | *
72 | * **** Error: An error occurred while reading an XREF table.
73 | * **** The file has been damaged. This may have been caused
74 | * **** by a problem while converting or transfering the file.
75 | * **** Ghostscript will attempt to recover the data.
76 | * **** However, the output may be incorrect.
77 | */
78 | stdout = stdout
79 | .split('\n')
80 | .filter(l => !l.startsWith(' **** ') && l.length > 0)
81 | .join('\n')
82 | .trim();
83 | const ret = parseInt(stdout);
84 | if (isNaN(ret))
85 | throw new Error('parsing failed: ' + stdout);
86 | return ret;
87 | });
88 | }
89 | catch (e) {
90 | throw new Error('Failed to determine number of pages in PDF: ' + e.message);
91 | }
92 | }
93 | exports.countPDFPages = countPDFPages;
94 | async function extractPDFPages(pdfBuffer, firstPage, lastPage) {
95 | try {
96 | return await useTempFilesPDFInOut(pdfBuffer, async (input, output) => {
97 | await exec(`gs -q -dNOPAUSE -sDEVICE=pdfwrite -dBATCH -dNOSAFER -dFirstPage=${firstPage} -dLastPage=${lastPage} -dAutoRotatePages=/None -sOutputFile=${output} ${input}`);
98 | });
99 | }
100 | catch (e) {
101 | throw new Error('Failed to extract PDF pages: ' + e.message);
102 | }
103 | }
104 | exports.extractPDFPages = extractPDFPages;
105 | async function rotatePDF(pdfBuffer, direction) {
106 | if (!['90', '180', '270'].includes(direction))
107 | throw new Error('Invalid rotation direction: ' + direction);
108 | try {
109 | return await useTempFilesPDFInOut(pdfBuffer, async (input, output) => {
110 | await exec(`qpdf ${input} ${output} --rotate=${direction}`);
111 | });
112 | }
113 | catch (e) {
114 | throw new Error('Failed to rotate PDF: ' + e.message);
115 | }
116 | }
117 | exports.rotatePDF = rotatePDF;
118 | /**
119 | * Converts a PDF to PDF/A.
120 | *
121 | * @param pdfBuffer - Buffer of the PDF to convert
122 | * @param options - Options for the conversion
123 | * @param options.version - PDF/A version to convert to. Defaults to 1.
124 | * @returns
125 | */
126 | async function convertToPDFA(pdfBuffer, options = {
127 | version: 1,
128 | }) {
129 | try {
130 | return await useTempFilesPDFInOut(pdfBuffer, async (input, output) => {
131 | await exec(`gs -dPDFA -dBATCH -dNOPAUSE -sColorConversionStrategy=UseDeviceIndependentColor -sDEVICE=pdfwrite -dPDFACompatibilityPolicy=${options.version} -sOutputFile=${output} ${input}`);
132 | });
133 | }
134 | catch (e) {
135 | throw new Error('Failed to convert PDF to PDF/A: ' + e.message);
136 | }
137 | }
138 | exports.convertToPDFA = convertToPDFA;
139 | /**
140 | * If `firstPage` is not given, 1 is used.
141 | * If `lastPage` is not given, the document's last page is used.
142 | * If `firstPage` is negative (e.g. -n), this refers to the last n pages and `lastPage` must be undefined.
143 | * All page numbers start at 1.
144 | */
145 | async function renderPDFPagesToPNG(pdfBuffer, firstPage, lastPage, resolution = 300) {
146 | const numPages = await countPDFPages(pdfBuffer);
147 | if (firstPage === undefined)
148 | firstPage = 1;
149 | else if (firstPage === 0 || (firstPage < 0 && firstPage < -numPages))
150 | throw new Error('First page number out of range: ' + firstPage);
151 | if (firstPage < 0) {
152 | if (lastPage !== undefined)
153 | throw new Error('Last page must be undefined when first page is negative');
154 | firstPage = numPages + firstPage + 1;
155 | lastPage = numPages;
156 | }
157 | if (lastPage === undefined)
158 | lastPage = numPages;
159 | else if (lastPage > numPages)
160 | throw new Error('Last page number out of range: ' + lastPage);
161 | if (firstPage > lastPage)
162 | throw new Error('Invalid page range: ' + firstPage + '-' + lastPage);
163 | try {
164 | return await useTempFilesPDFIn(pdfBuffer, async (input) => {
165 | const outDir = tempy_1.default.directory();
166 | await exec(`gs -q -dQUIET -dSAFER -dBATCH -dNOPAUSE -dNOPROMPT -dMaxBitmap=500000000 -dAlignToPixels=0 -dGridFitTT=2 -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r${resolution} -sOutputFile=${outDir}/%d.png -dFirstPage=${firstPage} -dLastPage=${lastPage} ${input}`);
167 | const outFiles = [];
168 | for (let i = 1; i <= lastPage - firstPage + 1; i++) {
169 | outFiles.push(await fs_extra_1.default.readFile(outDir + '/' + i + '.png'));
170 | }
171 | await fs_extra_1.default.rmdir(outDir, { recursive: true });
172 | return outFiles;
173 | });
174 | }
175 | catch (e) {
176 | throw new Error('Failed to render PDF pages to PNG: ' + e.message);
177 | }
178 | }
179 | exports.renderPDFPagesToPNG = renderPDFPagesToPNG;
180 | async function isValidPDF(pdfBuffer) {
181 | try {
182 | await countPDFPages(pdfBuffer);
183 | return true;
184 | }
185 | catch (e) {
186 | return false;
187 | }
188 | }
189 | exports.isValidPDF = isValidPDF;
190 | /**
191 | * This function try, reduce size of your PDF not destroying quality
192 | * @param pdfBuffer Buffer
193 | * @returns Buffer
194 | */
195 | async function compressPDF(pdfBuffer, encoding) {
196 | try {
197 | if (typeof pdfBuffer === 'string') {
198 | pdfBuffer = Buffer.from(pdfBuffer, encoding !== null && encoding !== void 0 ? encoding : 'base64');
199 | }
200 | const compressedPdf = await useTempFilesPDFInOut(pdfBuffer, async (input, output) => {
201 | await exec(`gs -q -dNOPAUSE -dBATCH -dSAFER -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dEmbedAllFonts=true -dSubsetFonts=true -dColorImageDownsampleType=/Bicubic -dColorImageResolution=144 -dGrayImageDownsampleType=/Bicubic -dGrayImageResolution=144 -dMonoImageDownsampleType=/Bicubic -dMonoImageResolution=144 -sOutputFile=${output} ${input}`);
202 | });
203 | if (pdfBuffer.length < compressedPdf.length) {
204 | return pdfBuffer;
205 | }
206 | return compressedPdf;
207 | }
208 | catch (e) {
209 | throw new Error('Failed optimize PDF: ' + e.message);
210 | }
211 | }
212 | exports.compressPDF = compressPDF;
213 |
--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
2 | module.exports = {
3 | preset: 'ts-jest',
4 | testEnvironment: 'node',
5 | };
6 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ghostscript-node",
3 | "version": "1.5.0",
4 | "description": "A Node.js library which can work with PDFs, based on Ghostscript.",
5 | "main": "dist/index.js",
6 | "scripts": {
7 | "build": "rimraf dist; tsc",
8 | "test": "jest"
9 | },
10 | "repository": {
11 | "type": "git",
12 | "url": "git+https://github.com/sigalor/ghostscript-node.git"
13 | },
14 | "keywords": [
15 | "pdf",
16 | "ghostscript"
17 | ],
18 | "author": "sigalor",
19 | "license": "MIT",
20 | "bugs": {
21 | "url": "https://github.com/sigalor/ghostscript-node/issues"
22 | },
23 | "homepage": "https://github.com/sigalor/ghostscript-node#readme",
24 | "dependencies": {
25 | "fs-extra": "^11.1.0",
26 | "tempy": "^1.0.1"
27 | },
28 | "devDependencies": {
29 | "@types/fs-extra": "^11.0.1",
30 | "@types/jest": "^29.4.0",
31 | "jest": "^29.4.0",
32 | "rimraf": "^4.1.2",
33 | "ts-jest": "^29.0.5",
34 | "typescript": "^4.9.4"
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | import childProcess from 'child_process';
2 | import fs from 'fs-extra';
3 | import tempy, { FileOptions } from 'tempy';
4 | import util from 'util';
5 |
6 | const exec = util.promisify(childProcess.exec);
7 |
8 | interface TempFilenameSetDefs {
9 | [setName: string]: {
10 | writeBuffers?: Buffer[];
11 | numFiles?: number;
12 | tempyConfig?: FileOptions;
13 | };
14 | }
15 |
16 | type TempFilenameSets = {
17 | [setName: string]: string[];
18 | };
19 |
20 | type TempFileFnSingle = (input: string, output?: string) => Promise;
21 | type TempFileFnMany = (filenames: TempFilenameSets) => Promise;
22 |
23 | async function useTempFiles(filenameSets: TempFilenameSetDefs, fn: TempFileFnMany) {
24 | const filenames: TempFilenameSets = {};
25 |
26 | // create all desired temporary files (either empty files or write data buffers to them)
27 | for (const [k, config] of Object.entries(filenameSets)) {
28 | const { numFiles, writeBuffers, tempyConfig } = config;
29 |
30 | if (numFiles !== undefined) {
31 | filenames[k] = Array(numFiles)
32 | .fill(0)
33 | .map(() => tempy.file(tempyConfig));
34 | } else if (writeBuffers !== undefined) {
35 | filenames[k] = await Promise.all(writeBuffers.map(b => tempy.write(b, tempyConfig)));
36 | }
37 | }
38 |
39 | // execute the worker function which uses these files
40 | const ret = await fn(filenames);
41 |
42 | // remove all the temporary files again
43 | await Promise.all([].concat(...(Object.values(filenames))).map(f => fs.unlink(f)));
44 |
45 | // return the result of the worker function
46 | return ret;
47 | }
48 |
49 | async function useTempFilesPDF(filenameSets: TempFilenameSetDefs, fn: TempFileFnMany) {
50 | Object.values(filenameSets).forEach(v => (v.tempyConfig = { extension: '.pdf' }));
51 | return useTempFiles(filenameSets, fn);
52 | }
53 |
54 | // writes inputBuffer to one temporary file, creates an empty output file, calls the worker function and returns the output file contents as Buffer
55 | async function useTempFilesPDFInOut(inputBuffer: Buffer, fn: TempFileFnSingle): Promise {
56 | return useTempFilesPDF(
57 | { input: { writeBuffers: [inputBuffer] }, output: { numFiles: 1 } },
58 | async ({ input, output }) => {
59 | await fn(input[0], output[0]);
60 | return fs.readFile(output[0]);
61 | },
62 | );
63 | }
64 |
65 | async function useTempFilesPDFIn(inputBuffer: Buffer, fn: TempFileFnSingle): Promise {
66 | return useTempFilesPDF({ input: { writeBuffers: [inputBuffer] } }, async ({ input }) => fn(input[0]));
67 | }
68 |
69 | export async function combinePDFs(pdfBuffers: Buffer[]): Promise {
70 | if (pdfBuffers.length === 0) return Buffer.alloc(0);
71 | if (pdfBuffers.length === 1) return pdfBuffers[0];
72 |
73 | try {
74 | return await useTempFilesPDF(
75 | { inputs: { writeBuffers: pdfBuffers }, output: { numFiles: 1 } },
76 | async ({ inputs, output }) => {
77 | await exec(
78 | `gs -q -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=${output[0]} -dBATCH -dAutoRotatePages=/None ${inputs.join(
79 | ' ',
80 | )} -c "[ /Creator () /Producer () /DOCINFO pdfmark"`,
81 | );
82 | return fs.readFile(output[0]);
83 | },
84 | );
85 | } catch (e: any) {
86 | throw new Error('Failed to combine PDFs: ' + e.message);
87 | }
88 | }
89 |
90 | export async function countPDFPages(pdfBuffer: Buffer): Promise {
91 | try {
92 | return await useTempFilesPDFIn(pdfBuffer, async input => {
93 | const escapedInput = input.replace(/\\/g, '\\\\');
94 | let { stdout } = await exec(
95 | `gs -q -dNOPAUSE -dBATCH -dNOSAFER -dNODISPLAY -c "(${escapedInput}) (r) file runpdfbegin pdfpagecount = quit"`,
96 | );
97 |
98 | /**
99 | * Necessary for when Ghostscript detects a damaged but repairable PDF file, because then it outputs
100 | * the following before the number of pages (three spaces before each line starting with "****"):
101 | *
102 | * **** Error: An error occurred while reading an XREF table.
103 | * **** The file has been damaged. This may have been caused
104 | * **** by a problem while converting or transfering the file.
105 | * **** Ghostscript will attempt to recover the data.
106 | * **** However, the output may be incorrect.
107 | */
108 | stdout = stdout
109 | .split('\n')
110 | .filter(l => !l.startsWith(' **** ') && l.length > 0)
111 | .join('\n')
112 | .trim();
113 |
114 | const ret = parseInt(stdout);
115 | if (isNaN(ret)) throw new Error('parsing failed: ' + stdout);
116 | return ret;
117 | });
118 | } catch (e: any) {
119 | throw new Error('Failed to determine number of pages in PDF: ' + e.message);
120 | }
121 | }
122 |
123 | export async function extractPDFPages(pdfBuffer: Buffer, firstPage: number, lastPage: number): Promise {
124 | try {
125 | return await useTempFilesPDFInOut(pdfBuffer, async (input, output) => {
126 | await exec(
127 | `gs -q -dNOPAUSE -sDEVICE=pdfwrite -dBATCH -dNOSAFER -dFirstPage=${firstPage} -dLastPage=${lastPage} -dAutoRotatePages=/None -sOutputFile=${output} ${input}`,
128 | );
129 | });
130 | } catch (e: any) {
131 | throw new Error('Failed to extract PDF pages: ' + e.message);
132 | }
133 | }
134 |
135 | export async function rotatePDF(pdfBuffer: Buffer, direction: '90' | '180' | '270'): Promise {
136 | if (!['90', '180', '270'].includes(direction)) throw new Error('Invalid rotation direction: ' + direction);
137 |
138 | try {
139 | return await useTempFilesPDFInOut(pdfBuffer, async (input, output) => {
140 | await exec(`qpdf ${input} ${output} --rotate=${direction}`);
141 | });
142 | } catch (e: any) {
143 | throw new Error('Failed to rotate PDF: ' + e.message);
144 | }
145 | }
146 |
147 | /**
148 | * Converts a PDF to PDF/A.
149 | *
150 | * @param pdfBuffer - Buffer of the PDF to convert
151 | * @param options - Options for the conversion
152 | * @param options.version - PDF/A version to convert to. Defaults to 1.
153 | * @returns
154 | */
155 | export async function convertToPDFA(
156 | pdfBuffer: Buffer,
157 | options = {
158 | version: 1,
159 | },
160 | ): Promise {
161 | try {
162 | return await useTempFilesPDFInOut(pdfBuffer, async (input, output) => {
163 | await exec(
164 | `gs -dPDFA -dBATCH -dNOPAUSE -sColorConversionStrategy=UseDeviceIndependentColor -sDEVICE=pdfwrite -dPDFACompatibilityPolicy=${options.version} -sOutputFile=${output} ${input}`,
165 | );
166 | });
167 | } catch (e: any) {
168 | throw new Error('Failed to convert PDF to PDF/A: ' + e.message);
169 | }
170 | }
171 |
172 | /**
173 | * If `firstPage` is not given, 1 is used.
174 | * If `lastPage` is not given, the document's last page is used.
175 | * If `firstPage` is negative (e.g. -n), this refers to the last n pages and `lastPage` must be undefined.
176 | * All page numbers start at 1.
177 | */
178 | export async function renderPDFPagesToPNG(
179 | pdfBuffer: Buffer,
180 | firstPage?: number,
181 | lastPage?: number,
182 | resolution = 300,
183 | ): Promise {
184 | const numPages = await countPDFPages(pdfBuffer);
185 |
186 | if (firstPage === undefined) firstPage = 1;
187 | else if (firstPage === 0 || (firstPage < 0 && firstPage < -numPages))
188 | throw new Error('First page number out of range: ' + firstPage);
189 |
190 | if (firstPage < 0) {
191 | if (lastPage !== undefined) throw new Error('Last page must be undefined when first page is negative');
192 | firstPage = numPages + firstPage + 1;
193 | lastPage = numPages;
194 | }
195 |
196 | if (lastPage === undefined) lastPage = numPages;
197 | else if (lastPage > numPages) throw new Error('Last page number out of range: ' + lastPage);
198 |
199 | if (firstPage! > lastPage!) throw new Error('Invalid page range: ' + firstPage + '-' + lastPage);
200 |
201 | try {
202 | return await useTempFilesPDFIn(pdfBuffer, async input => {
203 | const outDir = tempy.directory();
204 | await exec(
205 | `gs -q -dQUIET -dSAFER -dBATCH -dNOPAUSE -dNOPROMPT -dMaxBitmap=500000000 -dAlignToPixels=0 -dGridFitTT=2 -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r${resolution} -sOutputFile=${outDir}/%d.png -dFirstPage=${firstPage} -dLastPage=${lastPage} ${input}`,
206 | );
207 |
208 | const outFiles = [];
209 | for (let i = 1; i <= lastPage! - firstPage! + 1; i++) {
210 | outFiles.push(await fs.readFile(outDir + '/' + i + '.png'));
211 | }
212 |
213 | await fs.rmdir(outDir, { recursive: true });
214 | return outFiles;
215 | });
216 | } catch (e: any) {
217 | throw new Error('Failed to render PDF pages to PNG: ' + e.message);
218 | }
219 | }
220 |
221 | export async function isValidPDF(pdfBuffer: Buffer): Promise {
222 | try {
223 | await countPDFPages(pdfBuffer);
224 | return true;
225 | } catch (e) {
226 | return false;
227 | }
228 | }
229 |
230 | /**
231 | * This function try, reduce size of your PDF not destroying quality
232 | * @param pdfBuffer Buffer
233 | * @returns Buffer
234 | */
235 | export async function compressPDF(pdfBuffer: Buffer | string, encoding?: BufferEncoding): Promise {
236 | try {
237 | if(typeof pdfBuffer === 'string'){
238 | pdfBuffer = Buffer.from(pdfBuffer, encoding ?? 'base64')
239 | }
240 | const compressedPdf = await useTempFilesPDFInOut(pdfBuffer, async (input, output) => {
241 | await exec(
242 | `gs -q -dNOPAUSE -dBATCH -dSAFER -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dEmbedAllFonts=true -dSubsetFonts=true -dColorImageDownsampleType=/Bicubic -dColorImageResolution=144 -dGrayImageDownsampleType=/Bicubic -dGrayImageResolution=144 -dMonoImageDownsampleType=/Bicubic -dMonoImageResolution=144 -sOutputFile=${output} ${input}`,
243 | );
244 | });
245 | if (pdfBuffer.length < compressedPdf.length) {
246 | return pdfBuffer;
247 | }
248 | return compressedPdf;
249 | } catch (e: any) {
250 | throw new Error('Failed optimize PDF: ' + e.message);
251 | }
252 | }
253 |
--------------------------------------------------------------------------------
/test/files/index.ts:
--------------------------------------------------------------------------------
1 | import fs from 'fs-extra';
2 | import path from 'path';
3 |
4 | export type FilesMap = { [filename: string]: Buffer };
5 |
6 | export async function getFiles(): Promise {
7 | const fileNames: string[] = (await fs.readdir(__dirname)).filter(f => f !== 'index.js');
8 | const files: FilesMap = {};
9 |
10 | for (let fileName of fileNames) {
11 | files[fileName] = await fs.readFile(path.join(__dirname, fileName));
12 | }
13 |
14 | return files;
15 | }
16 |
--------------------------------------------------------------------------------
/test/files/out2.txt:
--------------------------------------------------------------------------------
1 | 
2 |
--------------------------------------------------------------------------------
/test/files/pdf1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sigalor/ghostscript-node/1a7cb8a98de0d6a8b081d224cdb637ee5a8fe633/test/files/pdf1.pdf
--------------------------------------------------------------------------------
/test/files/pdf2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sigalor/ghostscript-node/1a7cb8a98de0d6a8b081d224cdb637ee5a8fe633/test/files/pdf2.pdf
--------------------------------------------------------------------------------
/test/files/pdf3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sigalor/ghostscript-node/1a7cb8a98de0d6a8b081d224cdb637ee5a8fe633/test/files/pdf3.pdf
--------------------------------------------------------------------------------
/test/files/renderPDFPagesToPNG-rotated-output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sigalor/ghostscript-node/1a7cb8a98de0d6a8b081d224cdb637ee5a8fe633/test/files/renderPDFPagesToPNG-rotated-output.png
--------------------------------------------------------------------------------
/test/files/renderPDFPagesToPNG-rotated-output2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sigalor/ghostscript-node/1a7cb8a98de0d6a8b081d224cdb637ee5a8fe633/test/files/renderPDFPagesToPNG-rotated-output2.png
--------------------------------------------------------------------------------
/test/index.test.ts:
--------------------------------------------------------------------------------
1 | import * as gs from '../src/index';
2 | import { FilesMap, getFiles } from './files';
3 |
4 | let files: FilesMap;
5 | beforeAll(async () => {
6 | files = await getFiles();
7 | });
8 |
9 | describe('combinePDFs', () => {
10 | test('works', async () => {
11 | const res = await gs.combinePDFs([files['pdf1.pdf'], files['pdf2.pdf']]);
12 | await expect(gs.countPDFPages(res)).resolves.toBe(4);
13 | });
14 | });
15 |
16 | describe('countPDFPages', () => {
17 | test('works', async () => {
18 | await expect(gs.countPDFPages(files['pdf1.pdf'])).resolves.toBe(1);
19 | await expect(gs.countPDFPages(files['pdf2.pdf'])).resolves.toBe(3);
20 | });
21 |
22 | test('fails for invalid PDF', async () => {
23 | await expect(gs.countPDFPages(Buffer.from([1, 2, 3]))).rejects.toThrow(
24 | /^Failed to determine number of pages in PDF: Command failed/,
25 | );
26 | });
27 | });
28 |
29 | describe('extractPDFPages', () => {
30 | test('works', async () => {
31 | const res = await gs.extractPDFPages(files['pdf2.pdf'], 2, 3);
32 | await expect(gs.countPDFPages(res)).resolves.toBe(2);
33 | });
34 | });
35 |
36 | describe('rotatePDF', () => {
37 | test('works', async () => {
38 | const res = await gs.rotatePDF(files['pdf2.pdf'], '90');
39 | await expect(gs.countPDFPages(res)).resolves.toBe(3);
40 | });
41 | });
42 |
43 | describe('convertToPDFA', () => {
44 | test('works', async () => {
45 | const res = await gs.convertToPDFA(files['pdf2.pdf']);
46 | await expect(gs.countPDFPages(res)).resolves.toBe(3);
47 | });
48 | });
49 |
50 | describe('renderPDFPagesToPNG', () => {
51 | test('works for single page PDF', async () => {
52 | const res = await gs.renderPDFPagesToPNG(files['pdf1.pdf']);
53 | expect(res.length).toBe(1);
54 | });
55 |
56 | test('works for three page PDF', async () => {
57 | const res = await gs.renderPDFPagesToPNG(files['pdf2.pdf']);
58 | expect(res.length).toBe(3);
59 | });
60 |
61 | test('works for second page of a PDF', async () => {
62 | const res = await gs.renderPDFPagesToPNG(files['pdf2.pdf'], 2, 2);
63 | expect(res.length).toBe(1);
64 | });
65 |
66 | test('works for the last two pages of a PDF', async () => {
67 | const res = await gs.renderPDFPagesToPNG(files['pdf2.pdf'], -2);
68 | expect(res.length).toBe(2);
69 | });
70 |
71 | test('fails when first page is 0', async () => {
72 | await expect(gs.renderPDFPagesToPNG(files['pdf1.pdf'], 0)).rejects.toThrow('First page number out of range: 0');
73 | });
74 |
75 | test('fails when last page is out of range', async () => {
76 | await expect(gs.renderPDFPagesToPNG(files['pdf1.pdf'], undefined, 3)).rejects.toThrow(
77 | 'Last page number out of range: 3',
78 | );
79 | });
80 |
81 | test('fails when last page number is given when first page number is negative', async () => {
82 | await expect(gs.renderPDFPagesToPNG(files['pdf1.pdf'], -1, 2)).rejects.toThrow(
83 | 'Last page must be undefined when first page is negative',
84 | );
85 | });
86 |
87 | test('fails for invalid PDF', async () => {
88 | await expect(gs.renderPDFPagesToPNG(Buffer.from([1, 2, 3]))).rejects.toThrow(
89 | /^Failed to determine number of pages in PDF: Command failed/,
90 | );
91 | });
92 |
93 | test('correctly renders rotated PDF', async () => {
94 | const rotatedPdf = await gs.rotatePDF(files['pdf1.pdf'], '90');
95 | const png = (await gs.renderPDFPagesToPNG(rotatedPdf))[0];
96 | expect([
97 | files['renderPDFPagesToPNG-rotated-output.png'].toString('base64'),
98 | files['renderPDFPagesToPNG-rotated-output2.png'].toString('base64'),
99 | ]).toContain(png.toString('base64'));
100 | });
101 | });
102 |
103 | describe('isValidPDF', () => {
104 | test('returns true for valid PDF', async () => {
105 | expect(await gs.isValidPDF(files['pdf1.pdf'])).toBe(true);
106 | });
107 |
108 | test('returns true for another valid PDF', async () => {
109 | expect(await gs.isValidPDF(files['pdf3.pdf'])).toBe(true);
110 | });
111 |
112 | test('returns true for PDF that has been extracted and combined again', async () => {
113 | const page = await gs.extractPDFPages(files['pdf3.pdf'], 1, 1);
114 | const pdf = await gs.combinePDFs([page]);
115 | expect(await gs.isValidPDF(pdf)).toBe(true);
116 | });
117 |
118 | test('returns false for invalid PDF', async () => {
119 | expect(await gs.isValidPDF(Buffer.from([1, 2, 3]))).toBe(false);
120 | });
121 | });
122 |
123 |
124 | describe('compressPDF', () => {
125 | test('returns PDF reduce size send buffer file', async () => {
126 | const optimizedPDF = await gs.compressPDF(files['pdf3.pdf'])
127 | expect(optimizedPDF.length).toBeLessThanOrEqual(files['pdf3.pdf'].length);
128 | });
129 | test('returns PDF reduce size send string encoded file', async () => {
130 | const optimizedPDF = await gs.compressPDF(files['pdf3.pdf'].toString('base64'))
131 | expect(optimizedPDF.length).toBeLessThanOrEqual(files['pdf3.pdf'].length);
132 | });
133 | })
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "incremental": true,
4 | "target": "ES2018",
5 | "module": "commonjs",
6 | "declaration": true,
7 | "outDir": "./dist",
8 | "esModuleInterop": true,
9 | "forceConsistentCasingInFileNames": true,
10 | "strict": true,
11 | "skipLibCheck": true
12 | },
13 | "exclude": [
14 | "test/*"
15 | ]
16 | }
17 |
--------------------------------------------------------------------------------