├── .gitignore ├── .npmignore ├── .travis.yml ├── LICENSE ├── README.md ├── bin └── pdf-text-extract.js ├── index.js ├── package.json └── test ├── buffered-extract-test.js ├── data ├── huge.pdf ├── multipage.pdf ├── multipage.txt └── pdf with space in name.pdf ├── extract-test.js ├── promise-buffered-extract-test.js └── promise-extract-test.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log 3 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | test/ 2 | .travis.yml -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | before_install: 2 | - sudo apt-get update -qq 3 | - sudo apt-get install -qq poppler-utils 4 | 5 | language: node_js 6 | node_js: 7 | - "stable" 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, ftorto 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDF Text Extract 2 | 3 | Extract text from pdfs that contain searchable pdf text. The module is wrapper that calls the `pdftotext` command to perform the actual extraction 4 | 5 | [![Build Status](https://travis-ci.org/nisaacson/pdf-text-extract.png?branch=master)](https://travis-ci.org/nisaacson/pdf-text-extract) [![Dependency Status](https://david-dm.org/nisaacson/pdf-text-extract.png)](https://david-dm.org/nisaacson/pdf-text-extract) 6 | 7 | # Installation 8 | ```bash 9 | npm install --save pdf-text-extract 10 | ``` 11 | 12 | 13 | You will need the `pdftotext` binary available on your path. There are packages available for many different operating systems 14 | 15 | See [https://github.com/nisaacson/pdf-extract#osx](https://github.com/nisaacson/pdf-extract#osx) for how to install the `pdftotext` command 16 | 17 | 18 | # Usage 19 | 20 | ## As a module 21 | 22 | `extract(filePath, [options], [pdftotextcommand], callback)` 23 | 24 | Options and pdftotextcommand are not required. 25 | 26 | 27 | ```javascript 28 | var path = require('path') 29 | var filePath = path.join(__dirname, 'test/data/multipage.pdf') 30 | var extract = require('pdf-text-extract') 31 | extract(filePath, function (err, pages) { 32 | if (err) { 33 | console.dir(err) 34 | return 35 | } 36 | console.dir(pages) 37 | }) 38 | ``` 39 | The output will be an array of where each entry is a page of text. If you want just a string of all pages you can set the option to `splitPages: false`. 40 | 41 | ```javascript 42 | var filePath = path.join(__dirname, 'test/data/multipage.pdf') 43 | var extract = require('pdf-text-extract') 44 | extract(filePath, { splitPages: false }, function (err, text) { 45 | if (err) { 46 | console.dir(err) 47 | return 48 | } 49 | console.dir(text) 50 | }) 51 | ``` 52 | 53 | You can set the following options: 54 | - `firstPage`: First page to extract 55 | - `lastPage`: Last page to extract 56 | - `resolution`: in dpi, as is specified by pdftotext -r 57 | - `crop`: Should be an object { x:x, y:y, w:w, h:h } 58 | - `layout`: Should be either `layout`, `raw` or `htmlmeta`. Default: `layout` 59 | - `encoding`: Should be either `UCS-2`, `ASCII7`, `Latin1`, `UTF-8`, `ZapfDingbats` or `Symbol`. Default: `UTF-8` 60 | - `eol`: End of line convention. One of either: `unix`, `dos` or `mac` 61 | - `ownerPassword`: Owner password (for encrypted files) 62 | - `userPassword`: User password (for encrypted files) 63 | - `splitPages`: If true, the result will be an array of pages. Default: true. 64 | 65 | 66 | If needed you can pass optional arguments to the extract function. These will be passed to the `child_process.spawn` call. 67 | 68 | ```javascript 69 | var filePath = path.join(__dirname, 'test/data/multipage.pdf') 70 | var extract = require('pdf-text-extract') 71 | var options = { 72 | cwd: "./" 73 | } 74 | extract(filePath, options, function (err, pages) { 75 | if (err) { 76 | console.dir(err) 77 | return 78 | } 79 | console.dir('extracted pages', pages) 80 | }) 81 | ``` 82 | 83 | You can also override the command for `pdftotext` if it is installed in a location that is not available in the `PATH` environment variable 84 | 85 | 86 | ```javascript 87 | var filePath = path.join(__dirname, 'test/data/multipage.pdf') 88 | var pdfToTextCommand = '/opt/bin/pdftotext' 89 | var extract = require('pdf-text-extract') 90 | var options = { 91 | cwd: "./" 92 | } 93 | extract(filePath, options, pdfToTextCommand, function (err, pages) { 94 | if (err) { 95 | console.dir(err) 96 | return 97 | } 98 | console.dir('extracted pages', pages) 99 | }) 100 | ``` 101 | 102 | 103 | ES6 promises are supported. You can now call .then(onFulfilled[, onRejected]): 104 | 105 | ```javascript 106 | var filePath = path.join(__dirname, 'test/data/multipage.pdf') 107 | var Extract = require('../index.js') 108 | var extract = new Extract(filePath) 109 | 110 | extract.then(function (pages) { 111 | console.dir('extracted pages', pages) 112 | }).catch(function (err) { 113 | console.error('error:', err) 114 | }) 115 | ``` 116 | 117 | 118 | ## As a command line tool 119 | 120 | ```bash 121 | npm install -g pdf-text-extract 122 | ``` 123 | 124 | Execute with the filePath as an argument. Output will be json-formatted array of pages 125 | 126 | ```bash 127 | pdf-text-extract ./test/data/multipage.pdf 128 | # outputs 129 | # ['', ''] 130 | ``` 131 | 132 | # Test 133 | 134 | ```bash 135 | # install dev dependencies 136 | npm install 137 | # run tests 138 | npm test 139 | -------------------------------------------------------------------------------- /bin/pdf-text-extract.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var extract = require('../index') 4 | 5 | var path = require('path') 6 | var fileName = process.argv[2] 7 | if (!fileName) { 8 | throw new Error('file path must be specified as the argument like "pdf-text-extract /path/to/file"') 9 | } 10 | var filePath = path.resolve(fileName) 11 | extract(filePath, cb) 12 | 13 | function cb (err, pages) { 14 | if (err) { 15 | throw err 16 | } 17 | console.dir(pages) 18 | } 19 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var path = require('path') 2 | var spawn = require('child_process').spawn 3 | 4 | function pdfTextExtract (filePath, options, pdfToTextCommand, cb) { 5 | if (!cb) { 6 | cb = pdfToTextCommand 7 | } 8 | if (!pdfToTextCommand) { 9 | cb = options 10 | } 11 | // options is optional 12 | if (typeof (options) === 'function') { 13 | cb = options 14 | options = {} 15 | } 16 | if (typeof (pdfToTextCommand) === 'function') { 17 | cb = pdfToTextCommand 18 | pdfToTextCommand = 'pdftotext' 19 | } 20 | if (!pdfToTextCommand) { 21 | pdfToTextCommand = 'pdftotext' 22 | } 23 | 24 | filePath = path.resolve(filePath) 25 | 26 | // [feat-promise] if cb is not a function, then it's probably a promise-typed call 27 | if (typeof (cb) !== 'function') { 28 | cb = null 29 | } 30 | 31 | // [feat-promise] options have to be not null 32 | if (!options) { 33 | options = {} 34 | } 35 | 36 | // default options 37 | options.encoding = options.encoding || 'UTF-8' 38 | options.layout = options.layout || 'layout' 39 | options.splitPages = (options.splitPages !== false) 40 | 41 | // Build args based on options 42 | var args = [] 43 | 44 | // First and last page to convert 45 | if (options.firstPage) { args.push('-f'); args.push(options.firstPage) } 46 | if (options.lastPage) { args.push('-l'); args.push(options.lastPage) } 47 | 48 | // Resolution, in dpi. (null is pdftotext default = 72) 49 | if (options.resolution) { args.push('-r'); args.push(options.resolution) } 50 | 51 | // If defined, should be an object { x:x, y:y, w:w, h:h } 52 | if (typeof (options.crop) === 'object') { 53 | if (options.crop.x) { args.push('-x'); args.push(options.crop.x) } 54 | if (options.crop.y) { args.push('-y'); args.push(options.crop.y) } 55 | if (options.crop.w) { args.push('-W'); args.push(options.crop.w) } 56 | if (options.crop.h) { args.push('-H'); args.push(options.crop.h) } 57 | } 58 | 59 | // One of either 'layout', 'raw' or 'htmlmeta' 60 | if (options.layout === 'layout') { args.push('-layout') } 61 | if (options.layout === 'raw') { args.push('-raw') } 62 | if (options.layout === 'htmlmeta') { args.push('-htmlmeta') } 63 | 64 | // Output text encoding (UCS-2, ASCII7, Latin1, UTF-8, ZapfDingbats or Symbol) 65 | if (options.encoding) { args.push('-enc'); args.push(options.encoding) } 66 | 67 | // Output end of line convention (unix, dos or mac) 68 | if (options.eol) { args.push('-eol'); args.push(options.eol) } 69 | 70 | // Owner and User password (for encrypted files) 71 | if (options.ownerPassword) { args.push('-opw'); args.push(options.ownerPassword) } 72 | if (options.userPassword) { args.push('-upw'); args.push(options.userPassword) } 73 | 74 | // finish up arguments 75 | args.push(filePath) 76 | args.push('-') 77 | 78 | function splitPages (err, content) { 79 | if (err) { 80 | return cb(err) 81 | } 82 | var pages = content.split(/\f/) 83 | if (!pages) { 84 | return cb({ 85 | message: 'pdf-text-extract failed', 86 | error: 'no text returned from the pdftotext command', 87 | filePath: filePath, 88 | stack: new Error().stack 89 | }) 90 | } 91 | // sometimes there can be an extract blank page on the end 92 | var lastPage = pages[pages.length - 1] 93 | if (!lastPage) { 94 | pages.pop() 95 | } 96 | cb(null, pages) 97 | } 98 | // [feat-promise] 99 | // if cb is not defined, then it's probably a promise-typed call 100 | // in order to use promise, instantiation is required 101 | if (!cb) { 102 | this.pdfToTextCommand = pdfToTextCommand 103 | this.args = args 104 | this.options = options 105 | this.splitPages = splitPages 106 | this.filePath = filePath 107 | } else { 108 | streamResults(pdfToTextCommand, args, options, options.splitPages ? splitPages : cb) 109 | } 110 | } 111 | 112 | /** 113 | * spawns pdftotext and returns its output 114 | */ 115 | function streamResults (command, args, options, cb) { 116 | var output = '' 117 | var stderr = '' 118 | var child = spawn(command, args, options) 119 | child.stdout.setEncoding('utf8') 120 | child.stderr.setEncoding('utf8') 121 | child.stdout.on('data', stdoutHandler) 122 | child.stderr.on('data', stderrHandler) 123 | child.on('close', closeHandler) 124 | 125 | function stdoutHandler (data) { 126 | output += data 127 | } 128 | 129 | function stderrHandler (data) { 130 | stderr += data 131 | } 132 | 133 | function closeHandler (code) { 134 | if (code !== 0) { 135 | return cb(new Error('pdf-text-extract command failed: ' + stderr)) 136 | } 137 | cb(null, output) 138 | } 139 | } 140 | 141 | /** 142 | * [feat-promise] 143 | * Promise support 144 | * 145 | * @param {Function} resolve 146 | * @param {Function} [reject] 147 | * @return {Request} 148 | */ 149 | pdfTextExtract.prototype.then = function (resolve, reject) { 150 | if (!this._fullfilledPromise) { 151 | var self = this 152 | this._fullfilledPromise = new Promise(function (innerResolve, innerReject) { 153 | streamResultsPromise(self.pdfToTextCommand, self.args, self.options, self.options.splitPages ? splitPagesPromise : resolve) 154 | }) 155 | } 156 | 157 | /** 158 | * Duplicated from function splitPages of pdfTextExtract 159 | */ 160 | function splitPagesPromise (content) { 161 | var pages = content.split(/\f/) 162 | if (!pages) { 163 | var ex = { 164 | message: 'pdf-text-extract failed', 165 | error: 'no text returned from the pdftotext command', 166 | filePath: this.filePath, 167 | stack: new Error().stack 168 | } 169 | throw ex 170 | } 171 | // sometimes there can be an extract blank page on the end 172 | var lastPage = pages[pages.length - 1] 173 | if (!lastPage) { 174 | pages.pop() 175 | } 176 | resolve(pages) 177 | } 178 | 179 | /** 180 | * Duplicated from function splitPages of streamResults 181 | */ 182 | function streamResultsPromise (command, args, options, cb) { 183 | var output = '' 184 | var stderr = '' 185 | var child = spawn(command, args, options) 186 | child.stdout.setEncoding('utf8') 187 | child.stderr.setEncoding('utf8') 188 | child.stdout.on('data', stdoutHandler) 189 | child.stderr.on('data', stderrHandler) 190 | child.on('close', closeHandler) 191 | 192 | function stdoutHandler (data) { 193 | output += data 194 | } 195 | 196 | function stderrHandler (data) { 197 | stderr += data 198 | } 199 | 200 | function closeHandler (code) { 201 | if (code !== 0) { 202 | var ex = new Error('pdf-text-extract command failed: ' + stderr) 203 | throw ex 204 | } 205 | cb(output) 206 | } 207 | } 208 | 209 | return this._fullfilledPromise.then(resolve, reject) 210 | } 211 | 212 | module.exports = pdfTextExtract 213 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pdf-text-extract", 3 | "version": "1.5.0", 4 | "description": "Extract text from pdfs that contain searchable pdf text", 5 | "main": "index.js", 6 | "bin": "./bin/pdf-text-extract.js", 7 | "directories": { 8 | "test": "test" 9 | }, 10 | "scripts": { 11 | "test": "node_modules/.bin/mocha --reporter spec", 12 | "pretest": "standard | snazzy" 13 | }, 14 | "repository": { 15 | "type": "git", 16 | "url": "git://github.com/nisaacson/pdf-text-extract.git" 17 | }, 18 | "keywords": [ 19 | "pdf", 20 | "extract", 21 | "pdftotext", 22 | "text", 23 | "extract" 24 | ], 25 | "author": "Noah Isaacson", 26 | "license": "BSD", 27 | "readmeFilename": "README.md", 28 | "devDependencies": { 29 | "mocha": "~1.8.2", 30 | "should": "~1.2.2", 31 | "snazzy": "^2.0.1", 32 | "standard": "^5.3.1" 33 | }, 34 | "dependencies": { 35 | "yargs": "^1.2.5" 36 | }, 37 | "standard": { 38 | "globals": [ 39 | "describe", 40 | "before", 41 | "beforeEach", 42 | "after", 43 | "afterEach", 44 | "it" 45 | ] 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /test/buffered-extract-test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | var fs = require('fs') 3 | var path = require('path') 4 | var extract = require('../index.js') 5 | var should = require('should') 6 | describe('Buffered Extract', function () { 7 | it('should extract text', function (done) { 8 | var desiredNumPages = 8 9 | var filePath = path.join(__dirname, 'data', 'multipage.pdf') 10 | assert.ok(fs.existsSync(filePath), 'pdf file not found at path: ' + filePath) 11 | extract(filePath, function (err, pages) { 12 | should.not.exist(err) 13 | should.exist(pages, 'no pages extracted') 14 | pages.length.should.eql(desiredNumPages) 15 | pages.map(function (page) { 16 | should.exist(page, 'page text content should exist') 17 | page.length.should.be.above(0) 18 | }) 19 | done() 20 | }) 21 | }) 22 | }) 23 | -------------------------------------------------------------------------------- /test/data/huge.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nisaacson/pdf-text-extract/d21ead42859aae859d3f20b79ebd5c801b21837d/test/data/huge.pdf -------------------------------------------------------------------------------- /test/data/multipage.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nisaacson/pdf-text-extract/d21ead42859aae859d3f20b79ebd5c801b21837d/test/data/multipage.pdf -------------------------------------------------------------------------------- /test/data/multipage.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nisaacson/pdf-text-extract/d21ead42859aae859d3f20b79ebd5c801b21837d/test/data/multipage.txt -------------------------------------------------------------------------------- /test/data/pdf with space in name.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nisaacson/pdf-text-extract/d21ead42859aae859d3f20b79ebd5c801b21837d/test/data/pdf with space in name.pdf -------------------------------------------------------------------------------- /test/extract-test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | var fs = require('fs') 3 | var path = require('path') 4 | var extract = require('../index.js') 5 | var should = require('should') 6 | 7 | describe('Pdf extract', function () { 8 | it('should return output and no error when everything is ok', function (done) { 9 | var filePath = path.join(__dirname, 'data', 'multipage.pdf') 10 | 11 | extract(filePath, function (err, pages) { 12 | should.not.exist(err) 13 | should.exists(pages) 14 | done() 15 | }) 16 | }) 17 | 18 | it('should accept files with space in name', function (done) { 19 | var filePath = path.join(__dirname, 'data', 'pdf with space in name.pdf') 20 | assert.ok(fs.existsSync(filePath), 'pdf file not found at path: ' + filePath) 21 | 22 | extract(filePath, function (err, pages) { 23 | should.not.exist(err) 24 | should.exist(pages) 25 | 26 | done() 27 | }) 28 | }) 29 | 30 | it('should work with parallel data streams', function (done) { 31 | var filePath = path.join(__dirname, 'data', 'pdf with space in name.pdf') 32 | 33 | var streams = 10 34 | var complete = 0 35 | for (var i = 0; i < streams; i++) { 36 | extract(filePath, function (err, pages) { 37 | should.not.exist(err) 38 | should.exists(pages[0]) 39 | complete++ 40 | if (complete === streams) { 41 | done() 42 | } 43 | }) 44 | } 45 | }) 46 | 47 | it('should allow large files', function (done) { 48 | this.timeout(5000) 49 | this.slow('4s') 50 | var filePath = path.join(__dirname, 'data', 'huge.pdf') 51 | 52 | var options = { 53 | cwd: null 54 | } 55 | extract(filePath, options, function (err, pages) { 56 | should.not.exists(err) 57 | should.exists(pages) 58 | done() 59 | }) 60 | }) 61 | 62 | it('should support custom pdftotext command undefined err when everything is ok', function (done) { 63 | var filePath = path.join(__dirname, 'data', 'multipage.pdf') 64 | var options = {} 65 | var pdfToTextCommand = 'pdftotext' 66 | 67 | extract(filePath, options, pdfToTextCommand, function (err, pages) { 68 | should.not.exist(err) 69 | should.exists(pages) 70 | done() 71 | }) 72 | }) 73 | }) 74 | -------------------------------------------------------------------------------- /test/promise-buffered-extract-test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | var fs = require('fs') 3 | var path = require('path') 4 | var Extract = require('../index.js') 5 | var should = require('should') 6 | 7 | describe('Buffered Extract Promise', function () { 8 | it('should extract text', function (done) { 9 | var desiredNumPages = 8 10 | var filePath = path.join(__dirname, 'data', 'multipage.pdf') 11 | assert.ok(fs.existsSync(filePath), 'pdf file not found at path: ' + filePath) 12 | var extractor = new Extract(filePath) 13 | extractor.then(function (pages) { 14 | should.exist(pages, 'no pages extracted') 15 | pages.length.should.eql(desiredNumPages) 16 | pages.map(function (page) { 17 | should.exist(page, 'page text content should exist') 18 | page.length.should.be.above(0) 19 | }) 20 | done() 21 | }).catch(function (err) { 22 | console.error('error:', err) 23 | }) 24 | }) 25 | }) 26 | -------------------------------------------------------------------------------- /test/promise-extract-test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | var fs = require('fs') 3 | var path = require('path') 4 | var Extract = require('../index.js') 5 | var should = require('should') 6 | 7 | describe('Pdf extract', function () { 8 | it('should return output and no error when everything is ok', function (done) { 9 | var filePath = path.join(__dirname, 'data', 'multipage.pdf') 10 | var extractor = new Extract(filePath) 11 | extractor.then(function (pages) { 12 | should.exists(pages) 13 | done() 14 | }).catch(function (err) { 15 | console.error('error:', err) 16 | }) 17 | }) 18 | 19 | it('should accept files with space in name', function (done) { 20 | var filePath = path.join(__dirname, 'data', 'pdf with space in name.pdf') 21 | assert.ok(fs.existsSync(filePath), 'pdf file not found at path: ' + filePath) 22 | 23 | var extractor = new Extract(filePath) 24 | extractor.then(function (pages) { 25 | should.exist(pages) 26 | done() 27 | }).catch(function (err) { 28 | console.error('error:', err) 29 | }) 30 | }) 31 | 32 | it('should work with parallel data streams', function (done) { 33 | var filePath = path.join(__dirname, 'data', 'pdf with space in name.pdf') 34 | 35 | var streams = 10 36 | var complete = 0 37 | for (var i = 0; i < streams; i++) { 38 | var extractor = new Extract(filePath) 39 | extractor.then(function (pages) { 40 | should.exists(pages[0]) 41 | complete++ 42 | if (complete === streams) { 43 | done() 44 | } 45 | }).catch(function (err) { 46 | console.error('error:', err) 47 | }) 48 | } 49 | }) 50 | 51 | it('should allow large files', function (done) { 52 | this.timeout(5000) 53 | this.slow('4s') 54 | var filePath = path.join(__dirname, 'data', 'huge.pdf') 55 | 56 | var options = { 57 | cwd: null 58 | } 59 | var extractor = new Extract(filePath, options) 60 | extractor.then(function (pages) { 61 | should.exists(pages) 62 | done() 63 | }).catch(function (err) { 64 | console.error('error:', err) 65 | }) 66 | }) 67 | 68 | it('should support custom pdftotext command undefined err when everything is ok', function (done) { 69 | var filePath = path.join(__dirname, 'data', 'multipage.pdf') 70 | var options = {} 71 | var pdfToTextCommand = 'pdftotext' 72 | 73 | var extractor = new Extract(filePath, options, pdfToTextCommand) 74 | extractor.then(function (pages) { 75 | should.exists(pages) 76 | done() 77 | }).catch(function (err) { 78 | console.error('error:', err) 79 | }) 80 | }) 81 | }) 82 | --------------------------------------------------------------------------------