├── .gitignore ├── test ├── test_results │ └── .gitignore ├── test_data │ ├── test.docx │ ├── test.pdf │ ├── pagesizes.json │ ├── propertystream.json │ ├── textstream.txt │ └── contentstream.json ├── testfile.js ├── streams.js ├── images.js └── pages.js ├── example ├── test.pdf └── test.js ├── .github └── workflows │ └── run_tests.yml ├── package.json ├── LICENSE-MIT ├── bin ├── simple_rasterize.js ├── rasterize.js └── crop.js ├── README.md ├── .eslintrc ├── LICENSE-APACHE ├── scissors.js └── contrib └── ps2ascii.ps /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | doc 3 | .idea 4 | -------------------------------------------------------------------------------- /test/test_results/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /example/test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcr/scissors/HEAD/example/test.pdf -------------------------------------------------------------------------------- /test/test_data/test.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcr/scissors/HEAD/test/test_data/test.docx -------------------------------------------------------------------------------- /test/test_data/test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcr/scissors/HEAD/test/test_data/test.pdf -------------------------------------------------------------------------------- /test/test_data/pagesizes.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "width": "612", 4 | "height": "828", 5 | "unit": "pt" 6 | }, 7 | { 8 | "width": "612", 9 | "height": "828", 10 | "unit": "pt" 11 | } 12 | ] -------------------------------------------------------------------------------- /example/test.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | var scissors = require('..'); 3 | 4 | var pdf = scissors(__dirname + '/test.pdf'); 5 | var page = pdf.pages(2); 6 | 7 | // Streams 8 | page.pdfStream().pipe(fs.createWriteStream(__dirname + '/test-page.pdf')); 9 | page.pngStream(300).pipe(fs.createWriteStream(__dirname + '/test-page.png')); 10 | 11 | // All content 12 | pdf.contentStream().on('data', function (item) { 13 | if (item.type == 'string') { 14 | console.log(item.string); 15 | } else if (item.type == 'image') { 16 | console.log(item); 17 | } 18 | }); 19 | -------------------------------------------------------------------------------- /.github/workflows/run_tests.yml: -------------------------------------------------------------------------------- 1 | name: Scissor Tests 2 | on: 3 | - pull_request 4 | - push 5 | 6 | jobs: 7 | run_tests: 8 | name: Run Scissors tests 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | node-version: [10.x, 12.x, 14.x] 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Install node.js ${{ matrix.node-version }} 16 | uses: actions/setup-node@v1 17 | with: 18 | node-version: ${{ matrix.node-version }} 19 | - name: Install Ubuntu package dependencies 20 | run: | 21 | sudo apt-get install -y pdftk ghostscript imagemagick poppler-utils 22 | npm install --no-optional 23 | - name: Run tests 24 | run: npm test 25 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "scissors", 3 | "version": "0.2.3", 4 | "description": "PDF manipulation in Node.js, based on PDFTK.", 5 | "main": "scissors.js", 6 | "dependencies": { 7 | "any-promise": "^1.3.0", 8 | "async": "^2.4.0", 9 | "bufferjs": "^3.0.1", 10 | "bufferstream": "^0.6.2", 11 | "rimraf": "^2.6.2", 12 | "temp": "^0.8.1" 13 | }, 14 | "scripts": { 15 | "test": "mocha" 16 | }, 17 | "devDependencies": { 18 | "assert-diff": "^1.2.0", 19 | "mocha": "^10.4.0", 20 | "stream-to-promise": "^2.2.0" 21 | }, 22 | "repository": { 23 | "type": "git", 24 | "url": "https://github.com/tcr/scissors.git" 25 | }, 26 | "author": "Tim Cameron Ryan (@tcr) and others", 27 | "license": "MIT", 28 | "readmeFilename": "README.md", 29 | "keywords": [ 30 | "pdf", 31 | "manipulation", 32 | "postscript", 33 | "ghostscript", 34 | "document", 35 | "split", 36 | "join", 37 | "crop", 38 | "PDFTK" 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2014 Technical Machine, Inc 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /bin/simple_rasterize.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var fs = require('fs'); 4 | var spawn = require('child_process').spawn; 5 | 6 | require('bufferjs/indexOf'); 7 | 8 | function debug () { 9 | //console.error.apply(console, arguments); 10 | } 11 | 12 | function rasterizeImage (ins, page, dpi, format, useCropBox) { 13 | var device; 14 | if (format == 'png') { 15 | device = 'png16m'; 16 | } 17 | else { 18 | device = 'jpeg'; 19 | } 20 | 21 | var gsArgs = [ 22 | '-q', 23 | '-sDEVICE=' + device, 24 | '-sOutputFile=-', 25 | '-r' + dpi, 26 | '-dNOPAUSE', 27 | '-dBATCH', 28 | '-dFirstPage=' + page, 29 | '-dLastPage=' + page, 30 | '-f', 31 | '-' 32 | ]; 33 | 34 | if (useCropBox) { 35 | gsArgs.unshift('-dUseCropBox'); 36 | } 37 | 38 | var gs = spawn('gs', gsArgs); 39 | 40 | ins.pipe(gs.stdin); 41 | 42 | gs.stderr.on('data', function (data) { 43 | console.error('gs encountered an error:\n', String(data)); 44 | }); 45 | 46 | gs.on('exit', function (code) { 47 | if (code) { 48 | console.error('gs exited with failure code:', code); 49 | } 50 | debug('Finished writing image.'); 51 | }); 52 | 53 | return gs.stdout; 54 | } 55 | 56 | if (process.argv.length < 5) { 57 | console.error('Invalid number of arguments.'); 58 | process.exit(1); 59 | } 60 | 61 | var input = process.argv[2]; 62 | var format = process.argv[3]; 63 | var page = Number(process.argv[4]) || 1; 64 | var dpi = Number(process.argv[5]) || 72; 65 | var useCropBox = process.argv[6] == 'true'; 66 | 67 | var inputStream = input == '-' ? process.stdin : fs.createReadStream(input); 68 | rasterizeImage(inputStream, page, dpi, format, useCropBox).pipe(process.stdout); 69 | -------------------------------------------------------------------------------- /test/test_data/propertystream.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "event": "PdfID0", 4 | "value": 6962940 5 | }, 6 | { 7 | "event": "PdfID1", 8 | "value": 6962940 9 | }, 10 | { 11 | "event": "NumberOfPages", 12 | "value": 10 13 | }, 14 | { 15 | "event": "PageMediaNumber", 16 | "value": 1 17 | }, 18 | { 19 | "event": "PageMediaDimensions", 20 | "value": 595 21 | }, 22 | { 23 | "event": "PageMediaNumber", 24 | "value": 2 25 | }, 26 | { 27 | "event": "PageMediaDimensions", 28 | "value": 595 29 | }, 30 | { 31 | "event": "PageMediaNumber", 32 | "value": 3 33 | }, 34 | { 35 | "event": "PageMediaDimensions", 36 | "value": 595 37 | }, 38 | { 39 | "event": "PageMediaNumber", 40 | "value": 4 41 | }, 42 | { 43 | "event": "PageMediaDimensions", 44 | "value": 595 45 | }, 46 | { 47 | "event": "PageMediaNumber", 48 | "value": 5 49 | }, 50 | { 51 | "event": "PageMediaDimensions", 52 | "value": 595 53 | }, 54 | { 55 | "event": "PageMediaNumber", 56 | "value": 6 57 | }, 58 | { 59 | "event": "PageMediaDimensions", 60 | "value": 595 61 | }, 62 | { 63 | "event": "PageMediaNumber", 64 | "value": 7 65 | }, 66 | { 67 | "event": "PageMediaDimensions", 68 | "value": 595 69 | }, 70 | { 71 | "event": "PageMediaNumber", 72 | "value": 8 73 | }, 74 | { 75 | "event": "PageMediaDimensions", 76 | "value": 595 77 | }, 78 | { 79 | "event": "PageMediaNumber", 80 | "value": 9 81 | }, 82 | { 83 | "event": "PageMediaDimensions", 84 | "value": 595 85 | }, 86 | { 87 | "event": "PageMediaNumber", 88 | "value": 10 89 | }, 90 | { 91 | "event": "PageMediaDimensions", 92 | "value": 595 93 | } 94 | ] -------------------------------------------------------------------------------- /test/test_data/textstream.txt: -------------------------------------------------------------------------------- 1 | 1 This is page 1 containing an image. Source: https://commons.wikimedia.org/wiki/File:Test_card.png 3 vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, At accusam aliquyam diam diam dolore dolores duo eirmod eos erat, et nonumy sed tempor et et invidunt justo labore Stet clita ea et gubergren, kasd magna no rebum. sanctus sea sed takimata ut vero voluptua. est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat. Consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. -------------------------------------------------------------------------------- /test/testfile.js: -------------------------------------------------------------------------------- 1 | /** @module test.testfile */ 2 | 3 | var fs = require('fs'); 4 | var assert = require('assert-diff'); 5 | var scissors = require('../scissors'); 6 | 7 | /** 8 | * Represents a test result file 9 | * @class 10 | * @param {String} name Name of the test (will be used as filename) 11 | * @param {String} ext (optional) file extension (without dot). Defaults to 'pdf' 12 | * @return {Testfile} An instance of this class 13 | */ 14 | var Testfile = function(name,ext){ 15 | this.name = name; 16 | this.ext = ext||'tmp'; 17 | this.path = __dirname + '/test_results/' + name + '.' + (ext||'pdf'); 18 | this.remove(); 19 | }; 20 | 21 | /** 22 | * Returns the path to the file 23 | * @return {string} 24 | */ 25 | Testfile.prototype.getPath = function(){ 26 | return this.path; 27 | }; 28 | 29 | /** 30 | * Throws an assertion error if file does not exist or is of size 0 31 | * @return {Testfile} The testfile instance 32 | */ 33 | Testfile.prototype.assertExists = function(){ 34 | assert.equal(true,fs.existsSync(this.getPath()), 'File does not exist'); 35 | assert.equal(true,fs.statSync(this.getPath()).size > 0, 'File size is 0'); 36 | return this; 37 | }; 38 | 39 | /** 40 | * write data to file as JSON 41 | * @return {Testfile} The testfile instance 42 | */ 43 | Testfile.prototype.writeJSON = function(data){ 44 | fs.writeFileSync(this.getPath(),JSON.stringify(data,null,2),'utf-8'); 45 | return this; 46 | }; 47 | 48 | /** 49 | * Throws an error if file does not have the specified number of pages 50 | * @return {Promise} 51 | */ 52 | Testfile.prototype.assertHasLength = function(length){ 53 | return scissors(fs.createReadStream(this.getPath())) 54 | .getNumPages() 55 | .then(function(computedLength){ 56 | assert.equal(computedLength,length,'Page number does not match.'); 57 | }) 58 | .catch(function(err){ 59 | throw err; 60 | }); 61 | }; 62 | 63 | /** 64 | * Compares with a reference result and throws an error if file is not the same 65 | * @return {Testfile} The testfile instance 66 | */ 67 | Testfile.prototype.compareWithReferenceFile = function(){ 68 | var content = fs.readFileSync(this.getPath(),'utf-8'); 69 | var referenceFile = __dirname + '/test_data/' + this.name + '.' + this.ext; 70 | var referenceContent = fs.readFileSync(referenceFile); 71 | if( this.ext == 'json'){ 72 | content = JSON.parse(content); 73 | referenceContent = JSON.parse(referenceContent,'utf-8'); 74 | assert.deepEqual(content, referenceContent, 'Output does not match reference content'); 75 | } else { 76 | for( var i=0; i++; i> setpagedevice', 62 | '-f', '-']); 63 | ins.pipe(gs.stdin); 64 | gs.stderr.on('data', function (data) { 65 | console.error('gs encountered an error:\n', String(data)); 66 | }); 67 | gs.on('exit', function (code) { 68 | if (code) { 69 | console.error('gs exited with failure code:', code); 70 | } 71 | debug('Finished writing image.'); 72 | }); 73 | return gs.stdout; 74 | } 75 | 76 | function createTempFile (next) { 77 | debug('opening temp file'); 78 | temp.open('scissors', function (err, info) { 79 | debug('closing temp file', info.path); 80 | fs.close(info.fd, function () { 81 | debug('closed.'); 82 | next(info.path); 83 | }); 84 | }); 85 | } 86 | 87 | //stripCropbox(process.stdin, fs) 88 | 89 | if (process.argv.length < 5) { 90 | console.error('Invalid number of arguments.'); 91 | process.exit(1); 92 | } 93 | 94 | var input = process.argv[2]; 95 | var format = process.argv[3]; 96 | var page = Number(process.argv[4]) || 1; 97 | var dpi = Number(process.argv[5]) || 72; 98 | 99 | createTempFile(function (path) { 100 | var inputStream = input == '-' ? process.stdin : fs.createReadStream(input); 101 | readBoundingBox(inputStream, page, function (err, boundingbox) { 102 | if (err) { 103 | rimraf(path); 104 | return console.error(err); 105 | } 106 | var stream = fs.createReadStream(path); 107 | stream.on('close', function () { 108 | rimraf(path); 109 | }).on('error', function () { 110 | rimraf(path); 111 | }); 112 | rasterizeImage(stream, page, dpi, format, boundingbox) 113 | .pipe(process.stdout); 114 | }); 115 | inputStream.resume(); 116 | inputStream.pipe(fs.createWriteStream(path)); 117 | }); 118 | -------------------------------------------------------------------------------- /bin/crop.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var fs = require('fs'); 4 | var spawn = require('child_process').spawn; 5 | var rimraf = require('rimraf').sync; 6 | 7 | var temp = require('temp'); 8 | require('bufferjs/indexOf'); 9 | 10 | // take stdin, write to random access file 11 | // strip cropbox tthen reapply cropbox and write to stdout 12 | // doesn't work with all PDFs yet, see 13 | // https://github.com/tcr/scissors/issues/21 14 | // http://stackoverflow.com/questions/6183479/cropping-a-pdf-using-ghostscript-9-01?rq=1 15 | 16 | function debug () { 17 | //console.error.apply(console, arguments); 18 | } 19 | 20 | function repairPDF (outs) { 21 | var pdftk = spawn('pdftk', ['-', 'output', '-']); 22 | pdftk.stderr.on('data', function (data) { 23 | throw new Error('pdftk encountered an error:\n', String(data)); 24 | }); 25 | pdftk.on('exit', function (code) { 26 | if (code) { 27 | throw new Error('pdftk exited with failure code:', code); 28 | } 29 | }); 30 | pdftk.stdout.pipe(outs); 31 | return pdftk.stdin; 32 | } 33 | 34 | function stripCropbox (ins, outs, next) { 35 | var cropbox = [0, 0, 0, 0]; 36 | var repair = repairPDF(outs); 37 | ins.on('data', function (data) { 38 | var i; 39 | if ((i = data.indexOf('/CropBox')) != -1) { 40 | repair.write(data.slice(0, i)); 41 | for (var j = i; data[j] != '\n'.charCodeAt(0); ) { 42 | j++; 43 | } 44 | debug('Scissors: found cropbox', String(data.slice(i, j))); 45 | cropbox = String(data.slice(i, j)) 46 | .match(/\/CropBox\s+\[([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\]/) 47 | .slice(1, 5).map(Number); 48 | repair.write(data.slice(j + 1)); 49 | } else { 50 | repair.write(data); 51 | } 52 | }); 53 | ins.on('end', function () { 54 | debug('Scissors: Finished stripping cropbox.'); 55 | repair.end(); 56 | }) 57 | outs.on('close', function (err) { 58 | next(err, cropbox); 59 | }); 60 | } 61 | 62 | function combineCropboxes (a, b) { 63 | return [a[0] + b[0], a[1] + b[1], Math.min(a[0] + b[2], a[2]), Math.min(a[1] + b[3], a[3])]; 64 | } 65 | 66 | function writeCropbox (ins, cropbox) { 67 | var gs = spawn('gs', [ 68 | '-sDEVICE=pdfwrite', 69 | '-sOutputFile=-', 70 | '-q', 71 | //'-sstdout=/dev/null', 72 | '-dNOPAUSE', '-dBATCH', 73 | '-c', '[/CropBox [' + cropbox.join(' ') + '] /PAGES pdfmark', 74 | '-f', '-']); 75 | ins.pipe(gs.stdin); 76 | gs.stderr.on('data', function (data) { 77 | throw new Error('gs encountered an error:\n', String(data)); 78 | }); 79 | gs.on('exit', function (code) { 80 | if (code) { 81 | throw new Error('gs exited with failure code:', code); 82 | } 83 | debug('Scissors: Finished writing cropbox.'); 84 | }); 85 | return gs.stdout; 86 | } 87 | 88 | //stripCropbox(process.stdin, fs) 89 | 90 | if (process.argv.length < 6) { 91 | throw new Error('Invalid number of arguments.'); 92 | process.exit(1); 93 | } 94 | 95 | var modcropbox = process.argv.slice(2, 6).map(Number); 96 | 97 | debug('Scissors: opening temp file'); 98 | temp.open('stripCropbox', function (err, info) { 99 | debug('Scissors: closing temp file', info.path); 100 | fs.close(info.fd, function () { 101 | debug('Scissors: closed.'); 102 | stripCropbox(process.stdin, fs.createWriteStream(info.path), function (err, cropbox) { 103 | if (err) { 104 | rimraf(info.path); 105 | throw new Error(err); 106 | } 107 | var stream = fs.createReadStream(info.path); 108 | stream.on('close', function () { 109 | rimraf(info.path); 110 | }).on('error', function () { 111 | rimraf(info.path); 112 | }); 113 | //fs.createReadStream(info.path).pipe(process.stdout); 114 | writeCropbox(stream, combineCropboxes(cropbox, modcropbox)) 115 | .pipe(process.stdout); 116 | }); 117 | process.stdin.resume(); 118 | }); 119 | }); 120 | -------------------------------------------------------------------------------- /test/streams.js: -------------------------------------------------------------------------------- 1 | /* global describe, it */ 2 | var scissors = require('../scissors'); 3 | var promisify = require('stream-to-promise'); 4 | var fs = require('fs'); 5 | var util = require('util'); 6 | var Testfile = require('./testfile'); 7 | 8 | var pdf = () => fs.createReadStream(__dirname + '/test_data/test.pdf'); 9 | 10 | // TODO: better result checks 11 | 12 | describe('Test Scissor streams', function() { 13 | this.timeout(20000); 14 | 15 | 16 | // _commandStream() 17 | describe('#_commandStream()', function() { 18 | var counter = 0; 19 | it('should produce a stream with JSON data parsed from the raw PDF data', function(done) { 20 | var testfile = new Testfile('commandstream','json'); 21 | var result = []; 22 | scissors(pdf()) 23 | .pages(1,3) 24 | ._commandStream() 25 | .on('data',function(data){ 26 | result.push(data); 27 | }) 28 | .on('end', function(){ 29 | if( counter++ == 0 ){ 30 | fs.writeFileSync(testfile.getPath(), JSON.stringify(result,null,2)); 31 | testfile.compareWithReferenceFile(); 32 | testfile.remove(); 33 | done(); 34 | } else { 35 | // this addresses a weird, hard to reproduce error 36 | throw new Error('"end" event has been emitted twice!'); 37 | } 38 | }) 39 | .on('error', function(err){ 40 | throw err; 41 | }); 42 | }); 43 | }); 44 | 45 | 46 | // contentStream() 47 | describe('#contentStream()', function() { 48 | it('should output metadata about the PDF', function(done) { 49 | var testfile = new Testfile('contentstream','json'); 50 | var result = []; 51 | scissors(pdf()) 52 | .pages(1,3) 53 | .contentStream() 54 | .on('data',function(data){ 55 | result.push(data); 56 | }) 57 | .on('end', function(){ 58 | fs.writeFileSync(testfile.getPath(), JSON.stringify(result,null,2)); 59 | testfile.compareWithReferenceFile(); 60 | testfile.remove(); 61 | done(); 62 | }) 63 | .on('error', function(err){ 64 | throw err; 65 | }); 66 | }); 67 | }); 68 | 69 | // propertyStream() 70 | describe('#propertyStream()', function() { 71 | it('should stream json data with metadata about the PDF', function(done) { 72 | var testfile = new Testfile('propertystream','json'); 73 | var result = []; 74 | scissors(pdf()) 75 | .propertyStream() 76 | .on('data',function(data){ 77 | if( data.value ){ 78 | result.push(data); 79 | } 80 | }) 81 | .on('end', function(){ 82 | fs.writeFileSync(testfile.getPath(), JSON.stringify(result,null,2)); 83 | testfile.compareWithReferenceFile(); 84 | testfile.remove(); 85 | done(); 86 | }) 87 | .on('error', function(err){ 88 | throw err; 89 | }); 90 | }); 91 | }); 92 | 93 | // textStream() 94 | describe('#textStream()', function() { 95 | it('should output text that is contained in the PDF', function() { 96 | var testfile = new Testfile('textstream','txt'); 97 | return promisify( 98 | scissors(pdf()).pages(1,3) 99 | .textStream() 100 | .on('error', e => {throw e;}) 101 | .pipe(fs.createWriteStream(testfile.getPath())) 102 | ) 103 | .then(function(){ 104 | testfile.assertExists(); 105 | // disabled because result is different on different platforms 106 | //testfile.compareWithReferenceFile(); 107 | testfile.remove(); 108 | }) 109 | .catch(function(err){ 110 | throw err; 111 | }); 112 | }); 113 | }); 114 | }); 115 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scissors 2 | [![.github/workflows/run_tests.yml](https://github.com/tcr/scissors/actions/workflows/run_tests.yml/badge.svg)](https://github.com/tcr/scissors/actions/workflows/run_tests.yml) 3 | 4 | PDF manipulation in Node.js, based on PDFTK! Split, join, crop, read, extract, 5 | boil, mash, stick them in a stew. 6 | 7 | > This project is no longer actively maintained and we cannot respond to issues. 8 | > Consider alternatives such as https://github.com/jjwilly16/node-pdftk 9 | > 10 | > Bug fixes are always welcome. 11 | 12 | ## Example 13 | 14 | ```javascript 15 | var scissors = require('scissors'); 16 | 17 | // Use and chain any of these commands... 18 | var pdf = scissors('in.pdf') 19 | .pages(4, 5, 6, 1, 12) // select or reorder individual pages 20 | .range(1, 10) // pages 1-10 21 | .even() // select even pages, 22 | .odd() // or odd, 23 | .rotate(90) // 90, 180, 270, 360 degrees 24 | .reverse() // reverse the page order 25 | .crop(100, 100, 300, 200) // offset in points from left, bottom, right, top (doesn't work reliably yet) 26 | .pdfStream()... // output stream, see below 27 | 28 | // Join multiple files... 29 | var pdfA = scissors('1.pdf'), pdfB = scissors('2.pdf'), pdfC = scissors('3.pdf') 30 | scissors.join(pdfA.pages(1), pdfB, pdfC.pages(5, 10)).pdfStream()... 31 | 32 | // And output data as streams 33 | pdf.pdfStream() 34 | .pipe(fs.createWriteStream('out.pdf')) 35 | .on('finish', function(){ 36 | console.log("We're done!"); 37 | }).on('error',function(err){ 38 | throw err; 39 | }); 40 | 41 | // or use promises: 42 | require('stream-to-promise')( 43 | scissors(pdf) 44 | .pages(1,3) 45 | .pdfStream().pipe(fs.createWriteStream(...) 46 | ) 47 | .then(function(){ 48 | console.log("We're done!"); 49 | }) 50 | .catch(function(e){ 51 | console.error("Something went wrong:" + e); 52 | }); 53 | 54 | pdf.pngStream(300).pipe(fs.createWriteStream('out-page1.png')); // PNG of first page at 300 dpi 55 | pdf.textStream().pipe(process.stdout) // Stream of individual text strings 56 | pdf.propertyStream().pipe(process.stdout) // Stream of PDF meta data 57 | 58 | // Extract content as text or images: 59 | pdf.contentStream().on('data', console.log) 60 | // { type: 'string', x: 1750, y: 594, 61 | // string: 'Reinhold Messner', 62 | // font: { height: 112, width: 116, font: 'ZSVUGH+Imago-Book' }, 63 | // color: { r: 137, g: 123, b: 126 } } 64 | // { type: 'image', x: 3049, y: 5680, width: 655, height: 810, index: 4 } 65 | 66 | // Use the 'index' property of an image element to extract an image: 67 | // Calls `pdfimages -j`, so the result format is dependent on the 68 | // format of the embedded image (see http://linuxcommand.org/man_pages/pdfimages1.html) 69 | pdf.extractImageStream(0).pipe(s.createWriteStream('firstImage.jpg')); 70 | 71 | // Promise-based output: 72 | pdf.getPageSizes().then(console.dir); // requires imagemagick 73 | // [ 74 | // { 75 | // "width": "595", 76 | // "height": "842", 77 | // "unit": "pt" 78 | // }, 79 | // ... 80 | pdf.getNumPages().then(console.log); // prints the number of pages of the PDF 81 | 82 | ``` 83 | 84 | ## Requirements 85 | 86 | Scissors is a wrapper around command line utilities (mainly PDFTK) that have to 87 | be separately installed. 88 | 89 | * Install [PDFTK](http://www.pdflabs.com/docs/install-pdftk/). For MacOS, see below. 90 | * Ensure you have Ghostscript installed (check by running `gs --version`). 91 | * To use the `getPageSizes` method, you need the imagemagick library, which provides the `identify` executable. 92 | * *(optional)* To extract individual images from a page with the 93 | `extractImageStream()` method, install `pdfimages` with `brew install xpdf` or 94 | `apt-get install poppler-utils`. 95 | 96 | ## MacOS 97 | 98 | PDFTK does not run out-of-the box on Mac OS >=10.11. A patched build is 99 | available 100 | [here](https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/pdftk_server-2.02-mac_osx-10.11-setup.pkg) 101 | as per [this 102 | thread](http://stackoverflow.com/questions/32505951/pdftk-server-on-os-x-10-11). 103 | Alternatively, use a dockerized executable such as 104 | https://hub.docker.com/r/jottr/alpine-pdftk. Remember that, in this case, you 105 | need to pass read streams to the executable instead of file paths unless you 106 | mount the directories containing these paths to make them accessible for the 107 | docker image. 108 | 109 | ## Testing 110 | 111 | The tests sometimes and unpredictably fail for unknown reasons, try to run them again to see whether the 112 | problem goes away. 113 | 114 | ## Dev resources 115 | - https://www.pdflabs.com/docs/pdftk-man-page/ 116 | 117 | ## Known issues 118 | - `.crop()` doesn't work reliably, if at all. 119 | -------------------------------------------------------------------------------- /test/images.js: -------------------------------------------------------------------------------- 1 | /* global describe, it */ 2 | var scissors = require('../scissors'); 3 | var fs = require('fs'); 4 | var Testfile = require('./testfile'); 5 | var Promise = require('any-promise'); 6 | var promisify = require('stream-to-promise'); 7 | var pdf = () => fs.createReadStream(__dirname + '/test_data/test.pdf'); 8 | 9 | describe('Test Scissors image extraction methods', function() { 10 | 11 | this.timeout(50000); 12 | 13 | //return; // skip time-consuming image tests 14 | describe('#jpgStream() - slow rasterize', function() { 15 | it('should extract a single jpg page (using default rasterize)', function(done) { 16 | var testfile = new Testfile('page1_default','jpg'); 17 | var dpi = 300; 18 | var pageNum = 1; 19 | var useSimpleRasterize = false; 20 | scissors(pdf()) 21 | .jpgStream(dpi, pageNum, useSimpleRasterize).pipe(fs.createWriteStream(testfile.getPath())) 22 | .on('error', err => {throw err;}) 23 | .on('finish', function(){ 24 | testfile.assertExists(); 25 | testfile.remove(); 26 | done(); 27 | }); 28 | }); 29 | }); 30 | 31 | describe('#pngStream() - slow rasterize', function() { 32 | it('should extract a single png page (using default rasterize)', function(done) { 33 | var testfile = new Testfile('page1_default','png'); 34 | var dpi = 300; 35 | var pageNum = 1; 36 | var useSimpleRasterize = false; 37 | scissors(pdf()) 38 | .pngStream(dpi, pageNum, useSimpleRasterize).pipe(fs.createWriteStream(testfile.getPath())) 39 | .on('error', err => {throw err;}) 40 | .on('finish', function(){ 41 | testfile.assertExists(); 42 | testfile.remove(); 43 | done(); 44 | }); 45 | }); 46 | }); 47 | 48 | describe('#jpgStream()', function() { 49 | it('should extract a single jpg page (using simple rasterize)', function(done) { 50 | var testfile = new Testfile('page1_simple','jpg'); 51 | var dpi = 300; 52 | var pageNum = 1; 53 | var useSimpleRasterize = true; 54 | scissors(pdf()) 55 | .jpgStream(dpi, pageNum, useSimpleRasterize).pipe(fs.createWriteStream(testfile.getPath())) 56 | .on('error', err => {throw err;}) 57 | .on('finish', function(){ 58 | testfile.assertExists(); 59 | testfile.remove(); 60 | done(); 61 | }); 62 | }); 63 | }); 64 | 65 | describe('#pngStream()', function() { 66 | it('should extract a single png page (using simple rasterize)', function(done) { 67 | var testfile = new Testfile('page1_simple','png'); 68 | var dpi = 300; 69 | var pageNum = 1; 70 | var useSimpleRasterize = true; 71 | scissors(pdf()) 72 | .pngStream(dpi, pageNum, useSimpleRasterize).pipe(fs.createWriteStream(testfile.getPath())) 73 | .on('error', err => {throw err;}) 74 | .on('finish', function(){ 75 | testfile.assertExists(); 76 | testfile.remove(); 77 | done(); 78 | }); 79 | }); 80 | }); 81 | 82 | describe('#jpgStream()', function() { 83 | it('should extract a single jpg page using crop box (using simple rasterize)', function(done) { 84 | var testfile = new Testfile('page1_simple_crop_box','jpg'); 85 | var dpi = 300; 86 | var pageNum = 1; 87 | var useSimpleRasterize = true; 88 | var useCropBox = true; 89 | scissors(pdf()) 90 | .jpgStream(dpi, pageNum, useSimpleRasterize, useCropBox).pipe(fs.createWriteStream(testfile.getPath())) 91 | .on('error', err => {throw err;}) 92 | .on('finish', function(){ 93 | testfile.assertExists(); 94 | testfile.remove(); 95 | done(); 96 | }); 97 | }); 98 | }); 99 | 100 | describe('#pngStream()', function() { 101 | it('should extract a single png page using crop box (using simple rasterize)', function(done) { 102 | var testfile = new Testfile('page1_simple_crop_box','png'); 103 | var dpi = 300; 104 | var pageNum = 1; 105 | var useSimpleRasterize = true; 106 | var useCropBox = true; 107 | scissors(pdf()) 108 | .pngStream(dpi, pageNum, useSimpleRasterize, useCropBox) 109 | .on('error', err => {throw err;}) 110 | .pipe(fs.createWriteStream(testfile.getPath())) 111 | .on('error', err => {throw err;}) 112 | .on('finish', function(){ 113 | testfile.assertExists(); 114 | testfile.remove(); 115 | done(); 116 | }); 117 | }); 118 | }); 119 | 120 | describe('#extractImageStream()', function() { 121 | it('should extract a single image from the pdf (only checks file creation)', function(done) { 122 | var testfile = new Testfile('image0','jpg'); 123 | scissors(pdf()) 124 | .extractImageStream(0) 125 | .on('error', err => {throw err;}) 126 | .pipe(fs.createWriteStream(testfile.getPath())) 127 | .on('error', err => {throw err;}) 128 | .on('finish', function(){ 129 | testfile.assertExists(); 130 | testfile.remove(); 131 | done(); 132 | }); 133 | }); 134 | }); 135 | 136 | describe('Save several pages as images', function() { 137 | it('should save a range of pages as png images', function() { 138 | var files = []; 139 | return Promise.all( 140 | [1,2,3].map(function(page){ 141 | var file = new Testfile('page_'+page,'png'); 142 | files.push(file); 143 | return promisify( 144 | scissors(pdf()) 145 | .pngStream(300,page,true) 146 | .on('error', err => {throw err;}) 147 | .pipe(fs.createWriteStream(file.getPath())) 148 | ); 149 | }) 150 | ) 151 | .then(function(){ 152 | files.forEach(function(file){ 153 | file.assertExists(); 154 | file.remove(); 155 | }); 156 | }) 157 | .catch(function (err) { 158 | throw err; 159 | }); 160 | }); 161 | }); 162 | }); 163 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "node": true, 4 | "browser": true, 5 | "es6": false 6 | }, 7 | "ecmaFeatures": { 8 | "modules": true, 9 | "templateStrings": false 10 | }, 11 | "rules": { 12 | "no-alert": "off", 13 | "no-array-constructor": "off", 14 | "no-bitwise": "off", 15 | "no-caller": "off", 16 | "no-case-declarations": "error", 17 | "no-catch-shadow": "off", 18 | "no-class-assign": "error", 19 | "no-cond-assign": "error", 20 | "no-confusing-arrow": "off", 21 | "no-console": "off", 22 | "no-const-assign": "error", 23 | "no-constant-condition": "error", 24 | "no-continue": "off", 25 | "no-control-regex": "error", 26 | "no-debugger": "error", 27 | "no-delete-var": "error", 28 | "no-div-regex": "off", 29 | "no-dupe-class-members": "error", 30 | "no-dupe-keys": "error", 31 | "no-dupe-args": "error", 32 | "no-duplicate-case": "error", 33 | "no-duplicate-imports": "off", 34 | "no-else-return": "off", 35 | "no-empty": "error", 36 | "no-empty-character-class": "error", 37 | "no-empty-function": "off", 38 | "no-empty-pattern": "error", 39 | "no-eq-null": "off", 40 | "no-eval": "off", 41 | "no-ex-assign": "error", 42 | "no-extend-native": "off", 43 | "no-extra-bind": "off", 44 | "no-extra-boolean-cast": "error", 45 | "no-extra-label": "off", 46 | "no-extra-parens": "off", 47 | "no-extra-semi": "error", 48 | "no-fallthrough": "error", 49 | "no-floating-decimal": "off", 50 | "no-func-assign": "error", 51 | "no-implicit-coercion": "off", 52 | "no-implicit-globals": "off", 53 | "no-implied-eval": "off", 54 | "no-inline-comments": "off", 55 | "no-inner-declarations": "error", 56 | "no-invalid-regexp": "error", 57 | "no-invalid-this": "off", 58 | "no-irregular-whitespace": "error", 59 | "no-iterator": "off", 60 | "no-label-var": "off", 61 | "no-labels": "off", 62 | "no-lone-blocks": "off", 63 | "no-lonely-if": "off", 64 | "no-loop-func": "off", 65 | "no-mixed-requires": "off", 66 | "no-mixed-spaces-and-tabs": "error", 67 | "linebreak-style": "off", 68 | "no-multi-spaces": "off", 69 | "no-multi-str": "off", 70 | "no-multiple-empty-lines": "off", 71 | "no-native-reassign": "off", 72 | "no-negated-condition": "off", 73 | "no-negated-in-lhs": "error", 74 | "no-nested-ternary": "off", 75 | "no-new": "off", 76 | "no-new-func": "off", 77 | "no-new-object": "off", 78 | "no-new-require": "off", 79 | "no-new-symbol": "error", 80 | "no-new-wrappers": "off", 81 | "no-obj-calls": "error", 82 | "no-octal": "error", 83 | "no-octal-escape": "off", 84 | "no-param-reassign": "off", 85 | "no-path-concat": "off", 86 | "no-plusplus": "off", 87 | "no-process-env": "off", 88 | "no-process-exit": "off", 89 | "no-proto": "off", 90 | "no-redeclare": "error", 91 | "no-regex-spaces": "error", 92 | "no-restricted-globals": "off", 93 | "no-restricted-imports": "off", 94 | "no-restricted-modules": "off", 95 | "no-restricted-syntax": "off", 96 | "no-return-assign": "off", 97 | "no-script-url": "off", 98 | "no-self-assign": "error", 99 | "no-self-compare": "off", 100 | "no-sequences": "off", 101 | "no-shadow": "off", 102 | "no-shadow-restricted-names": "off", 103 | "no-whitespace-before-property": "off", 104 | "no-spaced-func": "off", 105 | "no-sparse-arrays": "error", 106 | "no-sync": "off", 107 | "no-ternary": "off", 108 | "no-trailing-spaces": "off", 109 | "no-this-before-super": "error", 110 | "no-throw-literal": "off", 111 | "no-undef": "error", 112 | "no-undef-init": "off", 113 | "no-undefined": "off", 114 | "no-unexpected-multiline": "error", 115 | "no-underscore-dangle": "off", 116 | "no-unmodified-loop-condition": "off", 117 | "no-unneeded-ternary": "off", 118 | "no-unreachable": "error", 119 | "no-unused-expressions": "off", 120 | "no-unused-labels": "error", 121 | "no-unused-vars": "error", 122 | "no-use-before-define": "off", 123 | "no-useless-call": "off", 124 | "no-useless-concat": "off", 125 | "no-useless-constructor": "off", 126 | "no-useless-escape": "off", 127 | "no-void": "off", 128 | "no-var": "off", 129 | "no-warning-comments": "off", 130 | "no-with": "off", 131 | "no-magic-numbers": "off", 132 | "array-bracket-spacing": "off", 133 | "array-callback-return": "off", 134 | "arrow-body-style": "off", 135 | "arrow-parens": "off", 136 | "arrow-spacing": "off", 137 | "accessor-pairs": "off", 138 | "block-scoped-var": "off", 139 | "block-spacing": "off", 140 | "brace-style": "off", 141 | "callback-return": "off", 142 | "camelcase": "off", 143 | "comma-dangle": "error", 144 | "comma-spacing": "off", 145 | "comma-style": "off", 146 | "complexity": [ 147 | "off", 148 | 11 149 | ], 150 | "computed-property-spacing": "off", 151 | "consistent-return": "off", 152 | "consistent-this": "off", 153 | "constructor-super": "error", 154 | "curly": "off", 155 | "default-case": "off", 156 | "dot-location": "off", 157 | "dot-notation": "off", 158 | "eol-last": "off", 159 | "eqeqeq": "off", 160 | "func-names": "off", 161 | "func-style": "off", 162 | "generator-star-spacing": "off", 163 | "global-require": "off", 164 | "guard-for-in": "off", 165 | "handle-callback-err": "off", 166 | "id-length": "off", 167 | "indent": "off", 168 | "init-declarations": "off", 169 | "jsx-quotes": "off", 170 | "key-spacing": "off", 171 | "keyword-spacing": "off", 172 | "lines-around-comment": "off", 173 | "max-depth": "off", 174 | "max-len": "off", 175 | "max-nested-callbacks": "off", 176 | "max-params": "off", 177 | "max-statements": "off", 178 | "max-statements-per-line": "off", 179 | "new-cap": "off", 180 | "new-parens": "off", 181 | "newline-after-var": "off", 182 | "newline-before-return": "off", 183 | "newline-per-chained-call": "off", 184 | "object-curly-spacing": [ 185 | "off", 186 | "never" 187 | ], 188 | "object-shorthand": "off", 189 | "one-var": "off", 190 | "one-var-declaration-per-line": "off", 191 | "operator-assignment": "off", 192 | "operator-linebreak": "off", 193 | "padded-blocks": "off", 194 | "prefer-arrow-callback": "off", 195 | "prefer-const": "off", 196 | "prefer-reflect": "off", 197 | "prefer-rest-params": "off", 198 | "prefer-spread": "off", 199 | "prefer-template": "off", 200 | "quote-props": "off", 201 | "quotes": [ 202 | 2, 203 | "single", 204 | "avoid-escape" 205 | ], 206 | "radix": "off", 207 | "id-match": "off", 208 | "id-blacklist": "off", 209 | "require-jsdoc": "off", 210 | "require-yield": "off", 211 | "semi": "off", 212 | "semi-spacing": "off", 213 | "sort-vars": "off", 214 | "sort-imports": "off", 215 | "space-before-blocks": "off", 216 | "space-before-function-paren": "off", 217 | "space-in-parens": "off", 218 | "space-infix-ops": "off", 219 | "space-unary-ops": "off", 220 | "spaced-comment": "off", 221 | "strict": "off", 222 | "template-curly-spacing": "off", 223 | "use-isnan": "error", 224 | "valid-jsdoc": "off", 225 | "valid-typeof": "error", 226 | "vars-on-top": "off", 227 | "wrap-iife": "off", 228 | "wrap-regex": "off", 229 | "yield-star-spacing": "off", 230 | "yoda": "off" 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /test/pages.js: -------------------------------------------------------------------------------- 1 | /* global describe, it */ 2 | var scissors = require('../scissors'); 3 | var promisify = require('stream-to-promise'); 4 | var fs = require('fs'); 5 | var Testfile = require('./testfile'); 6 | var assert = require('assert-diff'); 7 | var pdf = () => fs.createReadStream(__dirname + '/test_data/test.pdf'); 8 | 9 | // TODO: better result checks 10 | 11 | describe('Test Scissors page manipulation methods', function() { 12 | 13 | this.timeout(20000); 14 | 15 | // getNumPages() 16 | describe('#getNumPages()', function() { 17 | it('should retrieve the number of pages of the PDF document', function() { 18 | return scissors(pdf()) 19 | .getNumPages() 20 | .then(function(length){ 21 | assert.equal(length,10,'Incorrect page number'); 22 | }) 23 | .catch(function (err) { 24 | throw err; 25 | }); 26 | }); 27 | }); 28 | 29 | // range() using stream events for async continuation 30 | describe('#range()', function() { 31 | it('should extract a range of pdf pages', function(done) { 32 | var testfile = new Testfile('range'); 33 | scissors(pdf()) 34 | .range(1,3) 35 | .pdfStream() 36 | .on('error', err => {throw err;}) 37 | .pipe(fs.createWriteStream(testfile.getPath())) 38 | .on('error', err => {throw err;}) 39 | .on('finish', function(){ 40 | testfile.assertExists(); 41 | testfile.assertHasLength(3) 42 | .then(function(){ 43 | testfile.remove(); 44 | done(); 45 | }); 46 | }); 47 | }); 48 | }); 49 | 50 | // pages() with Promise 51 | describe('#pages()', function() { 52 | it('should extract pdf pages', function() { 53 | var testfile = new Testfile('pages'); 54 | return promisify( 55 | scissors(pdf()) 56 | .pages(1,3) 57 | .pdfStream().pipe(fs.createWriteStream(testfile.getPath())) 58 | ) 59 | .then(function(){ 60 | testfile.assertExists(); 61 | return testfile.assertHasLength(2); 62 | }) 63 | .then(function(){ 64 | testfile.remove(); 65 | }) 66 | .catch(function(err){ 67 | throw err; 68 | }); 69 | }); 70 | }); 71 | 72 | // odd() with Promise 73 | describe('#odd()', function() { 74 | it('should extract all odd pages', function() { 75 | var testfile = new Testfile('odd'); 76 | return promisify( 77 | scissors(pdf()) 78 | .odd() 79 | .pdfStream() 80 | .on('error', err => {throw err;}) 81 | .pipe(fs.createWriteStream(testfile.getPath())) 82 | ) 83 | .then(function(){ 84 | testfile.assertExists(); 85 | return testfile.assertHasLength(5); 86 | }) 87 | .then(function(){ 88 | testfile.remove(); 89 | }) 90 | .catch(function(err){ 91 | throw err; 92 | }); 93 | }); 94 | }); 95 | 96 | // odd() with Promise 97 | describe('#even()', function() { 98 | it('should extract all odd pages', function() { 99 | var testfile = new Testfile('even'); 100 | return promisify( 101 | scissors(pdf()) 102 | .even() 103 | .pdfStream() 104 | .on('error', err => {throw err;}) 105 | .pipe(fs.createWriteStream(testfile.getPath())) 106 | ) 107 | .then(function(){ 108 | testfile.assertExists(); 109 | testfile.remove(); 110 | }).catch(function(err){ 111 | throw err; 112 | }); 113 | }); 114 | }); 115 | 116 | // reverse() with Promise 117 | describe('#reverse()', function() { 118 | it('should reverse the page order', function() { 119 | var testfile = new Testfile('reverse'); 120 | return promisify( 121 | scissors(pdf()) 122 | .reverse() 123 | .pdfStream() 124 | .on('error', err => {throw err;}) 125 | .pipe(fs.createWriteStream(testfile.getPath())) 126 | ) 127 | .then(function(){ 128 | testfile.assertExists(); 129 | testfile.remove(); 130 | }) 131 | }); 132 | }); 133 | 134 | // chained commands 135 | describe('(chained commands)', function() { 136 | it('should execute a couple of chained commands', function() { 137 | var testfile = new Testfile('odd'); 138 | return promisify( 139 | scissors(pdf()) 140 | .reverse() 141 | .odd() 142 | .range(2,3) 143 | .pages(1) 144 | .pdfStream() 145 | .on('error', err => {throw err;}) 146 | .pipe(fs.createWriteStream(testfile.getPath())) 147 | ) 148 | .then(function(){ 149 | testfile.assertExists(); 150 | testfile.remove(); 151 | }).catch(function(err){ 152 | throw err; 153 | }); 154 | }); 155 | }); 156 | 157 | // rotate 158 | describe('#rotate()', function() { 159 | it('should rotate the selected pages', function() { 160 | var testfile = new Testfile('rotate'); 161 | return promisify( 162 | scissors(pdf()) 163 | .range(1,3) 164 | .rotate(90) 165 | .pdfStream() 166 | .on('error', err => {throw err;}) 167 | .pipe(fs.createWriteStream(testfile.getPath())) 168 | ) 169 | .then(function(){ 170 | testfile.assertExists(); 171 | testfile.remove(); 172 | }) 173 | }); 174 | }); 175 | 176 | // compress 177 | describe('#compress()', function() { 178 | it('should compress the selected pages', function() { 179 | var testfile = new Testfile('compress'); 180 | return promisify( 181 | scissors(pdf()) 182 | .compress() 183 | .pdfStream() 184 | .on('error', err => {throw err;}) 185 | .pipe(fs.createWriteStream(testfile.getPath())) 186 | ) 187 | .then(function(){ 188 | testfile.assertExists(); 189 | testfile.remove(); 190 | }).catch(function(err){ 191 | throw err; 192 | }); 193 | }); 194 | }); 195 | 196 | // decompress 197 | // describe('#uncompress()', function() { 198 | // it('should uncompress the selected pages', function() { 199 | // var infile = new Testfile('compress'); 200 | // var outfile = new Testfile('uncompress'); 201 | // return promisify(scissors(infile.getPath()) 202 | // .uncompress() 203 | // .pdfStream().pipe(fs.createWriteStream(outfile.getPath()))) 204 | // .then(function(){ 205 | // outfile.assertExists(); 206 | // //outfile.remove(); 207 | // }).catch(function(err){ 208 | // throw err; 209 | // }); 210 | // }); 211 | // }); 212 | 213 | // crop 214 | describe('#crop()', function() { 215 | it('should crop the selected pages (checks only execution, not result)', function() { 216 | var testfile = new Testfile('crop'); 217 | return promisify( 218 | scissors(pdf()) 219 | .pages(1,2) 220 | .crop(0,0,100,100) 221 | .pdfStream() 222 | .pipe(fs.createWriteStream(testfile.getPath())) 223 | ) 224 | .then(function(){ 225 | testfile.assertExists(); 226 | testfile.remove(); 227 | }).catch(function(err){ 228 | throw err; 229 | }); 230 | }); 231 | }); 232 | 233 | // getPageSizes() 234 | // describe('#getPageSizes()', function() { 235 | // it('should retrieve information on the size of the PDF pages', function() { 236 | // var testfile = new Testfile('pagesizes','json'); 237 | // return scissors(pdf()) 238 | // .range(1,3) 239 | // .getPageSizes() 240 | // .then(function(result){ 241 | // testfile 242 | // .writeJSON(result) 243 | // // .compareWithReferenceFile() // result is platform-dependent 244 | // .remove(); 245 | // }) 246 | // .catch(function (err) { 247 | // throw err; 248 | // }); 249 | // }); 250 | // }); 251 | 252 | 253 | }); 254 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /test/test_data/contentstream.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "type": "string", 4 | "x": 2942, 5 | "y": 391, 6 | "string": " ", 7 | "font": { 8 | "height": 204, 9 | "width": 89, 10 | "font": "AFPSDA+CourierNewPSMT" 11 | }, 12 | "color": { 13 | "r": 0, 14 | "g": 0, 15 | "b": 0 16 | } 17 | }, 18 | { 19 | "type": "string", 20 | "x": 2942, 21 | "y": 391, 22 | "string": "1 ", 23 | "font": { 24 | "height": 204, 25 | "width": 89, 26 | "font": "AFPSDA+CourierNewPSMT" 27 | }, 28 | "color": { 29 | "r": 0, 30 | "g": 0, 31 | "b": 0 32 | } 33 | }, 34 | { 35 | "type": "string", 36 | "x": 710, 37 | "y": 7612, 38 | "string": "This is page 1 containing an image. ", 39 | "font": { 40 | "height": 204, 41 | "width": 89, 42 | "font": "AFPSDA+CourierNewPSMT" 43 | }, 44 | "color": { 45 | "r": 0, 46 | "g": 0, 47 | "b": 0 48 | } 49 | }, 50 | { 51 | "type": "string", 52 | "x": 710, 53 | "y": 7476, 54 | "string": " ", 55 | "font": { 56 | "height": 204, 57 | "width": 89, 58 | "font": "AFPSDA+CourierNewPSMT" 59 | }, 60 | "color": { 61 | "r": 0, 62 | "g": 0, 63 | "b": 0 64 | } 65 | }, 66 | { 67 | "type": "string", 68 | "x": 710, 69 | "y": 7341, 70 | "string": " ", 71 | "font": { 72 | "height": 204, 73 | "width": 89, 74 | "font": "AFPSDA+CourierNewPSMT" 75 | }, 76 | "color": { 77 | "r": 0, 78 | "g": 0, 79 | "b": 0 80 | } 81 | }, 82 | { 83 | "type": "string", 84 | "x": 5270, 85 | "y": 4754, 86 | "string": " ", 87 | "font": { 88 | "height": 204, 89 | "width": 89, 90 | "font": "AFPSDA+CourierNewPSMT" 91 | }, 92 | "color": { 93 | "r": 0, 94 | "g": 0, 95 | "b": 0 96 | } 97 | }, 98 | { 99 | "type": "string", 100 | "x": 710, 101 | "y": 4653, 102 | "string": " ", 103 | "font": { 104 | "height": 204, 105 | "width": 89, 106 | "font": "AFPSDA+CourierNewPSMT" 107 | }, 108 | "color": { 109 | "r": 0, 110 | "g": 0, 111 | "b": 0 112 | } 113 | }, 114 | { 115 | "type": "image", 116 | "x": 730, 117 | "y": 4752, 118 | "width": 4533, 119 | "height": 2550, 120 | "index": 0 121 | }, 122 | { 123 | "type": "image", 124 | "x": 730, 125 | "y": 4752, 126 | "width": 4533, 127 | "height": 2550, 128 | "index": 1 129 | }, 130 | { 131 | "type": "string", 132 | "x": 710, 133 | "y": 4519, 134 | "string": "Source: https://commons.wikimedia.org/wiki/File:Test_card.png ", 135 | "font": { 136 | "height": 204, 137 | "width": 89, 138 | "font": "AFPSDA+CourierNewPSMT" 139 | }, 140 | "color": { 141 | "r": 0, 142 | "g": 0, 143 | "b": 0 144 | } 145 | }, 146 | { 147 | "type": "string", 148 | "x": 710, 149 | "y": 391, 150 | "string": " ", 151 | "font": { 152 | "height": 204, 153 | "width": 89, 154 | "font": "AFPSDA+CourierNewPSMT" 155 | }, 156 | "color": { 157 | "r": 0, 158 | "g": 0, 159 | "b": 0 160 | } 161 | }, 162 | { 163 | "type": "string", 164 | "x": 2942, 165 | "y": 391, 166 | "string": "3 ", 167 | "font": { 168 | "height": 204, 169 | "width": 89, 170 | "font": "AFPSDA+CourierNewPSMT" 171 | }, 172 | "color": { 173 | "r": 0, 174 | "g": 0, 175 | "b": 0 176 | } 177 | }, 178 | { 179 | "type": "string", 180 | "x": 710, 181 | "y": 7612, 182 | "string": "vero eos et accusam et justo duo dolores et ea rebum. Stet ", 183 | "font": { 184 | "height": 204, 185 | "width": 89, 186 | "font": "AFPSDA+CourierNewPSMT" 187 | }, 188 | "color": { 189 | "r": 0, 190 | "g": 0, 191 | "b": 0 192 | } 193 | }, 194 | { 195 | "type": "string", 196 | "x": 710, 197 | "y": 7476, 198 | "string": "clita kasd gubergren, no sea takimata sanctus est Lorem ipsum ", 199 | "font": { 200 | "height": 204, 201 | "width": 89, 202 | "font": "AFPSDA+CourierNewPSMT" 203 | }, 204 | "color": { 205 | "r": 0, 206 | "g": 0, 207 | "b": 0 208 | } 209 | }, 210 | { 211 | "type": "string", 212 | "x": 710, 213 | "y": 7341, 214 | "string": "dolor sit amet. Lorem ipsum dolor sit amet, consetetur ", 215 | "font": { 216 | "height": 204, 217 | "width": 89, 218 | "font": "AFPSDA+CourierNewPSMT" 219 | }, 220 | "color": { 221 | "r": 0, 222 | "g": 0, 223 | "b": 0 224 | } 225 | }, 226 | { 227 | "type": "string", 228 | "x": 710, 229 | "y": 7204, 230 | "string": "sadipscing elitr, At accusam aliquyam diam diam dolore dolores ", 231 | "font": { 232 | "height": 204, 233 | "width": 89, 234 | "font": "AFPSDA+CourierNewPSMT" 235 | }, 236 | "color": { 237 | "r": 0, 238 | "g": 0, 239 | "b": 0 240 | } 241 | }, 242 | { 243 | "type": "string", 244 | "x": 710, 245 | "y": 7068, 246 | "string": "duo eirmod eos erat, et nonumy sed tempor et et invidunt justo ", 247 | "font": { 248 | "height": 204, 249 | "width": 89, 250 | "font": "AFPSDA+CourierNewPSMT" 251 | }, 252 | "color": { 253 | "r": 0, 254 | "g": 0, 255 | "b": 0 256 | } 257 | }, 258 | { 259 | "type": "string", 260 | "x": 710, 261 | "y": 6933, 262 | "string": "labore Stet clita ea et gubergren, kasd magna no rebum. ", 263 | "font": { 264 | "height": 204, 265 | "width": 89, 266 | "font": "AFPSDA+CourierNewPSMT" 267 | }, 268 | "color": { 269 | "r": 0, 270 | "g": 0, 271 | "b": 0 272 | } 273 | }, 274 | { 275 | "type": "string", 276 | "x": 710, 277 | "y": 6796, 278 | "string": "sanctus sea sed takimata ut vero voluptua. est Lorem ipsum ", 279 | "font": { 280 | "height": 204, 281 | "width": 89, 282 | "font": "AFPSDA+CourierNewPSMT" 283 | }, 284 | "color": { 285 | "r": 0, 286 | "g": 0, 287 | "b": 0 288 | } 289 | }, 290 | { 291 | "type": "string", 292 | "x": 710, 293 | "y": 6660, 294 | "string": "dolor sit amet. Lorem ipsum dolor sit amet, consetetur ", 295 | "font": { 296 | "height": 204, 297 | "width": 89, 298 | "font": "AFPSDA+CourierNewPSMT" 299 | }, 300 | "color": { 301 | "r": 0, 302 | "g": 0, 303 | "b": 0 304 | } 305 | }, 306 | { 307 | "type": "string", 308 | "x": 710, 309 | "y": 6525, 310 | "string": "sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut ", 311 | "font": { 312 | "height": 204, 313 | "width": 89, 314 | "font": "AFPSDA+CourierNewPSMT" 315 | }, 316 | "color": { 317 | "r": 0, 318 | "g": 0, 319 | "b": 0 320 | } 321 | }, 322 | { 323 | "type": "string", 324 | "x": 710, 325 | "y": 6388, 326 | "string": "labore et dolore magna aliquyam erat. ", 327 | "font": { 328 | "height": 204, 329 | "width": 89, 330 | "font": "AFPSDA+CourierNewPSMT" 331 | }, 332 | "color": { 333 | "r": 0, 334 | "g": 0, 335 | "b": 0 336 | } 337 | }, 338 | { 339 | "type": "string", 340 | "x": 710, 341 | "y": 6252, 342 | "string": " ", 343 | "font": { 344 | "height": 204, 345 | "width": 89, 346 | "font": "AFPSDA+CourierNewPSMT" 347 | }, 348 | "color": { 349 | "r": 0, 350 | "g": 0, 351 | "b": 0 352 | } 353 | }, 354 | { 355 | "type": "string", 356 | "x": 710, 357 | "y": 6117, 358 | "string": "Consetetur sadipscing elitr, sed diam nonumy eirmod tempor ", 359 | "font": { 360 | "height": 204, 361 | "width": 89, 362 | "font": "AFPSDA+CourierNewPSMT" 363 | }, 364 | "color": { 365 | "r": 0, 366 | "g": 0, 367 | "b": 0 368 | } 369 | }, 370 | { 371 | "type": "string", 372 | "x": 710, 373 | "y": 5980, 374 | "string": "invidunt ut labore et dolore magna aliquyam erat, sed diam ", 375 | "font": { 376 | "height": 204, 377 | "width": 89, 378 | "font": "AFPSDA+CourierNewPSMT" 379 | }, 380 | "color": { 381 | "r": 0, 382 | "g": 0, 383 | "b": 0 384 | } 385 | }, 386 | { 387 | "type": "string", 388 | "x": 710, 389 | "y": 5844, 390 | "string": "voluptua. At vero eos et accusam et justo duo dolores et ea ", 391 | "font": { 392 | "height": 204, 393 | "width": 89, 394 | "font": "AFPSDA+CourierNewPSMT" 395 | }, 396 | "color": { 397 | "r": 0, 398 | "g": 0, 399 | "b": 0 400 | } 401 | }, 402 | { 403 | "type": "string", 404 | "x": 710, 405 | "y": 5709, 406 | "string": "rebum. Stet clita kasd gubergren, no sea takimata sanctus est ", 407 | "font": { 408 | "height": 204, 409 | "width": 89, 410 | "font": "AFPSDA+CourierNewPSMT" 411 | }, 412 | "color": { 413 | "r": 0, 414 | "g": 0, 415 | "b": 0 416 | } 417 | }, 418 | { 419 | "type": "string", 420 | "x": 710, 421 | "y": 5572, 422 | "string": "Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, ", 423 | "font": { 424 | "height": 204, 425 | "width": 89, 426 | "font": "AFPSDA+CourierNewPSMT" 427 | }, 428 | "color": { 429 | "r": 0, 430 | "g": 0, 431 | "b": 0 432 | } 433 | }, 434 | { 435 | "type": "string", 436 | "x": 710, 437 | "y": 5438, 438 | "string": "consetetur sadipscing elitr, sed diam nonumy eirmod tempor ", 439 | "font": { 440 | "height": 204, 441 | "width": 89, 442 | "font": "AFPSDA+CourierNewPSMT" 443 | }, 444 | "color": { 445 | "r": 0, 446 | "g": 0, 447 | "b": 0 448 | } 449 | }, 450 | { 451 | "type": "string", 452 | "x": 710, 453 | "y": 5301, 454 | "string": "invidunt ut labore et dolore magna aliquyam erat, sed diam ", 455 | "font": { 456 | "height": 204, 457 | "width": 89, 458 | "font": "AFPSDA+CourierNewPSMT" 459 | }, 460 | "color": { 461 | "r": 0, 462 | "g": 0, 463 | "b": 0 464 | } 465 | }, 466 | { 467 | "type": "string", 468 | "x": 710, 469 | "y": 5164, 470 | "string": "voluptua. At vero eos et accusam et justo duo dolores et ea ", 471 | "font": { 472 | "height": 204, 473 | "width": 89, 474 | "font": "AFPSDA+CourierNewPSMT" 475 | }, 476 | "color": { 477 | "r": 0, 478 | "g": 0, 479 | "b": 0 480 | } 481 | }, 482 | { 483 | "type": "string", 484 | "x": 710, 485 | "y": 5030, 486 | "string": "rebum. Stet clita kasd gubergren, no sea takimata sanctus est ", 487 | "font": { 488 | "height": 204, 489 | "width": 89, 490 | "font": "AFPSDA+CourierNewPSMT" 491 | }, 492 | "color": { 493 | "r": 0, 494 | "g": 0, 495 | "b": 0 496 | } 497 | }, 498 | { 499 | "type": "string", 500 | "x": 710, 501 | "y": 4893, 502 | "string": "Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, ", 503 | "font": { 504 | "height": 204, 505 | "width": 89, 506 | "font": "AFPSDA+CourierNewPSMT" 507 | }, 508 | "color": { 509 | "r": 0, 510 | "g": 0, 511 | "b": 0 512 | } 513 | }, 514 | { 515 | "type": "string", 516 | "x": 710, 517 | "y": 4756, 518 | "string": "consetetur sadipscing elitr, sed diam nonumy eirmod tempor ", 519 | "font": { 520 | "height": 204, 521 | "width": 89, 522 | "font": "AFPSDA+CourierNewPSMT" 523 | }, 524 | "color": { 525 | "r": 0, 526 | "g": 0, 527 | "b": 0 528 | } 529 | }, 530 | { 531 | "type": "string", 532 | "x": 710, 533 | "y": 4622, 534 | "string": "invidunt ut labore et dolore magna aliquyam erat, sed diam ", 535 | "font": { 536 | "height": 204, 537 | "width": 89, 538 | "font": "AFPSDA+CourierNewPSMT" 539 | }, 540 | "color": { 541 | "r": 0, 542 | "g": 0, 543 | "b": 0 544 | } 545 | }, 546 | { 547 | "type": "string", 548 | "x": 710, 549 | "y": 4485, 550 | "string": "voluptua. At vero eos et accusam et justo duo dolores et ea ", 551 | "font": { 552 | "height": 204, 553 | "width": 89, 554 | "font": "AFPSDA+CourierNewPSMT" 555 | }, 556 | "color": { 557 | "r": 0, 558 | "g": 0, 559 | "b": 0 560 | } 561 | }, 562 | { 563 | "type": "string", 564 | "x": 710, 565 | "y": 4348, 566 | "string": "rebum. Stet clita kasd gubergren, no sea takimata sanctus. ", 567 | "font": { 568 | "height": 204, 569 | "width": 89, 570 | "font": "AFPSDA+CourierNewPSMT" 571 | }, 572 | "color": { 573 | "r": 0, 574 | "g": 0, 575 | "b": 0 576 | } 577 | }, 578 | { 579 | "type": "string", 580 | "x": 710, 581 | "y": 4214, 582 | "string": " ", 583 | "font": { 584 | "height": 204, 585 | "width": 89, 586 | "font": "AFPSDA+CourierNewPSMT" 587 | }, 588 | "color": { 589 | "r": 0, 590 | "g": 0, 591 | "b": 0 592 | } 593 | }, 594 | { 595 | "type": "string", 596 | "x": 710, 597 | "y": 4077, 598 | "string": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed ", 599 | "font": { 600 | "height": 204, 601 | "width": 89, 602 | "font": "AFPSDA+CourierNewPSMT" 603 | }, 604 | "color": { 605 | "r": 0, 606 | "g": 0, 607 | "b": 0 608 | } 609 | }, 610 | { 611 | "type": "string", 612 | "x": 710, 613 | "y": 3940, 614 | "string": "diam nonumy eirmod tempor invidunt ut labore et dolore magna ", 615 | "font": { 616 | "height": 204, 617 | "width": 89, 618 | "font": "AFPSDA+CourierNewPSMT" 619 | }, 620 | "color": { 621 | "r": 0, 622 | "g": 0, 623 | "b": 0 624 | } 625 | }, 626 | { 627 | "type": "string", 628 | "x": 710, 629 | "y": 3806, 630 | "string": "aliquyam erat, sed diam voluptua. At vero eos et accusam et ", 631 | "font": { 632 | "height": 204, 633 | "width": 89, 634 | "font": "AFPSDA+CourierNewPSMT" 635 | }, 636 | "color": { 637 | "r": 0, 638 | "g": 0, 639 | "b": 0 640 | } 641 | }, 642 | { 643 | "type": "string", 644 | "x": 710, 645 | "y": 3669, 646 | "string": "justo duo dolores et ea rebum. Stet clita kasd gubergren, no ", 647 | "font": { 648 | "height": 204, 649 | "width": 89, 650 | "font": "AFPSDA+CourierNewPSMT" 651 | }, 652 | "color": { 653 | "r": 0, 654 | "g": 0, 655 | "b": 0 656 | } 657 | }, 658 | { 659 | "type": "string", 660 | "x": 710, 661 | "y": 3535, 662 | "string": "sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ", 663 | "font": { 664 | "height": 204, 665 | "width": 89, 666 | "font": "AFPSDA+CourierNewPSMT" 667 | }, 668 | "color": { 669 | "r": 0, 670 | "g": 0, 671 | "b": 0 672 | } 673 | }, 674 | { 675 | "type": "string", 676 | "x": 710, 677 | "y": 3398, 678 | "string": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam ", 679 | "font": { 680 | "height": 204, 681 | "width": 89, 682 | "font": "AFPSDA+CourierNewPSMT" 683 | }, 684 | "color": { 685 | "r": 0, 686 | "g": 0, 687 | "b": 0 688 | } 689 | }, 690 | { 691 | "type": "string", 692 | "x": 710, 693 | "y": 3261, 694 | "string": "nonumy eirmod tempor invidunt ut labore et dolore magna ", 695 | "font": { 696 | "height": 204, 697 | "width": 89, 698 | "font": "AFPSDA+CourierNewPSMT" 699 | }, 700 | "color": { 701 | "r": 0, 702 | "g": 0, 703 | "b": 0 704 | } 705 | }, 706 | { 707 | "type": "string", 708 | "x": 710, 709 | "y": 3127, 710 | "string": "aliquyam erat, sed diam voluptua. At vero eos et accusam et ", 711 | "font": { 712 | "height": 204, 713 | "width": 89, 714 | "font": "AFPSDA+CourierNewPSMT" 715 | }, 716 | "color": { 717 | "r": 0, 718 | "g": 0, 719 | "b": 0 720 | } 721 | }, 722 | { 723 | "type": "string", 724 | "x": 710, 725 | "y": 2990, 726 | "string": "justo duo dolores et ea rebum. Stet clita kasd gubergren, no ", 727 | "font": { 728 | "height": 204, 729 | "width": 89, 730 | "font": "AFPSDA+CourierNewPSMT" 731 | }, 732 | "color": { 733 | "r": 0, 734 | "g": 0, 735 | "b": 0 736 | } 737 | }, 738 | { 739 | "type": "string", 740 | "x": 710, 741 | "y": 2853, 742 | "string": "sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ", 743 | "font": { 744 | "height": 204, 745 | "width": 89, 746 | "font": "AFPSDA+CourierNewPSMT" 747 | }, 748 | "color": { 749 | "r": 0, 750 | "g": 0, 751 | "b": 0 752 | } 753 | }, 754 | { 755 | "type": "string", 756 | "x": 710, 757 | "y": 2719, 758 | "string": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam ", 759 | "font": { 760 | "height": 204, 761 | "width": 89, 762 | "font": "AFPSDA+CourierNewPSMT" 763 | }, 764 | "color": { 765 | "r": 0, 766 | "g": 0, 767 | "b": 0 768 | } 769 | }, 770 | { 771 | "type": "string", 772 | "x": 710, 773 | "y": 2582, 774 | "string": "nonumy eirmod tempor invidunt ut labore et dolore magna ", 775 | "font": { 776 | "height": 204, 777 | "width": 89, 778 | "font": "AFPSDA+CourierNewPSMT" 779 | }, 780 | "color": { 781 | "r": 0, 782 | "g": 0, 783 | "b": 0 784 | } 785 | }, 786 | { 787 | "type": "string", 788 | "x": 710, 789 | "y": 2445, 790 | "string": "aliquyam erat, sed diam voluptua. At vero eos et accusam et ", 791 | "font": { 792 | "height": 204, 793 | "width": 89, 794 | "font": "AFPSDA+CourierNewPSMT" 795 | }, 796 | "color": { 797 | "r": 0, 798 | "g": 0, 799 | "b": 0 800 | } 801 | }, 802 | { 803 | "type": "string", 804 | "x": 710, 805 | "y": 2311, 806 | "string": "justo duo dolores et ea rebum. Stet clita kasd gubergren, no ", 807 | "font": { 808 | "height": 204, 809 | "width": 89, 810 | "font": "AFPSDA+CourierNewPSMT" 811 | }, 812 | "color": { 813 | "r": 0, 814 | "g": 0, 815 | "b": 0 816 | } 817 | }, 818 | { 819 | "type": "string", 820 | "x": 710, 821 | "y": 2174, 822 | "string": "sea takimata sanctus est Lorem ipsum dolor sit amet. ", 823 | "font": { 824 | "height": 204, 825 | "width": 89, 826 | "font": "AFPSDA+CourierNewPSMT" 827 | }, 828 | "color": { 829 | "r": 0, 830 | "g": 0, 831 | "b": 0 832 | } 833 | }, 834 | { 835 | "type": "string", 836 | "x": 710, 837 | "y": 2040, 838 | "string": " ", 839 | "font": { 840 | "height": 204, 841 | "width": 89, 842 | "font": "AFPSDA+CourierNewPSMT" 843 | }, 844 | "color": { 845 | "r": 0, 846 | "g": 0, 847 | "b": 0 848 | } 849 | }, 850 | { 851 | "type": "string", 852 | "x": 710, 853 | "y": 1903, 854 | "string": "Duis autem vel eum iriure dolor in hendrerit in vulputate ", 855 | "font": { 856 | "height": 204, 857 | "width": 89, 858 | "font": "AFPSDA+CourierNewPSMT" 859 | }, 860 | "color": { 861 | "r": 0, 862 | "g": 0, 863 | "b": 0 864 | } 865 | }, 866 | { 867 | "type": "string", 868 | "x": 710, 869 | "y": 1766, 870 | "string": "velit esse molestie consequat, vel illum dolore eu feugiat ", 871 | "font": { 872 | "height": 204, 873 | "width": 89, 874 | "font": "AFPSDA+CourierNewPSMT" 875 | }, 876 | "color": { 877 | "r": 0, 878 | "g": 0, 879 | "b": 0 880 | } 881 | }, 882 | { 883 | "type": "string", 884 | "x": 710, 885 | "y": 1632, 886 | "string": "nulla facilisis at vero eros et accumsan et iusto odio ", 887 | "font": { 888 | "height": 204, 889 | "width": 89, 890 | "font": "AFPSDA+CourierNewPSMT" 891 | }, 892 | "color": { 893 | "r": 0, 894 | "g": 0, 895 | "b": 0 896 | } 897 | }, 898 | { 899 | "type": "string", 900 | "x": 710, 901 | "y": 1495, 902 | "string": "dignissim qui blandit praesent luptatum zzril delenit augue ", 903 | "font": { 904 | "height": 204, 905 | "width": 89, 906 | "font": "AFPSDA+CourierNewPSMT" 907 | }, 908 | "color": { 909 | "r": 0, 910 | "g": 0, 911 | "b": 0 912 | } 913 | }, 914 | { 915 | "type": "string", 916 | "x": 710, 917 | "y": 1358, 918 | "string": "duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit ", 919 | "font": { 920 | "height": 204, 921 | "width": 89, 922 | "font": "AFPSDA+CourierNewPSMT" 923 | }, 924 | "color": { 925 | "r": 0, 926 | "g": 0, 927 | "b": 0 928 | } 929 | }, 930 | { 931 | "type": "string", 932 | "x": 710, 933 | "y": 1224, 934 | "string": "amet, consectetuer adipiscing elit, sed diam nonummy nibh ", 935 | "font": { 936 | "height": 204, 937 | "width": 89, 938 | "font": "AFPSDA+CourierNewPSMT" 939 | }, 940 | "color": { 941 | "r": 0, 942 | "g": 0, 943 | "b": 0 944 | } 945 | }, 946 | { 947 | "type": "string", 948 | "x": 710, 949 | "y": 1087, 950 | "string": "euismod tincidunt ut laoreet dolore magna aliquam erat ", 951 | "font": { 952 | "height": 204, 953 | "width": 89, 954 | "font": "AFPSDA+CourierNewPSMT" 955 | }, 956 | "color": { 957 | "r": 0, 958 | "g": 0, 959 | "b": 0 960 | } 961 | }, 962 | { 963 | "type": "string", 964 | "x": 710, 965 | "y": 950, 966 | "string": "volutpat. ", 967 | "font": { 968 | "height": 204, 969 | "width": 89, 970 | "font": "AFPSDA+CourierNewPSMT" 971 | }, 972 | "color": { 973 | "r": 0, 974 | "g": 0, 975 | "b": 0 976 | } 977 | }, 978 | { 979 | "type": "string", 980 | "x": 710, 981 | "y": 816, 982 | "string": " ", 983 | "font": { 984 | "height": 204, 985 | "width": 89, 986 | "font": "AFPSDA+CourierNewPSMT" 987 | }, 988 | "color": { 989 | "r": 0, 990 | "g": 0, 991 | "b": 0 992 | } 993 | } 994 | ] -------------------------------------------------------------------------------- /scissors.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @module scissors 3 | */ 4 | 5 | // imports 6 | var fs = require('fs'); 7 | var spawn = require('child_process').spawn; 8 | var path = require('path'); 9 | var Stream = require('stream').Stream; 10 | var BufferStream = require('bufferstream'); 11 | var temp = require('temp').track(); 12 | var async = require('async'); 13 | var Promise = require('any-promise'); 14 | var rimraf = require('rimraf').sync; 15 | 16 | /* 17 | Internal functions 18 | */ 19 | 20 | /** 21 | * Non-standard lightweight internal promise implementation with a simple callback 22 | * Queue functions by using promise(yourCallback); Deliver the promise using 23 | * promise.deliver(). Once the promise has been delivered, promise(yourCallback) 24 | * immediately calls. 25 | * @ignore 26 | * @return {Function} 27 | */ 28 | function promise () { 29 | var queue = [], args = null; 30 | var promise = function (fn) { 31 | if (promise.delivered) { 32 | process.nextTick(function () { 33 | fn.apply(null, args); 34 | }); 35 | } else { 36 | queue.push(fn); 37 | } 38 | } 39 | promise.deliver = function () { 40 | args = arguments, promise.delivered = true; 41 | queue.splice(0, queue.length).forEach(function (fn) { 42 | process.nextTick(function () { 43 | fn.apply(null, args); 44 | }); 45 | }); 46 | } 47 | return promise; 48 | } 49 | 50 | /** 51 | * Forwards stream events "data", "end" and "error" from 52 | * stream a to stream b 53 | * @ignore 54 | * @param {Stream} a The source stream 55 | * @param {Stream} b The target stream 56 | */ 57 | function proxyStream (a, b) { 58 | if (a && b) { 59 | a 60 | .on('data', b.emit.bind(b, 'data')) 61 | .on('end', b.emit.bind(b, 'end')) 62 | .on('error', b.emit.bind(b, 'error')); 63 | } 64 | } 65 | 66 | /** 67 | * Constructor of Command instance 68 | * @inner 69 | * @constructor 70 | * @param {mixed} input Should be either a filename (string) or a pipe. If it's 71 | * a pipe, this.stream is set to that value, otherwise null. 72 | 73 | * @param {Boolean} ready Whether the command has been fully executed 74 | */ 75 | function Command (input, ready) { 76 | this.input = input; 77 | // is input stream? 78 | if (typeof this.input !== 'string' && this.input && this.input.pipe) { 79 | this.stream = this.input; 80 | } else { 81 | this.stream = null; 82 | } 83 | this.commands = []; 84 | this.onready = promise(); 85 | if (ready !== false) { 86 | this.onready.deliver(); 87 | } 88 | } 89 | 90 | /** 91 | * Makes a copy of the commands in the queue and adds the input 92 | * @return {Command} A chainable Command instance 93 | */ 94 | Command.prototype._copy = function () { 95 | var cmd = new Command(); 96 | cmd.input = this.input; 97 | cmd.stream = this.stream; 98 | cmd.commands = this.commands.slice(); 99 | cmd.onready = this.onready; 100 | return cmd; 101 | } 102 | 103 | /** 104 | * Pushes a command to the queue 105 | * @param {Array} command 106 | * @return {Command} A chainable Command instance 107 | */ 108 | Command.prototype._push = function (command) { 109 | this.commands.push(command); 110 | this.input = command; 111 | return this; 112 | } 113 | 114 | /** 115 | * Returns what the command line expects to receive, i.e. either the filename 116 | * or - (i.e. stdin) 117 | * @return {string} 118 | */ 119 | Command.prototype._input = function () { 120 | // Non-existant files will throw an error, assume full paths. 121 | try { 122 | return typeof this.input == 'string' ? fs.realpathSync(this.input) : '-'; 123 | } catch (e) { 124 | return this.input; 125 | } 126 | }; 127 | 128 | /** 129 | * Marks a folder to be deleted on cleanup 130 | * @param {String} folder 131 | * @return {Command} A chainable Command instance 132 | */ 133 | Command.prototype._markCleanupFolder = function (folder) { 134 | this._cleanupFolders = this._cleanupFolders || []; 135 | this._cleanupFolders.push(folder); 136 | return this; 137 | }; 138 | 139 | /* 140 | Chainable instance methods, return a Command instance 141 | */ 142 | 143 | /** 144 | * Creates a copy of a page range 145 | * @param {number} min First page 146 | * @param {number} max Last page. If omitted, all pages starting with 147 | * first page are used. 148 | * @return {Command} A chainable Command instance 149 | */ 150 | Command.prototype.range = function (min, max) { 151 | var cmd = this._copy(); 152 | return cmd._push([ 153 | 'pdftk', cmd._input(), 154 | 'cat', min + (max ? '-' + max : '-end'), 155 | 'output', '-' 156 | ]); 157 | }; 158 | 159 | /** 160 | * Creates a copy of the pages with the given numbers 161 | * @param {(...Number|Array)} Page number, either as an array or as arguments 162 | * @return {Command} A chainable Command instance 163 | */ 164 | Command.prototype.pages = function () { 165 | var args = (Array.isArray(arguments[0])) ? 166 | arguments[0] : Array.prototype.slice.call(arguments); 167 | var cmd = this._copy(); 168 | return cmd._push([ 169 | 'pdftk', cmd._input(), 170 | 'cat'].concat(args.map(Number), [ 171 | 'output', '-' 172 | ])); 173 | }; 174 | 175 | /** 176 | * Creates a copy of all pages with an odd page number 177 | * @return {Command} A chainable Command instance 178 | */ 179 | Command.prototype.odd = function (/*min, max*/) { 180 | var cmd = this._copy(); 181 | return cmd._push([ 182 | 'pdftk', cmd._input(), 183 | 'cat', 'odd', 184 | 'output', '-' 185 | ]); 186 | }; 187 | 188 | /** 189 | * Creates a copy of all pages with an even page number 190 | * @return {Command} A chainable Command instance 191 | */ 192 | Command.prototype.even = function (/*min, max*/) { 193 | var cmd = this._copy(); 194 | return cmd._push([ 195 | 'pdftk', cmd._input(), 196 | 'cat', 'even', 197 | 'output', '-' 198 | ]); 199 | }; 200 | 201 | /** 202 | * Creates a copy of the input in reverse order 203 | * @return {Command} A chainable Command instance 204 | */ 205 | Command.prototype.reverse = function (/*min, max*/) { 206 | var cmd = this._copy(); 207 | return cmd._push([ 208 | 'pdftk', cmd._input(), 209 | 'cat', 'end-1', 210 | 'output', '-' 211 | ]); 212 | }; 213 | 214 | /** 215 | * Rotates a copy of the input with the given degree 216 | * @param {number} amount 217 | * @return {Command} A chainable Command instance 218 | */ 219 | Command.prototype.rotate = function (amount) { 220 | var cmd = this._copy(); 221 | amount = Number(amount) % 360; 222 | var dir = null; 223 | switch (amount) { 224 | case 90: case -270: dir = 'EAST'; break; 225 | case 180: case -180: dir = 'SOUTH'; break; 226 | case -90: case 270: dir = 'WEST'; break; 227 | case 0: return this; 228 | default: 229 | throw new Error("Invalid rotation angle: " + amount); 230 | } 231 | return cmd._push([ 232 | 'pdftk', cmd._input(), 233 | 'cat', '1-end' + dir, 234 | 'output', '-' 235 | ]); 236 | }; 237 | 238 | 239 | /** 240 | * Compresses the input 241 | * @return {Command} A chainable Command instance 242 | */ 243 | Command.prototype.compress = function () { 244 | var cmd = this._copy(); 245 | return cmd._push([ 246 | 'pdftk', cmd._input(), 'output', '-', 247 | 'compress' 248 | ]); 249 | }; 250 | 251 | /** 252 | * Uncompresses the input 253 | * @return {Command} A chainable Command instance 254 | */ 255 | Command.prototype.uncompress = function () { 256 | var cmd = this._copy(); 257 | return cmd._push([ 258 | 'pdftk', cmd._input(), 'output', '-', 259 | 'uncompress' 260 | ]); 261 | }; 262 | 263 | /** 264 | * Repairs the input 265 | * @return {Command} A chainable Command instance 266 | */ 267 | Command.prototype.repair = function () { 268 | // pdftk extraction of a single page causes issues for some reason. 269 | // "repairing" using pdftk fixes this. 270 | var cmd = this._copy(); 271 | var args = [ 272 | 'pdftk', this._input(), 'output', '-' 273 | ]; 274 | // Don't double-repair. 275 | if (JSON.stringify(this.commands[this.commands.length - 1]) != JSON.stringify(args)) { 276 | cmd._push(args); 277 | } 278 | return cmd; 279 | }; 280 | 281 | /** 282 | * Crops the input to a box defined by two x-y coordinates (left bottom / 283 | * right top) in pt (72 points == 1 inch == 25.4 millimeters, 1mm = 2,8pt), 284 | * measured from the bottom left (coordinates 0,0). 285 | * Doesn't work with all PDFs yet, see // https://github.com/tcr/scissors/issues/21 286 | * 287 | * @param {number} l Left x coordinate in pt 288 | * @param {number} b Bottom y coordinate in pt 289 | * @param {number} r Right x coordinate in pt 290 | * @param {number} t Top y coordinate in pt 291 | * @return {Command} A chainable Command instance 292 | */ 293 | Command.prototype.crop = function (l, b, r, t) { 294 | var cmd = this.uncompress(); 295 | return cmd._push([path.join(__dirname, 'bin/crop.js'), l, b, r, t]); 296 | }; 297 | 298 | /* 299 | Instance methods returning a stream 300 | */ 301 | 302 | /** 303 | * Returns a stream with the output of `pdftk infile dump_data` (a report on PDF 304 | * document metadata and bookmarks). Used by {@link Command#getNumPages}. Might 305 | * be removed or turned into internal function, since it is very similar to 306 | * {@link Command#propertyStream} 307 | * @return {Stream} 308 | */ 309 | Command.prototype.dumpData = function () { 310 | var cmd = this._copy(); 311 | cmd._push([ 312 | 'pdftk', cmd._input(), 313 | 'dump_data' 314 | ]); 315 | return cmd._exec(); 316 | }; 317 | 318 | /** 319 | * Returns a stream with the PDF data 320 | * @return {Stream} 321 | */ 322 | Command.prototype.pdfStream = function () { 323 | var cmd = this.repair(); 324 | return cmd._exec(); 325 | }; 326 | 327 | /** 328 | * Returns a stream with the PNG data in the given resolution 329 | * @param {number} dpi DPI resolution 330 | * @return {Stream} 331 | */ 332 | Command.prototype.pngStream = function (dpi, page, useSimpleRasterize, useCropBox) { 333 | return this.imageStream(dpi, 'png', page, useSimpleRasterize, useCropBox); 334 | }; 335 | 336 | /** 337 | * Returns a stream with the JPG data in the given resolution 338 | * @param {number} dpi DPI resolution 339 | * @return {Stream} 340 | */ 341 | Command.prototype.jpgStream = function (dpi, page, useSimpleRasterize, useCropBox) { 342 | return this.imageStream(dpi, 'jpg', page, useSimpleRasterize, useCropBox); 343 | }; 344 | 345 | /** 346 | * Returns a stream with the image data in the given resolution 347 | * @param {number} dpi DPI resolution 348 | * @return {Stream} 349 | */ 350 | Command.prototype.imageStream = function (dpi, format, page, useSimpleRasterize, useCropBox) { 351 | var cmd = this.repair(); 352 | var rasterizer = useSimpleRasterize ? 'bin/simple_rasterize.js' : 'bin/rasterize.js'; 353 | cmd._push([path.join(__dirname, rasterizer), this._input(), format || 'png', page || 1, dpi || 72, useCropBox ? 'true' : 'false']); 354 | var stream = cmd._exec(); 355 | return stream; 356 | }; 357 | 358 | /** 359 | * (Internal) Returns a stream with JSON data parsed from the raw PDF data. 360 | * Consumes this.pdfStream() 361 | * @return {BufferStream} 362 | */ 363 | Command.prototype._commandStream = function () { 364 | var stream = new BufferStream({ 365 | size: 'flexible' 366 | }); 367 | // var buf = []; 368 | stream.split('\n', function (line) { 369 | var tokens = String(line).split(/[ ](?=[^\)]*?(?:\(|$))/); 370 | var data = (function () { 371 | switch (tokens[0]) { 372 | case 'S': return {type: 'string', x: +tokens[1], y: +tokens[2], string: tokens[3].replace(/^.|.$/g, '')}; 373 | case 'F': return {type: 'font', height: +tokens[1], width: +tokens[2], font: (tokens[3] || '').replace(/^.|.$/g, '')}; 374 | case 'P': return {type: 'endpage'}; 375 | case 'C': return {type: 'color', r: +tokens[1], g: +tokens[2], b: +tokens[3]}; 376 | case 'I': return {type: 'image', x: +tokens[1], y: +tokens[2], width: +tokens[3], height: +tokens[4]}; 377 | case 'R': return {type: 'rectangle', x: +tokens[1], y: +tokens[2], width: +tokens[3], height: +tokens[4]}; 378 | } 379 | })(); 380 | if (data) { 381 | stream.emit('data', data); 382 | } 383 | }); 384 | 385 | var gs = spawn('gs', [ 386 | '-q', '-dNODISPLAY', 387 | '-P-', 388 | '-dSAFER', 389 | '-dDELAYBIND', 390 | '-dWRITESYSTEMDICT', 391 | '-dCOMPLEX', path.join(__dirname, 'contrib/ps2ascii.ps'), 392 | '-', '-c', 'quit']); 393 | this.pdfStream().pipe(gs.stdin); 394 | var end = false; 395 | gs.stdout 396 | .pipe(stream) 397 | .on('end', function(){ 398 | end = true; 399 | }) 400 | gs.stderr.on('data', function (data) { 401 | console.error('gs encountered an error:\n', String(data)); 402 | }); 403 | gs.on('exit', function (/*code*/) { 404 | if (!end) { 405 | end = true; 406 | stream.emit('end'); 407 | } 408 | }); 409 | return stream; 410 | }; 411 | 412 | /** 413 | * Returns a stream with JSON content data aggregated from this._commandStream() 414 | * @return {Stream} 415 | */ 416 | Command.prototype.contentStream = function () { 417 | function isNextStringPartOfLastString (b, a, font) { 418 | // NOTE: This is a completely arbitrary heuristic. 419 | // I wouldn't trust it to not break. 420 | return Math.abs(a.y - b.y) < 50 && Math.abs((a.x + (a.string.length*(font.width / 3))) - b.x) < (font.width + 10); 421 | } 422 | 423 | function decode (str) { 424 | return String(str).replace(/\\(\d{3}|.)/g, function (str, esc) { 425 | if (esc.length == 3) { 426 | return String.fromCharCode(parseInt(esc, 8)); 427 | } else { 428 | try { 429 | return JSON.parse('"' + str + '"'); 430 | } catch (e) { 431 | return esc; 432 | } 433 | } 434 | }); 435 | } 436 | 437 | var stream = new Stream(), str = '', first = null, last = null, font = null, color = null, imgindex = 0; 438 | this._commandStream() 439 | .on('data', function (cmd) { 440 | if (cmd.type == 'string') { 441 | if (!last || isNextStringPartOfLastString(cmd, last, font)) { 442 | str += decode(cmd.string); 443 | } else { 444 | stream.emit('data', { 445 | type: 'string', x: (first || cmd).x, y: (first || cmd).y, 446 | string: str, font: font, color: color 447 | }); 448 | str = decode(cmd.string); 449 | first = cmd; 450 | } 451 | last = cmd; 452 | } else if (cmd.type == 'image') { 453 | cmd.index = imgindex++; 454 | stream.emit('data', cmd); 455 | } else if (cmd.type == 'font') { 456 | delete cmd.type; 457 | font = cmd; 458 | } else if (cmd.type == 'color') { 459 | delete cmd.type; 460 | color = cmd; 461 | } 462 | }) 463 | .on('end', function () { 464 | if (str) { 465 | stream.emit('data', { 466 | type: 'string', 467 | x: first ? first.x : 0, 468 | y: first ? first.y : 0, 469 | string: str, font: font, color: color 470 | }); 471 | str = ''; 472 | process.nextTick(function() { 473 | stream.emit('end'); 474 | }); 475 | } 476 | }); 477 | return stream; 478 | }; 479 | 480 | /** 481 | * Returns a Stream with text content data aggregated from this._commandStream() 482 | * @return {Stream} 483 | */ 484 | Command.prototype.textStream = function () { 485 | var stream = new Stream(); 486 | this.contentStream().on('data', function (cmd) { 487 | if (cmd.type == 'string') { 488 | stream.emit('data', cmd.string); 489 | } 490 | }); 491 | this.contentStream().on('end', function () { 492 | stream.emit('end'); 493 | }); 494 | return stream; 495 | }; 496 | 497 | /** 498 | * Returns a stream of image data, via the `pdfimages` command (called with `-j`). 499 | * The output format cannot be guaranteed. As per pdfimages documentation 500 | * (http://linuxcommand.org/man_pages/pdfimages1.html), images in DCT format 501 | * are saved as JPEG format. All non-DCT images are saved are written as PBM 502 | * (for monochrome images) or PPM (for non-monochrome images) files. 503 | * NOTE: The current implementation is pretty costly and is dependent on an additional 504 | * dependency (pdfimages). Preferrably, this would be done in Ghostscript. 505 | * @param {Number=} [0] i The number of the image to be extracted, defaults to 0. 506 | * @return {Stream} Stream of image data in PPM, PBM or JPG format 507 | */ 508 | Command.prototype.extractImageStream = function (i) { 509 | i = i || 0; 510 | var stream = new Stream(); 511 | if (!this._pdfimages) { 512 | var callback = this._pdfimages = promise(); 513 | temp.mkdir('pdfimages', function (err, dirPath) { 514 | this._markCleanupFolder(dirPath); 515 | this.pdfStream() 516 | .pipe(fs.createWriteStream(path.join(dirPath, 'file.pdf'))) 517 | .on('error', function () { 518 | callback.deliver([]); 519 | }) 520 | .on('close', function () { 521 | var prog = spawn('pdfimages', ['-j', dirPath + '/file.pdf', dirPath + '/A']); 522 | prog.stderr.on('data', function (data) { 523 | process.stderr.write('pdfimages: ' + String(data)); 524 | }); 525 | prog.on('exit', function (code) { 526 | if (code) { 527 | console.error('pdfimages exited with failure code:', code); 528 | throw new Error('pdfimages failed.'); 529 | } 530 | var files = fs.readdirSync(dirPath).slice(0, -1).map(function (file) { 531 | return dirPath + '/' + file; 532 | }); 533 | callback.deliver(files); 534 | }); 535 | }.bind(this)) 536 | }.bind(this)); 537 | } 538 | 539 | // Add callback to promise. 540 | this._pdfimages(function (pdfimages) { 541 | if (!pdfimages[i]) { 542 | stream.emit('error', new Error('Image ' + i + ' out of bounds.')); 543 | return; 544 | } 545 | proxyStream(fs.createReadStream(pdfimages[i]), stream); 546 | }); 547 | 548 | return stream; 549 | }; 550 | 551 | /** 552 | * Returns a stream of property data, in UTF-8 encoding 553 | * @return {Stream} 554 | */ 555 | Command.prototype.propertyStream = function () { 556 | var stream = new BufferStream({ 557 | size: 'flexible' 558 | }); 559 | stream.split('\n', function (buffer) { 560 | var line = String(buffer); 561 | var index = line.indexOf(':'); 562 | if(index > -1) { 563 | stream.emit('data', { 564 | event: line.slice(0, index), 565 | value: parseInt(line.slice(index + 1)) 566 | }) 567 | } else { 568 | stream.emit('data', {event: line}); 569 | } 570 | }); 571 | 572 | var cmd = this._copy(); 573 | var property_stream = cmd._push([ 574 | 'pdftk', cmd._input(), 575 | 'dump_data_utf8', 576 | 'output', '-' 577 | ])._exec().pipe(stream); 578 | 579 | property_stream.on('exit', function () { 580 | stream.emit('end'); 581 | }); 582 | 583 | return stream; 584 | } 585 | 586 | /** 587 | * Executes the commands in order and returns a stream with the data of the 588 | * result document 589 | * @return {Stream} 590 | */ 591 | Command.prototype._exec = function () { 592 | var stream = new Stream(), commands = this.commands.slice(); 593 | 594 | stream.on('error', function (err) { 595 | console.error(err.message); 596 | }) 597 | 598 | // Note: this.stream is either a pipe or null. If it's a pipe, it's piped into the 599 | // object as stdin. (Otherwise the command would receive no stdin) And _input 600 | // is used as the input argument to the command, either the filename or - to 601 | // mean stdin, accordingly. 602 | var initialValue = this.stream; 603 | this.onready(function () { 604 | // use result of one command as input for next command 605 | var commandStream = commands.reduce(function (input, command) { 606 | var prog = spawn(command[0], command.slice(1)); 607 | if (input) { 608 | input.pipe(prog.stdin); 609 | } 610 | prog.stderr.on('data', function (data) { 611 | process.stderr.write(command[0].match(/[^\/]*$/)[0] + ': ' + String(data)); 612 | }); 613 | prog.on('exit', function (code) { 614 | if (code) { 615 | var err = new Error(command[0] + ' exited with failure code: ' + code); 616 | err.code = code; 617 | stream.emit('error', err ); 618 | console.error(err.message); // TODO Deprecated, will be removed 619 | } 620 | }); 621 | return prog.stdout; 622 | }, initialValue); 623 | proxyStream(commandStream, stream); 624 | }); 625 | return stream; 626 | } 627 | 628 | /* 629 | Instance methods returning Promises 630 | */ 631 | 632 | /** 633 | * Returns the number of pages in the document. 634 | * @return {Promise} 635 | */ 636 | Command.prototype.getNumPages = function() { 637 | var self = this; 638 | return new Promise(function(resolve, reject) { 639 | self.propertyStream() 640 | .on('data',function(data){ 641 | if( data.event === 'NumberOfPages' ){ 642 | resolve(parseInt(data.value)); 643 | } 644 | }) 645 | .on('end', function() { 646 | reject(new Error("PDF does not contain page number data.")); 647 | }) 648 | .on('error', reject); 649 | }); 650 | }; 651 | 652 | /** 653 | * Cleans all temporary folders created during usage. 654 | * Use this method if your process is running for a long time 655 | * and you want to clean up temporary folders. 656 | */ 657 | Command.prototype.cleanup = function() { 658 | if (this._cleanupFolders) { 659 | this._cleanupFolders.forEach(function (dir) { 660 | rimraf(dir) 661 | }); 662 | this._cleanupFolders = []; 663 | } 664 | }; 665 | 666 | /** 667 | * Returns an array of objects containing the dimension of the page. 668 | * Requires the imagemagick package, containing the `identify` command line 669 | * utility 670 | * @return {Promise} Promise that resolves with an array of objects, each 671 | * containing the properties 'width', 'height' and 'unit' unit being 'pt'. 672 | */ 673 | Command.prototype.getPageSizes = function() { 674 | var self = this; 675 | return new Promise(function(resolve, reject) { 676 | temp.open({suffix: '.pdf'}, function(err, info) { 677 | if (err) reject(err); 678 | fs.close(info.fd, function(err) { 679 | if (err) reject(err); 680 | self 681 | .pdfStream() 682 | .on('error', reject) 683 | .pipe(fs.createWriteStream(info.path)) 684 | .on('finish',function(){ 685 | var identify = spawn('identify', [info.path]); 686 | var result =""; 687 | identify.stderr.on('data', function (data) { 688 | if (data && data.toString().trim()) { 689 | throw new Error('identify encountered an error:\n', String(data)); 690 | } 691 | }); 692 | identify.stdout.on('data', function(data){ 693 | result+=data.toString(); 694 | }); 695 | identify.on('exit', function (code) { 696 | rimraf(info.path); 697 | if (code) { 698 | throw new Error('identify exited with failure code:', code); 699 | } 700 | dimensions=[]; 701 | var re = /\[([0-9]+)\] PDF ([0-9]+)x([0-9]+)/ig; 702 | result.split(/\n/).map(function(line){ 703 | var matches = re.exec(line); 704 | if(matches instanceof Array){ 705 | dimensions.push({ 706 | width : matches[2], 707 | height : matches[3], 708 | unit : 'pt' 709 | }); 710 | } 711 | }); 712 | resolve(dimensions); 713 | }); 714 | }) 715 | .on('error',function(err){ 716 | rimraf(info.path); 717 | reject(err); 718 | }); 719 | }); 720 | }); 721 | }); 722 | }; 723 | 724 | /** 725 | * Entry function 726 | * @function 727 | * @param {string} path Path to the source PDF 728 | * @return {Command} A Command instance 729 | */ 730 | var scissors = function (path) { 731 | var cmd = new Command(path); 732 | return cmd; 733 | } 734 | 735 | /** 736 | * Joins the given pages into one document and returnes a 737 | * @return {Command} A chainable Command instance 738 | */ 739 | scissors.join = function () { 740 | var joinTemp = temp.mkdirSync('pdfimages'), joinindex = 0; 741 | var args = Array.prototype.slice.call(arguments); 742 | 743 | var outfile = joinTemp + '/' + (joinindex++) + '.pdf'; 744 | var pdf = new Command(outfile, false); 745 | pdf._markCleanupFolder(joinTemp); 746 | 747 | async.map(args, function (arg, next) { 748 | var file = joinTemp + '/' + (joinindex++) + '.pdf'; 749 | arg.pdfStream() 750 | .pipe(fs.createWriteStream(file)) 751 | .on('close', function () { 752 | next(null, file); 753 | }); 754 | }, function (err, files) { 755 | var command = ['pdftk'].concat(files, ['output', outfile]); 756 | var prog = spawn(command[0], command.slice(1)); 757 | prog.stderr.on('data', function (data) { 758 | process.stderr.write(command[0].match(/[^\/]*$/)[0] + ': ' + String(data)); 759 | }); 760 | prog.on('exit', function (code) { 761 | if (code) { 762 | console.error(command[0], 'exited with failure code:', code); 763 | } 764 | // PDF is now ready. 765 | pdf.onready.deliver(); 766 | }); 767 | }); 768 | 769 | return pdf; 770 | } 771 | 772 | 773 | /** 774 | * Exports the scissors function 775 | */ 776 | module.exports = scissors; 777 | 778 | /* 779 | 780 | References 781 | 782 | * http://hzqtc.github.com/2012/04/pdf-tools-merging-extracting-and-cropping.html 783 | * http://documentcloud.github.com/docsplit/ 784 | * http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/ 785 | * http://segfault.in/2010/07/pdf-manipulations-and-conversions-from-linux-command-prompt/ 786 | * http://www.maths.ox.ac.uk/help/faqs/files/manipulating-pdf-files 787 | * http://stackoverflow.com/questions/11754556/ghostscript-convert-a-pdf-and-output-in-a-textfile 788 | * http://right-sock.net/linux/better-convert-pdf-to-jpg-using-ghost-script/ 789 | * http://stackoverflow.com/questions/12484353/how-to-crop-a-section-of-a-pdf-file-to-png-using-ghostscript?lq=1 790 | 791 | */ 792 | -------------------------------------------------------------------------------- /contrib/ps2ascii.ps: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2001-2012 Artifex Software, Inc. 2 | % All Rights Reserved. 3 | % 4 | % This software is provided AS-IS with no warranty, either express or 5 | % implied. 6 | % 7 | % This software is distributed under license and may not be copied, 8 | % modified or distributed except as expressly authorized under the terms 9 | % of the license contained in the file LICENSE in this distribution. 10 | % 11 | % Refer to licensing information at http://www.artifex.com or contact 12 | % Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134, San Rafael, 13 | % CA 94903, U.S.A., +1(415)492-9861, for further information. 14 | % 15 | 16 | % Extract the ASCII text from a PostScript file. Nothing is displayed. 17 | % Instead, ASCII information is written to stdout. The idea is similar to 18 | % Glenn Reid's `distillery', only a lot more simple-minded, and less robust. 19 | 20 | % If SIMPLE is defined, just the text is written, with a guess at line 21 | % breaks and word spacing. If SIMPLE is not defined, lines are written 22 | % to stdout as follows: 23 | % 24 | % F () 25 | % Indicate the font height and the width of a space. 26 | % 27 | % P 28 | % Indicate the end of the page. 29 | % 30 | % S () 31 | % Display a string. 32 | % 33 | % and are integer dimensions in units of 1/720". 34 | % and are integer coordinates, in units of 1/720", with the origin 35 | % at the lower left. 36 | % and are strings represented with the standard 37 | % PostScript escape conventions. 38 | 39 | % If COMPLEX is defined, the following additional types of lines are 40 | % written to stdout. 41 | % 42 | % C 43 | % Indicate the current color. 44 | % 45 | % I 46 | % Note the presence of an image. 47 | % 48 | % R 49 | % Fill a rectangle. 50 | % 51 | % , , and are RGB values expressed as integers between 0 and 1000. 52 | % 53 | % Note that future versions of this program (in COMPLEX mode) may add 54 | % other output elements, so programs parsing the output should be 55 | % prepared to ignore elements that they do not recognize. 56 | 57 | % Note that this code will only work in all cases if systemdict is writable 58 | % and if `binding' the definitions of operators defined as procedures 59 | % is deferred. For this reason, it is normally invoked with 60 | % gs -q -dNODISPLAY -dDELAYBIND -dWRITESYSTEMDICT ps2ascii.ps 61 | 62 | % Thanks to: 63 | % J Greely for improvements to this code; 64 | % Jerry Whelan for motivating other improvements; 65 | % David M. Jones for improvements noted below. 66 | 67 | %% Additional modifications by David M. Jones 68 | %% (dmjones@theory.lcs.mit.edu), December 23, 1997 69 | %% 70 | %% (a) Rewrote forall loop at the end of .show.write. This fixes a 71 | %% stack leakage problem, but the changes are more significant 72 | %% than that. 73 | %% 74 | %% .char.map includes the names of all characters in the 75 | %% StandardEncoding, ISOLatin1Encoding, OT1Encoding and 76 | %% T1Encoding vectors. Thus, if the Encoding vector for the 77 | %% current font contains a name that is not in .char.map, it's 78 | %% redundant to check if the Encoding vector is equal to one of 79 | %% the known vectors. Previous versions of ps2ascii would give 80 | %% up at this point, and substitute an asterisk (*) for the 81 | %% character. I've taken the liberty of instead using the 82 | %% OT1Encoding vector to translate the character, on the grounds 83 | %% that in the cases I'm most interested in, a font without a 84 | %% useful Encoding vector was most likely created by a DVI to PS 85 | %% converter such as dvips or DVILASER (and OT1Encoding is 86 | %% largely compatible with StandardEncoding anyway). [Note that 87 | %% this does not make my earlier changes to support dvips (see 88 | %% fix (a) under my 1996 changes) completely obsolete, since 89 | %% there's additional useful information I can extract in that 90 | %% case.] 91 | %% 92 | %% Overall, this should provide better support for some documents 93 | %% (e.g, DVILASER documents will no longer be translated into a 94 | %% series of *'s) without breaking any other documents any worse 95 | %% than they already were broken. 96 | %% 97 | %% (b) Fixed two bugs in dvips.df-tail: (1) changed "dup 127" to "dup 98 | %% 128" to fix fencepost error, and (2) gave each font it's own 99 | %% FontName rather than having all fonts share the same name. 100 | %% 101 | %% (c) Added one further refinement to the heuristic for detecting 102 | %% paragraph breaks: do not ever start a new paragraph after a 103 | %% line ending in a hyphen. 104 | %% 105 | %% (d) Added a bunch of missing letters from the T1Encoding, 106 | %% OT1Encoding and ISOLatin1Encoding vectors to .letter.chars to 107 | %% improve hyphen-elimination algorithm. This still won't help 108 | %% if there's no useful Encoding vector. 109 | %% 110 | %% NOTE: A better solution to the problem of missing Encoding vectors 111 | %% might be to redefine definefont to check whether the Encoding 112 | %% vector is sensible and, if not, replace it by a default. This 113 | %% would alleviate the need for constant tests in the .show.write 114 | %% loop, as well as automatically solving the problem noted in fix 115 | %% (d) above, and the similar problem with .break.chars. This should 116 | %% be investigated. Also, the hyphen-elimination algorithm really 117 | %% needs to be looked at carefully and rethought. 118 | 119 | %%* Modifications to ps2ascii.ps by David M. Jones 120 | %%* (dmjones@theory.lcs.mit.edu), June 25-July 8, 1996 121 | 122 | %%* Modifications: 123 | %%* 124 | %%* (a) added code to give better support for dvips files by providing 125 | %%* FontBBox's, FontName's and Encoding vectors for downloaded 126 | %%* bitmap fonts. This is done by using dvips's start-hook to 127 | %%* overwrite the df-tail and D procedures that dvips uses to 128 | %%* define its Type 3 bitmap fonts. Thus, this change should 129 | %%* provide better support for dvips-generated PS files without 130 | %%* affecting the handling of other documents. 131 | %%* 132 | %%* (b) Fixed two bugs that could potentially affect any PS file, not 133 | %%* just those created by dvips: (1) added missing "get" operator 134 | %%* in .show.write and (2) fixed bug that caused a hyphen at the 135 | %%* end of a line to be replaced by a space rather than begin 136 | %%* deleted. Note that the first bug was a source of stack 137 | %%* leakage, causing ps2ascii to run out of operand stack space 138 | %%* occasionally. 139 | %%* 140 | %%* Search for "%%* BF" to find these modifications. 141 | %%* 142 | %%* (c) Improved the heuristic for determining whether a line break 143 | %%* has occurred and whether a line break represents a paragraph 144 | %%* break. Previously, any change in the vertical position caused 145 | %%* a line break; now a line break is only registered if the 146 | %%* change is larger than the height of the current font. This 147 | %%* means that superscripts, subscripts, and such things as 148 | %%* shifted accents generated by TeX won't cause line breaks. 149 | %%* Paragraph-recognition is now done by comparing the indentation 150 | %%* of the new line to the indentation of the previous line and by 151 | %%* comparing the vertical distance between the new line and the 152 | %%* previous line to the vertical distance between the previous 153 | %%* line and its predecessor. 154 | %%* 155 | %%* (d) Added a hook for renaming the files where stdout and stderr 156 | %%* go. 157 | %%* 158 | %%* In general, my additions or changes to the code are described in 159 | %%* comments beginning with "%%*". However, there are numerous other 160 | %%* places where I have either re-formatted code or added comments to 161 | %%* the code while I was trying to understand it. These are usually 162 | %%* not specially marked. 163 | %%* 164 | 165 | /QUIET true def 166 | systemdict wcheck { systemdict } { userdict } ifelse begin 167 | /.max where { pop } { /.max { 2 copy lt { exch } if pop } bind def } ifelse 168 | /COMPLEX dup where { pop true } { false } ifelse def 169 | /SIMPLE dup where { pop true } { false } ifelse def 170 | /setglobal where 171 | { pop currentglobal /setglobal load true setglobal } 172 | { { } } 173 | ifelse 174 | 175 | % Define a way to store and retrieve integers that survives save/restore. 176 | /.i.string0 (0 ) def 177 | /.i.string .i.string0 length string def 178 | /.iget { cvi } bind def 179 | /.iput { exch //.i.string exch copy cvs pop } bind def 180 | /.inew { //.i.string0 dup length string copy } bind def 181 | 182 | % We only want to redefine operators if they are defined already. 183 | 184 | /codef { 1 index where { pop def } { pop pop } ifelse } def 185 | 186 | % Redefine the end-of-page operators. 187 | 188 | /erasepage { } codef 189 | /copypage { SIMPLE { (\014) } { (P\n) } ifelse //print } codef 190 | /showpage { copypage erasepage initgraphics } codef 191 | 192 | % Redefine the fill operators to detect rectangles. 193 | 194 | /.orderrect % .orderrect 195 | { % Ensure llx <= urx, lly <= ury. 196 | 1 index 4 index lt { 4 2 roll } if 197 | dup 3 index lt { 3 1 roll exch } if 198 | exch 3 index sub exch 2 index sub 199 | } odef 200 | /.fillcomplex 201 | { % Do a first pass to see if the path is all rectangles in 202 | % the output coordinate system. We don't worry about overlapping 203 | % rectangles that might be partially not filled. 204 | % Stack: mark llx0 lly0 urx0 ury0 ... true mark x0 y0 ... 205 | mark true mark 206 | % Add a final moveto so we pick up any trailing unclosed subpath. 207 | 0 0 itransform moveto 208 | { .coord counttomark 2 gt 209 | { counttomark 4 gt { .fillcheckrect } { 4 2 roll pop pop } ifelse } 210 | if 211 | } 212 | { .coord } 213 | { cleartomark not mark exit } 214 | { counttomark -2 roll 2 copy counttomark 2 roll .fillcheckrect } 215 | pathforall cleartomark 216 | { .showcolor counttomark 4 idiv 217 | { counttomark -4 roll .orderrect 218 | (R ) //print .show==4 219 | } 220 | repeat pop 221 | } 222 | { cleartomark 223 | } 224 | ifelse 225 | } odef 226 | /.fillcheckrect 227 | { % Check whether the current subpath is a rectangle. 228 | % If it is, add it to the list of rectangles being accumulated; 229 | % if not exit the .fillcomplex loop. 230 | % The subpath has not been closed. 231 | % Stack: as in .fillcomplex, + newx newy 232 | counttomark 10 eq { 9 index 9 index 4 2 roll } if 233 | counttomark 12 ne { cleartomark not mark exit } if 234 | 12 2 roll 235 | % Check for the two possible forms of rectangles: 236 | % x0 y0 x0 y1 x1 y1 x1 y0 x0 y0 237 | % x0 y0 x1 y0 x1 y1 x0 y1 x0 y0 238 | 9 index 2 index eq 9 index 2 index eq and 239 | 10 index 9 index eq 240 | { % Check for first form. 241 | 7 index 6 index eq and 6 index 5 index eq and 3 index 2 index eq and 242 | } 243 | { % Check for second form. 244 | 9 index 8 index eq and 245 | 8 index 7 index eq and 5 index 4 index eq and 4 index 3 index eq and 246 | } 247 | ifelse not { cleartomark not mark exit } if 248 | % We have a rectangle. 249 | pop pop pop pop 4 2 roll pop pop 8 4 roll 250 | } odef 251 | /eofill { COMPLEX { .fillcomplex } if newpath } codef 252 | /fill { COMPLEX { .fillcomplex } if newpath } codef 253 | /rectfill { gsave newpath .rectappend fill grestore } codef 254 | /ueofill { gsave newpath uappend eofill grestore } codef 255 | /ufill { gsave newpath uappend fill grestore } codef 256 | 257 | % Redefine the stroke operators to detect rectangles. 258 | 259 | /rectstroke 260 | { gsave newpath 261 | dup type dup /arraytype eq exch /packedarraytype eq or 262 | { dup length 6 eq { exch .rectappend concat } { .rectappend } ifelse } 263 | { .rectappend } 264 | ifelse stroke grestore 265 | } codef 266 | /.strokeline % .strokeline 267 | % Note: fromx and fromy are in output coordinates; 268 | % tox and toy are in user coordinates. 269 | { .coord 2 copy 6 2 roll .orderrect 270 | % Add in the line width. Assume square or round caps. 271 | currentlinewidth 2 div dup .dcoord add abs 1 .max 5 1 roll 272 | 4 index add 4 1 roll 4 index add 4 1 roll 273 | 4 index sub 4 1 roll 5 -1 roll sub 4 1 roll 274 | (R ) //print .show==4 275 | } odef 276 | /.strokecomplex 277 | { % Do a first pass to see if the path is all horizontal and vertical 278 | % lines in the output coordinate system. 279 | % Stack: true mark origx origy curx cury 280 | true mark null null null null 281 | { .coord 6 2 roll pop pop pop pop 2 copy } 282 | { .coord 1 index 4 index eq 1 index 4 index eq or 283 | { 4 2 roll pop pop } 284 | { cleartomark not mark exit } 285 | ifelse 286 | } 287 | { cleartomark not mark exit } 288 | { counttomark -2 roll 2 copy counttomark 2 roll 289 | 1 index 4 index eq 1 index 4 index eq or 290 | { pop pop 2 copy } 291 | { cleartomark not mark exit } 292 | ifelse 293 | } 294 | pathforall cleartomark 295 | 0 currentlinewidth .dcoord 0 eq exch 0 eq or and 296 | % Do the second pass to write out the rectangles. 297 | % Stack: origx origy curx cury 298 | { .showcolor null null null null 299 | { 6 2 roll pop pop pop pop 2 copy .coord } 300 | { .strokeline } 301 | { } 302 | { 3 index 3 index .strokeline } 303 | pathforall pop pop pop pop 304 | } 305 | if 306 | } odef 307 | /stroke { COMPLEX { .strokecomplex } if newpath } codef 308 | /ustroke 309 | { gsave newpath 310 | dup length 6 eq { exch uappend concat } { uappend } ifelse 311 | stroke grestore 312 | } codef 313 | 314 | % The image operators must read the input and note the dimensions. 315 | % Eventually we should redefine these to detect 1-bit-high all-black images, 316 | % since this is how dvips does underlining (!). 317 | 318 | /.noteimagerect % .noteimagerect - 319 | { COMPLEX 320 | { gsave setmatrix itransform 0 0 itransform 321 | grestore .coord 4 2 roll .coord .orderrect 322 | (I ) //print .show==4 323 | } 324 | { pop pop pop 325 | } 326 | ifelse 327 | } odef 328 | /colorimage where 329 | { pop /colorimage 330 | { 1 index 331 | { dup 6 add index 1 index 6 add index 2 index 5 add index } 332 | { 6 index 6 index 5 index } 333 | ifelse .noteimagerect gsave nulldevice //colorimage grestore 334 | } codef 335 | } if 336 | /.noteimage % Arguments as for image[mask] 337 | { dup type /dicttype eq 338 | { dup /Width get 1 index /Height get 2 index /ImageMatrix get } 339 | { 4 index 4 index 3 index } 340 | ifelse .noteimagerect 341 | } odef 342 | /image { .noteimage gsave nulldevice //image grestore } codef 343 | /imagemask { .noteimage gsave nulldevice //imagemask grestore } codef 344 | 345 | % Output the current color if necessary. 346 | /.color.r .inew def 347 | .color.r -1 .iput % make sure we write the color at the beginning 348 | /.color.g .inew def 349 | /.color.b .inew def 350 | /.showcolor 351 | { COMPLEX 352 | { currentrgbcolor 353 | 1000 mul round cvi 354 | 3 1 roll 1000 mul round cvi 355 | exch 1000 mul round cvi 356 | % Stack: b g r 357 | dup //.color.r .iget eq 358 | 2 index //.color.g .iget eq and 359 | 3 index //.color.b .iget eq and 360 | { pop pop pop 361 | } 362 | { (C ) //print 363 | dup //.color.r exch .iput .show==only 364 | ( ) //print dup //.color.g exch .iput .show==only 365 | ( ) //print dup //.color.b exch .iput .show==only 366 | (\n) //print 367 | } 368 | ifelse 369 | } 370 | if 371 | } bind def 372 | 373 | % Redefine `show'. 374 | 375 | % Set things up so our output will be in tenths of a point, with origin at 376 | % lower left. This isolates us from the peculiarities of individual devices. 377 | 378 | /.show.ident.matrix matrix def 379 | /.show.ident { % - .show.ident 380 | % //.show.ident.matrix defaultmatrix 381 | % % Assume the original transformation is well-behaved. 382 | % 0.1 0 2 index dtransform abs exch abs .max /.show.scale exch def 383 | % 0.1 dup 3 -1 roll scale 384 | gsave initmatrix 385 | % Assume the original transformation is well-behaved... 386 | 0.1 0 dtransform abs exch abs .max 387 | 0.1 dup scale .show.ident.matrix currentmatrix 388 | % ... but undo any rotation into landscape orientation. 389 | dup 0 get 0 eq { 390 | 1 get dup abs div 90 mul rotate 391 | .show.ident.matrix currentmatrix 392 | } if 393 | grestore 394 | } bind def 395 | 396 | /.coord { % .coord 397 | transform .show.ident exch pop itransform 398 | exch round cvi exch round cvi 399 | } odef 400 | 401 | /.dcoord { % .coord 402 | % Transforming distances is trickier, because 403 | % the coordinate system might be rotated. 404 | .show.ident pop 3 1 roll 405 | exch 0 dtransform 406 | dup mul exch dup mul add sqrt 407 | 2 index div round cvi 408 | exch 0 exch dtransform 409 | dup mul exch dup mul add sqrt 410 | 3 -1 roll div round cvi 411 | } odef 412 | 413 | % Remember the current X, Y, and height. 414 | /.show.x .inew def 415 | /.show.y .inew def 416 | /.show.height .inew def 417 | 418 | % Remember the last character of the previous string; if it was a 419 | % hyphen preceded by a letter, we didn't output the hyphen. 420 | 421 | /.show.last (\000) def 422 | 423 | % Remember the current font. 424 | /.font.name 130 string def 425 | /.font.name.length .inew def 426 | /.font.height .inew def 427 | /.font.width .inew def 428 | 429 | %%* Also remember indentation of current line and previous vertical 430 | %%* skip 431 | 432 | /.show.indent .inew def 433 | /.show.dy .inew def 434 | 435 | % We have to redirect stdout somehow.... 436 | 437 | /.show.stdout { (%stdout) (w) file } bind def 438 | 439 | % Make sure writing will work even if a program uses =string. 440 | /.show.string =string length string def 441 | /.show.=string =string length string def 442 | /.show==only 443 | { //=string //.show.=string copy pop 444 | dup type /stringtype eq 445 | { dup length //.show.string length le 446 | { dup rcheck { //.show.string copy } if 447 | } if 448 | } if 449 | .show.stdout exch write==only 450 | //.show.=string //=string copy pop 451 | } odef 452 | /.show==4 453 | { 4 -1 roll .show==only ( ) //print 454 | 3 -1 roll .show==only ( ) //print 455 | exch .show==only ( ) //print 456 | .show==only (\n) //print 457 | } odef 458 | 459 | /.showwidth % Same as stringwidth, but disable COMPLEX so that 460 | % we don't try to detect rectangles during BuildChar. 461 | { COMPLEX 462 | { /COMPLEX false def stringwidth /COMPLEX true def } 463 | { stringwidth } 464 | ifelse 465 | } odef 466 | 467 | /.showfont % .showfont 468 | { gsave 469 | % Try getting the height and width of the font from the FontBBox. 470 | currentfont /FontBBox .knownget not { {0 0 0 0} } if 471 | aload pop % llx lly urx ury 472 | exch 4 -1 roll % lly ury urx llx 473 | sub % lly ury dx 474 | 3 1 roll exch % dx ury lly 475 | sub % dx dy 476 | 2 copy .max 0 ne 477 | { currentfont /FontMatrix get dtransform 478 | } 479 | { pop pop 480 | % Fonts produced by dvips, among other applications, have 481 | % BuildChar procedures that bomb out when given unexpected 482 | % characters, and there is no way to determine whether a given 483 | % character will do this. So for Type 1 fonts, we measure a 484 | % typical character ('X'); for others, we punt. 485 | currentfont /FontType get 1 eq 486 | { (X) .showwidth pop dup 1.3 mul 487 | } 488 | { % No safe way to get the character size. Punt. 489 | 0 0 490 | } 491 | ifelse 492 | } 493 | ifelse .dcoord exch 494 | currentfont /FontName .knownget not { () } if 495 | dup type /stringtype ne { //.show.string cvs } if 496 | grestore 497 | % Stack: height width fontname 498 | SIMPLE 499 | { pop pop //.show.height exch .iput } 500 | { 2 index //.font.height .iget eq 501 | 2 index //.font.width .iget eq and 502 | 1 index //.font.name 0 //.font.name.length .iget getinterval eq and 503 | { pop pop pop 504 | } 505 | { (F ) //print 506 | 3 -1 roll dup //.font.height exch .iput .show==only ( ) //print 507 | exch dup //.font.width exch .iput .show==only ( ) //print 508 | dup length //.font.name.length exch .iput 509 | //.font.name cvs .show==only (\n) //print 510 | } 511 | ifelse 512 | } 513 | ifelse 514 | } odef 515 | 516 | % Define the letters -- characters which, if they occur followed by a hyphen 517 | % at the end of a line, cause the hyphen and line break to be ignored. 518 | /.letter.chars 100 dict def 519 | mark 520 | 65 1 90 { dup 32 add } for 521 | counttomark 522 | { StandardEncoding exch get .letter.chars exch dup put } 523 | repeat 524 | pop 525 | 526 | %%* Add the rest of the letters from the [O]T1Encoding and 527 | %%* ISOLatin1Encoding vectors 528 | 529 | mark 530 | /AE 531 | /Aacute 532 | /Abreve 533 | /Acircumflex 534 | /Adieresis 535 | /Agrave 536 | /Aogonek 537 | /Aring 538 | /Atilde 539 | /Cacute 540 | /Ccaron 541 | /Ccedilla 542 | /Dcaron 543 | /Eacute 544 | /Ecaron 545 | /Ecircumflex 546 | /Edieresis 547 | /Egrave 548 | /Eng 549 | /Eogonek 550 | /Eth 551 | /Gbreve 552 | /Germandbls 553 | /IJ 554 | /Iacute 555 | /Icircumflex 556 | /Idieresis 557 | /Idot 558 | /Igrave 559 | /Lacute 560 | /Lcaron 561 | /Lslash 562 | /Nacute 563 | /Ncaron 564 | /Ntilde 565 | /OE 566 | /Oacute 567 | /Ocircumflex 568 | /Odieresis 569 | /Ograve 570 | /Ohungarumlaut 571 | /Oslash 572 | /Otilde 573 | /Racute 574 | /Rcaron 575 | /Sacute 576 | /Scaron 577 | /Scedilla 578 | /Tcaron 579 | /Tcedilla 580 | /Thorn 581 | /Uacute 582 | /Ucircumflex 583 | /Udieresis 584 | /Ugrave 585 | /Uhungarumlaut 586 | /Uring 587 | /Yacute 588 | /Ydieresis 589 | /Zacute 590 | /Zcaron 591 | /Zdot 592 | /aacute 593 | /abreve 594 | /acircumflex 595 | /adieresis 596 | /ae 597 | /agrave 598 | /aogonek 599 | /aring 600 | /atilde 601 | /cacute 602 | /ccaron 603 | /ccedilla 604 | /dbar 605 | /dcaron 606 | /dotlessi 607 | /dotlessj 608 | /eacute 609 | /ecaron 610 | /ecircumflex 611 | /edieresis 612 | /egrave 613 | /eng 614 | /eogonek 615 | /eth 616 | /exclamdown 617 | /ff 618 | /ffi 619 | /ffl 620 | /fi 621 | /fl 622 | /gbreve 623 | /germandbls 624 | /iacute 625 | /icircumflex 626 | /idieresis 627 | /igrave 628 | /ij 629 | /lacute 630 | /lcaron 631 | /lslash 632 | /nacute 633 | /ncaron 634 | /ntilde 635 | /oacute 636 | /ocircumflex 637 | /odieresis 638 | /oe 639 | /ograve 640 | /ohungarumlaut 641 | /oslash 642 | /otilde 643 | /questiondown 644 | /racute 645 | /rcaron 646 | /sacute 647 | /scaron 648 | /scedilla 649 | /section 650 | /sterling 651 | /tcaron 652 | /tcedilla 653 | /thorn 654 | /uacute 655 | /ucircumflex 656 | /udieresis 657 | /ugrave 658 | /uhungarumlaut 659 | /uring 660 | /yacute 661 | /ydieresis 662 | /zacute 663 | /zcaron 664 | /zdot 665 | counttomark 666 | { .letter.chars exch dup put } 667 | repeat 668 | pop 669 | 670 | % Define a set of characters which, if they occur at the start of a line, 671 | % are taken as indicating a paragraph break. 672 | /.break.chars 50 dict def 673 | mark 674 | /bullet /dagger /daggerdbl /periodcentered /section 675 | counttomark 676 | { .break.chars exch dup put } 677 | repeat 678 | pop 679 | 680 | % Define character translation to ASCII. 681 | % We have to do this for the entire character set. 682 | 683 | /.char.map 500 dict def 684 | 685 | /.chars.def { counttomark 2 idiv { .char.map 3 1 roll put } repeat pop } def 686 | 687 | % Encode the printable ASCII characters. 688 | 689 | mark 32 1 126 690 | { 1 string dup 0 4 -1 roll put 691 | dup 0 get StandardEncoding exch get exch 692 | } 693 | for .chars.def 694 | 695 | % Encode accents. 696 | mark 697 | /acute (') 698 | /caron (^) 699 | /cedilla (,) 700 | /circumflex (^) 701 | /dieresis (") 702 | /grave (`) 703 | /ring (*) 704 | /tilde (~) 705 | .chars.def 706 | 707 | % Encode the ISO accented characters. 708 | mark 192 1 255 709 | { ISOLatin1Encoding exch get =string cvs 710 | dup 0 1 getinterval 1 index dup length 1 sub 1 exch getinterval 711 | .char.map 2 index known .char.map 2 index known and 712 | { .char.map 3 -1 roll get .char.map 3 -1 roll get concatstrings 713 | .char.map 3 1 roll put 714 | } 715 | { pop pop pop 716 | } 717 | ifelse 718 | } 719 | for .chars.def 720 | 721 | % Encode the remaining standard and ISO alphabetic characters. 722 | 723 | mark 724 | /AE (AE) /Eth (DH) /OE (OE) /Thorn (Th) 725 | /ae (ae) /eth (dh) 726 | /ffi (ffi) /ffl (ffl) /fi (fi) /fl (fl) 727 | /germandbls (ss) /oe (oe) /thorn (th) 728 | .chars.def 729 | 730 | % Encode the other standard and ISO characters. 731 | 732 | mark 733 | /brokenbar (|) /bullet (*) /copyright ((C)) /currency (#) 734 | /dagger (#) /daggerdbl (##) /degree (o) /divide (/) /dotaccent (.) 735 | /dotlessi (i) 736 | /ellipsis (...) /emdash (--) /endash (-) /exclamdown (!) 737 | /florin (f) /fraction (/) 738 | /guillemotleft (<<) /guillemotright (>>) 739 | /guilsinglleft (<) /guilsinglright (>) /hungarumlaut ("") /logicalnot (~) 740 | /macron (_) /minus (-) /mu (u) /multiply (*) 741 | /ogonek (,) /onehalf (1/2) /onequarter (1/4) /onesuperior (1) 742 | /ordfeminine (-a) /ordmasculine (-o) 743 | /paragraph (||) /periodcentered (*) /perthousand (o/oo) /plusminus (+-) 744 | /questiondown (?) /quotedblbase (") /quotedblleft (") /quotedblright (") 745 | /quotesinglbase (,) /quotesingle (') /registered ((R)) 746 | /section ($) /sterling (#) 747 | /threequarters (3/4) /threesuperior (3) /trademark ((TM)) /twosuperior (2) 748 | /yen (Y) 749 | .chars.def 750 | 751 | % Encode a few common Symbol characters. 752 | 753 | mark 754 | /asteriskmath (*) /copyrightsans ((C)) /copyrightserif ((C)) 755 | /greaterequal (>=) /lessequal (<=) /registersans ((R)) /registerserif ((R)) 756 | /trademarksans ((TM)) /trademarkserif ((TM)) 757 | .chars.def 758 | 759 | %%* Add a few characters from StandardEncoding and ISOLatin1Encoding 760 | %%* that were missing. 761 | 762 | mark 763 | /cent (c) 764 | /guilsinglleft (<) 765 | /guilsinglright (>) 766 | /breve (*) 767 | /Lslash (L/) 768 | /lslash (l/) 769 | .chars.def 770 | 771 | %%* Define the OT1Encoding and T1Encoding vectors for use with dvips 772 | %%* files. Unfortunately, there's no way of telling what font is 773 | %%* really being used within a dvips document, so we can't provide an 774 | %%* appropriate encoding for each individual font. Instead, we'll 775 | %%* just provide support for the two most popular text encodings, the 776 | %%* OT1 and T1 encodings, and just accept the fact that any font not 777 | %%* using one of those encodings will be rendered as gibberish. 778 | %%* 779 | %%* OT1 is Knuth's 7-bit encoding for the CMR text fonts, while T1 780 | %%* (aka the Cork encoding) is the 8-bit encoding used by the DC 781 | %%* fonts, a preliminary version of the proposed Extended Computer 782 | %%* Modern fonts. Unfortunately, T1 is not a strict extension of OT1; 783 | %%* they differ in positions 8#000 through 8#040, 8#074, 8#076, 8#134, 784 | %%* 8#137, 8#173, 8#174, 8#175 and 8#177, so we can't use the same 785 | %%* vector for both. 786 | %%* 787 | %%* Of course, we also can't reliably tell the difference between an 788 | %%* OT1-encoded font and a T1-encoded font based on the information in 789 | %%* a dvips-created PostScript file. As a best-guess solution, we'll 790 | %%* use the T1 encoding if the font contains any characters in 791 | %%* positions above 8#177 and the OT1 encoding if it doesn't. 792 | 793 | /T1Encoding 256 array def 794 | 795 | /OT1Encoding 256 array def 796 | 797 | %%* T1Encoding shares a lot with StandardEncoding, so let's start 798 | %%* there. 799 | 800 | StandardEncoding T1Encoding copy pop 801 | 802 | /OT1.encode { 803 | counttomark 804 | 2 idiv 805 | { OT1Encoding 3 1 roll put } 806 | repeat 807 | cleartomark 808 | } def 809 | 810 | /T1.encode { 811 | counttomark 812 | 2 idiv 813 | { T1Encoding 3 1 roll put } 814 | repeat 815 | cleartomark 816 | } def 817 | 818 | mark 819 | 8#000 /grave 820 | 8#001 /acute 821 | 8#002 /circumflex 822 | 8#003 /tilde 823 | 8#004 /dieresis 824 | 8#005 /hungarumlaut 825 | 8#006 /ring 826 | 8#007 /caron 827 | 828 | 8#010 /breve 829 | 8#011 /macron 830 | 8#012 /dotaccent 831 | 8#013 /cedilla 832 | 8#014 /ogonek 833 | 8#015 /quotesinglbase 834 | 8#016 /guilsinglleft 835 | 8#017 /guilsinglright 836 | 837 | 8#020 /quotedblleft 838 | 8#021 /quotedblright 839 | 8#022 /quotedblbase 840 | 8#023 /guillemotleft 841 | 8#024 /guillemotright 842 | 8#025 /endash 843 | 8#026 /emdash 844 | 8#027 /cwm 845 | 846 | 8#030 /perthousandzero 847 | 8#031 /dotlessi 848 | 8#032 /dotlessj 849 | 8#033 /ff 850 | 8#034 /fi 851 | 8#035 /fl 852 | 8#036 /ffi 853 | 8#037 /ffl 854 | 855 | %% 8#040 through 8#176 follow StandardEncoding 856 | 857 | 8#177 /hyphen 858 | T1.encode 859 | 860 | mark 861 | 8#200 /Abreve 862 | 8#201 /Aogonek 863 | 8#202 /Cacute 864 | 8#203 /Ccaron 865 | 8#204 /Dcaron 866 | 8#205 /Ecaron 867 | 8#206 /Eogonek 868 | 8#207 /Gbreve 869 | 8#210 /Lacute 870 | 8#211 /Lcaron 871 | 8#212 /Lslash 872 | 8#213 /Nacute 873 | 8#214 /Ncaron 874 | 8#215 /Eng 875 | 8#216 /Ohungarumlaut 876 | 8#217 /Racute 877 | 8#220 /Rcaron 878 | 8#221 /Sacute 879 | 8#222 /Scaron 880 | 8#223 /Scedilla 881 | 8#224 /Tcaron 882 | 8#225 /Tcedilla 883 | 8#226 /Uhungarumlaut 884 | 8#227 /Uring 885 | 8#230 /Ydieresis 886 | 8#231 /Zacute 887 | 8#232 /Zcaron 888 | 8#233 /Zdot 889 | 8#234 /IJ 890 | 8#235 /Idot 891 | 8#236 /dbar 892 | 8#237 /section 893 | 8#240 /abreve 894 | 8#241 /aogonek 895 | 8#242 /cacute 896 | 8#243 /ccaron 897 | 8#244 /dcaron 898 | 8#245 /ecaron 899 | 8#246 /eogonek 900 | 8#247 /gbreve 901 | 8#250 /lacute 902 | 8#251 /lcaron 903 | 8#252 /lslash 904 | 8#253 /nacute 905 | 8#254 /ncaron 906 | 8#255 /eng 907 | 8#256 /ohungarumlaut 908 | 8#257 /racute 909 | 8#260 /rcaron 910 | 8#261 /sacute 911 | 8#262 /scaron 912 | 8#263 /scedilla 913 | 8#264 /tcaron 914 | 8#265 /tcedilla 915 | 8#266 /uhungarumlaut 916 | 8#267 /uring 917 | 8#270 /ydieresis 918 | 8#271 /zacute 919 | 8#272 /zcaron 920 | 8#273 /zdot 921 | 8#274 /ij 922 | 8#275 /exclamdown 923 | 8#276 /questiondown 924 | 8#277 /sterling 925 | 926 | 8#300 /Agrave 927 | 8#301 /Aacute 928 | 8#302 /Acircumflex 929 | 8#303 /Atilde 930 | 8#304 /Adieresis 931 | 8#305 /Aring 932 | 8#306 /AE 933 | 8#307 /Ccedilla 934 | 8#310 /Egrave 935 | 8#311 /Eacute 936 | 8#312 /Ecircumflex 937 | 8#313 /Edieresis 938 | 8#314 /Igrave 939 | 8#315 /Iacute 940 | 8#316 /Icircumflex 941 | 8#317 /Idieresis 942 | 8#320 /Eth 943 | 8#321 /Ntilde 944 | 8#322 /Ograve 945 | 8#323 /Oacute 946 | 8#324 /Ocircumflex 947 | 8#325 /Otilde 948 | 8#326 /Odieresis 949 | 8#327 /OE 950 | 8#330 /Oslash 951 | 8#331 /Ugrave 952 | 8#332 /Uacute 953 | 8#333 /Ucircumflex 954 | 8#334 /Udieresis 955 | 8#335 /Yacute 956 | 8#336 /Thorn 957 | 8#337 /Germandbls 958 | 959 | 8#340 /agrave 960 | 8#341 /aacute 961 | 8#342 /acircumflex 962 | 8#343 /atilde 963 | 8#344 /adieresis 964 | 8#345 /aring 965 | 8#346 /ae 966 | 8#347 /ccedilla 967 | 8#350 /egrave 968 | 8#351 /eacute 969 | 8#352 /ecircumflex 970 | 8#353 /edieresis 971 | 8#354 /igrave 972 | 8#355 /iacute 973 | 8#356 /icircumflex 974 | 8#357 /idieresis 975 | 8#360 /eth 976 | 8#361 /ntilde 977 | 8#362 /ograve 978 | 8#363 /oacute 979 | 8#364 /ocircumflex 980 | 8#365 /otilde 981 | 8#366 /odieresis 982 | 8#367 /oe 983 | 8#370 /oslash 984 | 8#371 /ugrave 985 | 8#372 /uacute 986 | 8#373 /ucircumflex 987 | 8#374 /udieresis 988 | 8#375 /yacute 989 | 8#376 /thorn 990 | 8#377 /germandbls 991 | 992 | T1.encode 993 | 994 | %%* Now copy OT1Encoding into T1Encoding and make a few changes. 995 | 996 | T1Encoding OT1Encoding copy pop 997 | 998 | mark 999 | 8#000 /Gamma 1000 | 8#001 /Delta 1001 | 8#002 /Theta 1002 | 8#003 /Lambda 1003 | 8#004 /Xi 1004 | 8#005 /Pi 1005 | 8#006 /Sigma 1006 | 8#007 /Upsilon 1007 | 1008 | 8#010 /Phi 1009 | 8#011 /Psi 1010 | 8#012 /Omega 1011 | 8#013 /ff 1012 | 8#014 /fi 1013 | 8#015 /fl 1014 | 8#016 /ffi 1015 | 8#017 /ffl 1016 | 1017 | 8#020 /dotlessi 1018 | 8#021 /dotlessj 1019 | 8#022 /grave 1020 | 8#023 /acute 1021 | 8#024 /caron 1022 | 8#025 /breve 1023 | 8#026 /macron 1024 | 8#027 /ring 1025 | 1026 | 8#030 /cedilla 1027 | 8#031 /germandbls 1028 | 8#032 /ae 1029 | 8#033 /oe 1030 | 8#034 /oslash 1031 | 8#035 /AE 1032 | 8#036 /OE 1033 | 8#037 /Oslash 1034 | 1035 | 8#040 /polishslash 1036 | 1037 | 8#042 /quotedblright 1038 | 1039 | 8#074 /exclamdown 1040 | 8#076 /questiondown 1041 | 1042 | 8#134 /quotedblleft 1043 | 8#137 /dotaccent 1044 | 1045 | 8#173 /endash 1046 | 8#174 /emdash 1047 | 8#175 /hungarumlaut 1048 | 8#177 /dieresis 1049 | OT1.encode 1050 | 1051 | %%* And add a few characters from the OT1Encoding 1052 | 1053 | mark 1054 | /Gamma (\\Gamma ) 1055 | /Delta (\\Delta ) 1056 | /Theta (\\Theta ) 1057 | /Lambda (\\Lambda ) 1058 | /Xi (\\Xi ) 1059 | /Pi (\\Pi ) 1060 | /Sigma (\\Sigma ) 1061 | /Upsilon (\\Upsilon ) 1062 | 1063 | /Phi (\\Phi ) 1064 | /Psi (\\Psi ) 1065 | /Omega (\\Omega ) 1066 | 1067 | /dotlessj (j) 1068 | /ff (ff) 1069 | 1070 | /cwm () 1071 | 1072 | /perthousandzero (0) 1073 | 1074 | /polishslash () 1075 | 1076 | /Abreve (A*) 1077 | /Aogonek (A,) 1078 | /Cacute (C') 1079 | /Ccaron (C^) 1080 | /Dcaron (D^) 1081 | /Ecaron (E^) 1082 | /Eogonek (E,) 1083 | /Gbreve (G*) 1084 | /Lacute (L') 1085 | /Lcaron (L^) 1086 | /Nacute (N') 1087 | /Ncaron (N^) 1088 | /Eng (NG) 1089 | /Ohungarumlaut (O"") 1090 | /Racute (R') 1091 | /Rcaron (R^) 1092 | /Sacute (S') 1093 | /Scaron (S^) 1094 | /Scedilla (S,) 1095 | /Tcaron (T^) 1096 | /Tcedilla (T,) 1097 | /Uhungarumlaut (U"") 1098 | /Uring (U*) 1099 | /Ydieresis (Y") 1100 | /Zacute (Z') 1101 | /Zcaron (Z^) 1102 | /Zdot (Z.) 1103 | /IJ (IJ) 1104 | /Idot (I.) 1105 | /dbar (d-) 1106 | /abreve (a*) 1107 | /aogonek (a,) 1108 | /cacute (c') 1109 | /ccaron (c^) 1110 | /dcaron (d^) 1111 | /ecaron (e^) 1112 | /eogonek (e,) 1113 | /gbreve (g*) 1114 | /lacute (l') 1115 | /lcaron (l^) 1116 | /nacute (n') 1117 | /ncaron (n^) 1118 | /eng (ng) 1119 | /ohungarumlaut (o"") 1120 | /racute (r') 1121 | /rcaron (r^) 1122 | /sacute (s') 1123 | /scaron (s^) 1124 | /scedilla (s,) 1125 | /tcaron (t^) 1126 | /tcedilla (t,) 1127 | /uhungarumlaut (u"") 1128 | /uring (u*) 1129 | /zacute (z') 1130 | /zcaron (z^) 1131 | /zdot (z.) 1132 | /ij (ij) 1133 | /Germandbls (SS) 1134 | .chars.def 1135 | 1136 | %%* We extend the df-tail command to stick in an Encoding vector (see 1137 | %%* above for a discussion of the T1 and OT1 encodings), put in a 1138 | %%* FontName (which will just be dvips's name for the font, i.e., Fa, 1139 | %%* Fb, etc.) and give each font a separate FontBBox instead of 1140 | %%* letting them all share a single one. 1141 | 1142 | /dvips.df-tail % id numcc maxcc df-tail 1143 | { 1144 | /nn 9 dict N 1145 | nn begin 1146 | %% 1147 | %% Choose an encoding based on the highest position occupied. 1148 | %% 1149 | dup 128 gt { T1Encoding } { OT1Encoding } ifelse 1150 | /Encoding X 1151 | /FontType 3 N 1152 | %% 1153 | %% It's ok for all the fonts to share a FontMatrix, but they 1154 | %% need to have separate FontBBoxes 1155 | %% 1156 | /FontMatrix fntrx N 1157 | /FontBBox [0 0 0 0] N 1158 | string /base X 1159 | array /BitMaps X 1160 | %% 1161 | %% And let's throw in a FontName for good measure 1162 | %% 1163 | dup ( ) cvs 1164 | %% 1165 | %% Make sure each font gets it own private FontName. -- dmj, 1166 | %% 12/23/97 1167 | %% 1168 | dup length string copy 1169 | /FontName X 1170 | /BuildChar {CharBuilder} N 1171 | end 1172 | dup { /foo setfont } 1173 | 2 array copy cvx N 1174 | load 1175 | 0 nn put 1176 | /ctr 0 N 1177 | [ 1178 | } def 1179 | 1180 | %%* This is functionally equivalent to dvips's /D procedure, but it 1181 | %%* also calculates the Font Bounding Box while defining the 1182 | %%* characters. 1183 | 1184 | /dvips.D % char-data ch D - : define character bitmap in current font 1185 | { 1186 | /cc X % char-data 1187 | dup type /stringtype ne {]} if % char-data 1188 | 1189 | /ch-xoff where 1190 | { pop } 1191 | { dup /Cd exch def 1192 | /ch-width { Cw } def 1193 | /ch-height { Ch } def 1194 | /ch-xoff { Cx } def 1195 | /ch-yoff { Cy } def 1196 | /ch-dx { Cdx } def 1197 | } ifelse 1198 | /ch-data X 1199 | nn /base get cc ctr put % (adds ctr to cc'th position of BASE) 1200 | nn /BitMaps get 1201 | ctr 1202 | ch-data % BitMaps ctr char-data 1203 | sf 1 ne { 1204 | dup dup length 1 sub dup 2 index S get sf div put 1205 | } if 1206 | put % puts char-data into BitMaps at index ctr 1207 | /ctr ctr 1 add N 1208 | %% 1209 | %% Make sure the Font Bounding Box encloses the Bounding Box of the 1210 | %% current character 1211 | %% 1212 | nn /FontBBox get % BB 1213 | 1214 | dup % calculate new llx 1215 | dup 0 get 1216 | ch-xoff 1217 | .min 1218 | 0 exch put 1219 | 1220 | dup % calculate new lly 1221 | dup 1 get 1222 | ch-yoff ch-height sub 1223 | .min 1224 | 1 exch put 1225 | 1226 | dup % calculate new urx 1227 | dup 2 get 1228 | ch-dx ch-width add 1229 | .max 1230 | 2 exch put 1231 | 1232 | dup 3 get % calculate new ury 1233 | ch-yoff 1234 | .max 1235 | 3 exch put 1236 | 1237 | } def 1238 | 1239 | %%* Define start-hook to replace df-tail and D by our versions. 1240 | %%* Unfortunately, the user can redefine start-hook and thus bypass 1241 | %%* these changes, but I don't see an obvious way around that. 1242 | 1243 | userdict /start-hook { 1244 | TeXDict /df-tail /dvips.df-tail load bind put 1245 | TeXDict /D /dvips.D load bind put 1246 | } put 1247 | 1248 | %%* Introduce a symbolic constant for hyphens. (Need to make 1249 | %%* allowance for hyphen being in different place?) 1250 | 1251 | /.hyphen 45 def 1252 | 1253 | % Write out a string. If it ends in a letter and a hyphen, 1254 | % don't write the hyphen, and set .show.last to a hyphen; 1255 | % otherwise, set .show.last to the character (or \000 if it was a hyphen). 1256 | /.show.write % 1257 | { 1258 | dup length 1 ge 1259 | { dup dup length 1 sub get % string last_char 1260 | dup .hyphen eq % string last_char hyphen? 1261 | { % string last_char 1262 | 1 index length 1 gt 1263 | { 1 index dup length 2 sub get } 1264 | { //.show.last 0 get } 1265 | ifelse % string last_char prev-char 1266 | currentfont /Encoding get exch get % look up prev-char 1267 | //.letter.chars exch known % is it a letter? 1268 | { % Remove the hyphen % string last_char 1269 | exch % last_char string 1270 | dup length 1 sub % last_char string len-1 1271 | 0 exch getinterval % last_char string-1 1272 | exch % string-1 last_char 1273 | } 1274 | { pop 0 } % string 0 1275 | ifelse 1276 | } 1277 | if 1278 | //.show.last 0 3 -1 roll put % store last_char 1279 | % in .show.last 1280 | % If .show.last == 1281 | % hyphen, then 1282 | % last char of 1283 | % previous string 1284 | % was a hyphen 1285 | } 1286 | if % string 1287 | currentfont /FontType get 0 ne 1288 | { 1289 | { % begin forall % c 1290 | dup % c c 1291 | currentfont /Encoding get % c c vec 1292 | exch get % c name 1293 | dup //.char.map exch known % c name bool 1294 | { exch pop } 1295 | { pop OT1Encoding exch get } 1296 | ifelse % name 1297 | //.char.map exch get % translation 1298 | .show.stdout exch writestring 1299 | } 1300 | forall 1301 | } 1302 | { (\0) dup 0 get 0 eq 1303 | { 0 1 put 1304 | (%stderr) (w) file dup 1305 | (*** Warning: composite font characters dumped without decoding.\n) writestring 1306 | closefile 1307 | } 1308 | { pop 1309 | } 1310 | ifelse 1311 | .show.stdout exch writestring 1312 | } 1313 | ifelse 1314 | } odef 1315 | 1316 | /.showstring1 { % string 1317 | currentpoint .coord % string x y 1318 | 3 -1 roll dup .showwidth % x y string dx dy 1319 | 1 index % x y string dx dy dx 1320 | 0 rmoveto % x y string dx dy 1321 | .dcoord pop % x y string width 1322 | SIMPLE 1323 | { % x y string width 1324 | 2 index % x y string width y 1325 | //.show.y .iget % x y string width y old.y 1326 | %%* 1327 | %%* Replaced test "has y changed" by "has y changed by more 1328 | %%* than the current font height" so that subscripts and 1329 | %%* superscripts won't cause line/paragraph breaks 1330 | %%* 1331 | sub abs dup % x y string width dy dy 1332 | //.show.height .iget 1333 | gt 1334 | { % x y string width dy 1335 | 1336 | %%* Vertical position has changed by more than the font 1337 | %%* height, so we now try to figure out whether we've 1338 | %%* started a new paragraph or merely a new line, using a 1339 | %%* variety of heuristics. 1340 | 1341 | %%* If any of the following is true, we start a new 1342 | %%* paragraph: 1343 | 1344 | %%* (a) the current vertical shift is more than 1.1 times 1345 | %%* the previous vertical shift, where 1.1 is an 1346 | %%* arbitrarily chosen factor that could probably be 1347 | %%* refined. 1348 | 1349 | dup % x y string width dy dy 1350 | //.show.dy .iget 1.1 mul 1351 | gt 1352 | exch 1353 | 1354 | %%* Save the new vertical shift 1355 | 1356 | //.show.dy exch .iput 1357 | 1358 | %%* (b) The vertical shift is more than 1.3 times the 1359 | %%* "size" of the current font. I've removed this 1360 | %%* test since it's not really very useful. 1361 | 1362 | %%* //.show.dy .iget 1363 | %%* //.show.height .iget 1.4 mul 1364 | %%* gt % x y string width bool 1365 | %%* .show.height .iget 0 gt and % only perform test if font 1366 | %%* % height is nonzero 1367 | %%* or 1368 | 1369 | %%* (c) the first character of the new line is one of the 1370 | %%* .break.chars 1371 | 1372 | 2 index length % x y string width newpar? len 1373 | 0 gt % x y string width newpar? len>0? 1374 | { 1375 | 2 index 0 get % x y string width newpar? s 1376 | currentfont /Encoding get 1377 | exch get % x y string width newpar? s_enc 1378 | //.break.chars exch known { pop true } if 1379 | } 1380 | if % x y string width newpar? 1381 | 1382 | %%* (d) The indentation of the new line is greater than 1383 | %%* the indentation of the previous line. 1384 | 1385 | 4 index 1386 | //.show.indent .iget 1387 | gt 1388 | or 1389 | 1390 | %%* HOWEVER, if the line ends in a hyphen, we do NOT begin 1391 | %%* a new paragraph (cf. comment at end of BF2). --dmj, 1392 | %%* 12/23/97 1393 | 1394 | //.show.last 0 get .hyphen ne 1395 | and 1396 | 1397 | % newpar? 1398 | { (\n\n) } % Paragraph 1399 | { % Line 1400 | %%* 1401 | %%* BF2: If last character on a line is 1402 | %%* a hyphen, we omit the hyphen and 1403 | %%* run the lines together. Of 1404 | %%* course, this will fail if a word 1405 | %%* with an explicit hyphen (e.g., 1406 | %%* X-ray) is split across two lines. 1407 | %%* Oh, well. (What should we do 1408 | %%* about a hyphen that ends a 1409 | %%* "paragraph"? Perhaps that should 1410 | %%* inhibit a paragraph break.) 1411 | %%* 1412 | //.show.last 0 get .hyphen eq 1413 | { () } 1414 | { (\n) } 1415 | ifelse % x y string width char 1416 | } 1417 | ifelse 1418 | //print 1419 | 1420 | //.show.y 3 index .iput % x y string width 1421 | //.show.x 4 index .iput % x y string width 1422 | //.show.indent 4 index .iput 1423 | } 1424 | { % x y string width dy 1425 | % If the word processor split a hyphenated word within 1426 | % the same line, put out the hyphen now. 1427 | pop 1428 | //.show.last 0 get .hyphen eq { (-) //print } if 1429 | } 1430 | ifelse 1431 | %%* 1432 | %%* If have moved more than 1 point to 1433 | %%* the right, interpret it as a 1434 | %%* space? This need to be looked at 1435 | %%* more closely. 1436 | %%* 1437 | 3 index % x y string width x 1438 | //.show.x .iget 10 add gt % x y string width bool 1439 | { ( ) //print } 1440 | if 1441 | % x y string width 1442 | 4 1 roll % width x y string 1443 | .show.write pop % width x 1444 | add //.show.x exch .iput % 1445 | } 1446 | { (S ) //print .show==4 } 1447 | ifelse 1448 | } odef 1449 | 1450 | /.showstring 1451 | { dup () eq { pop } { .showstring1 } ifelse 1452 | } bind def 1453 | 1454 | % Redefine all the string display operators. 1455 | 1456 | /show { 1457 | .showfont 1458 | .showcolor 1459 | .showstring 1460 | } codef 1461 | 1462 | % We define all the other operators in terms of .show1. 1463 | 1464 | /.show1.string ( ) def 1465 | /.show1 { //.show1.string exch 0 exch put //.show1.string .showstring } odef 1466 | /ashow 1467 | { .showfont .showcolor 1468 | { .show1 2 copy rmoveto } forall 1469 | pop pop 1470 | } codef 1471 | /awidthshow 1472 | { .showfont .showcolor 1473 | { dup .show1 4 index eq { 4 index 4 index rmoveto } if 1474 | 2 copy rmoveto 1475 | } 1476 | forall 1477 | pop pop pop pop pop 1478 | } codef 1479 | /widthshow 1480 | { .showfont .showcolor 1481 | //.show1.string 0 4 -1 roll put 1482 | { //.show1.string search not { exit } if 1483 | .showstring .showstring 1484 | 2 index 2 index rmoveto 1485 | } loop 1486 | .showstring pop pop 1487 | } codef 1488 | /kshow 1489 | { .showfont .showcolor 1490 | %**************** Should construct a closure, in case the procedure 1491 | %**************** affects the o-stack. 1492 | { .show1 dup exec } forall pop 1493 | } codef 1494 | 1495 | % We don't really do the right thing with the Level 2 show operators, 1496 | % but we do something semi-reasonable. 1497 | /xshow { pop show } codef 1498 | /yshow { pop show } codef 1499 | /xyshow { pop show } codef 1500 | /glyphshow 1501 | { currentfont /Encoding .knownget not { {} } if 1502 | 0 1 2 index length 1 sub 1503 | { % Stack: glyph encoding index 1504 | 2 copy get 3 index eq { exch pop exch pop null exit } if 1505 | pop 1506 | } 1507 | for null eq { (X) dup 0 4 -1 roll put show } { pop } ifelse 1508 | } codef 1509 | 1510 | end 1511 | 1512 | % Bind the operators we just defined, and all the others if we didn't 1513 | % do it before. 1514 | 1515 | DELAYBIND { .bindnow } if 1516 | 1517 | % Make systemdict read-only if it wasn't already. 1518 | 1519 | systemdict wcheck { systemdict readonly pop } if 1520 | 1521 | % Restore the current local/global VM mode. 1522 | 1523 | exec 1524 | --------------------------------------------------------------------------------