├── .editorconfig ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── bin └── pdf-bot.js ├── examples ├── pdf-bot.config.js └── receiving-api.js ├── package.json ├── production ├── README.md ├── nginx.conf └── pm2.config.js ├── src ├── api.js ├── db │ ├── lowdb.js │ └── pgsql.js ├── error.js ├── pdfGenerator.js ├── queue.js ├── storage │ └── s3.js ├── utils.js └── webhook.js ├── storage ├── db │ └── .gitignore └── pdf │ └── .gitignore └── test ├── api.test.js ├── error.test.js ├── pdfGenerator.test.js ├── queue.test.js ├── storage └── s3.test.js └── webhook.test.js /.editorconfig: -------------------------------------------------------------------------------- 1 | # This file is for unifying the coding style for different editors and IDEs 2 | # editorconfig.org 3 | 4 | root = true 5 | 6 | [*] 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | 12 | # Spaces in coffee 13 | [**.coffee] 14 | indent_style = space 15 | indent_size = 2 16 | 17 | [**.js] 18 | indent_style = space 19 | indent_size = 2 20 | 21 | [**.jsx] 22 | indent_style = space 23 | indent_size = 2 24 | 25 | # Tabs in less 26 | [**.less] 27 | indent_style = tab 28 | indent_size = 2 29 | 30 | [**.css] 31 | indent_style = tab 32 | indent_size = 2 33 | 34 | [**.php] 35 | indent_style = space 36 | indent_size = 4 37 | 38 | [**.html] 39 | indent_style = tab 40 | indent_size = 2 41 | 42 | [Makefile] 43 | indent_style = tab 44 | indent_size = 4 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (http://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # Typescript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | .DS_Store 61 | package-lock.json 62 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "node" 4 | script: 5 | - npm test 6 | after_success: 7 | - cat ./coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Esben Petersen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🤖 pdf-bot 2 | 3 | [![npm](https://img.shields.io/npm/v/pdf-bot.svg)](https://www.npmjs.com/package/pdf-bot) [![Build Status](https://travis-ci.org/esbenp/pdf-bot.svg?branch=master)](https://travis-ci.org/esbenp/pdf-bot) [![Coverage Status](https://coveralls.io/repos/github/esbenp/pdf-bot/badge.svg?branch=master)](https://coveralls.io/github/esbenp/pdf-bot?branch=master) 4 | 5 | Easily create a microservice for generating PDFs using headless Chrome. 6 | 7 | `pdf-bot` is installed on a server and will receive URLs to turn into PDFs through its API or CLI. `pdf-bot` will manage a queue of PDF jobs. Once a PDF job has run it will notify you using a webhook so you can fetch the API. `pdf-bot` supports storing PDFs on S3 out of the box. Failed PDF generations and Webhook pings will be retried after a configurable decaying schedule. 8 | 9 | ![How to use the pdf-bot CLI](http://imgur.com/aRHye2l.gif) 10 | 11 | `pdf-bot` uses [`html-pdf-chrome`](https://github.com/westy92/html-pdf-chrome) under the hood and supports all the settings that it supports. Major thanks to [@westy92](https://github.com/westy92/html-pdf-chrome) for making this possible. 12 | 13 | ## How does it work? 14 | 15 | Imagine you have an app that creates invoices. You want to save those invoices as PDF. You install `pdf-bot` on a server as an API. Your app server sends the URL of the invoice to the `pdf-bot` server. A cronjob on the `pdf-bot` server keeps checking for new jobs, generates a PDF using headless Chrome and sends the location back to the application server using a webhook. 16 | 17 | ## Prerequisites 18 | 19 | * Node.js v6 or later 20 | 21 | ## Installation 22 | 23 | ```bash 24 | $ npm install -g pdf-bot 25 | $ pdf-bot install 26 | ``` 27 | 28 | > Make sure the node path is in your $PATH 29 | 30 | `pdf-bot install` will prompt for some basic configurations and then create a storage folder where your database and pdf files will be saved. 31 | 32 | ### Configuration 33 | 34 | `pdf-bot` comes packaged with sensible defaults. At the very minimum you must have a config file in the same folder from which you are executing `pdf-bot` with a `storagePath` given. However, in reality what you probably want to do is use the `pdf-bot install` command to generate a configuration file and then use an alias `ALIAS pdf-bot = "pdf-bot -c /home/pdf-bot.config.js"` 35 | 36 | `pdf-bot.config.js` 37 | ```js 38 | var htmlPdf = require('html-pdf-chrome') 39 | 40 | module.exports = { 41 | api: { 42 | token: 'crazy-secret' 43 | }, 44 | generator: { 45 | completionTrigger: new htmlPdf.CompletionTrigger.Timer(1000) // 1 sec timeout 46 | }, 47 | storagePath: 'storage' 48 | } 49 | ``` 50 | 51 | ```bash 52 | $ pdf-bot -c ./pdf-bot.config.js push https://esbenp.github.io 53 | ``` 54 | 55 | [See a full list of the available configuration options.](#options) 56 | 57 | ## Usage guide 58 | 59 | ### Structure and concept 60 | 61 | `pdf-bot` is meant to be a microservice that runs a server to generate PDFs for you. That usually means you will send requests from your application server to the PDF server to request an url to be generated as a PDF. `pdf-bot` will manage a queue and retry failed generations. Once a job is successfully generated a path to it will be sent back to your application server. 62 | 63 | Let us check out the flow for an app that generates PDF invoices. 64 | 65 | ``` 66 | 1. (App server): An invoice is created ----> Send URL to invoice to pdf-bot server 67 | 2. (pdf-bot server): Put the URL in the queue 68 | 3. (pdf-bot server): PDF is generated using headless Chrome 69 | 4. (pdf-bot server): (if failed try again using 1 min, 3 min, 10 min, 30 min, 60 min delay) 70 | 5. (pdf-bot server): Upload PDF to storage (e.g. Amazon S3) 71 | 6. (pdf-bot server): Send S3 location of PDF back to the app server 72 | 7. (App server): Receive S3 location of PDF -> Check signature sum matches for security 73 | 8. (App server): Handle PDF however you see fit (move it, download it, save it etc.) 74 | ``` 75 | 76 | You can send meta data to the `pdf-bot` server that will be sent back to the application. This can help you identify what PDF you are receiving. 77 | 78 | ### Setup 79 | 80 | On your `pdf-bot` server start by creating a config file `pdf-bot.config.js`. [You can see an example file here](https://github.com/esbenp/pdf-bot/blob/master/examples/pdf-bot.config.js) 81 | 82 | `pdf-bot.config.js` 83 | ```js 84 | module.exports = { 85 | api: { 86 | port: 3000, 87 | token: 'api-token' 88 | }, 89 | storage: { 90 | 's3': createS3Config({ 91 | bucket: '', 92 | accessKeyId: '', 93 | region: '', 94 | secretAccessKey: '' 95 | }) 96 | }, 97 | webhook: { 98 | secret: '1234', 99 | url: 'http://localhost:3000/webhooks/pdf' 100 | } 101 | } 102 | ``` 103 | 104 | As a minimum you should configure an access token for your API. This will be used to authenticate jobs sent to your `pdf-bot` server. You also need to add a `webhook` configuration to have pdf notifications sent back to your application server. You should add a `secret` that will be used to generate a signature used to check that the request has not been tampered with during transfer. 105 | 106 | Start your API using 107 | 108 | `pdf-bot -c ./pdf-bot.config.js api` 109 | 110 | This will start an [express server](http://expressjs.com) that listens for new jobs on port `3000`. 111 | 112 | #### Setting up Chrome 113 | 114 | `pdf-bot` uses [html-pdf-chrome](https://github.com/westy92/html-pdf-chrome) which in turns uses [chrome-launcher](https://github.com/GoogleChrome/lighthouse/tree/master/chrome-launcher) to launch chrome. You should check out those two resources on how to properly setup Chrome. However, with `chrome-launcher` Chrome should be started automatically. Otherwise, `html-pdf-chrome` has a small guide on how to have it running as a process using `pm2`. 115 | 116 | You can install chrome on Ubuntu using 117 | 118 | ``` 119 | sudo apt-get update && apt-get install chromium-browser 120 | ``` 121 | 122 | If you are testing things on OSX or similar, `chrome-launcher` should be able to find and automatically startup Chrome for you. 123 | 124 | #### Setting up the receiving API 125 | 126 | In the [examples folder](https://github.com/esbenp/pdf-bot/blob/master/examples/receiving-api.js) there is a small example on how the application API could look. Basically, you just have to define an endpoint that will receive the webhook and check that the signature matches. 127 | 128 | ```javascript 129 | api.post('/hook', function (req, res) { 130 | var signature = req.get('X-PDF-Signature', 'sha1=') 131 | 132 | var bodyCrypted = require('crypto') 133 | .createHmac('sha1', '12345') 134 | .update(JSON.stringify(req.body)) 135 | .digest('hex') 136 | 137 | if (bodyCrypted !== signature) { 138 | res.status(401).send() 139 | return 140 | } 141 | 142 | console.log('PDF webhook received', JSON.stringify(req.body)) 143 | 144 | res.status(204).send() 145 | }) 146 | ``` 147 | 148 | ### Setup production environment 149 | 150 | [Follow the guide under `production/` to see how to setup `pdf-bot` using `pm2` and `nginx`](https://github.com/esbenp/pdf-bot/blob/master/production/README.md) 151 | 152 | ### Setup crontab 153 | 154 | We setup our crontab to continuously look for jobs that have not yet been completed. 155 | 156 | ```bash 157 | * * * * * node $(npm bin -g)/pdf-bot -c ./pdf-bot.config.js shift:all >> /var/log/pdfbot.log 2>&1 158 | * * * * * node $(npm bin -g)/pdf-bot -c ./pdf-bot.config.js ping:retry-failed >> /var/log/pdfbot.log 2>&1 159 | ``` 160 | 161 | ### Quick example using the CLI 162 | 163 | Let us assume I want to generate a PDF for `https://esbenp.github.io`. I can add the job using the `pdf-bot` CLI. 164 | 165 | ```bash 166 | $ pdf-bot -c ./pdf-bot.config.js push https://esbenp.github.io --meta '{"id":1}' 167 | ``` 168 | 169 | Next, if my crontab is not setup to run it automatically I can run it using the `shift:all` command 170 | 171 | ```bash 172 | $ pdf-bot -c ./pdf-bot.config.js shift:all 173 | ``` 174 | 175 | This will look for the oldest uncompleted job and run it. 176 | 177 | ### How can I generate PDFs for sites that use Javascript? 178 | 179 | This is a common issue with PDF generation. Luckily, `html-pdf-chrome` has a really awesome API for dealing with Javascript. You can specify a timeout in milliseconds, wait for elements or custom events. To add a wait simply configure the `generator` key in your configuration. Below are a few examples. 180 | 181 | **Wait for 5 seconds** 182 | 183 | ```javascript 184 | var htmlPdf = require('html-pdf-chrome') 185 | 186 | module.exports = { 187 | api: { 188 | token: 'api-token' 189 | }, 190 | // html-pdf-chrome options 191 | generator: { 192 | completionTrigger: new htmlPdf.CompletionTrigger.Timer(5000), // waits for 5 sec 193 | }, 194 | webhook: { 195 | secret: '1234', 196 | url: 'http://localhost:3000/webhooks/pdf' 197 | } 198 | } 199 | ``` 200 | 201 | **Wait for event** 202 | 203 | ```javascript 204 | var htmlPdf = require('html-pdf-chrome') 205 | 206 | module.exports = { 207 | api: { 208 | token: 'api-token' 209 | }, 210 | // html-pdf-chrome options 211 | generator: { 212 | completionTrigger: new htmlPdf.CompletionTrigger.Event( 213 | 'myEvent', // name of the event to listen for 214 | '#myElement', // optional DOM element CSS selector to listen on, defaults to body 215 | 5000 // optional timeout (milliseconds) 216 | ) 217 | }, 218 | webhook: { 219 | secret: '1234', 220 | url: 'http://localhost:3000/webhooks/pdf' 221 | } 222 | } 223 | ``` 224 | 225 | In your Javascript trigger the event when rendering is complete 226 | 227 | ```javascript 228 | document.getElementById('myElement').dispatchEvent(new CustomEvent('myEvent')); 229 | ``` 230 | 231 | **Wait for variable** 232 | 233 | ```javascript 234 | var htmlPdf = require('html-pdf-chrome') 235 | 236 | module.exports = { 237 | api: { 238 | token: 'api-token' 239 | }, 240 | // html-pdf-chrome options 241 | generator: { 242 | completionTrigger: new htmlPdf.CompletionTrigger.Variable( 243 | 'myVarName', // optional, name of the variable to wait for. Defaults to 'htmlPdfDone' 244 | 5000 // optional, timeout (milliseconds) 245 | ) 246 | }, 247 | webhook: { 248 | secret: '1234', 249 | url: 'http://localhost:3000/webhooks/pdf' 250 | } 251 | } 252 | ``` 253 | 254 | In your Javascript set the variable when the rendering is complete 255 | 256 | ```javascript 257 | window.myVarName = true; 258 | ``` 259 | 260 | [You can find more completion triggers in html-pdf-chrome's documentation](https://github.com/westy92/html-pdf-chrome#trigger-render-completion) 261 | 262 | ## API 263 | 264 | Below are given the endpoints that are exposed by `pdf-server`'s REST API 265 | 266 | ### Push URL to queue: POST / 267 | 268 | key | type | required | description 269 | --- | ---- | -------- | ----------- 270 | url | string | yes | The URL to generate a PDF from 271 | meta | object | | Optional meta data object to send back to the webhook url 272 | 273 | #### Example 274 | 275 | ```bash 276 | curl -X POST -H 'Authorization: Bearer api-token' -H 'Content-Type: application/json' http://pdf-bot.com/ -d ' 277 | { 278 | "url":"https://esbenp.github.io", 279 | "meta":{ 280 | "type":"invoice", 281 | "id":1 282 | } 283 | }' 284 | ``` 285 | 286 | ## Database 287 | 288 | ### LowDB (file-database) (default) 289 | 290 | If you have low conurrency (run a job every now and then) you can use the default database driver that uses LowDB. 291 | 292 | ```javascript 293 | var LowDB = require('pdf-bot/src/db/lowdb') 294 | 295 | module.exports = { 296 | api: { 297 | token: 'api-token' 298 | }, 299 | db: LowDB({ 300 | lowDbOptions: {}, 301 | path: '' // defaults to $storagePath/db/db.json 302 | }), 303 | webhook: { 304 | secret: '1234', 305 | url: 'http://localhost:3000/webhooks/pdf' 306 | } 307 | } 308 | ``` 309 | 310 | ### PostgreSQL 311 | 312 | ```javascript 313 | var pgsql = require('pdf-bot/src/db/pgsql') 314 | 315 | module.exports = { 316 | api: { 317 | token: 'api-token' 318 | }, 319 | db: pgsql({ 320 | database: 'pdfbot', 321 | username: 'pdfbot', 322 | password: 'pdfbot', 323 | port: 5432 324 | }), 325 | webhook: { 326 | secret: '1234', 327 | url: 'http://localhost:3000/webhooks/pdf' 328 | } 329 | } 330 | ``` 331 | 332 | Optionally, you can specify a database url by specifying a `connectionString`. 333 | 334 | To install the necessary database tables, run `db:migrate`. You can also destroy the database by running `db:destroy`. 335 | 336 | ## Storage 337 | 338 | Currently `pdf-bot` comes bundled with build-in support for storing PDFs on Amazon S3. 339 | 340 | [Feel free to contribute a PR if you want to see other storage plugins in `pdf-bot`](https://github.com/esbenp/pdf-bot/compare)! 341 | 342 | ### Amazon S3 343 | 344 | To install S3 storage add a key to the `storage` configuration. Notice, you can add as many different locations you want by giving them different keys. 345 | 346 | ```javascript 347 | var createS3Config = require('pdf-bot/src/storage/s3') 348 | 349 | module.exports = { 350 | api: { 351 | token: 'api-token' 352 | }, 353 | storage: { 354 | 'my_s3': createS3Config({ 355 | bucket: '[YOUR BUCKET NAME]', 356 | accessKeyId: '[YOUR ACCESS KEY ID]', 357 | region: '[YOUR REGION]', 358 | secretAccessKey: '[YOUR SECRET ACCESS KEY]' 359 | }) 360 | }, 361 | webhook: { 362 | secret: '1234', 363 | url: 'http://localhost:3000/webhooks/pdf' 364 | } 365 | } 366 | 367 | ``` 368 | 369 | ## Options 370 | 371 | ```javascript 372 | var decaySchedule = [ 373 | 1000 * 60, // 1 minute 374 | 1000 * 60 * 3, // 3 minutes 375 | 1000 * 60 * 10, // 10 minutes 376 | 1000 * 60 * 30, // 30 minutes 377 | 1000 * 60 * 60 // 1 hour 378 | ]; 379 | 380 | module.exports = { 381 | // The settings of the API 382 | api: { 383 | // The port your express.js instance listens to requests from. (default: 3000) 384 | port: 3000, 385 | // Spawn command when a job has been pushed to the API 386 | postPushCommand: ['/home/user/.npm-global/bin/pdf-bot', ['-c', './pdf-bot.config.js', 'shift:all']], 387 | // The token used to validate requests to your API. Not required, but 100% recommended. 388 | token: 'api-token' 389 | }, 390 | db: LowDB(), // see other drivers under Database 391 | // html-pdf-chrome 392 | generator: { 393 | // Triggers that specify when the PDF should be generated 394 | completionTrigger: new htmlPdf.CompletionTrigger.Timer(1000), // waits for 1 sec 395 | // The port to listen for Chrome (default: 9222) 396 | port: 9222 397 | }, 398 | queue: { 399 | // How frequent should pdf-bot retry failed generations? 400 | // (default: 1 min, 3 min, 10 min, 30 min, 60 min) 401 | generationRetryStrategy: function(job, retries) { 402 | return decaySchedule[retries - 1] ? decaySchedule[retries - 1] : 0 403 | }, 404 | // How many times should pdf-bot try to generate a PDF? 405 | // (default: 5) 406 | generationMaxTries: 5, 407 | // How many generations to run at the same time when using shift:all 408 | parallelism: 4, 409 | // How frequent should pdf-bot retry failed webhook pings? 410 | // (default: 1 min, 3 min, 10 min, 30 min, 60 min) 411 | webhookRetryStrategy: function(job, retries) { 412 | return decaySchedule[retries - 1] ? decaySchedule[retries - 1] : 0 413 | }, 414 | // How many times should pdf-bot try to ping a webhook? 415 | // (default: 5) 416 | webhookMaxTries: 5 417 | }, 418 | storage: { 419 | 's3': createS3Config({ 420 | bucket: '', 421 | accessKeyId: '', 422 | region: '', 423 | secretAccessKey: '' 424 | }) 425 | }, 426 | webhook: { 427 | // The prefix to add to all pdf-bot headers on the webhook response. 428 | // I.e. X-PDF-Transaction and X-PDF-Signature. (default: X-PDF-) 429 | headerNamespace: 'X-PDF-', 430 | // Extra request options to add to the Webhook ping. 431 | requestOptions: { 432 | 433 | }, 434 | // The secret used to generate the hmac-sha1 signature hash. 435 | // !Not required, but should definitely be included! 436 | secret: '1234', 437 | // The endpoint to send PDF messages to. 438 | url: 'http://localhost:3000/webhooks/pdf' 439 | } 440 | } 441 | ``` 442 | 443 | ## CLI 444 | 445 | `pdf-bot` comes with a full CLI included! Use `-c` to pass a configuration to `pdf-bot`. You can also use `--help` to get a list of all commands. An example is given below. 446 | 447 | ```bash 448 | $ pdf-bot.js --config ./examples/pdf-bot.config.js --help 449 | 450 | 451 | Usage: pdf-bot [options] [command] 452 | 453 | 454 | Options: 455 | 456 | -V, --version output the version number 457 | -c, --config Path to configuration file 458 | -h, --help output usage information 459 | 460 | 461 | Commands: 462 | 463 | api Start the API 464 | db:migrate 465 | db:destroy 466 | install 467 | generate [jobID] Generate PDF for job 468 | jobs [options] List all completed jobs 469 | ping [jobID] Attempt to ping webhook for job 470 | ping:retry-failed 471 | pings [jobId] List pings for a job 472 | purge [options] Will remove all completed jobs 473 | push [options] [url] Push new job to the queue 474 | shift Run the next job in the queue 475 | shift:all Run all unfinished jobs in the queue 476 | ``` 477 | 478 | ## Debug mode 479 | 480 | `pdf-bot` uses `debug` for debug messages. You can turn on debugging by setting the environment variable `DEBUG=pdf:*` like so 481 | 482 | ```bash 483 | DEBUG=pdf:* pdf-bot jobs 484 | ``` 485 | 486 | ## Tests 487 | 488 | ```bash 489 | $ npm run test 490 | ``` 491 | 492 | ## Issues 493 | 494 | [Please report issues to the issue tracker](https://github.com/esbenp/pdf-bot/issues/new) 495 | 496 | ## License 497 | 498 | The MIT License (MIT). Please see [License File](https://github.com/esbenp/pdf-bot/blob/master/LICENSE) for more information. 499 | -------------------------------------------------------------------------------- /bin/pdf-bot.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var fs = require('fs') 4 | var path = require('path') 5 | var debug = require('debug')('pdf:cli') 6 | var Table = require('cli-table') 7 | var program = require('commander'); 8 | var merge = require('lodash.merge') 9 | var chunk = require('lodash.chunk') 10 | var clone = require('lodash.clonedeep'); 11 | var createPdfGenerator = require('../src/pdfGenerator') 12 | var createApi = require('../src/api') 13 | var error = require('../src/error') 14 | var createQueue = require('../src/queue') 15 | var webhook = require('../src/webhook') 16 | var pjson = require('../package.json') 17 | var execSync = require('child_process').execSync 18 | var prompt = require('prompt') 19 | var lowDb = require('../src/db/lowdb') 20 | 21 | program 22 | .version(pjson.version) 23 | .option('-c, --config ', 'Path to configuration file') 24 | 25 | var decaySchedule = [ 26 | 1000 * 60, // 1 minute 27 | 1000 * 60 * 3, // 3 minutes 28 | 1000 * 60 * 10, // 10 minutes 29 | 1000 * 60 * 30, // 30 minutes 30 | 1000 * 60 * 60 // 1 hour 31 | ]; 32 | 33 | var configuration, queue 34 | var defaultConfig = { 35 | api: { 36 | port: 3000, 37 | //postPushCommand: '', 38 | //token: 'api-token' 39 | }, 40 | db: lowDb(), 41 | // html-pdf-chrome options 42 | generator: { 43 | 44 | }, 45 | queue: { 46 | generationRetryStrategy: function(job, retries) { 47 | return decaySchedule[retries - 1] ? decaySchedule[retries - 1] : 0 48 | }, 49 | generationMaxTries: 5, 50 | parallelism: 4, 51 | webhookRetryStrategy: function(job, retries) { 52 | return decaySchedule[retries - 1] ? decaySchedule[retries - 1] : 0 53 | }, 54 | webhookMaxTries: 5, 55 | lowDbOptions: { 56 | 57 | } 58 | }, 59 | storage: { 60 | /* 61 | 's3': createS3Config({ 62 | bucket: '', 63 | accessKeyId: '', 64 | region: '', 65 | secretAccessKey: '' 66 | }) 67 | */ 68 | }, 69 | storagePath: 'storage', 70 | /*webhook: { 71 | headerNamespace: 'X-PDF-', 72 | requestOptions: { 73 | 74 | }, 75 | secret: '12345', 76 | url: 'http://localhost:3001/hook' 77 | }*/ 78 | } 79 | 80 | program 81 | .command('api') 82 | .description('Start the API') 83 | .action(function (options) { 84 | // We delay initiation of queue. This is because the API will load the DB in memory as 85 | // copy A. When we make changes through the CLI this creates copy B. But next time the 86 | // user pushes to the queue using the API copy A will be persisted again. 87 | var initiateQueue = openConfig(true) 88 | 89 | var apiOptions = configuration.api 90 | var port = apiOptions.port 91 | 92 | createApi(initiateQueue, { 93 | port: port, 94 | postPushCommand: apiOptions.postPushCommand, 95 | token: apiOptions.token 96 | }).listen(port, function() { 97 | debug('Listening to port %d', port) 98 | }) 99 | }) 100 | 101 | program 102 | .command('install') 103 | .action(function (options) { 104 | var configPath = program.config || path.join(process.cwd(), 'pdf-bot.config.js') 105 | 106 | function startPrompt() { 107 | prompt.start({noHandleSIGINT: true}) 108 | prompt.get([ 109 | { 110 | name: 'storagePath', 111 | description: 'Enter a path for storage', 112 | default: path.join(process.cwd(), 'pdf-storage'), 113 | required: true 114 | }, 115 | { 116 | name: 'token', 117 | description: 'An access token for your API', 118 | required: false 119 | }], function (err, result) { 120 | if (err) { 121 | process.exit(0) 122 | } 123 | var options = {} 124 | 125 | if (result.token) { 126 | options.api = {token: result.token} 127 | } 128 | 129 | options.storagePath = result.storagePath 130 | 131 | var configContents = "module.exports = " + JSON.stringify(options, null, 2) 132 | 133 | fs.writeFileSync(configPath, configContents) 134 | 135 | if (!fs.existsSync(options.storagePath)) { 136 | fs.mkdirSync(options.storagePath, '0775') 137 | fs.mkdirSync(path.join(options.storagePath, 'db'), '0775') 138 | fs.mkdirSync(path.join(options.storagePath, 'pdf'), '0775') 139 | } 140 | 141 | console.log('pdf-bot was installed successfully.') 142 | console.log('Config file is placed at ' + configPath + ' and contains') 143 | console.log(configContents) 144 | console.log('You should add ALIAS pdf-bot="pdf-bot -c ' + configPath + '" to your ~/.profile') 145 | }); 146 | } 147 | 148 | var existingConfigFileFound = fs.existsSync(configPath) 149 | if (existingConfigFileFound) { 150 | prompt.start({noHandleSIGINT: true}) 151 | prompt.get([ 152 | { 153 | name: 'replaceConfig', 154 | description: 'A config file already exists, are you sure you want to override (yes/no)' 155 | } 156 | ], function (err, result) { 157 | if (err) { 158 | process.exit(0) 159 | } 160 | if (result.replaceConfig !== 'yes') { 161 | process.exit(0) 162 | } else { 163 | startPrompt() 164 | } 165 | }) 166 | } else { 167 | startPrompt() 168 | } 169 | }) 170 | 171 | program 172 | .command('db:migrate') 173 | .action(function() { 174 | openConfig() 175 | 176 | var db = configuration.db(configuration) 177 | 178 | return db.migrate() 179 | .then(function () { 180 | console.log('The database was migrated') 181 | db.close() 182 | process.exit(0) 183 | }) 184 | .catch(handleDbError) 185 | }) 186 | 187 | program 188 | .command('db:destroy') 189 | .action(function() { 190 | openConfig() 191 | 192 | var db = configuration.db(configuration) 193 | 194 | prompt.start({noHandleSIGINT: true}) 195 | prompt.get([ 196 | { 197 | name: 'destroy', 198 | description: 'This action will remove all data and tables. Are you sure you want to destroy the database? (yes/no)' 199 | } 200 | ], function (err, result) { 201 | if (err) { 202 | process.exit(0) 203 | } 204 | if (result.destroy !== 'yes') { 205 | process.exit(0) 206 | } else { 207 | db.destroy() 208 | .then(function() { 209 | console.log('The database has been destroyed.') 210 | db.close() 211 | process.exit(0) 212 | }) 213 | .catch(handleDbError) 214 | } 215 | }) 216 | }) 217 | 218 | program 219 | .command('generate [jobID]') 220 | .description('Generate PDF for job') 221 | .action(function (jobId, options){ 222 | openConfig() 223 | 224 | return queue.getById(jobId) 225 | .then(function (job) { 226 | if (!job) { 227 | console.error('Job not found') 228 | queue.close() 229 | process.exit(1) 230 | } 231 | 232 | return processJob(job, configuration) 233 | }) 234 | .catch(handleDbError) 235 | }) 236 | 237 | program 238 | .command('jobs') 239 | .description('List all completed jobs') 240 | .option('--completed', 'Show completed jobs') 241 | .option('--failed', 'Show failed jobs') 242 | .option('-l, --limit [limit]', 'Limit how many jobs to show') 243 | .action(function (options) { 244 | openConfig() 245 | 246 | return listJobs(queue, options.failed, options.completed, options.limit) 247 | .then(function() { 248 | queue.close() 249 | process.exit(0) 250 | }) 251 | .catch(handleDbError) 252 | }) 253 | 254 | program 255 | .command('ping [jobID]') 256 | .description('Attempt to ping webhook for job') 257 | .action(function (jobId, options) { 258 | openConfig() 259 | 260 | return queue.getById(jobId) 261 | .then(function (job) { 262 | if (!job) { 263 | queue.close() 264 | console.log('Job not found.') 265 | return; 266 | } 267 | 268 | return ping(job, configuration.webhook).then(response => { 269 | queue.close() 270 | 271 | if (response.error) { 272 | process.exit(1) 273 | } else { 274 | process.exit(0) 275 | } 276 | }) 277 | }) 278 | .catch(handleDbError) 279 | }) 280 | 281 | program 282 | .command('ping:retry-failed') 283 | .action(function() { 284 | openConfig() 285 | 286 | var maxTries = configuration.queue.webhookMaxTries 287 | var retryStrategy = configuration.queue.webhookRetryStrategy 288 | 289 | queue.getNextWithoutSuccessfulPing(retryStrategy, maxTries) 290 | .then(function (next) { 291 | if (!next) { 292 | queue.close() 293 | process.exit(0) 294 | } 295 | 296 | return ping(next, configuration.webhook).then(function (response) { 297 | queue.close() 298 | 299 | if (response.error) { 300 | process.exit(1) 301 | } else { 302 | process.exit(0) 303 | } 304 | }) 305 | }) 306 | .catch(handleDbError) 307 | }) 308 | 309 | program 310 | .command('pings [jobId]') 311 | .description('List pings for a job') 312 | .action(function (jobId, options) { 313 | openConfig() 314 | 315 | var job = queue.getById(jobId) 316 | .then(function (job) { 317 | if (!job) { 318 | queue.close() 319 | console.log('Job not found') 320 | process.exit(1) 321 | } 322 | 323 | var table = new Table({ 324 | head: ['ID', 'URL', 'Method', 'Status', 'Sent at', 'Response', 'Payload'], 325 | colWidths: [40, 40, 50, 20, 20, 20] 326 | }); 327 | 328 | for(var i in job.pings) { 329 | var ping = job.pings[i] 330 | 331 | table.push([ 332 | ping.id, 333 | ping.url, 334 | ping.method, 335 | ping.status, 336 | formatDate(ping.sent_at), 337 | JSON.stringify(ping.response), 338 | JSON.stringify(ping.payload) 339 | ]) 340 | } 341 | 342 | console.log(table.toString()) 343 | queue.close() 344 | process.exit(0) 345 | }) 346 | .catch(handleDbError) 347 | }) 348 | 349 | program 350 | .command('purge') 351 | .description('Will remove all completed jobs') 352 | .option('--failed', 'Remove all failed jobs') 353 | .option('--new', 'Remove all new jobs') 354 | .action(function (options) { 355 | openConfig() 356 | 357 | return queue.purge(options.failed, options.new) 358 | .then(function () { 359 | queue.close() 360 | console.log('The queue was purged.') 361 | process.exit(0) 362 | }) 363 | .catch(handleDbError) 364 | }) 365 | 366 | program 367 | .command('push [url]') 368 | .description('Push new job to the queue') 369 | .option('-m, --meta [meta]', 'JSON string with meta data. Default: \'{}\'') 370 | .action(function (url, options) { 371 | openConfig() 372 | 373 | return queue 374 | .addToQueue({ 375 | url: url, 376 | meta: JSON.parse(options.meta || '{}') 377 | }) 378 | .then(function (response) { 379 | queue.close() 380 | 381 | if (error.isError(response)) { 382 | console.error('Could not push to queue: %s', response.message) 383 | process.exit(1) 384 | } else { 385 | console.log('The job was created with ID ' + response.id) 386 | process.exit(0) 387 | } 388 | }) 389 | .catch(handleDbError) 390 | }) 391 | 392 | program 393 | .command('shift') 394 | .description('Run the next job in the queue') 395 | .action(function (url) { 396 | openConfig() 397 | 398 | var maxTries = configuration.queue.generationMaxTries 399 | var retryStrategy = configuration.queue.generationRetryStrategy 400 | 401 | return queue.getNext(retryStrategy, maxTries) 402 | .then(function (next) { 403 | if (!next) { 404 | queue.close() 405 | process.exit(0) 406 | } 407 | 408 | return processJob(next, configuration) 409 | }) 410 | .catch(handleDbError) 411 | }) 412 | 413 | program 414 | .command('shift:all') 415 | .description('Run all unfinished jobs in the queue') 416 | .action(function (url) { 417 | openConfig() 418 | 419 | return queue.isBusy() 420 | .then(function (isBusy) { 421 | if (isBusy) { 422 | queue.close() 423 | process.exit(0) 424 | } 425 | 426 | var shiftAll = function () { 427 | var maxTries = configuration.queue.generationMaxTries 428 | var retryStrategy = configuration.queue.generationRetryStrategy 429 | var parallelism = configuration.queue.parallelism 430 | 431 | return queue.getAllUnfinished(retryStrategy, maxTries) 432 | .then(function (jobs) { 433 | if (jobs.length === 0) { 434 | queue.close() 435 | process.exit(0) 436 | } 437 | 438 | var chunks = chunk(jobs, parallelism) 439 | 440 | function runNextChunk(k = 1) { 441 | if (chunks.length === 0) { 442 | queue.setIsBusy(false).then(shiftAll) 443 | } else { 444 | var chunk = chunks.shift() 445 | console.log('Running chunk %s, %s chunks left', k, chunks.length) 446 | 447 | var promises = [] 448 | for(var i in chunk) { 449 | promises.push(processJob(chunk[i], clone(configuration), false)) 450 | } 451 | 452 | Promise.all(promises) 453 | .then(function(){ 454 | return runNextChunk(k + 1) 455 | }) 456 | .catch(function(){ 457 | return queue.setIsBusy(false).then(function() { 458 | queue.close() 459 | process.exit(1) 460 | }) 461 | }) 462 | } 463 | } 464 | 465 | console.log('Found %s jobs, divided into %s chunks', jobs.length, chunks.length) 466 | 467 | queue.setIsBusy(true).then(function () { 468 | return runNextChunk() 469 | }) 470 | }) 471 | } 472 | 473 | return shiftAll() 474 | }) 475 | .catch(handleDbError) 476 | }) 477 | 478 | program.parse(process.argv) 479 | 480 | if (!process.argv.slice(2).length) { 481 | program.outputHelp(); 482 | } 483 | 484 | function processJob(job, configuration, exitProcess = true) { 485 | var generatorOptions = configuration.generator 486 | var storagePlugins = configuration.storage 487 | 488 | var generator = createPdfGenerator(configuration.storagePath, generatorOptions, storagePlugins) 489 | 490 | return queue.processJob(generator, job, configuration.webhook).then(function (response) { 491 | if (error.isError(response)) { 492 | console.error(response.message) 493 | if (exitProcess) { 494 | queue.close() 495 | process.exit(1) 496 | } 497 | } else { 498 | console.log('Job ID ' + job.id + ' was processed.') 499 | if (exitProcess) { 500 | queue.close() 501 | process.exit(0) 502 | } 503 | } 504 | }) 505 | } 506 | 507 | function openConfig(delayQueueCreation = false) { 508 | configuration = defaultConfig 509 | 510 | if (!program.config) { 511 | if (fs.existsSync(path.join(process.cwd(), 'pdf-bot.config.js'))) { 512 | program.config = 'pdf-bot.config.js' 513 | } else { 514 | throw new Error('You need to supply a config file') 515 | } 516 | } 517 | 518 | var configPath = path.join(process.cwd(), program.config) 519 | 520 | if (!fs.existsSync(configPath)) { 521 | throw new Error('No config file was found at ' + configPath) 522 | } 523 | 524 | debug('Creating CLI using config file %s', configPath) 525 | merge(configuration, require(configPath)) 526 | 527 | if (!fs.existsSync(configuration.storagePath)) { 528 | throw new Error('Whoops! Looks like your storage folder does not exist. You should run pdf-bot install.') 529 | } 530 | 531 | if (!fs.existsSync(path.join(configuration.storagePath, 'pdf'))) { 532 | throw new Error('There is no pdf folder in the storage folder. Create it: storage/pdf') 533 | } 534 | 535 | function initiateQueue() { 536 | var db = configuration.db(configuration) 537 | var queueOptions = configuration.queue 538 | return createQueue(db, queueOptions) 539 | } 540 | 541 | if (delayQueueCreation) { 542 | return initiateQueue 543 | } else { 544 | queue = initiateQueue() 545 | } 546 | } 547 | 548 | function listJobs(queue, failed = false, limit) { 549 | return new Promise((resolve) => { 550 | var response = queue 551 | .getList( 552 | failed, 553 | limit 554 | ).then(function (response) { 555 | var table = new Table({ 556 | head: ['ID', 'URL', 'Meta', 'PDF Gen. tries', 'Created at', 'Completed at'], 557 | colWidths: [40, 40, 50, 20, 20, 20] 558 | }); 559 | 560 | for(var i in response) { 561 | var job = response[i] 562 | 563 | table.push([ 564 | job.id, 565 | job.url, 566 | JSON.stringify(job.meta), 567 | job.generations.length, 568 | formatDate(job.created_at), 569 | formatDate(job.completed_at) 570 | ]) 571 | } 572 | 573 | console.log(table.toString()); 574 | 575 | resolve() 576 | }) 577 | .catch(handleDbError) 578 | }) 579 | } 580 | 581 | function ping(job, webhookConfiguration) { 582 | return queue.attemptPing(job, webhookConfiguration || {}).then(response => { 583 | if (!response.error) { 584 | console.log('Ping succeeded: ' + JSON.stringify(response)) 585 | } else { 586 | console.error('Ping failed: ' + JSON.stringify(response)) 587 | } 588 | 589 | return response 590 | }) 591 | } 592 | 593 | function formatDate(input) { 594 | if (!input) { 595 | return '' 596 | } 597 | 598 | return (new Date(input)).toLocaleString() 599 | } 600 | 601 | function handleDbError(e) { 602 | console.error(e) 603 | queue.close() 604 | process.exit(1) 605 | } 606 | -------------------------------------------------------------------------------- /examples/pdf-bot.config.js: -------------------------------------------------------------------------------- 1 | var htmlPdf = require('html-pdf-chrome') 2 | var createS3Config = require('../src/storage/s3') 3 | var pgsql = require('../src/db/pgsql') 4 | 5 | module.exports = { 6 | api: { 7 | token: 'api-token' 8 | }, 9 | db: pgsql({ 10 | user: 'pdfbot', 11 | password: 'pdfbot', 12 | database: 'pdfbot' 13 | }), 14 | // html-pdf-chrome options 15 | generator: { 16 | completionTrigger: new htmlPdf.CompletionTrigger.Timer(1000), // waits for 1 sec 17 | //port: 50 // chrome port 18 | }, 19 | queue: { 20 | 21 | }, 22 | storage: { 23 | /*'s3': createS3Config({ 24 | bucket: '', 25 | accessKeyId: '', 26 | region: '', 27 | secretAccessKey: '' 28 | })*/ 29 | }, 30 | // storagePath: '', 31 | webhook: { 32 | headerNamespace: 'X-PDF-', 33 | requestOptions: { 34 | 35 | }, 36 | secret: '1234', 37 | url: 'http://localhost:3000/webhooks/pdf' 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /examples/receiving-api.js: -------------------------------------------------------------------------------- 1 | var express = require('express') 2 | var bodyParser = require('body-parser') 3 | 4 | var api = express() 5 | api.use(bodyParser.json()) 6 | 7 | api.post('/hook', function (req, res) { 8 | var signature = req.get('X-PDF-Signature', 'sha1=') 9 | 10 | var bodyCrypted = require('crypto') 11 | .createHmac('sha1', '12345') 12 | .update(JSON.stringify(req.body)) 13 | .digest('hex') 14 | 15 | if (bodyCrypted !== signature) { 16 | res.status(401).send() 17 | return 18 | } 19 | 20 | console.log('PDF webhook received', JSON.stringify(req.body)) 21 | 22 | res.status(204).send() 23 | }) 24 | 25 | api.listen(3001, function() { 26 | console.log('Listening to port 3001') 27 | }) 28 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pdf-bot", 3 | "version": "0.6.1", 4 | "author": "Esben Petersen ", 5 | "homepage": "https://github.com/esbenp/pdf-bot", 6 | "license": "MIT", 7 | "repository": { 8 | "type": "git", 9 | "url": "https://github.com/esbenp/pdf-bot.git" 10 | }, 11 | "engines": { 12 | "node": ">= 6" 13 | }, 14 | "description": "A Node queue API for generating PDFs using headless Chrome. Comes with a CLI, S3 storage and webhooks for notifying subscribers about generated PDFs", 15 | "main": "./src/index.js", 16 | "bin": "./bin/pdf-bot.js", 17 | "files": [ 18 | "bin/", 19 | "src/", 20 | "storage/" 21 | ], 22 | "scripts": { 23 | "example": "DEBUG=pdf:* node ./bin/pdf-bot.js --config ./examples/pdf-bot.config.js", 24 | "example:receiving-api": "DEBUG=pdf:* node ./examples/receiving-api.js", 25 | "test": "nyc --reporter=lcov --reporter=text mocha test/*.test.js --recursive --coverage", 26 | "test:watch": "mocha -w test/*.test.js --recursive" 27 | }, 28 | "dependencies": { 29 | "body-parser": "^1.17.2", 30 | "cli-table": "^0.3.1", 31 | "commander": "^2.11.0", 32 | "debug": "^2.6.8", 33 | "express": "^4.15.3", 34 | "html-pdf-chrome": "^0.4.2", 35 | "lodash.chunk": "^4.2.0", 36 | "lodash.clonedeep": "^4.5.0", 37 | "lodash.merge": "^4.6.0", 38 | "lowdb": "^0.16.2", 39 | "node-fetch": "^2.6.1", 40 | "pg": "^7.4.0", 41 | "prompt": "^1.0.0", 42 | "s3": "^4.4.0", 43 | "uuid": "^3.1.0" 44 | }, 45 | "devDependencies": { 46 | "assert": "^1.4.1", 47 | "coveralls": "^2.13.1", 48 | "mocha": "^3.5.0", 49 | "nyc": "^11.1.0", 50 | "proxyquire": "^1.8.0", 51 | "sinon": "^3.0.0", 52 | "supertest": "^3.0.0" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /production/README.md: -------------------------------------------------------------------------------- 1 | # Running pdf-bot in production 2 | 3 | ## Run pdf-bot using pm2 4 | 5 | It is recommended to use [pm2](https://github.com/Unitech/pm2) to run a pdf-bot process. 6 | 7 | First install `pm2` 8 | 9 | ``` 10 | npm install -g pm2 11 | ``` 12 | 13 | [Create a configuration file using the one in this repo as an example](https://github.com/esbenp/pdf-bot/blob/master/production/pm2.config.js) 14 | 15 | `pdf-bot-process.config.js` 16 | ```javascript 17 | module.exports = { 18 | apps : [{ 19 | name : "pdf-bot", 20 | script : "pdf-bot", 21 | args : "api -c ./pdf-bot.config.js", 22 | // Should be from whatever folder your pdf-bot.config.js is in 23 | // cwd : "/home/[user]/", 24 | env: { 25 | "DEBUG" : "pdf:*", 26 | "NODE_ENV": "production", 27 | }, 28 | }] 29 | } 30 | ``` 31 | 32 | Run in using `pm2 start pdf-bot-process.config.js` 33 | 34 | [Read more about starting the app on server restarts](http://pm2.keymetrics.io/docs/usage/startup/) 35 | 36 | ## Use nginx to proxy requests 37 | 38 | If you run `pdf-bot` on port 3000 or similar it is recommended to run it behind an nginx proxy. 39 | 40 | Create a site that listens to port 80 and uses the [config from the `production/` folder](https://github.com/esbenp/pdf-bot/blob/master/production/nginx.conf) 41 | -------------------------------------------------------------------------------- /production/nginx.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80 default_server; 3 | listen [::]:80 default_server; 4 | 5 | server_name pdf-bot; 6 | 7 | location / { 8 | proxy_set_header X-Real-IP $remote_addr; 9 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 10 | proxy_set_header Host $http_host; 11 | proxy_set_header X-NginX-Proxy true; 12 | proxy_pass http://127.0.0.1:3000/; 13 | proxy_redirect off; 14 | proxy_http_version 1.1; 15 | proxy_set_header Upgrade $http_upgrade; 16 | proxy_set_header Connection "upgrade"; 17 | 18 | proxy_redirect off; 19 | proxy_set_header X-Forwarded-Proto $scheme; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /production/pm2.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | apps : [{ 3 | name : "pdf-bot", 4 | script : "pdf-bot", 5 | args : "api -c ./pdf-bot.config.js", 6 | // Should be from whatever folder your pdf-bot.config.js is in 7 | // cwd : "/home/[user]/", 8 | env: { 9 | "DEBUG" : "pdf:*", 10 | "NODE_ENV": "production", 11 | }, 12 | }] 13 | } 14 | -------------------------------------------------------------------------------- /src/api.js: -------------------------------------------------------------------------------- 1 | // these need to occur after dotenv 2 | var express = require('express') 3 | var bodyParser = require('body-parser') 4 | var debug = require('debug')('pdf:api') 5 | var error = require('./error') 6 | var childProcess = require('child_process') 7 | 8 | function createApi(createQueue, options = {}) { 9 | var api = express() 10 | api.use(bodyParser.json()) 11 | 12 | var token = options.token 13 | 14 | if (!token) { 15 | debug('Warning: The server should be protected using a token.') 16 | } 17 | 18 | api.post('/', function(req, res) { 19 | var queue = createQueue() 20 | var authHeader = req.get('Authorization') 21 | 22 | if (token && (!authHeader || authHeader.replace(/Bearer (.*)$/i, '$1') !== token)) { 23 | res.status(401).json(error.createErrorResponse(error.ERROR_INVALID_TOKEN)) 24 | return 25 | } 26 | 27 | queue 28 | .addToQueue({ 29 | url: req.body.url, 30 | meta: req.body.meta || {} 31 | }).then(function (response) { 32 | queue.close() 33 | 34 | if (error.isError(response)) { 35 | res.status(422).json(response) 36 | return 37 | } 38 | 39 | if (options.postPushCommand && options.postPushCommand.length > 0) { 40 | childProcess.spawn.apply(null, options.postPushCommand) 41 | } 42 | 43 | res.status(201).json(response) 44 | }) 45 | }) 46 | 47 | return api 48 | } 49 | 50 | module.exports = createApi 51 | -------------------------------------------------------------------------------- /src/db/lowdb.js: -------------------------------------------------------------------------------- 1 | var low = require('lowdb') 2 | var fs = require('fs') 3 | var path = require('path') 4 | var utils = require('../utils') 5 | 6 | function createLowDb(options = {}) { 7 | return function (pdfBotConfiguration) { 8 | if (!options.path) { 9 | options.path = path.join(pdfBotConfiguration.storagePath, 'db', 'db.json') 10 | } 11 | 12 | var db = low(options.path, options.lowDbOptions || {}) 13 | 14 | db.defaults({ 15 | queue: options.initialValue || [] 16 | }) 17 | .write() 18 | 19 | var createDbMethod = function (func) { 20 | return function() { 21 | var args = Array.prototype.slice.call(arguments, 0) 22 | return new Promise((resolve) => resolve(func.apply(func, [db].concat(args)))) 23 | } 24 | } 25 | 26 | return { 27 | close: createDbMethod(close), 28 | getAllUnfinished: createDbMethod(getAllUnfinished), 29 | getById: createDbMethod(getById), 30 | getList: createDbMethod(getList), 31 | getNextWithoutSuccessfulPing: createDbMethod(getNextWithoutSuccessfulPing), 32 | logGeneration: createDbMethod(logGeneration), 33 | logPing: createDbMethod(logPing), 34 | isBusy: createDbMethod(isBusy), 35 | markAsCompleted: createDbMethod(markAsCompleted), 36 | purge: createDbMethod(purge), 37 | pushToQueue: createDbMethod(pushToQueue), 38 | setIsBusy: createDbMethod(setIsBusy), 39 | setStorage: createDbMethod(setStorage) 40 | } 41 | } 42 | } 43 | 44 | module.exports = createLowDb 45 | 46 | function pushToQueue (db, data) { 47 | db 48 | .get('queue') 49 | .push(data) 50 | .write() 51 | 52 | return data 53 | } 54 | 55 | function close() { 56 | return true 57 | } 58 | 59 | function getAllUnfinished (db, shouldWait, maxTries = 5) { 60 | return db 61 | .get('queue') 62 | .filter(function (job) { 63 | if (job.completed_at !== null) { 64 | return false 65 | } 66 | 67 | var currentTries = job.generations.length 68 | 69 | if (currentTries === 0) { 70 | return true 71 | } 72 | 73 | if (currentTries < maxTries) { 74 | var lastRun = job.generations[currentTries - 1].generated_at 75 | 76 | if (_hasWaitedLongEnough(lastRun, shouldWait(job, currentTries))) { 77 | return true 78 | } 79 | } 80 | 81 | return false 82 | }) 83 | .value() 84 | } 85 | 86 | function getById (db, id) { 87 | return db 88 | .get('queue') 89 | .find({ id: id }) 90 | .value() 91 | } 92 | 93 | function getList (db, failed = false, completed = false, limit) { 94 | var query = db.get('queue') 95 | 96 | query = query.filter(function (job) { 97 | // failed jobs 98 | if (!failed && job.completed_at === null && job.generations.length > 0) { 99 | return false 100 | } 101 | 102 | // completed jobs 103 | if (!completed && job.completed_at !== null) { 104 | return false 105 | } 106 | 107 | return true 108 | }) 109 | 110 | if (limit) { 111 | query = query.take(limit) 112 | } 113 | 114 | return query.value() 115 | } 116 | 117 | function getNextWithoutSuccessfulPing (db, shouldWait, maxTries = 5) { 118 | return db 119 | .get('queue') 120 | .filter(function (job) { 121 | var currentTries = job.pings.length 122 | 123 | if (job.completed_at === null) { 124 | return false 125 | } 126 | 127 | if (currentTries === 0) { 128 | return true 129 | } 130 | 131 | if (currentTries >= maxTries) { 132 | return false 133 | } 134 | 135 | var unsuccessfulPings = job.pings.filter(ping => ping.error) 136 | 137 | // There are some successful ping(s) 138 | if (unsuccessfulPings.length !== job.pings.length) { 139 | return false 140 | } 141 | 142 | var lastTry = unsuccessfulPings[unsuccessfulPings.length - 1].sent_at 143 | if (_hasWaitedLongEnough(lastTry, shouldWait(job, currentTries))) { 144 | return true 145 | } 146 | 147 | return false 148 | }) 149 | .take(1) 150 | .value()[0] 151 | } 152 | 153 | function isBusy (db) { 154 | return db.get('is_busy').value() || false 155 | } 156 | 157 | function purge (db, failed = false, pristine = false, maxTries = 5) { 158 | var query = db.get('queue').slice(0) 159 | 160 | query = query.filter(function (job) { 161 | // failed jobs 162 | if (failed && job.completed_at === null && job.generations.length >= maxTries) { 163 | return true 164 | } 165 | 166 | // new jobs 167 | if (pristine && job.completed_at === null && job.generations.length < maxTries) { 168 | return true 169 | } 170 | 171 | // completed jobs 172 | if (job.completed_at !== null) { 173 | return true 174 | } 175 | 176 | return false 177 | }) 178 | 179 | var queue = query.value() 180 | 181 | for(var i in queue) { 182 | db.get('queue').remove({ id: queue[i].id }).write() 183 | } 184 | } 185 | 186 | function setIsBusy (db, isBusy) { 187 | db.set('is_busy', isBusy).write() 188 | } 189 | 190 | function logGeneration (db, id, response) { 191 | var job = getById(db, id) 192 | 193 | var generations = job.generations.slice(0) 194 | generations.push(response) 195 | 196 | return db 197 | .get('queue') 198 | .find({ id: id }) 199 | .assign({ generations: generations }) 200 | .write() 201 | } 202 | 203 | function logPing (db, id, response) { 204 | var job = getById(db, id) 205 | 206 | var pings = job.pings.slice(0) 207 | pings.push(response) 208 | 209 | return db 210 | .get('queue') 211 | .find({ id: id }) 212 | .assign({ pings: pings }) 213 | .write() 214 | } 215 | 216 | function markAsCompleted (db, id) { 217 | var completed_at = utils.getCurrentDateTimeAsString() 218 | 219 | return db 220 | .get('queue') 221 | .find({ id: id }) 222 | .assign({ completed_at: completed_at }) 223 | .write() 224 | } 225 | 226 | function setStorage (db, id, storage) { 227 | return db 228 | .get('queue') 229 | .find({ id: id }) 230 | .assign({ storage: storage }) 231 | .write() 232 | } 233 | 234 | function _hasWaitedLongEnough (logTimestamp, timeToWait) { 235 | var diff = (new Date() - new Date(logTimestamp)) 236 | return diff > timeToWait 237 | } 238 | -------------------------------------------------------------------------------- /src/db/pgsql.js: -------------------------------------------------------------------------------- 1 | var low = require('lowdb') 2 | var fs = require('fs') 3 | var path = require('path') 4 | var utils = require('../utils') 5 | var pg = require('pg') 6 | 7 | function createPostgresDb(options = {}) { 8 | function parseConfig() { 9 | var config = {}; 10 | 11 | if (options.connectionString != undefined) { 12 | config.connectionString = options.connectionString; 13 | } else { 14 | config.user = options.user; 15 | config.host = options.host || 'localhost'; 16 | config.database = options.database; 17 | config.password = options.password; 18 | config.port = options.port || 5432; 19 | } 20 | 21 | if (options.ssl != undefined) { 22 | config.ssl = options.ssl; 23 | } 24 | 25 | if (options.types != undefined) { 26 | config.types = options.types; 27 | } 28 | 29 | if (options.statement_timeout != undefined) { 30 | config.statement_timeout = options.statement_timeout; 31 | } 32 | 33 | return config; 34 | }; 35 | 36 | 37 | return function (pdfBotConfiguration) { 38 | var db = new pg.Client(parseConfig()); 39 | db.connect() 40 | 41 | var createDbMethod = function (func) { 42 | return function() { 43 | var args = Array.prototype.slice.call(arguments, 0) 44 | return func.apply(func, [db].concat(args)) 45 | } 46 | } 47 | 48 | return { 49 | close: createDbMethod(close), 50 | destroy: createDbMethod(destroy), 51 | getAllUnfinished: createDbMethod(getAllUnfinished), 52 | getById: createDbMethod(getById), 53 | getList: createDbMethod(getList), 54 | getNextWithoutSuccessfulPing: createDbMethod(getNextWithoutSuccessfulPing), 55 | logGeneration: createDbMethod(logGeneration), 56 | logPing: createDbMethod(logPing), 57 | isBusy: createDbMethod(isBusy), 58 | markAsCompleted: createDbMethod(markAsCompleted), 59 | migrate: createDbMethod(migrate), 60 | purge: createDbMethod(purge), 61 | pushToQueue: createDbMethod(pushToQueue), 62 | setIsBusy: createDbMethod(setIsBusy), 63 | setStorage: createDbMethod(setStorage) 64 | } 65 | } 66 | } 67 | 68 | module.exports = createPostgresDb 69 | 70 | function close (db) { 71 | db.end() 72 | } 73 | 74 | function pushToQueue (db, data) { 75 | return db 76 | .query( 77 | `INSERT INTO jobs (id, url, meta, created_at) VALUES($1, $2, $3, $4)`, 78 | [data.id, data.url, data.meta, data.created_at] 79 | ) 80 | .then(function() { 81 | return data 82 | }) 83 | } 84 | 85 | function getAllUnfinished (db, shouldWait, maxTries = 5) { 86 | return db.query('SELECT * FROM jobs WHERE completed_at is null').then(function (res) { 87 | var jobs = res.rows 88 | 89 | return jobs.filter(job => { 90 | var currentTries = job.generations.length 91 | 92 | if (currentTries === 0) { 93 | return true 94 | } 95 | 96 | if (currentTries < maxTries) { 97 | var lastRun = job.generations[currentTries - 1].generated_at 98 | 99 | if (_hasWaitedLongEnough(lastRun, shouldWait(job, currentTries))) { 100 | return true 101 | } 102 | } 103 | 104 | return false 105 | }) 106 | }) 107 | } 108 | 109 | function getById (db, id) { 110 | return db.query('SELECT * FROM jobs WHERE id = $1', [id]) 111 | .then(function (res) { 112 | var jobs = res.rows 113 | 114 | return jobs.length > 0 ? jobs[0] : null 115 | }) 116 | } 117 | 118 | function getList (db, failed = false, completed = false, limit) { 119 | var query = 'SELECT * FROM jobs WHERE (completed_at is null AND jsonb_array_length(generations) = 0) ' 120 | 121 | if (failed) { 122 | query += ' OR (completed_at is null AND jsonb_array_length(generations) > 0)' 123 | } 124 | 125 | if (completed) { 126 | query += ' OR (completed_at is not null)' 127 | } 128 | 129 | if (limit) { 130 | query += ' LIMIT ' + limit 131 | } 132 | 133 | return db.query(query).then(function(res) { 134 | var jobs = res.rows 135 | 136 | return jobs 137 | }) 138 | } 139 | 140 | function getNextWithoutSuccessfulPing (db, shouldWait, maxTries = 5) { 141 | return db.query('SELECT * FROM jobs WHERE completed_at is not null order by created_at').then(function(res) { 142 | var jobs = res.rows.filter(function (job) { 143 | var currentTries = job.pings.length 144 | 145 | if (currentTries === 0) { 146 | return true 147 | } 148 | 149 | if (currentTries >= maxTries) { 150 | return false 151 | } 152 | 153 | var unsuccessfulPings = job.pings.filter(ping => ping.error) 154 | 155 | // There are some successful ping(s) 156 | if (unsuccessfulPings.length !== job.pings.length) { 157 | return false 158 | } 159 | 160 | var lastTry = unsuccessfulPings[unsuccessfulPings.length - 1].sent_at 161 | if (_hasWaitedLongEnough(lastTry, shouldWait(job, currentTries))) { 162 | return true 163 | } 164 | 165 | return false 166 | }) 167 | 168 | return jobs.length > 0 ? jobs[0] : null 169 | }) 170 | } 171 | 172 | function isBusy (db) { 173 | return db.query('SELECT busy FROM worker LIMIT 1').then(function (res) { 174 | var isBusy = res.rows.length > 0 ? res.rows[0].busy : false 175 | return isBusy 176 | }) 177 | } 178 | 179 | function purge (db, failed = false, pristine = false, maxTries = 5) { 180 | var query = 'DELETE FROM jobs WHERE (completed_at is not null)' 181 | var params = [] 182 | 183 | if (failed) { 184 | query += ' OR (completed_at is null and jsonb_array_length(generations) >= $1)' 185 | params.push(maxTries) 186 | } 187 | 188 | if (pristine) { 189 | query += ' OR (completed_at is null and jsonb_array_length(generations) < $2)' 190 | params.push(maxTries) 191 | } 192 | 193 | return db.query(query, params) 194 | } 195 | 196 | function setIsBusy (db, isBusy) { 197 | return db.query('UPDATE worker SET busy = $1', [isBusy]); 198 | } 199 | 200 | function logGeneration (db, id, response) { 201 | return getById(db, id).then(function (job) { 202 | var generations = job.generations.slice(0) 203 | generations.push(response) 204 | 205 | return db.query('UPDATE jobs SET generations = $1 WHERE id = $2', [JSON.stringify(generations), id]) 206 | }) 207 | } 208 | 209 | function logPing (db, id, response) { 210 | return getById(db, id).then(function (job) { 211 | var pings = job.pings.slice(0) 212 | pings.push(response) 213 | 214 | return db.query('UPDATE jobs SET pings = $1 WHERE id = $2', [JSON.stringify(pings), id]) 215 | }) 216 | } 217 | 218 | function markAsCompleted (db, id) { 219 | var completed_at = utils.getCurrentDateTimeAsString() 220 | 221 | return db.query('UPDATE jobs SET completed_at = $1 WHERE id = $2', [completed_at, id]) 222 | } 223 | 224 | function setStorage (db, id, storage) { 225 | return db.query('UPDATE jobs SET storage = $1 WHERE id = $2', [JSON.stringify(storage), id]) 226 | } 227 | 228 | function _hasWaitedLongEnough (logTimestamp, timeToWait) { 229 | var diff = (new Date() - new Date(logTimestamp)) 230 | return diff > timeToWait 231 | } 232 | 233 | function destroy(db) { 234 | return db.query(` 235 | DROP TABLE jobs 236 | `) 237 | } 238 | 239 | function migrate(db) { 240 | return db.query(` 241 | CREATE TABLE jobs ( 242 | id character varying(255), 243 | url text, 244 | meta jsonb default '{}'::json, 245 | generations jsonb default '[]'::json, 246 | pings jsonb default '[]'::json, 247 | storage jsonb default '{}'::json, 248 | created_at timestamp without time zone, 249 | completed_at timestamp without time zone default null 250 | ); 251 | CREATE TABLE worker ( 252 | busy boolean default false 253 | ); 254 | INSERT INTO worker (busy) VALUES(false); 255 | `) 256 | } 257 | -------------------------------------------------------------------------------- /src/error.js: -------------------------------------------------------------------------------- 1 | function createErrorResponse (type) { 2 | return { 3 | code: errorCodes[type], 4 | error: true, 5 | message: errorMessages[type] 6 | } 7 | } 8 | 9 | function isError (response) { 10 | return response.error && response.code 11 | } 12 | 13 | function getErrorCode(type) { 14 | return errorCodes[type] 15 | } 16 | 17 | var ERROR_INVALID_TOKEN = 'ERROR_INVALID_TOKEN' 18 | var ERROR_INVALID_URL = 'ERROR_INVALID_URL' 19 | var ERROR_HTML_PDF_CHROME_ERROR = 'ERROR_HTML_PDF_CHROME_ERROR' 20 | var ERROR_META_IS_NOT_OBJECT = 'ERROR_META_IS_NOT_OBJECT' 21 | var ERROR_INVALID_JSON_RESPONSE = 'ERROR_INVALID_JSON_RESPONSE' 22 | 23 | var errorCodes = { 24 | [ERROR_INVALID_TOKEN]: '001', 25 | [ERROR_INVALID_URL]: '002', 26 | [ERROR_HTML_PDF_CHROME_ERROR]: '003', 27 | [ERROR_META_IS_NOT_OBJECT]: '004', 28 | [ERROR_INVALID_JSON_RESPONSE]: '005' 29 | } 30 | 31 | var errorMessages = { 32 | [ERROR_INVALID_TOKEN]: 'Invalid token.', 33 | [ERROR_INVALID_URL]: 'Invalid url.', 34 | [ERROR_HTML_PDF_CHROME_ERROR]: 'html-pdf-chrome error:', 35 | [ERROR_META_IS_NOT_OBJECT]: 'Meta data is not a valid object', 36 | [ERROR_INVALID_JSON_RESPONSE]: 'Invalid JSON response' 37 | } 38 | 39 | module.exports = { 40 | createErrorResponse: createErrorResponse, 41 | isError: isError, 42 | getErrorCode: getErrorCode, 43 | ERROR_INVALID_TOKEN: ERROR_INVALID_TOKEN, 44 | ERROR_INVALID_URL: ERROR_INVALID_URL, 45 | ERROR_HTML_PDF_CHROME_ERROR: ERROR_HTML_PDF_CHROME_ERROR, 46 | ERROR_META_IS_NOT_OBJECT: ERROR_META_IS_NOT_OBJECT, 47 | ERROR_INVALID_JSON_RESPONSE: ERROR_INVALID_JSON_RESPONSE 48 | } 49 | -------------------------------------------------------------------------------- /src/pdfGenerator.js: -------------------------------------------------------------------------------- 1 | var path = require('path') 2 | var htmlPdf = require('html-pdf-chrome') 3 | var uuid = require('uuid') 4 | var debug = require('debug')('pdf:generator') 5 | var error = require('./error') 6 | var uuid = require('uuid') 7 | var utils = require('./utils') 8 | 9 | function createPdfGenerator(storagePath, options = {}, storagePlugins = {}) { 10 | return function createPdf (url, job) { 11 | debug('Creating PDF for url %s with options %s', url, JSON.stringify(options)) 12 | 13 | var generationId = uuid() 14 | var generated_at = utils.getCurrentDateTimeAsString() 15 | var jobId = job.id 16 | 17 | function createResponseObject() { 18 | return { 19 | id: generationId, 20 | generated_at: generated_at 21 | } 22 | } 23 | 24 | return htmlPdf 25 | .create(url, options) 26 | .then((pdf) => { 27 | var pdfPath = path.join(storagePath, 'pdf', (uuid() + '.pdf')) 28 | 29 | debug('Saving PDF to %s', pdfPath) 30 | 31 | return pdf 32 | .toFile(pdfPath) 33 | .then(function(response){ 34 | var storage = { 35 | local: pdfPath 36 | } 37 | var storagePluginPromises = [] 38 | for (var i in storagePlugins) { 39 | // Because i will change before the promise is resolved 40 | // we use a self executing function to inject the variable 41 | // into a different scope 42 | var then = (function(type) { 43 | return function (response) { 44 | return Object.assign(response, { 45 | type: type 46 | }) 47 | } 48 | })(i) 49 | 50 | storagePluginPromises.push( 51 | storagePlugins[i](pdfPath, job).then(then) 52 | ) 53 | } 54 | 55 | return Promise.all(storagePluginPromises).then(responses => { 56 | for(var i in responses) { 57 | var response = responses[i] 58 | 59 | storage[response.type] = { 60 | path: response.path, 61 | meta: response.meta || {} 62 | } 63 | } 64 | 65 | return Object.assign( 66 | createResponseObject(), 67 | { 68 | storage: storage 69 | } 70 | ) 71 | }) 72 | }) 73 | }) 74 | .catch(msg => { 75 | var response = error.createErrorResponse(error.ERROR_HTML_PDF_CHROME_ERROR) 76 | 77 | response.message += ' ' + msg + ' (job ID: ' + jobId + '. Generation ID: ' + generationId + ')' 78 | 79 | return Object.assign(createResponseObject(), response) 80 | }) 81 | } 82 | } 83 | 84 | module.exports = createPdfGenerator 85 | -------------------------------------------------------------------------------- /src/queue.js: -------------------------------------------------------------------------------- 1 | var uuid = require('uuid') 2 | var debug = require('debug')('pdf:db') 3 | var error = require('./error') 4 | var webhook = require('./webhook') 5 | var utils = require('./utils') 6 | 7 | function createQueue (db, options = {}) { 8 | var createQueueMethod = function (func) { 9 | return function() { 10 | var args = Array.prototype.slice.call(arguments, 0) 11 | return func.apply(func, [db].concat(args)) 12 | } 13 | } 14 | 15 | return { 16 | addToQueue: createQueueMethod(addToQueue), 17 | attemptPing: createQueueMethod(attemptPing), 18 | close: createQueueMethod(close), 19 | getById: createQueueMethod(getById), 20 | getList: createQueueMethod(getList), 21 | getNext: createQueueMethod(getNext), 22 | getAllUnfinished: createQueueMethod(getAllUnfinished), 23 | getNextWithoutSuccessfulPing: createQueueMethod(getNextWithoutSuccessfulPing), 24 | isBusy: createQueueMethod(isBusy), 25 | processJob: createQueueMethod(processJob), 26 | purge: createQueueMethod(purge), 27 | setIsBusy: createQueueMethod(setIsBusy) 28 | } 29 | } 30 | 31 | function addToQueue (db, data) { 32 | var id = uuid() 33 | var createdAt = utils.getCurrentDateTimeAsString() 34 | 35 | var defaults = { 36 | meta: {} 37 | } 38 | 39 | if (!data.url || !utils.isValidUrl(data.url)) { 40 | return error.createErrorResponse(error.ERROR_INVALID_URL) 41 | } 42 | 43 | if (data.meta && typeof data.meta !== 'object') { 44 | return error.createErrorResponse(error.ERROR_META_IS_NOT_OBJECT) 45 | } 46 | 47 | data = Object.assign(defaults, data, { 48 | id: id, 49 | created_at: createdAt, 50 | completed_at: null, 51 | generations: [], 52 | pings: [], 53 | storage: {} 54 | }) 55 | 56 | debug('Pushing job to queue with data %s', JSON.stringify(data)) 57 | 58 | return db.pushToQueue(data) 59 | } 60 | 61 | function close(db) { 62 | return db.close() 63 | } 64 | 65 | // ========= 66 | // RETRIEVAL 67 | // ========= 68 | 69 | function getList (db, failed = false, completed = false, limit) { 70 | return db.getList(failed, completed, limit) 71 | } 72 | 73 | function getById (db, id) { 74 | return db.getById(id) 75 | } 76 | 77 | function getNext (db, shouldWait, maxTries = 5) { 78 | return getAllUnfinished(db, shouldWait, maxTries).then(function (jobs) { 79 | return jobs.length > 0 ? jobs[0] : null; 80 | }) 81 | } 82 | 83 | function getAllUnfinished (db, shouldWait, maxTries = 5) { 84 | return db.getAllUnfinished (shouldWait, maxTries) 85 | } 86 | 87 | function getNextWithoutSuccessfulPing (db, shouldWait, maxTries = 5) { 88 | return db.getNextWithoutSuccessfulPing(shouldWait, maxTries) 89 | } 90 | 91 | function isBusy (db) { 92 | return db.isBusy() 93 | } 94 | 95 | function purge (db, failed = false, pristine = false, maxTries = 5) { 96 | return db.purge(failed, pristine, maxTries) 97 | } 98 | 99 | function setIsBusy(db, isBusy) { 100 | return db.setIsBusy(isBusy) 101 | } 102 | 103 | // ========== 104 | // PROCESSING 105 | // ========== 106 | 107 | function processJob (db, generator, job, webhookOptions) { 108 | return generator(job.url, job) 109 | .then(function (response) { 110 | return _logGeneration(db, job.id, response) 111 | .then(function (logResponse) { 112 | if (!error.isError(response)) { 113 | debug('Job %s was processed, marking job as complete.', job.id) 114 | 115 | return Promise.all([ 116 | _markAsCompleted(db, job.id), 117 | _setStorage(db, job.id, response.storage) 118 | ]).then(function () { 119 | if (!webhookOptions) { 120 | return response 121 | } 122 | 123 | // Re-fetch the job as storage has been added 124 | return getById(db, job.id).then(function (job) { 125 | // Important to return promise otherwise the npm cli process will exit early 126 | return attemptPing(db, job, webhookOptions) 127 | .then(function() { 128 | return response 129 | }) 130 | }) 131 | }) 132 | } 133 | 134 | return response 135 | }) 136 | }) 137 | } 138 | 139 | // ======= 140 | // PINGING 141 | // ======= 142 | 143 | function attemptPing (db, job, webhookOptions) { 144 | if (!(typeof webhookOptions === 'object')) { 145 | throw new Error('No webhook is configured.') 146 | } 147 | 148 | return webhook.ping(job, webhookOptions) 149 | .then(response => { 150 | return _logPing(db, job.id, response) 151 | .then(function () { 152 | return response 153 | }) 154 | }) 155 | } 156 | 157 | // =============== 158 | // PRIVATE METHODS 159 | // =============== 160 | 161 | function _logGeneration (db, id, response) { 162 | debug('Logging try for job ID %s', id) 163 | 164 | return db.logGeneration(id, response) 165 | } 166 | 167 | function _logPing (db, id, response) { 168 | debug('Logging ping for job ID %s', id) 169 | 170 | return db.logPing(id, response) 171 | } 172 | 173 | function _markAsCompleted (db, id) { 174 | var completed_at = utils.getCurrentDateTimeAsString() 175 | 176 | debug('Marking job ID %s as completed at %s', id, completed_at) 177 | 178 | return db.markAsCompleted(id) 179 | } 180 | 181 | function _setStorage (db, id, storage) { 182 | return db.setStorage(id, storage) 183 | } 184 | 185 | module.exports = createQueue 186 | -------------------------------------------------------------------------------- /src/storage/s3.js: -------------------------------------------------------------------------------- 1 | var debug = require('debug')('pdf:s3') 2 | var s3 = require('s3') 3 | var path = require('path') 4 | 5 | function createS3Storage(options = {}) { 6 | if (!options.accessKeyId) { 7 | throw new Error('S3: No access key given') 8 | } 9 | 10 | if (!options.secretAccessKey) { 11 | throw new Error('S3: No secret access key given') 12 | } 13 | 14 | if (!options.region) { 15 | throw new Error('S3: No region specified') 16 | } 17 | 18 | if (!options.bucket) { 19 | throw new Error('S3: No bucket was specified') 20 | } 21 | 22 | return function uploadToS3 (localPath, job) { 23 | return new Promise((resolve, reject) => { 24 | var client = s3.createClient( 25 | Object.assign(options.s3ClientOptions || {}, 26 | { 27 | s3Options: { 28 | accessKeyId: options.accessKeyId, 29 | secretAccessKey: options.secretAccessKey, 30 | region: options.region, 31 | } 32 | } 33 | ) 34 | ) 35 | 36 | var remotePath = (options.path || '') 37 | if (typeof options.path === 'function') { 38 | remotePath = options.path(localPath, job) 39 | } 40 | 41 | var pathSplitted = localPath.split('/') 42 | var fileName = pathSplitted[pathSplitted.length - 1] 43 | var fullRemotePath = path.join(remotePath, fileName) 44 | 45 | var uploadOptions = { 46 | localFile: localPath, 47 | 48 | s3Params: { 49 | Bucket: options.bucket, 50 | Key: fullRemotePath, 51 | }, 52 | } 53 | 54 | debug('Pushing job ID %s to S3 path: %s/%s', job.id, options.bucket, fileName) 55 | 56 | var uploader = client.uploadFile(uploadOptions); 57 | uploader.on('error', function(err) { 58 | reject(err) 59 | }); 60 | uploader.on('end', function(data) { 61 | resolve({ 62 | path: { 63 | bucket: uploadOptions.s3Params.Bucket, 64 | region: options.region, 65 | key: uploadOptions.s3Params.Key 66 | } 67 | }) 68 | }); 69 | }) 70 | } 71 | } 72 | 73 | module.exports = createS3Storage 74 | -------------------------------------------------------------------------------- /src/utils.js: -------------------------------------------------------------------------------- 1 | var isValidUrl = function (url) { 2 | return url.match(/((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/) 3 | } 4 | 5 | function getCurrentDateTimeAsString() { 6 | return (new Date()).toUTCString() 7 | } 8 | 9 | module.exports = { 10 | isValidUrl: isValidUrl, 11 | getCurrentDateTimeAsString: getCurrentDateTimeAsString 12 | } 13 | -------------------------------------------------------------------------------- /src/webhook.js: -------------------------------------------------------------------------------- 1 | var crypto = require('crypto') 2 | var debug = require('debug')('pdf:webhook') 3 | var fetch = require('node-fetch') 4 | var uuid = require('uuid') 5 | var error = require('./error') 6 | var utils = require('./utils') 7 | 8 | function ping (job, options) { 9 | if (!options.url || !utils.isValidUrl(options.url)) { 10 | throw new Error('Webhook is not valid url.') 11 | } 12 | 13 | if (!options.secret) { 14 | throw new Error('You need to supply a secret for your webhooks') 15 | } 16 | 17 | var requestOptions = options.requestOptions || {} 18 | 19 | var headerOptions = requestOptions.headers || {} 20 | 21 | requestOptions.method = 'POST' 22 | headerOptions['Content-Type'] = 'application/json' 23 | 24 | var bodyRaw = { 25 | id: job.id, 26 | url: job.url, 27 | meta: job.meta, 28 | storage: job.storage 29 | } 30 | var body = JSON.stringify(bodyRaw) 31 | 32 | var signature = generateSignature(body, options.secret) 33 | 34 | var requestId = uuid() 35 | var namespace = options.headerNamespace || 'X-PDF-' 36 | headerOptions[namespace + 'Transaction'] = requestId 37 | headerOptions[namespace + 'Signature'] = signature 38 | 39 | var headers = new fetch.Headers() 40 | for(var i in headerOptions) { 41 | headers.set(i, headerOptions[i]) 42 | } 43 | 44 | requestOptions.headers = headers 45 | requestOptions.body = body 46 | 47 | debug( 48 | 'Pinging job ID %s at URL %s with request options %s', 49 | job.id, 50 | options.url, 51 | JSON.stringify(requestOptions) 52 | ) 53 | 54 | var sent_at = utils.getCurrentDateTimeAsString() 55 | 56 | function createResponse (response, error) { 57 | var status = response.status 58 | 59 | return getContentBody(response).then(body => { 60 | return { 61 | id: requestId, 62 | status: response.status, 63 | method: requestOptions.method, 64 | payload: bodyRaw, 65 | response: body, 66 | url: options.url, 67 | sent_at: sent_at, 68 | error: !response.ok 69 | } 70 | }) 71 | } 72 | 73 | return fetch(options.url, requestOptions) 74 | .then(function (response) { 75 | return createResponse(response, !response.ok) 76 | }) 77 | .catch(function (response) { 78 | return createResponse(response, true) 79 | }) 80 | } 81 | 82 | module.exports = { 83 | generateSignature: generateSignature, 84 | ping: ping 85 | } 86 | 87 | function generateSignature (payload, key) { 88 | return crypto.createHmac('sha1', key).update(payload).digest('hex') 89 | } 90 | 91 | function getContentBody (response) { 92 | return new Promise(function(resolve){ 93 | var emptyCodes = [204, 205] 94 | if (emptyCodes.indexOf(response.status) !== -1) { 95 | resolve({}) 96 | } 97 | 98 | // Happens for instance on ECONNREFUSED 99 | if (!(response instanceof fetch.Response)) { 100 | resolve(response) 101 | } 102 | 103 | var contentType = response.headers.get('content-type'); 104 | if (contentType.indexOf('json') === -1) { 105 | return response.text().then(resolve) 106 | } 107 | 108 | return response.text().then(text => { 109 | if (!text) { 110 | return resolve({}); 111 | } 112 | try { 113 | return resolve(JSON.parse(text)) 114 | } catch (e) { 115 | return resolve( 116 | Object.assign( 117 | error.createErrorResponse(error.ERROR_INVALID_JSON_RESPONSE), 118 | { 119 | response: text 120 | } 121 | ) 122 | ) 123 | } 124 | }) 125 | }) 126 | } 127 | -------------------------------------------------------------------------------- /storage/db/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /storage/pdf/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /test/api.test.js: -------------------------------------------------------------------------------- 1 | var sinon = require('sinon') 2 | var request = require('supertest') 3 | var createApi = require('../src/api') 4 | var error = require('../src/error') 5 | 6 | describe('api: POST /', function () { 7 | var api 8 | beforeEach(function(){ 9 | api = createApi(function(){}, { 10 | token: '1234' 11 | }) 12 | }) 13 | 14 | it('should return 401 if no token is given', function(done) { 15 | request(api) 16 | .post('/') 17 | .expect(401, done) 18 | }) 19 | 20 | it('should return 401 if invalid token is give', function (done) { 21 | request(api) 22 | .post('/') 23 | .set('Authorization', 'Bearer test') 24 | .expect(401, done) 25 | }) 26 | 27 | it('should return 422 on errorneous responses', function(done) { 28 | queue = function () { 29 | return { 30 | addToQueue: function() { 31 | return new Promise(function (resolve) { 32 | resolve({ 33 | code: '001', 34 | error: true 35 | }) 36 | }) 37 | }, 38 | close: function(){} 39 | } 40 | } 41 | var api = createApi(queue, { 42 | token: '1234' 43 | }) 44 | 45 | request(api) 46 | .post('/') 47 | .set('Authorization', 'Bearer 1234') 48 | .send({}) 49 | .expect(422, done) 50 | }) 51 | 52 | it('should run the queue with the correct params', function (done) { 53 | var meta = {id: 1} 54 | 55 | var addToQueue = sinon.stub() 56 | addToQueue.onCall(0).returns(new Promise(function (resolve) { resolve({ id: '1234' }) })) 57 | 58 | var queue = function() { 59 | return { 60 | addToQueue: addToQueue, 61 | close: function(){} 62 | } 63 | } 64 | var api = createApi(queue, { 65 | token: '1234' 66 | }) 67 | 68 | request(api) 69 | .post('/') 70 | .set('Authorization', 'Bearer 1234') 71 | .send({ url: 'https://google.com', meta: meta }) 72 | .expect(201) 73 | .end(function (err, res) { 74 | if (err) return done(err) 75 | 76 | if (!addToQueue.calledWith({ url: 'https://google.com', meta: meta })) { 77 | throw new Error('Queue was not called with correct url') 78 | } 79 | 80 | done() 81 | }) 82 | }) 83 | }) 84 | -------------------------------------------------------------------------------- /test/error.test.js: -------------------------------------------------------------------------------- 1 | var errorUtils = require('../src/error') 2 | 3 | describe('Error utils', function() { 4 | it('should correctly determine if error response', function() { 5 | var notError1 = errorUtils.isError({ code: '001' }) 6 | var notError2 = errorUtils.isError({ error: true }) 7 | var error = errorUtils.isError({ code: '001', error: true }) 8 | 9 | if (notError1 === true || notError2 === true) { 10 | throw new Error('Wrongly determined error response') 11 | } 12 | 13 | if (!error) { 14 | throw new Error('Did not determine error response') 15 | } 16 | }) 17 | }) 18 | -------------------------------------------------------------------------------- /test/pdfGenerator.test.js: -------------------------------------------------------------------------------- 1 | var sinon = require('sinon') 2 | var htmlPdf = require('html-pdf-chrome') 3 | var createGenerator = require('../src/pdfGenerator') 4 | var error = require('../src/error') 5 | 6 | describe('PDF Generator', function() { 7 | var generator 8 | var pdf 9 | var createStub 10 | beforeEach(function(){ 11 | pdf = { 12 | toFile: sinon.stub().returns(new Promise(function(resolve){ 13 | resolve() 14 | })) 15 | } 16 | createStub = sinon.stub(htmlPdf, 'create'); 17 | createStub.onCall(0).returns(new Promise((resolve) => resolve(pdf))) 18 | generator = createGenerator('storage') 19 | }) 20 | 21 | afterEach(function(){ 22 | createStub.restore() 23 | }) 24 | 25 | it('should call html-pdf-chrome with the correct options', function() { 26 | var options = {options: true} 27 | generator = createGenerator('storage', options)('url', {id: 1}) 28 | 29 | if (!createStub.calledOnce || !createStub.calledWith('url', options)) { 30 | throw new Error('Correct options not passed') 31 | } 32 | }) 33 | 34 | it('should attempt to write pdf to storage', function(done) { 35 | generator('url', {id: 1}).then(() => { 36 | if (!pdf.toFile.calledOnce || !pdf.toFile.args[0][0].match(/storage\/pdf\/(.+)\.pdf$/)) { 37 | throw new Error('PDF was not attempted to saved') 38 | } 39 | 40 | done() 41 | }) 42 | }) 43 | 44 | it('should apply all passed storage configurations', function(done) { 45 | var storage = { 46 | storage_1: function() { 47 | return new Promise((resolve) => resolve({ path: 'file_1' })) 48 | }, 49 | storage_2: function() { 50 | return new Promise((resolve) => resolve({ path: 'file_2' })) 51 | } 52 | } 53 | 54 | createGenerator('storage', {}, storage)('url', {id: 1}).then(response => { 55 | var storage = response.storage 56 | 57 | if (storage.storage_1.path !== 'file_1' || storage.storage_2.path !== 'file_2') { 58 | throw new Error('Storage response not properly set') 59 | } 60 | 61 | done() 62 | }) 63 | }) 64 | 65 | it('should return error response thrown promises', function(done) { 66 | createStub.onCall(0).returns(new Promise((resolve, reject) => reject('error'))) 67 | 68 | createGenerator('storage', {}, {})('url', {id: 1}).then(response => { 69 | if (!error.isError(response)) { 70 | throw new Exception('Generator rejection did not resolve in error promise') 71 | } 72 | 73 | done() 74 | }) 75 | }) 76 | }) 77 | -------------------------------------------------------------------------------- /test/queue.test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | var fs = require('fs') 3 | var path = require('path') 4 | var sinon = require('sinon') 5 | var baseCreateQueue = require('../src/queue') 6 | var lowDb = require('../src/db/lowdb') 7 | var error = require('../src/error') 8 | var webhook = require('../src/webhook') 9 | var merge = require('lodash.merge') 10 | 11 | var queuePath = path.join(__dirname, 'db.json') 12 | 13 | function getQueue() { 14 | return JSON.parse(fs.readFileSync(queuePath, 'utf8')).queue 15 | } 16 | 17 | function deleteQueue() { 18 | fs.unlinkSync(queuePath) 19 | } 20 | 21 | function createQueue(options = {}, initialValue = []) { 22 | // Some times we want to create the queue in the test 23 | if (fs.existsSync(queuePath)) { 24 | deleteQueue() 25 | } 26 | 27 | var db = lowDb(merge(options, { 28 | initialValue: initialValue, 29 | path: path.join(__dirname, 'db.json') 30 | }))() 31 | return baseCreateQueue(db) 32 | } 33 | 34 | var i = 0 35 | function createJob(completed = false, generationTries = 0, pingTries = 0) { 36 | i++ 37 | 38 | var generations = [] 39 | for(var k = 0; k < generationTries; k++) { 40 | generations.push({ id: 'xxx' }) 41 | } 42 | var pings = [] 43 | for(var k = 0; k < pingTries; k++) { 44 | pings.push({ id: 'xxx' }) 45 | } 46 | 47 | return { 48 | id: i, 49 | completed_at: (completed ? '2017-01-01' : null), 50 | generations: generations, 51 | pings: pings 52 | } 53 | } 54 | 55 | describe('queue : retrieval', function() { 56 | var queue 57 | beforeEach(function() { 58 | i = 0 59 | queue = createQueue() 60 | }) 61 | 62 | afterEach(function(){ 63 | deleteQueue() 64 | }) 65 | 66 | it('should create a default structure', function() { 67 | var queue = getQueue() 68 | assert(Array.isArray(queue)) 69 | }) 70 | 71 | it('should create error when passing invalid url', function() { 72 | var response = queue.addToQueue({ 73 | url: '$#$#@%@#' 74 | }) 75 | 76 | assert(response.error) 77 | assert.equal(response.code, error.getErrorCode(error.ERROR_INVALID_URL)) 78 | }) 79 | 80 | it('should create error when passing invalid meta', function() { 81 | var response = queue.addToQueue({ 82 | meta: 'not-object', 83 | url: 'http://localhost' 84 | }) 85 | 86 | assert(response.error) 87 | assert.equal(response.code, error.getErrorCode(error.ERROR_META_IS_NOT_OBJECT)) 88 | }) 89 | 90 | it('should save jobs to the queue', function() { 91 | queue.addToQueue({ 92 | meta: { 93 | hello: true 94 | }, 95 | url: 'http://localhost' 96 | }) 97 | 98 | var job = getQueue()[0] 99 | 100 | assert.equal(job.meta.hello, true) 101 | assert.equal(job.url, 'http://localhost') 102 | }) 103 | 104 | it('should return failed jobs when failed flag is passed', function(){ 105 | queue = createQueue({}, [ 106 | createJob(false, 2), 107 | createJob(true, 1), 108 | createJob(false, 0) // has not been run yet 109 | ]) 110 | 111 | return queue.getList(true).then(function(list) { 112 | assert.equal(list.length, 2) 113 | assert.equal(list[0].id, 1) 114 | }) 115 | }) 116 | 117 | it('should return completed jobs', function() { 118 | queue = createQueue({}, [ 119 | createJob(true), 120 | createJob(true), 121 | createJob(false, 1) 122 | ]) 123 | 124 | return queue.getList(false, true).then(function (list) { 125 | assert.equal(list.length, 2) 126 | assert.equal(list[1].id, 2) 127 | }) 128 | }) 129 | 130 | it('should return new jobs', function() { 131 | queue = createQueue({}, [ 132 | createJob(true), 133 | createJob(true), 134 | createJob(false, 1), // failed 135 | createJob(false, 0) // new 136 | ]) 137 | 138 | return queue.getList(false, false).then(function (list) { 139 | assert.equal(list.length, 1) 140 | assert.equal(list[0].id, 4) 141 | }) 142 | }) 143 | 144 | it('should limit', function() { 145 | var jobs = [] 146 | for(var i = 0; i <= 20; i++) { 147 | jobs.push(createJob(false)) 148 | } 149 | 150 | queue = createQueue({}, jobs) 151 | 152 | return queue.getList(false, false, 10).then(function (list) { 153 | assert.equal(list.length, 10) 154 | }) 155 | }) 156 | 157 | it('should return the correct job by id', function() { 158 | queue = createQueue({}, [ 159 | createJob(true), 160 | Object.assign(createJob(true), { meta: { correct: true } }), 161 | createJob(true) 162 | ]) 163 | 164 | return queue.getById(2).then(function (job) { 165 | assert.equal(job.meta.correct, true) 166 | }) 167 | }) 168 | 169 | it('should return the next job if no tries were found', function() { 170 | queue = createQueue({}, [ 171 | createJob(true), 172 | createJob(false, 5), 173 | createJob(false), 174 | createJob(true) 175 | ]) 176 | 177 | return queue.getNext(function(){}, 5).then(function (job) { 178 | assert.equal(job.id, 3) 179 | }) 180 | }) 181 | 182 | it('should return the next job that is within decay schedule', function() { 183 | var dateOne = inFiveMinutes() 184 | var dateTwo = fiveMinutesAgo() 185 | 186 | var jobWithManyGenerations = [] 187 | for(var k = 0; k < 10; k++) { 188 | jobWithManyGenerations.push({ id: k, generated_at: dateTwo }) 189 | } 190 | 191 | queue = createQueue({}, [ 192 | createJob(true), 193 | Object.assign(createJob(false), { generations: jobWithManyGenerations }), // should skip this due to generations 194 | Object.assign(createJob(false), { generations: [{id: 1, generated_at: dateOne }] }), // should skip due to decay 195 | Object.assign(createJob(false), { generations: [{id: 1, generated_at: dateTwo }] }) 196 | ]) 197 | 198 | return queue.getNext(function(){ return 1000 * 60 * 4 }, 5).then(function (job) { 199 | assert.equal(job.id, 4) 200 | }) 201 | }) 202 | 203 | it('should get next with no pings', function() { 204 | var jobWithManyPings = [] 205 | for(var k = 0; k < 10; k++) { 206 | jobWithManyPings.push({ id: k, sent_at: fiveMinutesAgo(), error: true }) 207 | } 208 | 209 | queue = createQueue({}, [ 210 | Object.assign(createJob(true, 1, 5), {pings: jobWithManyPings}), // exceeds limit 211 | createJob(false, 1, 0), // should skip since it is not completed 212 | Object.assign(createJob(true, 1), {pings: [{id:55, sent_at: fiveMinutesAgo(), error: true }]}) 213 | ]) 214 | 215 | return queue.getNextWithoutSuccessfulPing(function(){ return 1000 * 60 * 4 }, 5).then(function (job) { 216 | assert.equal(job.id, 3) 217 | }) 218 | }) 219 | 220 | it('should get next ping that is within decay schedule', function() { 221 | var dateOne = fiveMinutesAgo() 222 | var dateTwo = inFiveMinutes() 223 | 224 | var jobWithManyPings = [] 225 | for(var k = 0; k < 10; k++) { 226 | jobWithManyPings.push({ id: k, sent_at: dateTwo, error: true }) 227 | } 228 | 229 | queue = createQueue({}, [ 230 | // not within decay 231 | Object.assign(createJob(true), { pings: [{ id: 1, error: true, sent_at: dateTwo }, { id: 2, error: true, sent_at: dateTwo }] }), 232 | // too many pings 233 | Object.assign(createJob(true), { pings: jobWithManyPings }), 234 | // next 235 | Object.assign(createJob(true), { pings: [{ id: 4, error: true, sent_at: dateOne }] }), 236 | // after previous 237 | Object.assign(createJob(true), { pings: [{ id: 5, error: true, sent_at: dateOne }] }) 238 | ]) 239 | 240 | queue.getNextWithoutSuccessfulPing(function() { return 1000 * 60 * 4 }, 5).then(function (job) { 241 | assert.equal(job.id, 3) 242 | }) 243 | }) 244 | 245 | it('should purge queue for completed', function() { 246 | queue = createQueue({}, [ 247 | createJob(true), 248 | createJob(true), 249 | createJob(false), 250 | createJob(false, 1) 251 | ]) 252 | 253 | return queue.purge(false, false, 5).then(function() { 254 | var contents = getQueue() 255 | 256 | assert.equal(contents.length, 2) 257 | assert.equal(contents[0].id, 3) 258 | assert.equal(contents[1].id, 4) 259 | }) 260 | }) 261 | 262 | it('should purge queue for failed', function() { 263 | queue = createQueue({}, [ 264 | createJob(true), 265 | createJob(true), 266 | createJob(false), 267 | createJob(false, 1), 268 | createJob(false, 6) 269 | ]) 270 | 271 | return queue.purge(true, false, 5).then(function () { 272 | var contents = getQueue() 273 | 274 | assert.equal(contents.length, 2) 275 | assert.equal(contents[0].id, 3) 276 | assert.equal(contents[1].id, 4) 277 | }) 278 | }) 279 | 280 | it('should purge queue for new', function() { 281 | queue = createQueue({}, [ 282 | createJob(true), 283 | createJob(true), 284 | createJob(false), 285 | createJob(false, 1), 286 | createJob(false, 6) 287 | ]) 288 | 289 | queue.purge(false, true, 5).then(function() { 290 | var contents = getQueue() 291 | 292 | assert.equal(contents.length, 1) 293 | assert.equal(contents[0].id, 5) 294 | }) 295 | }) 296 | }) 297 | 298 | describe('queue : processing', function() { 299 | beforeEach(function(){ 300 | i = 0 301 | queue = createQueue() 302 | }) 303 | 304 | afterEach(function(){ 305 | deleteQueue() 306 | }) 307 | 308 | it('should log generation', function(done) { 309 | var job = createJob(false) 310 | var errorGenerator = sinon.stub().returns(new Promise(resolve => resolve({ 311 | code: '001', 312 | error: true 313 | }))) 314 | var successGenerator = sinon.stub().returns(new Promise(resolve => resolve({ 315 | success: true 316 | }))) 317 | queue = createQueue({}, [ 318 | job 319 | ]) 320 | 321 | Promise.all([ 322 | queue.processJob(errorGenerator, job), 323 | queue.processJob(successGenerator, job) 324 | ]).then(function(responses) { 325 | assert(error.isError(responses[0])) 326 | assert(!error.isError(responses[1])) 327 | 328 | var dbJob = getQueue()[0] 329 | 330 | assert.equal(dbJob.generations.length, 2) 331 | assert.equal(dbJob.generations[0].code, '001') 332 | assert.equal(dbJob.generations[1].success, true) 333 | 334 | done() 335 | }) 336 | }) 337 | 338 | it('should mark as complete on success', function (done) { 339 | var job = createJob(false) 340 | queue = createQueue({}, [ 341 | job 342 | ]) 343 | 344 | var pingStub = sinon.stub(webhook, 'ping').returns(new Promise(resolve => resolve({ pinged: true }))) 345 | var generatorStub = sinon.stub().returns(new Promise(resolve => resolve({ 346 | completed: true, 347 | storage: { 348 | local: 'awesome' 349 | } 350 | }))) 351 | 352 | var webhookOptions = { url: 'http://localhost' } 353 | queue.processJob(generatorStub, job, webhookOptions).then(response => { 354 | assert.equal(response.completed, true) 355 | 356 | var dbJob = getQueue()[0] 357 | 358 | assert(dbJob.completed_at !== null) 359 | assert(dbJob.storage.local, 'awesome') 360 | assert(response.completed, true) 361 | 362 | var pingArgs = pingStub.args[0] 363 | assert.equal(pingArgs[0].id, job.id) 364 | assert.equal(pingArgs[1], webhookOptions) 365 | 366 | pingStub.restore() 367 | done() 368 | }) 369 | }) 370 | }) 371 | 372 | describe('queue : pinging', function() { 373 | beforeEach(function(){ 374 | i = 0 375 | queue = createQueue() 376 | }) 377 | 378 | afterEach(function(){ 379 | deleteQueue() 380 | }) 381 | 382 | it('should throw error if no webhook is configured', function() { 383 | var didThrow = false 384 | try { 385 | queue.attemptPing(createJob(true)) 386 | } catch (e) { 387 | if (e.toString() === 'Error: No webhook is configured.') { 388 | didThrow = true 389 | } 390 | } 391 | 392 | assert(didThrow) 393 | }) 394 | 395 | it('should attempt ping with correct parameters', function(done){ 396 | var job = createJob(true) 397 | queue = createQueue({}, [ 398 | job 399 | ]) 400 | 401 | pingStub = sinon.stub(webhook, 'ping').returns( 402 | new Promise((resolve) => resolve({ message: 'yay' })) 403 | ) 404 | 405 | var url = 'http://localhost'; 406 | queue.attemptPing(job, { 407 | url: url 408 | }).then(response => { 409 | assert.equal(response.message, 'yay') 410 | 411 | dbJob = getQueue()[0] 412 | 413 | assert.equal(dbJob.pings.length, 1) 414 | 415 | var ping = dbJob.pings[0] 416 | 417 | assert.equal(ping.message, 'yay') 418 | 419 | pingStub.restore() 420 | done() 421 | }) 422 | }) 423 | }) 424 | 425 | function inFiveMinutes() { 426 | var dateOne = new Date() 427 | dateOne.setTime(dateOne.getTime() + (1000 * 60 * 5)) // add 5 minutes 428 | return dateOne.toUTCString() 429 | } 430 | 431 | function fiveMinutesAgo() { 432 | var dateOne = new Date() 433 | dateOne.setTime(dateOne.getTime() - (1000 * 60 * 5)) // add 5 minutes 434 | return dateOne.toUTCString() 435 | } 436 | -------------------------------------------------------------------------------- /test/storage/s3.test.js: -------------------------------------------------------------------------------- 1 | var sinon = require('sinon') 2 | var s3 = require('s3') 3 | var createS3Storage = require('../../src/storage/s3') 4 | 5 | var job = { 6 | id: 1 7 | } 8 | 9 | describe('storage:s3', function() { 10 | var createClientStub, uploadFileStub, onSpy 11 | beforeEach(function(){ 12 | onSpy = sinon.stub().callsFake(function(type, func) { 13 | if (type === 'end') { 14 | func({}) 15 | } 16 | }) 17 | uploadFileStub = sinon.stub().returns({ 18 | on: onSpy 19 | }) 20 | createClientStub = sinon.stub(s3, 'createClient').returns({ 21 | uploadFile: uploadFileStub 22 | }) 23 | }) 24 | 25 | afterEach(function(){ 26 | createClientStub.restore() 27 | }) 28 | 29 | it('should throw when access key id is not passed', function() { 30 | var didThrow = false 31 | try { 32 | createS3Storage({}) 33 | } catch(e) { 34 | if (e.toString() === 'Error: S3: No access key given') { 35 | didThrow = true 36 | } 37 | } 38 | if (!didThrow) { 39 | throw new Error('Error was not thrown when no access key id was given') 40 | } 41 | }) 42 | 43 | it('should throw when secret access key is not passed', function() { 44 | var didThrow = false 45 | try { 46 | createS3Storage({ accessKeyId: '1234' }) 47 | } catch(e) { 48 | if (e.toString() === 'Error: S3: No secret access key given') { 49 | didThrow = true 50 | } 51 | } 52 | if (!didThrow) { 53 | throw new Error('Error was not thrown when no access key id was given') 54 | } 55 | }) 56 | 57 | it('should throw when region is not passed', function() { 58 | var didThrow = false 59 | try { 60 | createS3Storage({ accessKeyId: '1234', secretAccessKey: '1234' }) 61 | } catch(e) { 62 | if (e.toString() === 'Error: S3: No region specified') { 63 | didThrow = true 64 | } 65 | } 66 | if (!didThrow) { 67 | throw new Error('Error was not thrown when no access key id was given') 68 | } 69 | }) 70 | 71 | it('should throw when bucket is not passed', function() { 72 | var didThrow = false 73 | try { 74 | createS3Storage({ accessKeyId: '1234', secretAccessKey: '1234', region: 'us-west-1' }) 75 | } catch(e) { 76 | if (e.toString() === 'Error: S3: No bucket was specified') { 77 | didThrow = true 78 | } 79 | } 80 | if (!didThrow) { 81 | throw new Error('Error was not thrown when no access key id was given') 82 | } 83 | }) 84 | 85 | it('create client with correct settings', function(done) { 86 | createS3Storage({ 87 | accessKeyId: '1234', 88 | secretAccessKey: '4321', 89 | region: 'us-west-1', 90 | bucket: 'bucket', 91 | s3ClientOptions: { 92 | test: true 93 | } 94 | })('path', job).then(() => { 95 | var expectedOptions = { 96 | s3Options: { 97 | accessKeyId: '1234', 98 | secretAccessKey: '4321', 99 | region: 'us-west-1' 100 | }, 101 | test: true 102 | } 103 | 104 | if (!createClientStub.calledOnce || !createClientStub.calledWith(expectedOptions)) { 105 | throw new Error('Client was not created with correct options') 106 | } 107 | 108 | done() 109 | }) 110 | }) 111 | 112 | it('should attempt to upload file with correct params', function(done){ 113 | createS3Storage({ 114 | accessKeyId: '1234', 115 | secretAccessKey: '4321', 116 | region: 'us-west-1', 117 | bucket: 'bucket', 118 | path: 'remote-folder', 119 | s3ClientOptions: { 120 | test: true 121 | } 122 | })('some/epic/path', job).then(() => { 123 | var expectedOptions = { 124 | localFile: 'some/epic/path', 125 | s3Params: { 126 | Bucket: 'bucket', 127 | Key: 'remote-folder/path' 128 | } 129 | } 130 | if (!uploadFileStub.calledOnce || !uploadFileStub.calledWith(expectedOptions)) { 131 | throw new Error('uploadFile was not called with correct options') 132 | } 133 | 134 | done() 135 | }) 136 | }) 137 | 138 | it('should call path if a function is passed', function(done) { 139 | var path = sinon.stub().returns('remote-path') 140 | createS3Storage({ 141 | accessKeyId: '1234', 142 | secretAccessKey: '4321', 143 | region: 'us-west-1', 144 | bucket: 'bucket', 145 | path: path, 146 | s3ClientOptions: { 147 | test: true 148 | } 149 | })('path', job).then(() => { 150 | if (!path.calledOnce || !path.calledWith('path', job)) { 151 | throw new Error('Path function was not called') 152 | } 153 | 154 | done() 155 | }) 156 | }) 157 | }) 158 | -------------------------------------------------------------------------------- /test/webhook.test.js: -------------------------------------------------------------------------------- 1 | var sinon = require('sinon') 2 | var proxyquire = require('proxyquire') 3 | var fetch = require('node-fetch') 4 | 5 | var job = { 6 | id: 1, 7 | url: 'http://localhost', 8 | meta: { 9 | id: 1 10 | }, 11 | storage: { 12 | local: 'something.pdf' 13 | } 14 | } 15 | 16 | describe('webhook', function() { 17 | var options = { 18 | secret: '1234', 19 | url: 'http://localhost/hook' 20 | } 21 | var fetchStub, promise, webhook 22 | 23 | beforeEach(function() { 24 | promise = new Promise(resolve => resolve({})) 25 | fetchStub = sinon.stub().returns(promise) 26 | webhook = proxyquire('../src/webhook', { 27 | 'node-fetch': fetchStub 28 | }) 29 | }) 30 | 31 | it('should throw error if no valid url is given', function() { 32 | var didThrow = false 33 | try { 34 | webhook.ping({}, { url: 'hello' }) 35 | } catch (e) { 36 | didThrow = true 37 | } 38 | 39 | if (!didThrow) { 40 | throw new Error('Did not throw on invalid URL') 41 | } 42 | }) 43 | 44 | it('should throw if no secret is given', function() { 45 | var didThrow = false 46 | try { 47 | webhook.ping({}, { url: 'http://localhost' }) 48 | } catch (e) { 49 | didThrow = true 50 | } 51 | 52 | if (!didThrow) { 53 | throw new Error('Did not throw on no secret') 54 | } 55 | }) 56 | 57 | it('should add passed request options to the request', function() { 58 | options.headerNamespace = 'X-Tests-' 59 | options.requestOptions = { 60 | headers: { 61 | 'X-Something': 'hello' 62 | }, 63 | method: 'GET' 64 | } 65 | 66 | webhook.ping(job, options) 67 | 68 | var fetchOptions = fetchStub.args[0][1] 69 | var headers = fetchOptions.headers.raw() 70 | 71 | if ( 72 | headers['content-type'][0] !== 'application/json' || 73 | !headers['x-tests-transaction'][0] || 74 | !headers['x-tests-signature'][0] || 75 | headers['x-something'][0] !== 'hello' 76 | ) { 77 | console.log(headers) 78 | throw new Error('Headers were not set correctly.') 79 | } 80 | 81 | if (fetchOptions.method !== 'POST') { 82 | throw new Error('Mehod was not POST.') 83 | } 84 | 85 | if (fetchOptions.body !== JSON.stringify(job)) { 86 | throw new Error('Body was not correct.') 87 | } 88 | }) 89 | 90 | it('should return empty response for 204 and 205', function(done) { 91 | var json = sinon.spy() 92 | fetchStub.returns( 93 | new Promise(function(resolve) { 94 | return resolve({ 95 | json: json, 96 | status: 204 97 | }) 98 | }) 99 | ) 100 | 101 | webhook.ping(job, options).then(function(response) { 102 | if (!json.notCalled) { 103 | throw new Error('json was called') 104 | } 105 | 106 | done() 107 | }) 108 | }) 109 | 110 | it('should be marked as error on bad response', function(done) { 111 | fetchStub.returns( 112 | new Promise(function(resolve) { 113 | return resolve({ 114 | status: 422 115 | }) 116 | }) 117 | ) 118 | 119 | webhook.ping(job, options).then(function(response) { 120 | if (!response.error) { 121 | throw new Error('it was not marked as error') 122 | } 123 | 124 | done() 125 | }) 126 | }) 127 | 128 | it('should return proper response on success', function (done) { 129 | fetchStub.returns( 130 | new Promise(function (resolve) { 131 | resolve(new fetch.Response(JSON.stringify('response'), { 132 | status: 200, 133 | headers: { 134 | 'content-type': 'application/json' 135 | } 136 | })) 137 | }) 138 | ) 139 | 140 | webhook.ping(job, options).then(function (response) { 141 | if (response.id !== fetchStub.args[0][1].headers.raw()['x-tests-transaction'][0] || 142 | response.method !== 'POST' || 143 | response.response !== 'response' || 144 | response.status !== 200) { 145 | console.log(response) 146 | throw new Error('Invalid response') 147 | } 148 | 149 | done() 150 | }) 151 | }) 152 | 153 | it('should generate proper HMAC signature', function() { 154 | var key = '12345' 155 | var body = 'awesome pdf generator' 156 | var signature = webhook.generateSignature(body, key) 157 | 158 | if (signature !== '6ff42a71ad26f83b76ea41defa22fb520716ddfb') { 159 | throw new Error('Generated signature was not correct') 160 | } 161 | }) 162 | }) 163 | --------------------------------------------------------------------------------