├── .gitignore ├── README.md ├── data ├── f1040ezt.pdf ├── xfa_1040.pdf ├── xfa_1040a.pdf └── xfa_1040ez.pdf ├── index.js ├── lib ├── service.js ├── svccontext.js └── svcresponse.js ├── license.txt └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | 10 | pids 11 | logs 12 | results 13 | 14 | npm-debug.log 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | p2jsvc 2 | ====== 3 | 4 | A RESTful web service adaptor for [pdf2json](https://github.com/modesty/pdf2json), built with restify and nodejs. 5 | 6 | ##Installation 7 | 8 | 9 | git clone https://github.com/modesty/p2jsvc 10 | cd p2jsvc 11 | npm install 12 | 13 | ##Start the Server 14 | 15 | cd p2jsvc 16 | node index 17 | 18 | You should see '[time_stamp] - PDFFORMServer1 listening at http://0.0.0.0:8001' in termial window. 19 | 20 | ##Service Status Check 21 | 22 | curl -isv http://0.0.0.0:8001/p2jsvc/status 23 | 24 | Response should include something like this: 25 | 26 | {"status":{"code":200,"message":"OK","fieldName":"PDFFORMServer1"}} 27 | 28 | 29 | ##Test with GET 30 | 31 | curl -isv http://0.0.0.0:8001/p2jsvc/data/xfa_1040ez 32 | curl -isv http://0.0.0.0:8001/p2jsvc/data/xfa_1040a 33 | curl -isv http://0.0.0.0:8001/p2jsvc/data/xfa_1040 34 | 35 | ##Test with POST 36 | 37 | curl -isv -H "Content-Type: application/json" -X POST -d '{"folderName":"data", "pdfId":"xfa_1040ez"}' http://0.0.0.0:8001/p2jsvc 38 | curl -isv -H "Content-Type: application/json" -X POST -d '{"folderName":"data", "pdfId":"xfa_1040a"}' http://0.0.0.0:8001/p2jsvc 39 | curl -isv -H "Content-Type: application/json" -X POST -d '{"folderName":"data", "pdfId":"xfa_1040"}' http://0.0.0.0:8001/p2jsvc 40 | 41 | ##Concurrency Benchmark Test 42 | 43 | ab -n 10 -c 10 http://0.0.0.0:8001/p2jsvc/data/xfa_1040ez 44 | ab -n 10 -c 10 http://0.0.0.0:8001/p2jsvc/data/xfa_1040a 45 | ab -n 10 -c 10 http://0.0.0.0:8001/p2jsvc/data/xfa_1040 46 | 47 | ##More Info 48 | 49 | [Restful Web Service for PDF2JSON](http://www.codeproject.com/Articles/573297/Restful-Web-Service-for-PDF2JSON) 50 | -------------------------------------------------------------------------------- /data/f1040ezt.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modesty/p2jsvc/a73150e52a7187f31cef50efcf8466f1ad5727e9/data/f1040ezt.pdf -------------------------------------------------------------------------------- /data/xfa_1040.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modesty/p2jsvc/a73150e52a7187f31cef50efcf8466f1ad5727e9/data/xfa_1040.pdf -------------------------------------------------------------------------------- /data/xfa_1040a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modesty/p2jsvc/a73150e52a7187f31cef50efcf8466f1ad5727e9/data/xfa_1040a.pdf -------------------------------------------------------------------------------- /data/xfa_1040ez.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modesty/p2jsvc/a73150e52a7187f31cef50efcf8466f1ad5727e9/data/xfa_1040ez.pdf -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | //testing with GET: 2 | //curl -isv http://0.0.0.0:8001/p2jsvc/data/xfa_1040ez 3 | //curl -isv http://0.0.0.0:8001/p2jsvc/data/xfa_1040a 4 | //curl -isv http://0.0.0.0:8001/p2jsvc/data/xfa_1040 5 | 6 | //testing with POST 7 | //curl -isv -H "Content-Type: application/json" -X POST -d '{"folderName":"data", "pdfId":"xfa_1040ez"}' http://0.0.0.0:8001/p2jsvc 8 | //curl -isv -H "Content-Type: application/json" -X POST -d '{"folderName":"data", "pdfId":"xfa_1040a"}' http://0.0.0.0:8001/p2jsvc 9 | //curl -isv -H "Content-Type: application/json" -X POST -d '{"folderName":"data", "pdfId":"xfa_1040"}' http://0.0.0.0:8001/p2jsvc 10 | 11 | 'use strict'; 12 | var service = require("./lib/service"); 13 | service.start(); 14 | -------------------------------------------------------------------------------- /lib/service.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var nodeUtil = require("util"), 4 | restify = require('restify'), 5 | _ = require('underscore'), 6 | SvcResponse = require('./svcresponse'), 7 | SvcContext = require("./svccontext"), 8 | PFParser = require("pdf2json"); 9 | 10 | var PDFFORMService = (function () { 11 | // private static 12 | var _nextId = 1; 13 | var _name = 'PDFFORMServer'; 14 | var _pdfPathBase = ""; 15 | 16 | // constructor 17 | var cls = function () { 18 | // private, only accessible within this constructor 19 | var _id = _nextId++; 20 | var _version = "0.0.1"; 21 | 22 | // public (every instance will have their own copy of these methods, needs to be lightweight) 23 | this.get_id = function() { return _id; }; 24 | this.get_name = function() { return _name + _id; }; 25 | this.get_version = function() {return _version; }; 26 | }; 27 | 28 | // public static 29 | cls.get_nextId = function () { 30 | return _name + _nextId; 31 | }; 32 | 33 | //private 34 | var _onPFBinDataReady = function(evtData) { 35 | nodeUtil.log(this.get_name() + " completed response."); 36 | var resData = new SvcResponse(200, "OK", evtData.pdfFilePath, "FormImage JSON"); 37 | resData.formImage = evtData.data; 38 | evtData.context.completeResponse(resData); 39 | 40 | evtData.destroy(); 41 | evtData = null; 42 | }; 43 | 44 | var _onPFBinDataError = function(evtData){ 45 | nodeUtil.log(this.get_name() + " 500 Error: " + JSON.stringify(evtData.data)); 46 | evtData.context.completeResponse(new SvcResponse(500, JSON.stringify(evtData.data))); 47 | 48 | evtData.destroy(); 49 | evtData = null; 50 | }; 51 | 52 | var _customizeHeaders = function(res) { 53 | // Resitify currently has a bug which doesn't allow you to set default headers 54 | // This headers comply with CORS and allow us to server our response to any origin 55 | res.header("Access-Control-Allow-Origin", "*"); 56 | res.header("Access-Control-Allow-Headers", "X-Requested-With"); 57 | res.header("Cache-Control", "no-cache, must-revalidate"); 58 | }; 59 | 60 | // public (every instance will share the same method, but has no access to private fields defined in constructor) 61 | cls.prototype.start = function () { 62 | var self = this; 63 | 64 | //private function within this public method 65 | 66 | var _gfilter = function(svcContext) { 67 | var req = svcContext.req; 68 | var folderName = req.params.folderName; 69 | var pdfId = req.params.pdfId; 70 | nodeUtil.log(self.get_name() + " received request:" + req.method + ":" + folderName + "/" + pdfId); 71 | 72 | var pdfParser = new PFParser(svcContext); 73 | 74 | _customizeHeaders(svcContext.res); 75 | 76 | pdfParser.on("pdfParser_dataReady", _.bind(_onPFBinDataReady, self)); 77 | pdfParser.on("pdfParser_dataError", _.bind(_onPFBinDataError, self)); 78 | 79 | pdfParser.loadPDF(_pdfPathBase + folderName + "/" + pdfId + ".pdf"); 80 | }; 81 | 82 | var server = restify.createServer({ 83 | name: self.get_name(), 84 | version: self.get_version() 85 | }); 86 | 87 | server.use(restify.acceptParser(server.acceptable)); 88 | server.use(restify.authorizationParser()); 89 | server.use(restify.dateParser()); 90 | server.use(restify.queryParser()); 91 | server.use(restify.bodyParser()); 92 | server.use(restify.jsonp()); 93 | server.use(restify.gzipResponse()); 94 | server.pre(restify.pre.userAgentConnection()); 95 | 96 | server.get('/p2jsvc/:folderName/:pdfId', function(req, res, next) { 97 | _gfilter(new SvcContext(req, res, next)); 98 | }); 99 | 100 | server.post('/p2jsvc', function(req, res, next) { 101 | _gfilter(new SvcContext(req, res, next)); 102 | }); 103 | 104 | server.get('/p2jsvc/status', function(req, res, next) { 105 | var jsObj = new SvcResponse(200, "OK", server.name, server.version); 106 | res.send(200, jsObj); 107 | return next(); 108 | }); 109 | 110 | server.listen(8001, function() { 111 | nodeUtil.log(nodeUtil.format('%s listening at %s', server.name, server.url)); 112 | }); 113 | }; 114 | 115 | return cls; 116 | })(); 117 | 118 | module.exports = new PDFFORMService(); 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /lib/svccontext.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | var SvcContext = (function () { 3 | // constructor 4 | var cls = function (req, res, next) { 5 | // public, this instance copies 6 | this.req = req; 7 | this.res = res; 8 | this.next = next; 9 | }; 10 | 11 | cls.prototype.completeResponse = function(jsObj) { 12 | this.res.send(200, jsObj); 13 | this.next(); 14 | }; 15 | 16 | cls.prototype.destroy = function() { 17 | this.req = null; 18 | this.res = null; 19 | this.next = null; 20 | }; 21 | 22 | return cls; 23 | })(); 24 | 25 | module.exports = SvcContext; 26 | -------------------------------------------------------------------------------- /lib/svcresponse.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | var SvcResponse = (function () { 3 | // private static 4 | var _svcStatusMsg = {200: "OK", 400: "Bad Request", 404: "Not Found"}; 5 | 6 | // constructor 7 | var cls = function (code, message, fieldName, fieldValue) { 8 | // public, this instance copies 9 | this.status = { 10 | code: code, 11 | message: message || _svcStatusMsg[code], 12 | 13 | fieldName: fieldName, 14 | fieldValue: fieldValue 15 | }; 16 | }; 17 | 18 | cls.prototype.setStatus = function(code, message, fieldName, fieldValue) { 19 | this.status.code = code; 20 | this.status.message = message || _svcStatusMsg[code]; 21 | this.status.fieldName = fieldName; 22 | this.status.fieldValue = fieldValue; 23 | }; 24 | 25 | cls.prototype.destroy = function() { 26 | this.status = null; 27 | }; 28 | 29 | return cls; 30 | })(); 31 | 32 | module.exports = SvcResponse; -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2012 Modesty Zhang, https://github.com/modesty/ 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "name": "p2jsvc", 4 | "_id": "p2jsvc@0.1.2", 5 | "version": "0.1.2", 6 | "description": "A RESTful web service adaptor for pdf2json, built with restify and nodejs.", 7 | "keywords": [ 8 | "pdf", 9 | "pdf parser", 10 | "convert pdf to json", 11 | "server side PDF parser", 12 | "PDF binary to text in web service", 13 | "RESTful service for pdf2json" 14 | ], 15 | "author": { 16 | "name": "Modesty Zhang", 17 | "email": "modestyz@hotmail.com", 18 | "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" 19 | }, 20 | "homepage": "https://github.com/modesty/p2jsvc", 21 | "repository": { 22 | "type": "git", 23 | "url": "git://github.com/modesty/p2jsvc.git" 24 | }, 25 | "main": "./index.js", 26 | "scripts": { 27 | "preinstall": "sudo npm i -g restify" 28 | }, 29 | "engines": { 30 | "node": ">=0.8" 31 | }, 32 | "dependencies": { 33 | "pdf2json" : ">=0.2.0", 34 | "underscore": ">=1.4.2" 35 | }, 36 | "devDependencies": {}, 37 | "maintainers": [ 38 | { 39 | "name": "Modesty Zhang", 40 | "email": "modestyz@hotmail.com", 41 | "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" 42 | } 43 | ], 44 | "contributors": [ 45 | ], 46 | "bugs": { 47 | "email": "modestyz@hotmail.com", 48 | "url": "http://github.com/modesty/p2jsvc/issues" 49 | }, 50 | "licenses": [ 51 | { 52 | "type": "LGPL", 53 | "url": "http://www.gnu.org/licenses/lgpl.html" 54 | } 55 | ], 56 | "readme": "https://github.com/modesty/p2jsvc/blob/master/readme.md", 57 | "_from": "p2jsvc" 58 | } 59 | --------------------------------------------------------------------------------