├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── _config.yml ├── bin └── ocr ├── examples ├── cdnurl.ocr.js ├── localurl.ocr.js ├── readme.md └── test01.jpg ├── index.js ├── lib └── ocr.js ├── package.json └── test ├── ocr.test.js └── test01.jpg /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # Compiled binary addons (http://nodejs.org/api/addons.html) 20 | build/Release 21 | 22 | # Dependency directory 23 | # Commenting this out is preferred by some people, see 24 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 25 | node_modules 26 | 27 | # Users Environment Variables 28 | .lock-wscript 29 | # mac 30 | .Ds_Store 31 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - 4.0 4 | - 5.0 5 | - stable 6 | after_script: 7 | - make coveralls 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Eury 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | TIMEOUT = 15000 2 | test: 3 | mocha -t $(TIMEOUT) 4 | 5 | cov test-cov: 6 | ./node_modules/.bin/istanbul cover _mocha -- -t $(TIMEOUT) 7 | 8 | coveralls: 9 | ./node_modules/.bin/istanbul cover ./node_modules/mocha/bin/_mocha --report lcovonly -- -R spec -t $(TIMEOUT) && cat ./coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js && rm -rf ./coverage 10 | 11 | .PHONY: test cov test-cov 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## [Baidu-OCR-API](https://bce.baidu.com/doc/OCR/ProductDescription.html#.E4.BB.8B.E7.BB.8D) For Nodejs 2 | 3 | [![NPM version][npm-image]][npm-url] [![Build Status][travis-image]][travis-url] [![Coveralls Status][coveralls-image]][coveralls-url] 4 | 5 | [![Downloads][downloads-image]][npm-url] 6 | 7 | ### Advantages 8 | - 支持本地图片 外部图片(速度取决图片大小) 9 | - 识别简单的验证码 10 | - 平时相机拍摄书本的文字,基本能达到 95% 11 | - 支持 shell/nodejs 全局安装可在控制台直接运行 12 | - bluebird/promise 接口操作灵活 13 | 14 | ### Install & Usage 15 | 16 | #### 1. Global 17 | 18 | ```sh 19 | npm install baidu-ocr-api -g 20 | 21 | ocr --help 22 | 23 | # 远程图片 24 | ocr http://7pun4e.com1.z0.glb.clouddn.com/test.jpg 25 | 26 | # 本地图片 27 | ocr ./test.jpg 28 | 29 | ``` 30 | ##### 效果图 31 | 32 | 33 | 34 | ![](https://raw.githubusercontent.com/netpi/baidu-ocr-api/master/examples/test01.jpg) 35 | 36 | 37 | 38 | 的早期世界观是建立在《魔兽争霸3:冰封王座》的基础上的,因此与现在暴雪公司的《魔兽世界》的背景设定有一定的联系,但由于版本更迭又略有不同。整个地图中地形名费伍德森林,费伍德森林是网络游戏《魔兽世界》中的游戏地图,位于卡利姆多境内的一片森林。这片由森林和草场构成的繁荣动荡的土地曾经由卡尔多雷掌管,并曾经处于半神塞纳留斯的保护下。燃烧军团的铁蹄践踏了这片土地,没有被毁灭的树木和生物则被恶魔的暴行永远的诅咒着 39 | 40 | 41 | #### 2. Nodejs 42 | ```sh 43 | npm install baidu-ocr-api --save 44 | 45 | ``` 46 | FYI [examples](https://github.com/netpi/baidu-ocr-api/tree/master/examples) 47 | 48 | ```js 49 | /** 50 | 51 | 登陆 百度bcs控制台中心 申请access key 52 | https://console.bce.baidu.com/iam/#/iam/accesslist 53 | 54 | **/ 55 | var ak = 'your ak'; 56 | var sk = 'your sk'; 57 | var ocr = require('baidu-ocr-api').create(ak,sk); 58 | // 外部图片 59 | ocr.scan({ 60 | url:'http://7pun4e.com1.z0.glb.clouddn.com/test.jpg', // 支持本地路径 61 | type:'text', 62 | }).then(function (result) { 63 | return console.log(result) 64 | }).catch(function (err) { 65 | console.log('err', err); 66 | }) 67 | 68 | ``` 69 | 70 | ### Test 71 | ```sh 72 | make test 73 | make cov # Coverage rate 74 | ``` 75 | ### License MIT 76 | 77 | 78 | [downloads-image]: http://img.shields.io/npm/dm/baidu-ocr-api.svg 79 | 80 | [npm-url]: https://npmjs.org/package/baidu-ocr-api 81 | [npm-image]: http://img.shields.io/npm/v/baidu-ocr-api.svg 82 | 83 | [travis-url]: https://travis-ci.org/netpi/baidu-ocr-api 84 | [travis-image]: https://travis-ci.org/netpi/baidu-ocr-api.svg?branch=master 85 | 86 | [coveralls-url]: https://coveralls.io/r/netpi/baidu-ocr-api 87 | [coveralls-image]:https://coveralls.io/repos/netpi/baidu-ocr-api/badge.svg?branch=master&service=github 88 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-leap-day -------------------------------------------------------------------------------- /bin/ocr: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var ak = process.env.BAIDU_AK||'b7d11214c8fc452db3de12028cf46daa'; 4 | var sk = process.env.BAIDU_SK||'64631fe987f4423bb0a117101bf90a45'; 5 | var ocr = require('../lib/ocr').create(ak,sk); 6 | var argv = require("minimist")(process.argv.slice(2)); 7 | var colors = require('colors'); 8 | String.prototype.startWith = function(compareStr){ 9 | return this.indexOf(compareStr) == 0; 10 | } 11 | var opt = {}; 12 | opt.url = argv._[0]||argv.u; 13 | opt.type = argv.type||argv.t||'text'; 14 | opt.language = argv.language||'CHE_ENG'; 15 | opt.merge = argv.m||argv.merge; 16 | 17 | if(!opt.url||argv.h||argv.help){ 18 | console.log('\n') 19 | console.log('Example usage:\n'); 20 | console.log(" ocr", "[url] [options] "); 21 | console.log(" ocr","http://7pun4e.com1.z0.glb.clouddn.com/test.jpg", "-m -l CHE_ENG ".yellow); 22 | console.log('\n') 23 | console.log("Options:\n"); 24 | 25 | console.log(" -t, --type".yellow," text line character "); 26 | console.log(" -l, --language".yellow," CHE_ENG CHE ENG"); 27 | console.log(" -m, --marge".yellow," merge result"); 28 | process.exit(0); 29 | } 30 | ocr.scan(opt).then(function (result) { 31 | console.log(result.results.words); 32 | }).catch(function (err) { 33 | console.log('err', err); 34 | }) 35 | -------------------------------------------------------------------------------- /examples/cdnurl.ocr.js: -------------------------------------------------------------------------------- 1 | /** 2 | 3 | 登陆 百度bcs控制台中心 申请access key 4 | https://console.bce.baidu.com/iam/#/iam/accesslist 5 | 6 | **/ 7 | 8 | var ak = 'b7d11214c8fc452db3de12028cf46daa'; 9 | var sk = '64631fe987f4423bb0a117101bf90a45' 10 | var ocr = require('../').create(ak,sk); 11 | // 外部图片 12 | ocr.scan({ 13 | url:'http://7pun4e.com1.z0.glb.clouddn.com/test.jpg', 14 | type:'text', 15 | }).then(function (result) { 16 | return console.log(result) 17 | }).catch(function (err) { 18 | console.log('err', err); 19 | }) 20 | -------------------------------------------------------------------------------- /examples/localurl.ocr.js: -------------------------------------------------------------------------------- 1 | /** 2 | 3 | 登陆 百度bcs控制台中心 申请access key 4 | https://console.bce.baidu.com/iam/#/iam/accesslist 5 | 6 | **/ 7 | 8 | var ak = 'b7d11214c8fc452db3de12028cf46daa'; 9 | var sk = '64631fe987f4423bb0a117101bf90a45' 10 | var ocr = require('../').create(ak,sk); 11 | 12 | // 本地图片 13 | ocr.scan({ 14 | url:__dirname+'/test01.jpg', 15 | type:'text', 16 | },function (err,result) { 17 | if(err){ 18 | return console.log(err) 19 | } 20 | console.log(result); 21 | }) 22 | -------------------------------------------------------------------------------- /examples/readme.md: -------------------------------------------------------------------------------- 1 | ### start 2 | 3 | 4 | ```js 5 | node cdnurl.ocr.js 6 | ``` 7 | -------------------------------------------------------------------------------- /examples/test01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netpi/baidu-ocr-api/f8cfa2393b46e8e72b752a229291e53a18bc53ed/examples/test01.jpg -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./lib/ocr'); 2 | -------------------------------------------------------------------------------- /lib/ocr.js: -------------------------------------------------------------------------------- 1 | var crypto = require('crypto'); 2 | var Promise = require('bluebird'); 3 | var fs = require('fs'); 4 | var request = require('request-promise'); 5 | Promise.promisifyAll(fs); 6 | 7 | var headersToSign = []; 8 | function OCR(ak,sk) { 9 | this.ak = ak; 10 | this.sk = sk; 11 | } 12 | 13 | module.exports.create = function(ak,sk) { 14 | return new OCR(ak,sk); 15 | }; 16 | OCR.prototype.scan = function scan(opt,cb) { 17 | 18 | var pathOpt = { 19 | text:'/v1/recognize/text', // 识别某张图中的所有文字 20 | line:'/v1/recognize/line', // 将结果作为单行文字去解析 21 | character:'/v1/recognize/character' // 识别某张图中的单个文字 22 | } 23 | var merge = opt.merge; 24 | if(merge === 'false'||merge ===false) 25 | { 26 | merge = false; 27 | }else{ 28 | merge = true; 29 | } 30 | // init data 31 | var type = opt.type||'text'; 32 | var path = pathOpt[type]; 33 | var url = opt.url; 34 | var language = opt.language||'CHN_ENG'; 35 | var accessKeyId = this.ak; 36 | var secretAccessKey = this.sk; 37 | var requestDate = new Date().toISOString().slice(0, -5) + 'Z'; 38 | var expire = 3600; 39 | var httpMethod = 'post'; 40 | 41 | var params = {}; // 42 | var headers = { 43 | 'host': 'ocr.bj.baidubce.com', 44 | 'x-bce-date': requestDate 45 | }; 46 | String.prototype.startWith = function(compareStr){ 47 | return this.indexOf(compareStr) == 0; 48 | } 49 | return new Promise(function (resolve,reject) { 50 | getImgBase64(url).then(function(result) { 51 | var data = { 52 | base64:result, 53 | language:language 54 | }; 55 | // get Authorization 56 | var databuffer = new Buffer(JSON.stringify(data)); 57 | headers['Content-Type'] = 'clarapplication/json'; 58 | headers['Content-Length'] = databuffer.length; 59 | 60 | var content = 'bce-auth-v1/'+ accessKeyId +'/'+ requestDate +'/' + expire; 61 | // get SigningKey 62 | var SigningKey = crypto.createHmac('sha256', secretAccessKey).update(content).digest('hex'); 63 | var CanonicalURI = path; 64 | var CanonicalQueryString = getCanonicalQueryString(params); 65 | var CanonicalHeaders = getCanonicalHeaders(headers); 66 | var CanonicalRequest = [httpMethod.toUpperCase(), CanonicalURI, CanonicalQueryString, CanonicalHeaders].join('\n'); 67 | // get Signature 68 | var Signature = crypto.createHmac('sha256', SigningKey).update(CanonicalRequest).digest('hex'); 69 | // Mosaic Authorization 70 | headers.Authorization = [content, headersToSign.join(';'), Signature].join('/'); 71 | var url = 'http://'+headers.host+path; 72 | var options = { 73 | 74 | json:data, 75 | host: headers.host, 76 | path: path+'?'+getCanonicalQueryString(params), 77 | headers: headers, 78 | method:httpMethod, 79 | encoding:'UTF-8' 80 | }; 81 | request(url,options).then(function (result) { 82 | if(!result.results){ 83 | return reject(result) 84 | } 85 | if(merge){ 86 | var words = ''; 87 | var rectangles = []; 88 | result.results.forEach(function (result) { 89 | words+= result.word; 90 | rectangles.push(result.rectangle) 91 | }) 92 | return resolve({results:{ 93 | words:words, 94 | rectangles:rectangles 95 | }}) 96 | } 97 | return resolve(result); 98 | }).catch(function (err) { 99 | reject(err); 100 | }) 101 | }).catch(function (err) { 102 | reject(err); 103 | }) 104 | }) 105 | 106 | function getCanonicalQueryString(params) { 107 | var result = []; 108 | for(var key in params) { 109 | if(key.toLowerCase() != 'authorization') { 110 | result.push(normalize(key) + '=' + normalize(params[key])); 111 | } 112 | } 113 | result.sort(); 114 | return result.join('&'); 115 | } 116 | 117 | function getCanonicalHeaders(headers) { 118 | headersToSign = ['host', 'content-md5', 'content-length', 'content-type'].concat(headersToSign); 119 | var result = []; 120 | var tempHeaderToSign = []; 121 | for(var key in headers) { 122 | var keyLower = key.toLowerCase(); 123 | var value = headers[key].toString().trim(); 124 | if(/^x-bce-/.test(keyLower) || new RegExp(keyLower).test(headersToSign)) { 125 | var temp = normalize(keyLower) + ':' + normalize(value); 126 | tempHeaderToSign.push(normalize(keyLower)); 127 | result.push(temp); 128 | } 129 | } 130 | headersToSign = tempHeaderToSign.sort(); 131 | result.sort(); 132 | return result.join('\n'); 133 | } 134 | 135 | function normalize(input, exceptSlash) { 136 | var result = ''; 137 | if(input == null) { 138 | return result; 139 | } 140 | input = input.toString(); 141 | for (var i = 0; i < input.length; i++) { 142 | var ch = input.charAt(i); 143 | if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '_' || ch == '-' || ch == '~' || ch == '.') { 144 | result += ch; 145 | } else if (ch == '/') { 146 | result += !exceptSlash ? '%2F' : ch; 147 | } else { 148 | result += '%' + new Buffer(ch).toString('hex').toUpperCase(); 149 | } 150 | } 151 | return result; 152 | } 153 | // 获取base64 154 | function getImgBase64(url) { 155 | return new Promise(function (resolve,reject) { 156 | // 外部地址 157 | if(url.startWith('http')||url.startWith('https')){ 158 | request({ 159 | method:'GET', 160 | url:url, 161 | headers:{ 162 | 'User-Agent': 'Paw/2.1 (Macintosh; OS X/10.10.5) GCDHTTPRequest', 163 | 'Referer':'http://baidu.com' 164 | }, 165 | encoding:null 166 | }).then(function (result) { 167 | resolve(result.toString('base64')); 168 | }).catch(function (err) { 169 | reject(err); 170 | }) 171 | }else{ // 本地地址 172 | fs.readFileAsync(url) 173 | .then(function (data) { 174 | resolve(data.toString('base64')) 175 | }) 176 | .catch(function (err) { 177 | reject(err); 178 | }) 179 | } 180 | }) 181 | } 182 | }; 183 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "baidu-ocr-api", 3 | "version": "2.0.5", 4 | "description": "OCR for baidu api", 5 | "main": "index.js", 6 | "directories": { 7 | "test": "test" 8 | }, 9 | "scripts": { 10 | "test": "make test" 11 | }, 12 | "bin": { 13 | "ocr": "./bin/ocr" 14 | }, 15 | "keywords": [ 16 | "ocr", 17 | "baidu", 18 | "bos" 19 | ], 20 | "author": "netpi", 21 | "license": "MIT", 22 | "dependencies": { 23 | "bluebird": "^3.4.1", 24 | "colors": "^1.1.2", 25 | "minimist": "^1.2.0", 26 | "request": "2.73.0", 27 | "request-promise": "^4.0.2" 28 | }, 29 | "devDependencies": { 30 | "coveralls": "^2.11.11", 31 | "istanbul": "^0.4.4", 32 | "mocha": "^2.5.3", 33 | "mocha-lcov-reporter": "^1.2.0", 34 | "should": "^9.0.2" 35 | }, 36 | "repository": { 37 | "type": "git", 38 | "url": "https://github.com/netpi/baidu-ocr-api.git" 39 | }, 40 | "bugs": { 41 | "url": "https://github.com/netpi/baidu-ocr-api/issues" 42 | }, 43 | "homepage": "https://github.com/netpi/baidu-ocr-api" 44 | } 45 | -------------------------------------------------------------------------------- /test/ocr.test.js: -------------------------------------------------------------------------------- 1 | var should = require('should'); 2 | var ak = 'b7d11214c8fc452db3de12028cf46daa'; 3 | var sk = '64631fe987f4423bb0a117101bf90a45'; 4 | var wrong_sk = 'wrong_sk'; 5 | var ocr = require('../').create(ak,sk); 6 | var ocr2 = require('../').create(ak,wrong_sk); 7 | 8 | describe('test/ocr.test.js',function () { 9 | describe('scan for cdn_url ',function () { 10 | it('should have result',function (done) { 11 | ocr.scan({ 12 | url:'http://7xod3k.com1.z0.glb.clouddn.com/mjdalykzuyefpzlgmlnkjizcfcuelxnu', 13 | type:'text', 14 | }).then(function (result) { 15 | result.should.be.an.instanceOf(Object); 16 | done(); 17 | }) 18 | }) 19 | }) 20 | describe('wrong_sk ',function () { 21 | it('should be catch error',function (done) { 22 | ocr2.scan({ 23 | url:'http://7xod3k.com1.z0.glb.clouddn.com/mjdalykzuyefpzlgmlnkjizcfcuelxnu', 24 | }).then(function (result) { 25 | 26 | }).catch(function (err) { 27 | err.should.be.an.instanceOf(Error); 28 | done(); 29 | }) 30 | }) 31 | }) 32 | describe('scan for cdn_url: merge==false ',function () { 33 | it('should have result',function (done) { 34 | ocr.scan({ 35 | url:'http://7xod3k.com1.z0.glb.clouddn.com/mjdalykzuyefpzlgmlnkjizcfcuelxnu', 36 | type:'text', 37 | merge:false 38 | }).then(function (result) { 39 | result.should.be.an.instanceOf(Object); 40 | done() 41 | }) 42 | }) 43 | }) 44 | describe('scan for wrong local_url ',function () { 45 | it('should have result',function (done) { 46 | ocr.scan({ 47 | url:'http://wrong_url', 48 | type:'text' 49 | }).then(function (result) { 50 | 51 | }).catch(function (err) { 52 | err.should.be.an.instanceOf(Error); 53 | done() 54 | }) 55 | }) 56 | }) 57 | describe('scan for local url ',function () { 58 | it('should return object',function (done) { 59 | ocr.scan({ 60 | url:__dirname+'/test01.jpg', 61 | type:'text' 62 | }).then(function(result) { 63 | result.should.be.an.instanceOf(Object); 64 | done() 65 | }) 66 | }) 67 | }) 68 | 69 | describe('scan for wrong local_url ',function () { 70 | it('should have result',function (done) { 71 | ocr.scan({ 72 | url:'wrong url', 73 | type:'line' 74 | }).then(function (result) { 75 | 76 | }).catch(function (err) { 77 | err.should.be.an.instanceOf(Error); 78 | done() 79 | }) 80 | }) 81 | }) 82 | }) 83 | -------------------------------------------------------------------------------- /test/test01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netpi/baidu-ocr-api/f8cfa2393b46e8e72b752a229291e53a18bc53ed/test/test01.jpg --------------------------------------------------------------------------------