├── .gitignore ├── LICENSE ├── LICENSE.zh ├── README.md ├── crawler.js ├── doc ├── extract_rule.txt ├── hbase-init.txt ├── neocrawler-architecture.pptx └── redis-init.txt ├── instance ├── 8zi │ ├── addon │ │ └── generate-url.py │ ├── backup │ │ └── data-dump.json │ ├── logs │ │ └── blank.txt │ ├── settings.json │ └── spider_extend.js ├── example │ ├── backup │ │ └── data-dump.json │ ├── logs │ │ └── blank.txt │ ├── settings.json │ └── spider_extend.js ├── proxy │ ├── addon │ │ └── check-proxy.js │ ├── backup │ │ └── ssdb-dump.json │ ├── logs │ │ └── blank.txt │ ├── settings.json │ └── spider_extend.js ├── shuttle │ ├── addon │ │ ├── check-proxy.js │ │ ├── gbk.js │ │ ├── gfw-proxy.js │ │ ├── qqwry.dat │ │ ├── qqwry.js │ │ └── shuttle.js │ ├── backup │ │ └── ssdb-dump.json │ ├── logs │ │ └── blank.txt │ ├── settings.json │ └── spider_extend.js └── wiki │ ├── backup │ └── wiki-rule-backup.json │ ├── logs │ └── blank.txt │ ├── settings.json │ └── spider_extend.js ├── lib ├── SSDB.js ├── httpRequest.js ├── jsextend.js ├── line_reader.js ├── logging.js ├── myredis.js ├── node_hbase │ ├── LICENSE.txt │ ├── README.md │ ├── index.js │ ├── lib │ │ ├── call.js │ │ ├── client.js │ │ ├── connection.js │ │ ├── data-input-stream.js │ │ ├── delete.js │ │ ├── filter-list.js │ │ ├── get.js │ │ ├── hconstants.js │ │ ├── increment.js │ │ ├── output-buffer.js │ │ ├── put.js │ │ ├── scan.js │ │ ├── utils.js │ │ └── zk-protobuf.js │ ├── package.json │ └── proto │ │ ├── AccessControl.proto │ │ ├── Admin.proto │ │ ├── Aggregate.proto │ │ ├── Authentication.proto │ │ ├── Cell.proto │ │ ├── Client.proto │ │ ├── ClusterId.proto │ │ ├── ClusterStatus.proto │ │ ├── Comparator.proto │ │ ├── ErrorHandling.proto │ │ ├── FS.proto │ │ ├── Filter.proto │ │ ├── HBase.proto │ │ ├── HFile.proto │ │ ├── LoadBalancer.proto │ │ ├── MapReduce.proto │ │ ├── Master.proto │ │ ├── MultiRowMutation.proto │ │ ├── RPC.proto │ │ ├── RegionServerStatus.proto │ │ ├── RowProcessor.proto │ │ ├── SecureBulkLoad.proto │ │ ├── Tracing.proto │ │ ├── WAL.proto │ │ └── ZooKeeper.proto ├── phantomjs │ ├── jquery-1.10.2.min.js │ ├── phantomjs │ ├── phantomjs-bridge.js │ ├── phantomjs.exe │ ├── phantomjs32 │ └── webpatch.js └── ssdb_redis.js ├── package.json ├── proxyrouter └── index.js ├── run.js ├── scheduler └── index.js ├── spider ├── downloader.js ├── extractor.js ├── index.js ├── pipeline.js └── spider.js ├── test ├── extractorTest.js ├── hbaseTest.js ├── proxy.txt └── test.js ├── tools ├── http-echo-server.js ├── queue-helper.js ├── rule-dump.js └── rule-restore.js └── webconfig ├── controllers ├── monitor.js ├── proxy.js └── rule.js ├── index.js ├── models ├── drillingRule.js └── proxyManagement.js ├── public ├── images │ ├── asc.gif │ ├── bg.gif │ └── desc.gif ├── javascripts │ ├── Chart.min.js │ ├── jquery-1.9.1.js │ ├── jquery-ui.js │ ├── jquery.dataTables.min.js │ ├── jquery.jsoneditor.js │ ├── jquery.min.js │ ├── jquery.tablesorter.min.js │ ├── json2.js │ ├── jsoneditor │ │ ├── img │ │ │ └── jsoneditor-icons.png │ │ ├── jsoneditor.css │ │ ├── jsoneditor.js │ │ └── lib │ │ │ ├── ace │ │ │ ├── ace.js │ │ │ ├── ext-searchbox.js │ │ │ ├── mode-json.js │ │ │ ├── theme-jsoneditor.js │ │ │ ├── theme-textmate.js │ │ │ └── worker-json.js │ │ │ └── jsonlint │ │ │ ├── README.md │ │ │ └── jsonlint.js │ └── my-json-editor.js ├── proxy │ ├── new.html │ └── proxy.html ├── rule │ └── new.html └── stylesheets │ ├── FlexiJsonEditor │ └── jsoneditor.css │ ├── demo_page.css │ ├── demo_table.css │ ├── demo_table_jui.css │ ├── jquery-ui.css │ ├── jquery.dataTables.css │ ├── jquery.dataTables_themeroller.css │ ├── main.css │ └── tablesorter.css ├── routecontroller.js ├── routes └── index.js ├── views ├── index.ejs ├── monitor │ ├── chart.ejs │ ├── daily.ejs │ └── linkdb.ejs ├── proxy │ ├── edit.ejs │ ├── index.ejs │ └── index.ejs.org ├── rule │ ├── edit.ejs │ ├── index.ejs │ └── show.ejs └── stylesheets │ └── main.styl └── webconfig.js /.gitignore: -------------------------------------------------------------------------------- 1 | .settings 2 | .project 3 | .idea 4 | *.log 5 | *.log-* 6 | debug-* 7 | node_modules/* 8 | /doc/statistic/target/ 9 | .svn 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Cherokee(successage@gmail.com) 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of neocrawler nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /LICENSE.zh: -------------------------------------------------------------------------------- 1 | * Copyright (c) 2014 著作权由Cherokee(successage@gmail.com)所有。著作权人保留一切权利。 2 | * 3 | * 这份授权条款,在使用者符合以下三条件的情形下,授予使用者使用及再散播本 4 | * 软件包装原始码及二进位可执行形式的权利,无论此包装是否经改作皆然: 5 | * 6 | * * 对于本软件源代码的再散播,必须保留上述的版权宣告、此三条件表列,以 7 | * 及下述的免责声明。 8 | * * 对于本套件二进位可执行形式的再散播,必须连带以文件以及/或者其他附 9 | * 于散播包装中的媒介方式,重制上述之版权宣告、此三条件表列,以及下述 10 | * 的免责声明。 11 | * * 未获事前取得书面许可,不得使用neocrawler或本软件贡献者之名称, 12 | * 来为本软件之衍生物做任何表示支持、认可或推广、促销之行为。 13 | * 14 | * 免责声明:本软件是由Cherokee及本软件之贡献者以现状("as is")提供, 15 | * 本软件包装不负任何明示或默示之担保责任,包括但不限于就适售性以及特定目 16 | * 的的适用性为默示性担保。Cherokee及本软件之贡献者,无论任何条件、 17 | * 无论成因或任何责任主义、无论此责任为因合约关系、无过失责任主义或因非违 18 | * 约之侵权(包括过失或其他原因等)而起,对于任何因使用本软件包装所产生的 19 | * 任何直接性、间接性、偶发性、特殊性、惩罚性或任何结果的损害(包括但不限 20 | * 于替代商品或劳务之购用、使用损失、资料损失、利益损失、业务中断等等), 21 | * 不负任何责任,即在该种使用已获事前告知可能会造成此类损害的情形下亦然。 22 | -------------------------------------------------------------------------------- /crawler.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by cherokee on 14-3-25. 3 | */ 4 | 5 | var retryTimes = 0; 6 | 7 | var spawnIt = function(tryTimes){ 8 | var spawn = require('child_process').spawn; 9 | var runner = spawn('node',['run.js'].concat(process.argv)); 10 | 11 | runner.stdout.on('data', function (data) { 12 | console.log(data.toString('utf8')); 13 | }); 14 | 15 | runner.stderr.on('data', function (data) { 16 | console.log(data.toString('utf8')); 17 | }); 18 | 19 | runner.on('exit', function (code, signal) { 20 | console.log('Child process exit :' + code+', '+signal); 21 | if(code!==0&&tryTimes<500){ 22 | console.log('Restart, times: '+(tryTimes++)); 23 | process.nextTick(function(){spawnIt(tryTimes)}); 24 | spawn = null; 25 | } 26 | }); 27 | } 28 | 29 | spawnIt(0); 30 | -------------------------------------------------------------------------------- /doc/extract_rule.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/doc/extract_rule.txt -------------------------------------------------------------------------------- /doc/hbase-init.txt: -------------------------------------------------------------------------------- 1 | create 'crawled',{NAME => 'basic', VERSIONS => 1},{NAME=>"data",VERSIONS=>"1"},{NAME => 'extra', VERSIONS => 1} 2 | create 'crawled_bin',{NAME => 'basic', VERSIONS => 1},{NAME=>"binary",VERSIONS=>"1"} -------------------------------------------------------------------------------- /doc/neocrawler-architecture.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/doc/neocrawler-architecture.pptx -------------------------------------------------------------------------------- /doc/redis-init.txt: -------------------------------------------------------------------------------- 1 | redis-cli -n 0 set "updated:driller:rule" 0 2 | 3 | redis-cli -n 3 set "updated:proxy:lib" 0 -------------------------------------------------------------------------------- /instance/8zi/addon/generate-url.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | import datetime 3 | 4 | d = datetime.datetime(2015, 3, 6) 5 | ar = [] 6 | for x in range(1,1800): 7 | d1 = d + datetime.timedelta(hours=x) 8 | ar.append(u'''http://www.meimingteng.com/m/apps/bazi.aspx?__VIEWSTATE=%%2FwEPDwUKMTA2MzE0NzA3OGRkSlWb4XuVUApmVAoOx6aU85PUNUM%%3D&from=&ctl00%%24ContentPlaceHolder1%%24tbBirth=2015-%02d-%02d+%02d%%3A00&ctl00%%24ContentPlaceHolder1%%24btSubmit=%%E5%%BC%%80%%E5%%A7%%8B%%E5%%88%%86%%E6%%9E%%90'''%(d1.month,d1.day,d1.hour)) 9 | 10 | fileHandle = open('urls.txt', 'w') 11 | fileHandle.write ('%s'%ar) 12 | fileHandle.close() 13 | -------------------------------------------------------------------------------- /instance/8zi/logs/blank.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/instance/8zi/logs/blank.txt -------------------------------------------------------------------------------- /instance/8zi/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "driller_info_redis_db":["localhost",6379,4], 3 | "url_info_redis_db":["localhost",6379,5], 4 | "url_report_redis_db":["localhost",6379,6], 5 | "proxy_info_redis_db":["localhost",6379,7], 6 | "use_proxy":false, 7 | "proxy_router":"127.0.0.1:2013", 8 | "download_timeout":60, 9 | "save_content_to_hbase":true, 10 | "crawled_hbase_conf":["land",8080], 11 | "crawled_hbase_table":"8zi", 12 | "crawled_hbase_bin_table":"8zi_bin", 13 | "statistic_mysql_db":["127.0.0.1",3306,"crawling","crawler","123"], 14 | "check_driller_rules_interval":120, 15 | "spider_concurrency":5, 16 | "spider_request_delay":0, 17 | "schedule_interval":10, 18 | "schedule_quantity_limitation":200, 19 | "download_retry":2, 20 | "log_level":"DEBUG", 21 | "use_ssdb":false, 22 | "to_much_fail_exit":false, 23 | "keep_link_relation":false 24 | } 25 | -------------------------------------------------------------------------------- /instance/example/backup/data-dump.json: -------------------------------------------------------------------------------- 1 | driller:sina.com.cn:detail 2 | {"id_parameter":"[]","domain":"sina.com.cn","navigate_rule":"[]","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"title\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"#artibodyTitle\",\"pick\":\"text\",\"index\":1},\"content\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"#artibody\",\"pick\":\"text\",\"index\":1}}}","inject_jquery":"false","validation_keywords":"[]","schedule_interval":"8640000","weight":"10","first_schedule":"1401100591948","load_img":"false","url_pattern":"^http://news.sina.com.cn/c/[\\d\\-]+/\\d+.shtml$","schedule_rule":"FIFO","jshandle":"false","format":"html","seed":"[]","stoppage":"-1","save_page":"true","cookie":"[]","priority":"1","script":"[]","type":"node","alias":"detail","encoding":"gb2312","active":"true","drill_rules":"[\"ul li a\",\".pagebox a\"]","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"title\",\"pick\":\"text\",\"index\":1}"} 3 | driller:news.sina.com.cn:list 4 | {"id_parameter":"[]","domain":"news.sina.com.cn","navigate_rule":"[]","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"title\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"title\",\"pick\":\"text\",\"index\":1}}}","inject_jquery":"false","validation_keywords":"[]","schedule_interval":"86400","weight":"10","first_schedule":"1401100591948","load_img":"false","url_pattern":"^http://roll.news.sina.com.cn/news/gnxw/gdxw1/index_\\d+.shtml$","schedule_rule":"FIFO","jshandle":"false","format":"html","seed":"[\"http://roll.news.sina.com.cn/news/gnxw/gdxw1/index_1.shtml\"]","stoppage":"-1","save_page":"false","cookie":"[]","priority":"1","script":"[]","type":"branch","alias":"list","encoding":"gb2312","active":"true","drill_rules":"[\"ul li a\",\".pagebox a\"]","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"title\",\"pick\":\"text\",\"index\":1}"} 5 | -------------------------------------------------------------------------------- /instance/example/logs/blank.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/instance/example/logs/blank.txt -------------------------------------------------------------------------------- /instance/example/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "driller_info_redis_db":["localhost",6379,0], 3 | "url_info_redis_db":["localhost",6379,1], 4 | "url_report_redis_db":["localhost",6379,2], 5 | "proxy_info_redis_db":["localhost",6379,3], 6 | "use_proxy":false, 7 | "proxy_router":"127.0.0.1:2013", 8 | "download_timeout":60, 9 | "save_content_to_hbase":false, 10 | "crawled_hbase_conf":["localhost",8080], 11 | "crawled_hbase_table":"crawled", 12 | "crawled_hbase_bin_table":"crawled_bin", 13 | "statistic_mysql_db":["127.0.0.1",3306,"crawling","crawler","123"], 14 | "check_driller_rules_interval":120, 15 | "spider_concurrency":5, 16 | "spider_request_delay":0, 17 | "schedule_interval":60, 18 | "schedule_quantity_limitation":200, 19 | "download_retry":2, 20 | "log_level":"DEBUG", 21 | "use_ssdb":false, 22 | "to_much_fail_exit":false, 23 | "keep_link_relation":false 24 | } -------------------------------------------------------------------------------- /instance/proxy/backup/ssdb-dump.json: -------------------------------------------------------------------------------- 1 | driller:com.ru:listpage 2 | {"active":"true","alias":"listpage","cookie":"[]","domain":"com.ru","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"h1\",\"pick\":\"text\",\"index\":1}","drill_rules":"[\"td[valign='top']>table:nth-child(2) a\"]","encoding":"UTF-8","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"IP\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"td[valign='top']>font>table tr:nth-child(n+2):nth-child(-n+51)\",\"pick\":\"html\",\"index\":1,\"subset\":{\"category\":\"ip\",\"rule\":{\"host\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"td:nth-child(2)\",\"pick\":\"text\",\"index\":1},\"port\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"td:nth-child(3)\",\"pick\":\"text\",\"index\":1}}}}}}","first_schedule":"1401873892439","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"1","save_page":"false","schedule_interval":"1200","schedule_rule":"FIFO","script":"[]","seed":"[\"http://proxy.com.ru/list_1.html\"]","stoppage":"-1","type":"branch","url_pattern":"^http://proxy.com.ru/list_\\d+.html$","validation_keywords":"[\"Proxy.Com.Ru\"]","weight":"10"} 3 | driller:itmop.com:detailpage 4 | {"active":"true","alias":"detailpage","cookie":"[]","domain":"itmop.com","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"title\",\"pick\":\"text\",\"index\":1}","drill_rules":"[]","encoding":"UTF-8","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"IP\":{\"base\":\"content\",\"mode\":\"regex\",\"expression\":\"(\\\\d+\\\\.\\\\d+\\\\.\\\\d+\\\\.\\\\d+:\\\\d+)@HTTP\",\"pick\":\"text\",\"index\":-1}}}","first_schedule":"1401873892439","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"1","save_page":"false","schedule_interval":"3600","schedule_rule":"FIFO","script":"[]","seed":"[]","stoppage":"-1","type":"branch","url_pattern":"^http://www.itmop.com/proxy/post/\\d+.html$","use_proxy":"false","validation_keywords":"[\"发表时间\"]","weight":"10"} 5 | driller:itmop.com:entrance 6 | {"active":"true","alias":"entrance","cookie":"[]","domain":"itmop.com","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"title\",\"pick\":\"text\",\"index\":1}","drill_rules":"[\"#NEW_INFO_LIST>dl:first-child>dt>a\"]","encoding":"UTF-8","extract_rule":"{}","first_schedule":"1401873892439","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"2","save_page":"false","schedule_interval":"3600","schedule_rule":"FIFO","script":"[]","seed":"[\"http://www.itmop.com/proxy/\"]","stoppage":"-1","type":"branch","url_pattern":"^http://www.itmop.com/proxy/$","use_proxy":"false","validation_keywords":"[\"最近\"]","weight":"10"} 7 | driller:xici.net.co:china 8 | {"active":"true","alias":"china","cookie":"[]","domain":"xici.net.co","drill_relation":"{}","drill_rules":"[\".pagination a\"]","encoding":"UTF-8","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"IP\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"#ip_list tr:nth-child(n+2)\",\"pick\":\"html\",\"index\":1,\"subset\":{\"category\":\"ip\",\"rule\":{\"host\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"td:nth-child(2)\",\"pick\":\"text\",\"index\":1},\"port\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"td:nth-child(3)\",\"pick\":\"text\",\"index\":1}}}}}}","first_schedule":"1401873892439","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"1","save_page":"false","schedule_interval":"3600","schedule_rule":"FIFO","script":"[]","seed":"[\"http://www.xici.net.co/nt/##1#36#1\",\"http://www.xici.net.co/nn/##1#157#1\"]","stoppage":"-1","type":"branch","url_pattern":"^http://www.xici.net.co/nt|n/\\d+$","use_proxy":"false","validation_keywords":"[\"西刺免费代理\"]","weight":"10"} 9 | driller:youdaili.cn:detailpage 10 | {"active":"true","alias":"detailpage","cookie":"[]","domain":"youdaili.cn","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"h1\",\"pick\":\"text\",\"index\":1}","drill_rules":"[\".pagelist>li:last-child>a\"]","encoding":"UTF-8","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"IP\":{\"base\":\"content\",\"mode\":\"regex\",\"expression\":\"(\\\\d+\\\\.\\\\d+\\\\.\\\\d+\\\\.\\\\d+:\\\\d+)@HTTP\",\"pick\":\"text\",\"index\":-1}}}","first_schedule":"1401873892439","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"1","save_page":"false","schedule_interval":"3600","schedule_rule":"FIFO","script":"[]","seed":"[]","stoppage":"-1","type":"node","url_pattern":"^http://www.youdaili.cn/Daili/guonei/[\\d_]+.html$","use_proxy":"false","validation_keywords":"[\"发布时间\"]","weight":"10"} 11 | driller:youdaili.cn:entrance 12 | {"active":"true","alias":"entrance","cookie":"[]","domain":"youdaili.cn","drill_relation":"{\"base\":\"content\",\"mode\":\"value\",\"expression\":\"国内代理-入口\",\"pick\":\"text\",\"index\":1}","drill_rules":"[\".newslist_line>li:nth-child(n+1):nth-child(-n+2)>a\"]","encoding":"UTF-8","extract_rule":"{\"category\":\"crawled\",\"rule\":{}}","first_schedule":"1401873892439","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"2","save_page":"false","schedule_interval":"3600","schedule_rule":"FIFO","script":"[]","seed":"[\"http://www.youdaili.cn/Daili/guonei/\"]","stoppage":"-1","type":"branch","url_pattern":"^http://www.youdaili.cn/Daili/guonei/$","use_proxy":"false","validation_keywords":"[\"末页\"]","weight":"10"} 13 | -------------------------------------------------------------------------------- /instance/proxy/logs/blank.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/instance/proxy/logs/blank.txt -------------------------------------------------------------------------------- /instance/proxy/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "driller_info_redis_db":["backee",1789,0], 3 | "url_info_redis_db":["backee",1789,1], 4 | "url_report_redis_db":["backee",1789,2], 5 | "proxy_info_redis_db":["backee",1789,3], 6 | "use_proxy":false, 7 | "proxy_router":"127.0.0.1:2013", 8 | "download_timeout":60, 9 | "save_content_to_hbase":false, 10 | "crawled_hbase_conf":{"zookeeperHosts": ["localhost:2222"],"zookeeperRoot": "/hbase"}, 11 | "crawled_hbase_table":"crawled", 12 | "crawled_hbase_bin_table":"crawled_bin", 13 | "statistic_mysql_db":["192.168.1.246",3306,"crawling","crawler","140109"], 14 | "check_driller_rules_interval":120, 15 | "spider_concurrency":5, 16 | "spider_request_delay":0, 17 | "schedule_interval":60, 18 | "schedule_quantity_limitation":200, 19 | "download_retry":2, 20 | "log_level":"DEBUG", 21 | "use_ssdb":true 22 | } 23 | -------------------------------------------------------------------------------- /instance/shuttle/addon/qqwry.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/instance/shuttle/addon/qqwry.dat -------------------------------------------------------------------------------- /instance/shuttle/addon/shuttle.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by cherokee on 7/8/14. 3 | */ 4 | var settings = require('../settings.json'); 5 | var log4js = require('log4js'); 6 | 7 | log4js.configure({ 8 | "appenders": [ 9 | { 10 | "type": "dateFile", 11 | "filename": "logs/shuttle.log", 12 | "pattern": "-yyyy-MM-dd", 13 | "alwaysIncludePattern": false 14 | }, 15 | { 16 | "type": "console" 17 | } 18 | ] 19 | }, 20 | {cwd: '../'} 21 | ); 22 | var logger = log4js.getLogger('shuttle'); 23 | logger.setLevel('DEBUG'); 24 | settings['logger'] = logger; 25 | settings['port'] = 1985; 26 | var gfw_proxy = new (require('./gfw-proxy.js'))(settings); 27 | 28 | gfw_proxy.start(); -------------------------------------------------------------------------------- /instance/shuttle/backup/ssdb-dump.json: -------------------------------------------------------------------------------- 1 | driller:xici.net.co:china 2 | {"active":"true","alias":"china","cookie":"[]","domain":"xici.net.co","drill_relation":"{}","drill_rules":"[\".pagination a\"]","encoding":"UTF-8","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"IP\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"#ip_list tr:nth-child(n+2)\",\"pick\":\"html\",\"index\":1,\"subset\":{\"category\":\"ip\",\"rule\":{\"host\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"td:nth-child(2)\",\"pick\":\"text\",\"index\":1},\"port\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"td:nth-child(3)\",\"pick\":\"text\",\"index\":1}}}}}}","first_schedule":"0","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"1","save_page":"false","schedule_interval":"7200","schedule_rule":"FIFO","script":"[]","seed":"[\"http://www.xici.net.co/wt/##1#172#1\",\"http://www.xici.net.co/wn/##1#118#1\"]","stoppage":"-1","type":"branch","url_pattern":"^http://www.xici.net.co/wt|n/\\d+$","use_proxy":"false","validation_keywords":"[\"西刺免费代理\"]","weight":"10"} 3 | driller:youdaili.cn:detailpage 4 | {"active":"true","alias":"detailpage","cookie":"[]","domain":"youdaili.cn","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"h1\",\"pick\":\"text\",\"index\":1}","drill_rules":"[\".pagelist>li:last-child>a\"]","encoding":"UTF-8","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"IP\":{\"base\":\"content\",\"mode\":\"regex\",\"expression\":\"(\\\\d+\\\\.\\\\d+\\\\.\\\\d+\\\\.\\\\d+:\\\\d+)@HTTP\",\"pick\":\"text\",\"index\":-1}}}","first_schedule":"0","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"1","save_page":"false","schedule_interval":"8640000","schedule_rule":"FIFO","script":"[]","seed":"[]","stoppage":"-1","type":"node","url_pattern":"^http://www.youdaili.cn/Daili/guowai/[\\d_]+.html$","use_proxy":"false","validation_keywords":"[\"发布时间\"]","weight":"10"} 5 | driller:youdaili.cn:entrance 6 | {"active":"true","alias":"entrance","cookie":"[]","domain":"youdaili.cn","drill_relation":"{\"base\":\"content\",\"mode\":\"value\",\"expression\":\"国内代理-入口\",\"pick\":\"text\",\"index\":1}","drill_rules":"[\"a\"]","encoding":"UTF-8","extract_rule":"{\"category\":\"crawled\",\"rule\":{}}","first_schedule":"0","format":"html","id_parameter":"[\"#\"]","inject_jquery":"false","jshandle":"false","load_img":"false","navigate_rule":"[]","priority":"2","save_page":"false","schedule_interval":"86400","schedule_rule":"FIFO","script":"[]","seed":"[\"http://www.youdaili.cn/Daili/guowai/list_#.html#1#30#1\"]","stoppage":"-1","type":"branch","url_pattern":"^http://www.youdaili.cn/Daili/guowai/list_\\d+.html$","use_proxy":"false","validation_keywords":"[\"末页\"]","weight":"10"} 7 | -------------------------------------------------------------------------------- /instance/shuttle/logs/blank.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/instance/shuttle/logs/blank.txt -------------------------------------------------------------------------------- /instance/shuttle/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "driller_info_redis_db":["backee",1786,0], 3 | "url_info_redis_db":["backee",1786,1], 4 | "url_report_redis_db":["backee",1786,2], 5 | "proxy_info_redis_db":["backee",1786,3], 6 | "use_proxy":false, 7 | "proxy_router":"127.0.0.1:2013", 8 | "download_timeout":30, 9 | "save_content_to_hbase":false, 10 | "crawled_hbase_conf":{"zookeeperHosts": ["localhost:2222"],"zookeeperRoot": "/hbase"}, 11 | "crawled_hbase_table":"crawled", 12 | "crawled_hbase_bin_table":"crawled_bin", 13 | "statistic_mysql_db":["hostname",3306,"db","user","pwd"], 14 | "check_driller_rules_interval":120, 15 | "spider_concurrency":5, 16 | "spider_request_delay":0, 17 | "schedule_interval":60, 18 | "schedule_quantity_limitation":200, 19 | "download_retry":2, 20 | "log_level":"DEBUG", 21 | "use_ssdb":true 22 | } 23 | -------------------------------------------------------------------------------- /instance/shuttle/spider_extend.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by james on 13-12-17. 3 | * spider extend: diy spider 4 | */ 5 | require('../../lib/jsextend.js'); 6 | var util = require('util'); 7 | var myredis = require('../../lib/myredis.js'); 8 | var request = require('request'); 9 | var async = require('async'); 10 | var httpRequest = require('../../lib/httpRequest.js'); 11 | 12 | var spider_extend = function(spiderCore){ 13 | var self = this; 14 | this.spiderCore = spiderCore; 15 | logger = spiderCore.settings.logger; 16 | 17 | var dbtype = 'redis'; 18 | if(this.spiderCore.settings['use_ssdb'])dbtype = 'ssdb'; 19 | 20 | myredis.createClient( 21 | this.spiderCore.settings['proxy_info_redis_db'][0], 22 | this.spiderCore.settings['proxy_info_redis_db'][1], 23 | this.spiderCore.settings['proxy_info_redis_db'][2], 24 | dbtype, 25 | function(err,cli){ 26 | self.redis_cli = cli; 27 | logger.debug('temporarily proxy redis db ready'); 28 | }); 29 | } 30 | 31 | /** 32 | * fetch proxy 33 | * @private 34 | */ 35 | spider_extend.prototype.__fetchProxy = function(crawled_info){ 36 | var self = this; 37 | if(crawled_info['extracted_data']){ 38 | var ips = crawled_info['extracted_data']['IP']; 39 | //async queue///////////////////////////////////////////////////////// 40 | var q = async.queue(function(task, callback) { 41 | logger.debug('proxy checker: worker '+task.name+' is processing task: '); 42 | task.run(callback); 43 | }, 20); 44 | q.saturated = function() { 45 | //logger.debug('proxy checker: all workers to be used'); 46 | } 47 | q.empty = function() { 48 | //logger.debug('proxy checker: no more tasks wating'); 49 | } 50 | q.drain = function() { 51 | logger.debug('proxy checker: all tasks have been processed'); 52 | } 53 | ///////////////////////////////////////////////////////////////////// 54 | //------------------------------------------------------------------- 55 | for(var i=0;i=bm){ 81 | var available_proxy = true; 82 | if(available_proxy){ 83 | var endTime = (new Date()).getTime(); 84 | if(endTime - startTime <= 60000){ 85 | redis_cli.lpush('proxy:public:available:3s',ip,function(err,value){ 86 | if(!err)logger.debug('proxy checker: Append a proxy: '+ip); 87 | cb(); 88 | }); 89 | }else{ 90 | logger.warn('proxy checker: '+ip + ' took a long time: '+(endTime - startTime)+'ms, drop it'); 91 | cb(); 92 | } 93 | }else { 94 | logger.warn('proxy checker: ' +ip + ' is invalidate proxy!'); 95 | cb(); 96 | } 97 | }else cb(); 98 | }else {logger.error('proxy checker: request error: '+ip);cb();} 99 | }); 100 | })(ip,redis_cli); 101 | }}, function(err) { 102 | //logger.debug('proxy checker: t'+i+' executed'); 103 | }); 104 | })(i,this.redis_cli); 105 | } 106 | //------------------------------------------------------------------------------ 107 | } 108 | } 109 | /** 110 | * report a url crawling finish 111 | * @param crawled_info 112 | */ 113 | spider_extend.prototype.crawl_finish_alert = function(crawled_info){ 114 | var self = this; 115 | self.__fetchProxy(crawled_info); 116 | } 117 | module.exports = spider_extend; -------------------------------------------------------------------------------- /instance/wiki/backup/wiki-rule-backup.json: -------------------------------------------------------------------------------- 1 | driller:mbalib.com:portal 2 | {"format":"html","seed":"[\"http://wiki.mbalib.com/wiki/Portal:%E9%87%91%E8%9E%8D?app=1\",\"http://wiki.mbalib.com/wiki/Portal:%E7%BB%8F%E6%B5%8E?app=1\"]","cookie":"[]","stoppage":"-1","encoding":"auto","drill_rules":"[\"#bodyContent>div:nth-child(3)>div:nth-child(6) a\"]","alias":"portal","extract_rule":"{\"category\":\"crawled\",\"rule\":{}}","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\".firstHeading\",\"pick\":\"text\",\"index\":1}","first_schedule":"1454634426641","save_page":"false","use_proxy":"true","script":"[]","type":"branch","priority":"1","id_parameter":"[]","url_pattern":"^http://wiki.mbalib.com/wiki/Portal:.*$","jshandle":"false","validation_keywords":"[\"条目由MBA智库网友创作并享有版权\"]","active":"true","domain":"mbalib.com","inject_jquery":"false","load_img":"false","weight":"10","schedule_interval":"86400","schedule_rule":"FIFO","navigate_rule":"[]"} 3 | driller:mbalib.com:detail 4 | {"format":"html","cookie":"[]","seed":"[]","stoppage":"-1","encoding":"auto","drill_rules":"[]","alias":"detail","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\".firstHeading\",\"pick\":\"text\",\"index\":1}","extract_rule":"{\"category\":\"crawled\",\"rule\":{\"title\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\".firstHeading\",\"pick\":\"text\",\"index\":1},\"body\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"#bodyContent\",\"pick\":\"html\",\"index\":1},\"content\":{\"base\":\"content\",\"mode\":\"css\",\"expression\":\"#bodyContent\",\"pick\":\"text\",\"index\":1}},\"require\":[\"title\"]}","script":"[]","first_schedule":"1454055069278","save_page":"true","use_proxy":"true","type":"node","priority":"1","id_parameter":"[]","url_pattern":"^http://wiki.mbalib.com/wiki/.*$","jshandle":"false","validation_keywords":"[\"页面分类\"]","active":"true","domain":"mbalib.com","weight":"10","load_img":"false","inject_jquery":"false","schedule_interval":"8640000","schedule_rule":"FIFO","navigate_rule":"[]"} 5 | driller:mbalib.com:category 6 | {"format":"html","cookie":"[]","seed":"[\"http://wiki.mbalib.com/wiki/Category:%E9%87%91%E8%9E%8D%E7%99%BE%E7%A7%91?app=1\",\"http://wiki.mbalib.com/wiki/Category:%E7%BB%8F%E6%B5%8E%E7%99%BE%E7%A7%91?app=1\"]","stoppage":"-1","encoding":"auto","drill_rules":"[\".page_ul li>a\",\".CategoryTreeItem>a\"]","alias":"category","drill_relation":"{\"base\":\"content\",\"mode\":\"css\",\"expression\":\".firstHeading\",\"pick\":\"text\",\"index\":1}","extract_rule":"{\"category\":\"crawled\",\"rule\":{}}","script":"[]","first_schedule":"1454634426641","save_page":"false","use_proxy":"true","type":"branch","priority":"1","id_parameter":"[]","url_pattern":"^http://wiki.mbalib.com/wiki/Category:.*$","jshandle":"false","validation_keywords":"[\"在该分类中有\",\"条目在\"]","active":"true","domain":"mbalib.com","load_img":"false","weight":"10","inject_jquery":"false","schedule_interval":"86400","schedule_rule":"FIFO","navigate_rule":"[]"} 7 | -------------------------------------------------------------------------------- /instance/wiki/logs/blank.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/instance/wiki/logs/blank.txt -------------------------------------------------------------------------------- /instance/wiki/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "driller_info_redis_db":["localhost",6379,0], 3 | "url_info_redis_db":["localhost",6379,1], 4 | "url_report_redis_db":["localhost",6379,2], 5 | "proxy_info_redis_db":["localhost",6379,3], 6 | "use_proxy":false, 7 | "proxy_router":"localhost:8118", 8 | "download_timeout":60, 9 | "save_content_to_hbase":false, 10 | "crawled_hbase_conf":["localhost",8080], 11 | "crawled_hbase_table":"crawled", 12 | "crawled_hbase_bin_table":"crawled_bin", 13 | "statistic_mysql_db":["127.0.0.1",3306,"crawling","crawler","123"], 14 | "check_driller_rules_interval":120, 15 | "spider_concurrency":1, 16 | "spider_request_delay":10, 17 | "schedule_interval":30, 18 | "schedule_quantity_limitation":200, 19 | "download_retry":2, 20 | "log_level":"DEBUG", 21 | "use_ssdb":false, 22 | "to_much_fail_exit":false, 23 | "keep_link_relation":true 24 | } 25 | -------------------------------------------------------------------------------- /lib/jsextend.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by james on 13-12-5. 3 | * js extend 4 | */ 5 | 6 | /** 7 | * remove duplicated item from array 8 | * @returns {Object} 9 | */ 10 | //if (!Array.prototype.unique) { 11 | // Array.prototype.unique = function() { 12 | // return this.reduce(function(p, c) { 13 | // if (p.indexOf(c)<0)p.push(c); 14 | // return p; 15 | // }, []); 16 | // }; 17 | //} 18 | 19 | /** 20 | * remove duplicated item from array 21 | * @param arr Array 22 | * @returns {*} 23 | */ 24 | arrayUnique = function(arr) { 25 | return arr.reduce(function(p, c) { 26 | if (p.indexOf(c)<0)p.push(c); 27 | return p; 28 | }, []); 29 | } 30 | 31 | /** 32 | * shuffle array 33 | */ 34 | //if (!Array.prototype.shuffle) { 35 | // Array.prototype.shuffle = function() { 36 | // for(var j, x, i = this.length; i; j = parseInt(Math.random() * i), x = this[--i], this[i] = this[j], this[j] = x); 37 | // return this; 38 | // }; 39 | //} 40 | 41 | /** 42 | * shuffle array 43 | * @param arr Array 44 | * @returns {*} 45 | */ 46 | arrayShuffle = function(arr) { 47 | for(var j, x, i = arr.length; i; j = parseInt(Math.random() * i), x = arr[--i], arr[i] = arr[j], arr[j] = x); 48 | return arr; 49 | } 50 | 51 | /** 52 | * detect whether string end with some special character 53 | * @param suffix 54 | * @returns {boolean} 55 | */ 56 | String.prototype.endsWith = function(suffix) { 57 | return this.indexOf(suffix, this.length - suffix.length) !== -1; 58 | }; 59 | /** 60 | * detect whether string start with some special character 61 | * @param suffix 62 | * @returns {boolean} 63 | */ 64 | String.prototype.startsWith = function(suffix) { 65 | return this.indexOf(suffix,0) === 0; 66 | }; 67 | /** 68 | * remove blank around the string 69 | * @returns {string} 70 | */ 71 | String.prototype.trim= function(){ 72 | return this.replace(/(^\s*)|(\s*$)/g, ""); 73 | } 74 | 75 | /** 76 | * detect object is empty 77 | * @returns {boolean} 78 | */ 79 | //Object.prototype.isEmpty = function() { 80 | // for (var prop in this) { 81 | // if (this.hasOwnProperty(prop)) return false; 82 | // } 83 | // return true; 84 | //}; 85 | /** 86 | * detect object is empty 87 | * @returns {boolean} 88 | */ 89 | isEmpty = function(obj){ 90 | for (var prop in obj) { 91 | if (obj.hasOwnProperty(prop)) return false; 92 | } 93 | return true; 94 | } 95 | 96 | clone = function(obj) { 97 | // Handle the 3 simple types, and null or undefined 98 | if (null == obj || "object" != typeof obj) return obj; 99 | 100 | // Handle Date 101 | if (obj instanceof Date) { 102 | var copy = new Date(); 103 | copy.setTime(obj.getTime()); 104 | return copy; 105 | } 106 | 107 | // Handle Array 108 | if (obj instanceof Array) { 109 | var copy = []; 110 | for (var i = 0,len = obj.length; i < len; ++i) { 111 | copy[i] = clone(obj[i]); 112 | } 113 | return copy; 114 | } 115 | 116 | // Handle Object 117 | if (obj instanceof Object) { 118 | var copy = {}; 119 | for (var attr in obj) { 120 | if (obj.hasOwnProperty(attr)) copy[attr] = clone(obj[attr]); 121 | } 122 | return copy; 123 | } 124 | 125 | throw new Error("Unable to copy obj! Its type isn't supported."); 126 | } -------------------------------------------------------------------------------- /lib/line_reader.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by cherokee on 14-5-27. 3 | */ 4 | (function() { 5 | "use strict"; 6 | 7 | var fs = require('fs'), 8 | StringDecoder = require('string_decoder').StringDecoder; 9 | 10 | function LineReader(fd, cb, separator, encoding, bufferSize) { 11 | var filePosition = 0, 12 | encoding = encoding || 'utf8', 13 | separator = separator || '\n', 14 | bufferSize = bufferSize || 1024, 15 | buffer = new Buffer(bufferSize), 16 | bufferStr = '', 17 | decoder = new StringDecoder(encoding), 18 | closed = false, 19 | eof = false, 20 | separatorIndex = -1; 21 | 22 | function close() { 23 | if (!closed) { 24 | fs.close(fd, function(err) { 25 | if (err) { 26 | throw err; 27 | } 28 | }); 29 | closed = true; 30 | } 31 | } 32 | 33 | function readToSeparator(cb) { 34 | function readChunk() { 35 | fs.read(fd, buffer, 0, bufferSize, filePosition, function(err, bytesRead) { 36 | var separatorAtEnd; 37 | 38 | if (err) { 39 | throw err; 40 | } 41 | 42 | if (bytesRead < bufferSize) { 43 | eof = true; 44 | close(); 45 | } 46 | 47 | filePosition += bytesRead; 48 | 49 | bufferStr += decoder.write(buffer.slice(0, bytesRead)); 50 | 51 | if (separatorIndex < 0) { 52 | separatorIndex = bufferStr.indexOf(separator); 53 | } 54 | 55 | separatorAtEnd = separatorIndex === bufferStr.length - 1; 56 | if (bytesRead && (separatorIndex === -1 || separatorAtEnd) && !eof) { 57 | readChunk(); 58 | } else { 59 | cb(); 60 | } 61 | }); 62 | } 63 | 64 | readChunk(); 65 | } 66 | 67 | function hasNextLine() { 68 | return bufferStr.length > 0 || !eof; 69 | } 70 | 71 | function nextLine(cb) { 72 | function getLine() { 73 | var ret = bufferStr.substring(0, separatorIndex); 74 | 75 | bufferStr = bufferStr.substring(separatorIndex + 1); 76 | separatorIndex = -1; 77 | cb(ret); 78 | } 79 | 80 | if (separatorIndex < 0) { 81 | separatorIndex = bufferStr.indexOf(separator); 82 | } 83 | 84 | if (separatorIndex < 0) { 85 | if (eof) { 86 | if (hasNextLine()) { 87 | separatorIndex = bufferStr.length; 88 | getLine(); 89 | } else { 90 | throw new Error('No more lines to read.'); 91 | } 92 | } else { 93 | readToSeparator(getLine); 94 | } 95 | } else { 96 | getLine(); 97 | } 98 | } 99 | 100 | this.hasNextLine = hasNextLine; 101 | this.nextLine = nextLine; 102 | this.close = close; 103 | 104 | readToSeparator(cb); 105 | } 106 | 107 | function open(filename, cb, separator, encoding, bufferSize) { 108 | fs.open(filename, 'r', parseInt('666', 8), function(err, fd) { 109 | var reader; 110 | if (err) { 111 | throw err; 112 | } 113 | 114 | reader = new LineReader(fd, function() { 115 | cb(reader); 116 | }, separator, encoding, bufferSize); 117 | }); 118 | } 119 | 120 | function eachLine(filename, cb, separator, encoding, bufferSize) { 121 | var finalFn, 122 | asyncCb = cb.length == 3; 123 | 124 | function finish() { 125 | if (finalFn && typeof finalFn === 'function') { 126 | finalFn(); 127 | } 128 | } 129 | 130 | open(filename, function(reader) { 131 | function newRead() { 132 | if (reader.hasNextLine()) { 133 | setImmediate(readNext); 134 | } else { 135 | finish(); 136 | } 137 | } 138 | 139 | function continueCb(continueReading) { 140 | if (continueReading !== false) { 141 | newRead(); 142 | } else { 143 | finish(); 144 | reader.close(); 145 | } 146 | } 147 | 148 | function readNext() { 149 | reader.nextLine(function(line) { 150 | var last = !reader.hasNextLine(); 151 | 152 | if (asyncCb) { 153 | cb(line, last, continueCb); 154 | } else { 155 | if (cb(line, last) !== false) { 156 | newRead(); 157 | } else { 158 | finish(); 159 | reader.close(); 160 | } 161 | } 162 | }); 163 | } 164 | 165 | newRead(); 166 | }, separator, encoding, bufferSize); 167 | 168 | return { 169 | then: function(cb) { 170 | finalFn = cb; 171 | } 172 | }; 173 | } 174 | 175 | module.exports.open = open; 176 | module.exports.eachLine = eachLine; 177 | }()); -------------------------------------------------------------------------------- /lib/logging.js: -------------------------------------------------------------------------------- 1 | /** 2 | * logging, log4js based 3 | */ 4 | /** 5 | * logger constructor 6 | * @param name 7 | * @param instance 8 | * @param level 9 | * @returns {Logger} 10 | */ 11 | exports.getLogger = function(name,instance,level){ 12 | var log4js = require('log4js'); 13 | log4js.configure({ 14 | "appenders": [ 15 | { 16 | "type": "dateFile", 17 | "filename": "logs/"+name+"-"+process.pid+".log", 18 | "pattern": "-yyyy-MM-dd", 19 | "alwaysIncludePattern": false 20 | }, 21 | { 22 | "type": "console" 23 | } 24 | ] 25 | }, 26 | {cwd: 'instance/'+instance } 27 | ); 28 | var logger = log4js.getLogger(name); 29 | logger.setLevel(level); 30 | return logger; 31 | } -------------------------------------------------------------------------------- /lib/myredis.js: -------------------------------------------------------------------------------- 1 | /** 2 | * redis instance 3 | * Created by cherokee on 14-5-22. 4 | */ 5 | 6 | var redis = require("redis"); 7 | var ssdb = require("./ssdb_redis.js"); 8 | 9 | exports.createClient = function(host,port,db,type,callback){ 10 | if(type=='ssdb'){ 11 | var ssdb_cli = ssdb.client(host,port); 12 | callback(null,ssdb_cli); 13 | }else{ 14 | var redis_cli = redis.createClient(port,host); 15 | redis_cli.hlist = function(name,callback){ 16 | redis_cli.keys(name,callback); 17 | }; 18 | redis_cli.hclear = function(name,callback){ 19 | redis_cli.del(name,callback); 20 | }; 21 | redis_cli.zlen = function(name,callback){ 22 | redis_cli.zcount(name,0,(new Date()).getTime(),callback); 23 | }; 24 | redis_cli.zlist = function(name,callback){ 25 | redis_cli.keys(name,callback); 26 | }; 27 | redis_cli.qlist = function(name,callback){ 28 | redis_cli.keys(name,callback); 29 | }; 30 | redis_cli.close = function(){ 31 | redis_cli.quit(); 32 | }; 33 | redis_cli.select(db, function(err,value) { 34 | callback(err,redis_cli); 35 | }); 36 | } 37 | } 38 | 39 | /* 40 | exports.createClient('10.1.1.122',1788,0,'ssdb',function(err,c){ 41 | c.hlist('driller',function(err,values){ 42 | console.log(err); 43 | console.log(values); 44 | //c.close(); 45 | }); 46 | }); 47 | */ -------------------------------------------------------------------------------- /lib/node_hbase/index.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | module.exports = function(options) { 4 | var Client; 5 | Client = require('./lib/client'); 6 | return new Client(options); 7 | }; 8 | 9 | module.exports.Put = require('./lib/put'); 10 | 11 | module.exports.Get = require('./lib/get'); 12 | 13 | module.exports.Delete = require('./lib/delete'); 14 | 15 | module.exports.Increment = require('./lib/increment'); 16 | 17 | module.exports.Scan = require('./lib/scan').Scan; 18 | 19 | module.exports.FilterList = require('./lib/filter-list'); 20 | 21 | module.exports.utils = require('./lib/utils'); 22 | 23 | }).call(this); 24 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/call.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var Call, debug, 4 | __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; 5 | 6 | debug = (require('debug'))('hbase-call'); 7 | 8 | module.exports = Call = (function() { 9 | function Call(responseClass, header, timeout, cb) { 10 | this.responseClass = responseClass; 11 | this.header = header; 12 | this.timeout = timeout; 13 | this.cb = cb; 14 | this.complete = __bind(this.complete, this); 15 | this.called = false; 16 | this.startTime = new Date; 17 | this.timer = setTimeout((function(_this) { 18 | return function() { 19 | debug("operation " + _this.header.callId + " (" + _this.header.methodName + ") timedout after " + _this.timeout + "ms"); 20 | _this.called = true; 21 | return _this.cb('timedout'); 22 | }; 23 | })(this), this.timeout); 24 | } 25 | 26 | Call.prototype.complete = function(err, data) { 27 | if (this.called) { 28 | return; 29 | } 30 | debug("operation " + this.header.callId + " (" + this.header.methodName + ") completed. Took: " + (new Date - this.startTime) + "ms"); 31 | clearTimeout(this.timer); 32 | return this.cb(err, data); 33 | }; 34 | 35 | return Call; 36 | 37 | })(); 38 | 39 | }).call(this); 40 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/data-input-stream.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var ByteBuffer, DataInputStream, EventEmitter, Readable, debug, 4 | __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, 5 | __hasProp = {}.hasOwnProperty, 6 | __extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; }; 7 | 8 | debug = require('debug')('hbase:data_input_stream'); 9 | 10 | Readable = require('readable-stream').Readable; 11 | 12 | ByteBuffer = require('protobufjs/node_modules/bytebuffer'); 13 | 14 | EventEmitter = require('events').EventEmitter; 15 | 16 | module.exports = DataInputStream = (function(_super) { 17 | __extends(DataInputStream, _super); 18 | 19 | function DataInputStream(io) { 20 | this.processMessage = __bind(this.processMessage, this); 21 | this.processData = __bind(this.processData, this); 22 | this["in"] = io; 23 | if (typeof io.read !== 'function') { 24 | this["in"] = new Readable(); 25 | this["in"].wrap(io); 26 | } 27 | this.bytearr = new Buffer(80); 28 | this.buffer = new Buffer(0); 29 | this.awaitBytes = 0; 30 | this["in"].on('data', this.processData); 31 | } 32 | 33 | DataInputStream.prototype.processData = function(data) { 34 | if (!data) { 35 | data = new Buffer(0); 36 | } 37 | this.buffer = Buffer.concat([this.buffer, data]); 38 | if (this.awaitBytes === 0 && this.buffer.length < 4) { 39 | return; 40 | } 41 | if (!this.awaitBytes) { 42 | this.awaitBytes = this.buffer.readUInt32BE(0); 43 | this.buffer = this.buffer.slice(4); 44 | } 45 | if (this.awaitBytes && this.awaitBytes > this.buffer.length) { 46 | return; 47 | } 48 | this.processMessage(this.buffer.slice(0, this.awaitBytes)); 49 | this.buffer = this.buffer.slice(this.awaitBytes); 50 | this.awaitBytes = 0; 51 | if (this.buffer.length > 0) { 52 | return this.processData(); 53 | } 54 | }; 55 | 56 | DataInputStream.prototype.processMessage = function(message) { 57 | var messages, payload, readDelimited; 58 | payload = ByteBuffer.wrap(message); 59 | readDelimited = function() { 60 | var header, headerLen; 61 | headerLen = payload.readVarint32(); 62 | header = payload.slice(payload.offset, payload.offset + headerLen); 63 | payload.offset += headerLen; 64 | return header.toBuffer(); 65 | }; 66 | messages = []; 67 | while (payload.remaining()) { 68 | messages.push(readDelimited()); 69 | } 70 | return this.emit('messages', messages); 71 | }; 72 | 73 | return DataInputStream; 74 | 75 | })(EventEmitter); 76 | 77 | }).call(this); 78 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/delete.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var ByteBuffer, Delete, ProtoBuf, builder, proto, 4 | __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; 5 | 6 | ProtoBuf = require("protobufjs"); 7 | 8 | ByteBuffer = require('protobufjs/node_modules/bytebuffer'); 9 | 10 | ProtoBuf.convertFieldsToCamelCase = true; 11 | 12 | builder = ProtoBuf.loadProtoFile("" + __dirname + "/../proto/Client.proto"); 13 | 14 | proto = builder.build(); 15 | 16 | module.exports = Delete = (function() { 17 | function Delete(row, ts) { 18 | this.row = row; 19 | this.ts = ts; 20 | this.getRow = __bind(this.getRow, this); 21 | this.getFields = __bind(this.getFields, this); 22 | this._add = __bind(this._add, this); 23 | this.deleteFamily = __bind(this.deleteFamily, this); 24 | this.deleteFamilyVersion = __bind(this.deleteFamilyVersion, this); 25 | this.deleteColumns = __bind(this.deleteColumns, this); 26 | this.deleteColumn = __bind(this.deleteColumn, this); 27 | this.familyMap = {}; 28 | } 29 | 30 | Delete.prototype.deleteColumn = function(cf, qualifier, timestamp) { 31 | return this._add(cf, qualifier, timestamp, 'DELETE_ONE_VERSION'); 32 | }; 33 | 34 | Delete.prototype.deleteColumns = function(cf, qualifier, timestamp) { 35 | return this._add(cf, qualifier, timestamp, 'DELETE_MULTIPLE_VERSIONS'); 36 | }; 37 | 38 | Delete.prototype.deleteFamilyVersion = function(cf, timestamp) { 39 | return this._add(cf, null, timestamp, 'DELETE_FAMILY_VERSION'); 40 | }; 41 | 42 | Delete.prototype.deleteFamily = function(cf, timestamp) { 43 | return this._add(cf, null, timestamp, 'DELETE_FAMILY'); 44 | }; 45 | 46 | Delete.prototype._add = function(cf, qualifier, timestamp, deleteType) { 47 | var _base; 48 | if (timestamp == null) { 49 | timestamp = ByteBuffer.Long.MAX_VALUE; 50 | } 51 | if ((_base = this.familyMap)[cf] == null) { 52 | _base[cf] = []; 53 | } 54 | return this.familyMap[cf].push({ 55 | qualifier: qualifier, 56 | timestamp: timestamp, 57 | deleteType: deleteType 58 | }); 59 | }; 60 | 61 | Delete.prototype.getFields = function() { 62 | var cf, column, o, qualifiers, _ref; 63 | o = { 64 | row: this.row, 65 | mutateType: "DELETE", 66 | columnValue: [] 67 | }; 68 | _ref = this.familyMap; 69 | for (cf in _ref) { 70 | qualifiers = _ref[cf]; 71 | column = { 72 | family: cf, 73 | qualifierValue: qualifiers.map(function(qualifier) { 74 | return qualifier; 75 | }) 76 | }; 77 | o.columnValue.push(column); 78 | } 79 | return o; 80 | }; 81 | 82 | Delete.prototype.getRow = function() { 83 | return this.row; 84 | }; 85 | 86 | return Delete; 87 | 88 | })(); 89 | 90 | }).call(this); 91 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/filter-list.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var FilterList, getFilter, 4 | __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; 5 | 6 | getFilter = require('./scan').getFilter; 7 | 8 | module.exports = FilterList = (function() { 9 | function FilterList(operator, filter) { 10 | var _ref; 11 | this.operator = operator; 12 | this.get = __bind(this.get, this); 13 | this.addFilter = __bind(this.addFilter, this); 14 | if ((_ref = this.operator) !== 'MUST_PASS_ALL' && _ref !== 'MUST_PASS_ONE') { 15 | this.operator = 'MUST_PASS_ALL'; 16 | } 17 | this.filters = []; 18 | if (filter) { 19 | filter = getFilter(filter); 20 | if (!filter) { 21 | return false; 22 | } 23 | this.filters.push(filter); 24 | } 25 | } 26 | 27 | FilterList.prototype.addFilter = function(filter) { 28 | filter = getFilter(filter); 29 | if (!filter) { 30 | return false; 31 | } 32 | return this.filters.push(filter); 33 | }; 34 | 35 | FilterList.prototype.get = function() { 36 | return { 37 | operator: this.operator, 38 | filters: this.filters 39 | }; 40 | }; 41 | 42 | return FilterList; 43 | 44 | })(); 45 | 46 | }).call(this); 47 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/get.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var Get, 4 | __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; 5 | 6 | module.exports = Get = (function() { 7 | function Get(row) { 8 | this.row = row; 9 | this.getRow = __bind(this.getRow, this); 10 | this.getFields = __bind(this.getFields, this); 11 | this.setTimeRange = __bind(this.setTimeRange, this); 12 | this.setMaxVersions = __bind(this.setMaxVersions, this); 13 | this.addColumn = __bind(this.addColumn, this); 14 | this.tr = { 15 | from: null, 16 | to: null 17 | }; 18 | this.familyMap = {}; 19 | this.maxVersions = 1; 20 | } 21 | 22 | Get.prototype.addColumn = function(cf, qualifier) { 23 | var _base; 24 | if ((_base = this.familyMap)[cf] == null) { 25 | _base[cf] = []; 26 | } 27 | this.familyMap[cf].push(qualifier); 28 | return this; 29 | }; 30 | 31 | Get.prototype.setMaxVersions = function(maxVersions) { 32 | if (maxVersions <= 0) { 33 | maxVersions = 1; 34 | } 35 | this.maxVersions = maxVersions; 36 | return this; 37 | }; 38 | 39 | Get.prototype.setTimeRange = function(minStamp, maxStamp) { 40 | this.tr = { 41 | from: minStamp, 42 | to: maxStamp 43 | }; 44 | return this; 45 | }; 46 | 47 | Get.prototype.getFields = function() { 48 | var cf, o, qualifiers, _ref; 49 | o = { 50 | row: this.row, 51 | timeRange: this.tr, 52 | column: [], 53 | maxVersions: this.maxVersions 54 | }; 55 | _ref = this.familyMap; 56 | for (cf in _ref) { 57 | qualifiers = _ref[cf]; 58 | o.column = { 59 | family: cf, 60 | qualifier: qualifiers.map(function(qualifier) { 61 | return qualifier; 62 | }) 63 | }; 64 | } 65 | return o; 66 | }; 67 | 68 | Get.prototype.getRow = function() { 69 | return this.row; 70 | }; 71 | 72 | return Get; 73 | 74 | })(); 75 | 76 | }).call(this); 77 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/hconstants.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | module.exports = { 4 | MAGIC: 255, 5 | MAGIC_SIZE: 1, 6 | ID_LENGTH_SIZE: 4, 7 | MD5_HEX_LENGTH: 32, 8 | SERVERNAME_SEPARATOR: ",", 9 | META_TABLE_NAME: new Buffer('hbase:meta'), 10 | META_REGION_NAME: new Buffer('hbase:meta,,1'), 11 | NINES: "99999999999999", 12 | ZEROS: "00000000000000", 13 | HEADER: new Buffer("HBas"), 14 | RPC_TIMEOUT: 30000, 15 | PING_TIMEOUT: 30000, 16 | CALL_TIMEOUT: 5000, 17 | SOCKET_RETRY_WAIT_MS: 200 18 | }; 19 | 20 | }).call(this); 21 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/increment.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var ByteBuffer, Increment, ProtoBuf, builder, proto, 4 | __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; 5 | 6 | ProtoBuf = require("protobufjs"); 7 | 8 | ByteBuffer = require('protobufjs/node_modules/bytebuffer'); 9 | 10 | ProtoBuf.convertFieldsToCamelCase = true; 11 | 12 | builder = ProtoBuf.loadProtoFile("" + __dirname + "/../proto/Client.proto"); 13 | 14 | proto = builder.build(); 15 | 16 | module.exports = Increment = (function() { 17 | function Increment(row, ts) { 18 | this.row = row; 19 | this.ts = ts; 20 | this.getRow = __bind(this.getRow, this); 21 | this.getFields = __bind(this.getFields, this); 22 | this.add = __bind(this.add, this); 23 | this.familyMap = {}; 24 | } 25 | 26 | Increment.prototype.add = function(cf, qualifier, value, timestamp) { 27 | var b, _base; 28 | if (timestamp == null) { 29 | timestamp = ByteBuffer.Long.MAX_VALUE; 30 | } 31 | b = new ByteBuffer(8); 32 | b.writeLong(ByteBuffer.Long.fromString("" + value)); 33 | if ((_base = this.familyMap)[cf] == null) { 34 | _base[cf] = []; 35 | } 36 | return this.familyMap[cf].push({ 37 | qualifier: qualifier, 38 | value: b.toBuffer(), 39 | timestamp: timestamp 40 | }); 41 | }; 42 | 43 | Increment.prototype.getFields = function() { 44 | var cf, o, qualifierValue, _ref; 45 | o = { 46 | row: this.row, 47 | mutateType: 'INCREMENT', 48 | columnValue: [] 49 | }; 50 | _ref = this.familyMap; 51 | for (cf in _ref) { 52 | qualifierValue = _ref[cf]; 53 | o.columnValue.push({ 54 | family: cf, 55 | qualifierValue: qualifierValue 56 | }); 57 | } 58 | return o; 59 | }; 60 | 61 | Increment.prototype.getRow = function() { 62 | return this.row; 63 | }; 64 | 65 | return Increment; 66 | 67 | })(); 68 | 69 | }).call(this); 70 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/output-buffer.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var ByteBuffer, DataOutputBuffer, DataOutputStream, 4 | __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, 5 | __slice = [].slice, 6 | __hasProp = {}.hasOwnProperty, 7 | __extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; }; 8 | 9 | ByteBuffer = require('protobufjs/node_modules/bytebuffer'); 10 | 11 | DataOutputStream = (function() { 12 | function DataOutputStream(out) { 13 | this.out = out; 14 | this.writeDelimitedBuffers = __bind(this.writeDelimitedBuffers, this); 15 | this.writeInt = __bind(this.writeInt, this); 16 | this.writeByte = __bind(this.writeByte, this); 17 | this.write = __bind(this.write, this); 18 | this.written = 0; 19 | } 20 | 21 | DataOutputStream.prototype.write = function(b, offset, length) { 22 | if (!Buffer.isBuffer(b)) { 23 | b = new Buffer(b); 24 | } 25 | if (length == null) { 26 | length = b.length; 27 | } 28 | if (offset) { 29 | b = b.slice(offset, offset + length); 30 | } 31 | this.out.write(b); 32 | return this.written += length; 33 | }; 34 | 35 | DataOutputStream.prototype.writeByte = function(b) { 36 | if (!Buffer.isBuffer(b)) { 37 | if (isNaN(b)) { 38 | b = new Buffer(b[0]); 39 | } else { 40 | b = new Buffer([b]); 41 | } 42 | } 43 | return this.write(b); 44 | }; 45 | 46 | DataOutputStream.prototype.writeInt = function(i) { 47 | var b; 48 | b = new Buffer(4); 49 | b.writeInt32BE(i, 0); 50 | return this.write(b); 51 | }; 52 | 53 | DataOutputStream.prototype.writeDelimitedBuffers = function() { 54 | var bb, buffer, buffers, i, length, varInt, _results; 55 | buffers = 1 <= arguments.length ? __slice.call(arguments, 0) : []; 56 | length = 0; 57 | varInt = []; 58 | for (i in buffers) { 59 | buffer = buffers[i]; 60 | varInt[i] = ByteBuffer.calculateVarint32(buffer.length); 61 | length += varInt[i] + buffer.length; 62 | } 63 | this.writeInt(length); 64 | _results = []; 65 | for (i in buffers) { 66 | buffer = buffers[i]; 67 | bb = new ByteBuffer(varInt[i]); 68 | bb.writeVarint32(buffer.length); 69 | this.write(bb.toBuffer()); 70 | _results.push(this.write(buffer)); 71 | } 72 | return _results; 73 | }; 74 | 75 | return DataOutputStream; 76 | 77 | })(); 78 | 79 | DataOutputBuffer = (function(_super) { 80 | __extends(DataOutputBuffer, _super); 81 | 82 | function DataOutputBuffer() { 83 | DataOutputBuffer.__super__.constructor.call(this); 84 | } 85 | 86 | return DataOutputBuffer; 87 | 88 | })(DataOutputStream); 89 | 90 | module.exports.DataOutputBuffer = DataOutputBuffer; 91 | 92 | module.exports.DataOutputStream = DataOutputStream; 93 | 94 | }).call(this); 95 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/put.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var ByteBuffer, ProtoBuf, Put, builder, proto, 4 | __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; 5 | 6 | ProtoBuf = require("protobufjs"); 7 | 8 | ByteBuffer = require('protobufjs/node_modules/bytebuffer'); 9 | 10 | ProtoBuf.convertFieldsToCamelCase = true; 11 | 12 | builder = ProtoBuf.loadProtoFile("" + __dirname + "/../proto/Client.proto"); 13 | 14 | proto = builder.build(); 15 | 16 | module.exports = Put = (function() { 17 | function Put(row, ts) { 18 | this.row = row; 19 | this.ts = ts; 20 | this.getRow = __bind(this.getRow, this); 21 | this.getFields = __bind(this.getFields, this); 22 | this.add = __bind(this.add, this); 23 | this.familyMap = {}; 24 | } 25 | 26 | Put.prototype.add = function(cf, qualifier, value, timestamp) { 27 | var _base; 28 | if (timestamp == null) { 29 | timestamp = this.ts; 30 | } 31 | if (timestamp == null) { 32 | timestamp = ByteBuffer.Long.MAX_VALUE; 33 | } 34 | if ((_base = this.familyMap)[cf] == null) { 35 | _base[cf] = []; 36 | } 37 | return this.familyMap[cf].push({ 38 | qualifier: qualifier, 39 | value: value, 40 | timestamp: timestamp 41 | }); 42 | }; 43 | 44 | Put.prototype.getFields = function() { 45 | var cf, o, qualifierValue, _ref; 46 | o = { 47 | row: this.row, 48 | mutateType: "PUT", 49 | columnValue: [] 50 | }; 51 | _ref = this.familyMap; 52 | for (cf in _ref) { 53 | qualifierValue = _ref[cf]; 54 | o.columnValue.push({ 55 | family: cf, 56 | qualifierValue: qualifierValue 57 | }); 58 | } 59 | return o; 60 | }; 61 | 62 | Put.prototype.getRow = function() { 63 | return this.row; 64 | }; 65 | 66 | return Put; 67 | 68 | })(); 69 | 70 | }).call(this); 71 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/utils.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | module.exports = { 4 | bufferIncrement: (function(_this) { 5 | return function(buffer, i) { 6 | var b, newBuffer, tmp; 7 | b = new Buffer(buffer); 8 | if (i == null) { 9 | i = b.length - 1; 10 | } 11 | if (i < 0) { 12 | return Buffer.concat([new Buffer([1]), b]); 13 | } 14 | tmp = new Buffer([parseInt(b[i]++)]); 15 | if (tmp[0] > b[i]) { 16 | newBuffer = _this.bufferIncrement(b, i - 1); 17 | } 18 | if (newBuffer && b.length < newBuffer.length) { 19 | return newBuffer; 20 | } 21 | return b; 22 | }; 23 | })(this), 24 | bufferCompare: function(a, b) { 25 | var i, len1, len2, _a, _b, _i, _ref; 26 | len1 = a.length; 27 | len2 = b.length; 28 | if (a === b && len1 === len2) { 29 | return 0; 30 | } 31 | for (i = _i = 0, _ref = len1 - 1; 0 <= _ref ? _i <= _ref : _i >= _ref; i = 0 <= _ref ? ++_i : --_i) { 32 | if (a[i] !== b[i]) { 33 | _a = a[i] ? a[i] : 0; 34 | _b = b[i] ? b[i] : 0; 35 | return _a - _b; 36 | } 37 | } 38 | return len1 - len2; 39 | } 40 | }; 41 | 42 | }).call(this); 43 | -------------------------------------------------------------------------------- /lib/node_hbase/lib/zk-protobuf.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.8.0 2 | (function() { 3 | var ByteBuffer, ProtoBuf, builder, hconstants, proto; 4 | 5 | ProtoBuf = require("protobufjs"); 6 | 7 | ByteBuffer = require('protobufjs/node_modules/bytebuffer'); 8 | 9 | hconstants = require('./hconstants'); 10 | 11 | ProtoBuf.convertFieldsToCamelCase = true; 12 | 13 | builder = ProtoBuf.loadProtoFile("" + __dirname + "/../proto/ZooKeeper.proto"); 14 | 15 | proto = builder.build(); 16 | 17 | exports.decodeMeta = function(data) { 18 | var dataLength, dataOffset, len; 19 | if (data[0] !== hconstants.MAGIC) { 20 | return; 21 | } 22 | len = ByteBuffer.wrap(data).readInt32(1); 23 | dataLength = data.length - hconstants.MAGIC_SIZE - hconstants.ID_LENGTH_SIZE - len; 24 | dataOffset = hconstants.MAGIC_SIZE + hconstants.ID_LENGTH_SIZE + len; 25 | data = data.slice(dataOffset + 4, dataOffset + dataLength); 26 | return proto.MetaRegionServer.decode(data); 27 | }; 28 | 29 | }).call(this); 30 | -------------------------------------------------------------------------------- /lib/node_hbase/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node-hbase", 3 | "version": "0.0.1", 4 | "description": "CoffeeScript HBase client implementation with protobuf support", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "DEBUG=-mocha*, ./node_modules/.bin/mocha --compilers coffee:coffee-script/register --require coffee-script test --reporter spec", 8 | "lint": "coffeelint --f coffeelint.json *.coffee lib/* test/*", 9 | "cov": "./node_modules/.bin/mocha --compilers coffee:coffee-script/register --require coffee-script -r blanket -R html-cov > coverage.html" 10 | }, 11 | "config": { 12 | "blanket": { 13 | "pattern": "/lib/", 14 | "data-cover-never": "node_modules", 15 | "loader": "./node-loaders/coffee-script" 16 | } 17 | }, 18 | "repository": { 19 | "type": "git", 20 | "url": "git://github.com/falsecz/node-hbase.git" 21 | }, 22 | "author": "", 23 | "license": "Apache", 24 | "bugs": { 25 | "url": "https://github.com/falsecz/node-hbase/issues" 26 | }, 27 | "homepage": "https://github.com/falsecz/node-hbase", 28 | "dependencies": { 29 | "protobufjs": "^2.2.1", 30 | "zookeeper-watcher": "^1.0.0", 31 | "debug": "^2.0.0", 32 | "readable-stream": "^1.0.26-4", 33 | "async": "^0.9.0" 34 | }, 35 | "devDependencies": { 36 | "chai": "~1.9.1", 37 | "mocha": "~1.19.0", 38 | "coffee-script": "~1.7.1", 39 | "coffeelint": "~1.4.1", 40 | "require-dir": "~0.1.0", 41 | "blanket": "~1.1.6" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/AccessControl.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 20 | option java_outer_classname = "AccessControlProtos"; 21 | option java_generic_services = true; 22 | option java_generate_equals_and_hash = true; 23 | option optimize_for = SPEED; 24 | 25 | import "HBase.proto"; 26 | 27 | message Permission { 28 | enum Action { 29 | READ = 0; 30 | WRITE = 1; 31 | EXEC = 2; 32 | CREATE = 3; 33 | ADMIN = 4; 34 | } 35 | enum Type { 36 | Global = 1; 37 | Namespace = 2; 38 | Table = 3; 39 | } 40 | required Type type = 1; 41 | optional GlobalPermission global_permission = 2; 42 | optional NamespacePermission namespace_permission = 3; 43 | optional TablePermission table_permission = 4; 44 | } 45 | 46 | message TablePermission { 47 | optional TableName table_name = 1; 48 | optional bytes family = 2; 49 | optional bytes qualifier = 3; 50 | repeated Permission.Action action = 4; 51 | } 52 | 53 | message NamespacePermission { 54 | optional bytes namespace_name = 1; 55 | repeated Permission.Action action = 2; 56 | } 57 | 58 | message GlobalPermission { 59 | repeated Permission.Action action = 1; 60 | } 61 | 62 | message UserPermission { 63 | required bytes user = 1; 64 | required Permission permission = 3; 65 | } 66 | 67 | /** 68 | * Content of the /hbase/acl/ znode. 69 | */ 70 | message UsersAndPermissions { 71 | message UserPermissions { 72 | required bytes user = 1; 73 | repeated Permission permissions = 2; 74 | } 75 | 76 | repeated UserPermissions user_permissions = 1; 77 | } 78 | 79 | message GrantRequest { 80 | required UserPermission user_permission = 1; 81 | } 82 | 83 | message GrantResponse { 84 | } 85 | 86 | message RevokeRequest { 87 | required UserPermission user_permission = 1; 88 | } 89 | 90 | message RevokeResponse { 91 | } 92 | 93 | message GetUserPermissionsRequest { 94 | optional Permission.Type type = 1; 95 | optional TableName table_name = 2; 96 | optional bytes namespace_name = 3; 97 | } 98 | 99 | message GetUserPermissionsResponse { 100 | repeated UserPermission user_permission = 1; 101 | } 102 | 103 | message CheckPermissionsRequest { 104 | repeated Permission permission = 1; 105 | } 106 | 107 | message CheckPermissionsResponse { 108 | } 109 | 110 | service AccessControlService { 111 | rpc Grant(GrantRequest) 112 | returns (GrantResponse); 113 | 114 | rpc Revoke(RevokeRequest) 115 | returns (RevokeResponse); 116 | 117 | rpc GetUserPermissions(GetUserPermissionsRequest) 118 | returns (GetUserPermissionsResponse); 119 | 120 | rpc CheckPermissions(CheckPermissionsRequest) 121 | returns (CheckPermissionsResponse); 122 | } 123 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/Aggregate.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 20 | option java_outer_classname = "AggregateProtos"; 21 | option java_generic_services = true; 22 | option java_generate_equals_and_hash = true; 23 | option optimize_for = SPEED; 24 | 25 | import "Client.proto"; 26 | 27 | message AggregateRequest { 28 | /** The request passed to the AggregateService consists of three parts 29 | * (1) the (canonical) classname of the ColumnInterpreter implementation 30 | * (2) the Scan query 31 | * (3) any bytes required to construct the ColumnInterpreter object 32 | * properly 33 | */ 34 | required string interpreter_class_name = 1; 35 | required Scan scan = 2; 36 | optional bytes interpreter_specific_bytes = 3; 37 | } 38 | 39 | message AggregateResponse { 40 | /** 41 | * The AggregateService methods all have a response that either is a Pair 42 | * or a simple object. When it is a Pair both first_part and second_part 43 | * have defined values (and the second_part is not present in the response 44 | * when the response is not a pair). Refer to the AggregateImplementation 45 | * class for an overview of the AggregateResponse object constructions. 46 | */ 47 | repeated bytes first_part = 1; 48 | optional bytes second_part = 2; 49 | } 50 | 51 | /** Refer to the AggregateImplementation class for an overview of the 52 | * AggregateService method implementations and their functionality. 53 | */ 54 | service AggregateService { 55 | rpc GetMax (AggregateRequest) returns (AggregateResponse); 56 | rpc GetMin (AggregateRequest) returns (AggregateResponse); 57 | rpc GetSum (AggregateRequest) returns (AggregateResponse); 58 | rpc GetRowNum (AggregateRequest) returns (AggregateResponse); 59 | rpc GetAvg (AggregateRequest) returns (AggregateResponse); 60 | rpc GetStd (AggregateRequest) returns (AggregateResponse); 61 | rpc GetMedian (AggregateRequest) returns (AggregateResponse); 62 | } 63 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/Authentication.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 20 | option java_outer_classname = "AuthenticationProtos"; 21 | option java_generic_services = true; 22 | option java_generate_equals_and_hash = true; 23 | option optimize_for = SPEED; 24 | 25 | message AuthenticationKey { 26 | required int32 id = 1; 27 | required int64 expiration_date = 2; 28 | required bytes key = 3; 29 | } 30 | 31 | 32 | message TokenIdentifier { 33 | enum Kind { 34 | HBASE_AUTH_TOKEN = 0; 35 | } 36 | required Kind kind = 1; 37 | required bytes username = 2; 38 | required int32 key_id = 3; 39 | optional int64 issue_date = 4; 40 | optional int64 expiration_date = 5; 41 | optional int64 sequence_number = 6; 42 | } 43 | 44 | 45 | // Serialization of the org.apache.hadoop.security.token.Token class 46 | // Note that this is a Hadoop class, so fields may change! 47 | message Token { 48 | // the TokenIdentifier in serialized form 49 | // Note: we can't use the protobuf directly because the Hadoop Token class 50 | // only stores the serialized bytes 51 | optional bytes identifier = 1; 52 | optional bytes password = 2; 53 | optional bytes service = 3; 54 | } 55 | 56 | 57 | // RPC request & response messages 58 | message GetAuthenticationTokenRequest { 59 | } 60 | 61 | message GetAuthenticationTokenResponse { 62 | optional Token token = 1; 63 | } 64 | 65 | message WhoAmIRequest { 66 | } 67 | 68 | message WhoAmIResponse { 69 | optional string username = 1; 70 | optional string auth_method = 2; 71 | } 72 | 73 | 74 | // RPC service 75 | service AuthenticationService { 76 | rpc GetAuthenticationToken(GetAuthenticationTokenRequest) 77 | returns (GetAuthenticationTokenResponse); 78 | 79 | rpc WhoAmI(WhoAmIRequest) 80 | returns (WhoAmIResponse); 81 | } 82 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/Cell.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // Cell and KeyValue protos 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "CellProtos"; 23 | option java_generate_equals_and_hash = true; 24 | option optimize_for = SPEED; 25 | 26 | /** 27 | * The type of the key in a Cell 28 | */ 29 | enum CellType { 30 | MINIMUM = 0; 31 | PUT = 4; 32 | 33 | DELETE = 8; 34 | DELETE_COLUMN = 12; 35 | DELETE_FAMILY = 14; 36 | 37 | // MAXIMUM is used when searching; you look from maximum on down. 38 | MAXIMUM = 255; 39 | } 40 | 41 | /** 42 | * Protocol buffer version of Cell. 43 | */ 44 | message Cell { 45 | optional bytes row = 1; 46 | optional bytes family = 2; 47 | optional bytes qualifier = 3; 48 | optional uint64 timestamp = 4; 49 | optional CellType cell_type = 5; 50 | optional bytes value = 6; 51 | } 52 | 53 | /** 54 | * Protocol buffer version of KeyValue. 55 | * It doesn't have those transient parameters 56 | */ 57 | message KeyValue { 58 | required bytes row = 1; 59 | required bytes family = 2; 60 | required bytes qualifier = 3; 61 | optional uint64 timestamp = 4; 62 | optional CellType key_type = 5; 63 | optional bytes value = 6; 64 | } 65 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/ClusterId.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers that are shared throughout HBase 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "ClusterIdProtos"; 23 | option java_generate_equals_and_hash = true; 24 | option optimize_for = SPEED; 25 | 26 | /** 27 | * Content of the '/hbase/hbaseid', cluster id, znode. 28 | * Also cluster of the ${HBASE_ROOTDIR}/hbase.id file. 29 | */ 30 | message ClusterId { 31 | // This is the cluster id, a uuid as a String 32 | required string cluster_id = 1; 33 | } 34 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/Comparator.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers that are used for filters 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "ComparatorProtos"; 23 | option java_generic_services = true; 24 | option java_generate_equals_and_hash = true; 25 | option optimize_for = SPEED; 26 | 27 | // This file contains protocol buffers that are used for comparators (e.g. in filters) 28 | 29 | message Comparator { 30 | required string name = 1; 31 | optional bytes serialized_comparator = 2; 32 | } 33 | 34 | message ByteArrayComparable { 35 | optional bytes value = 1; 36 | } 37 | 38 | message BinaryComparator { 39 | required ByteArrayComparable comparable = 1; 40 | } 41 | 42 | message BinaryPrefixComparator { 43 | required ByteArrayComparable comparable = 1; 44 | } 45 | 46 | message BitComparator { 47 | required ByteArrayComparable comparable = 1; 48 | required BitwiseOp bitwise_op = 2; 49 | 50 | enum BitwiseOp { 51 | AND = 1; 52 | OR = 2; 53 | XOR = 3; 54 | } 55 | } 56 | 57 | message NullComparator { 58 | } 59 | 60 | message RegexStringComparator { 61 | required string pattern = 1; 62 | required int32 pattern_flags = 2; 63 | required string charset = 3; 64 | } 65 | 66 | message SubstringComparator { 67 | required string substr = 1; 68 | } 69 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/ErrorHandling.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers that are used for error handling 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "ErrorHandlingProtos"; 23 | option java_generate_equals_and_hash = true; 24 | option optimize_for = SPEED; 25 | 26 | /** 27 | * Protobuf version of a java.lang.StackTraceElement 28 | * so we can serialize exceptions. 29 | */ 30 | message StackTraceElementMessage { 31 | optional string declaring_class = 1; 32 | optional string method_name = 2; 33 | optional string file_name = 3; 34 | optional int32 line_number = 4; 35 | } 36 | 37 | /** 38 | * Cause of a remote failure for a generic exception. Contains 39 | * all the information for a generic exception as well as 40 | * optional info about the error for generic info passing 41 | * (which should be another protobuffed class). 42 | */ 43 | message GenericExceptionMessage { 44 | optional string class_name = 1; 45 | optional string message = 2; 46 | optional bytes error_info = 3; 47 | repeated StackTraceElementMessage trace = 4; 48 | } 49 | 50 | /** 51 | * Exception sent across the wire when a remote task needs 52 | * to notify other tasks that it failed and why 53 | */ 54 | message ForeignExceptionMessage { 55 | optional string source = 1; 56 | optional GenericExceptionMessage generic_exception = 2; 57 | } 58 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/FS.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers that are written into the filesystem 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "FSProtos"; 23 | option java_generate_equals_and_hash = true; 24 | option optimize_for = SPEED; 25 | 26 | /** 27 | * The ${HBASE_ROOTDIR}/hbase.version file content 28 | */ 29 | message HBaseVersionFileContent { 30 | required string version = 1; 31 | } 32 | 33 | /** 34 | * Reference file content used when we split an hfile under a region. 35 | */ 36 | message Reference { 37 | required bytes splitkey = 1; 38 | enum Range { 39 | TOP = 0; 40 | BOTTOM = 1; 41 | } 42 | required Range range = 2; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/Filter.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers that are used for filters 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "FilterProtos"; 23 | option java_generic_services = true; 24 | option java_generate_equals_and_hash = true; 25 | option optimize_for = SPEED; 26 | 27 | import "HBase.proto"; 28 | import "Comparator.proto"; 29 | 30 | message Filter { 31 | required string name = 1; 32 | optional bytes serialized_filter = 2; 33 | } 34 | 35 | message ColumnCountGetFilter { 36 | required int32 limit = 1; 37 | } 38 | 39 | message ColumnPaginationFilter { 40 | required int32 limit = 1; 41 | optional int32 offset = 2; 42 | optional bytes column_offset = 3; 43 | } 44 | 45 | message ColumnPrefixFilter { 46 | required bytes prefix = 1; 47 | } 48 | 49 | message ColumnRangeFilter { 50 | optional bytes min_column = 1; 51 | optional bool min_column_inclusive = 2; 52 | optional bytes max_column = 3; 53 | optional bool max_column_inclusive = 4; 54 | } 55 | 56 | message CompareFilter { 57 | required CompareType compare_op = 1; 58 | optional Comparator comparator = 2; 59 | } 60 | 61 | message DependentColumnFilter { 62 | required CompareFilter compare_filter = 1; 63 | optional bytes column_family = 2; 64 | optional bytes column_qualifier = 3; 65 | optional bool drop_dependent_column = 4; 66 | } 67 | 68 | message FamilyFilter { 69 | required CompareFilter compare_filter = 1; 70 | } 71 | 72 | message FilterList { 73 | required Operator operator = 1; 74 | repeated Filter filters = 2; 75 | 76 | enum Operator { 77 | MUST_PASS_ALL = 1; 78 | MUST_PASS_ONE = 2; 79 | } 80 | } 81 | 82 | message FilterWrapper { 83 | required Filter filter = 1; 84 | } 85 | 86 | message FirstKeyOnlyFilter { 87 | } 88 | 89 | message FirstKeyValueMatchingQualifiersFilter { 90 | repeated bytes qualifiers = 1; 91 | } 92 | 93 | message FuzzyRowFilter { 94 | repeated BytesBytesPair fuzzy_keys_data = 1; 95 | } 96 | 97 | message InclusiveStopFilter { 98 | optional bytes stop_row_key = 1; 99 | } 100 | 101 | message KeyOnlyFilter { 102 | required bool len_as_val = 1; 103 | } 104 | 105 | message MultipleColumnPrefixFilter { 106 | repeated bytes sorted_prefixes = 1; 107 | } 108 | 109 | message PageFilter { 110 | required int64 page_size = 1; 111 | } 112 | 113 | message PrefixFilter { 114 | optional bytes prefix = 1; 115 | } 116 | 117 | message QualifierFilter { 118 | required CompareFilter compare_filter = 1; 119 | } 120 | 121 | message RandomRowFilter { 122 | required float chance = 1; 123 | } 124 | 125 | message RowFilter { 126 | required CompareFilter compare_filter = 1; 127 | } 128 | 129 | message SingleColumnValueExcludeFilter { 130 | required SingleColumnValueFilter single_column_value_filter = 1; 131 | } 132 | 133 | message SingleColumnValueFilter { 134 | optional bytes column_family = 1; 135 | optional bytes column_qualifier = 2; 136 | required CompareType compare_op = 3; 137 | required Comparator comparator = 4; 138 | optional bool filter_if_missing = 5; 139 | optional bool latest_version_only = 6; 140 | } 141 | 142 | message SkipFilter { 143 | required Filter filter = 1; 144 | } 145 | 146 | message TimestampsFilter { 147 | repeated int64 timestamps = 1 [packed=true]; 148 | } 149 | 150 | message ValueFilter { 151 | required CompareFilter compare_filter = 1; 152 | } 153 | 154 | message WhileMatchFilter { 155 | required Filter filter = 1; 156 | } 157 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/HBase.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers that are shared throughout HBase 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "HBaseProtos"; 23 | option java_generate_equals_and_hash = true; 24 | option optimize_for = SPEED; 25 | 26 | import "Cell.proto"; 27 | 28 | /** 29 | * Table Name 30 | */ 31 | message TableName { 32 | required bytes namespace = 1; 33 | required bytes qualifier = 2; 34 | } 35 | 36 | /** 37 | * Table Schema 38 | * Inspired by the rest TableSchema 39 | */ 40 | message TableSchema { 41 | optional TableName table_name = 1; 42 | repeated BytesBytesPair attributes = 2; 43 | repeated ColumnFamilySchema column_families = 3; 44 | repeated NameStringPair configuration = 4; 45 | } 46 | 47 | /** 48 | * Column Family Schema 49 | * Inspired by the rest ColumSchemaMessage 50 | */ 51 | message ColumnFamilySchema { 52 | required bytes name = 1; 53 | repeated BytesBytesPair attributes = 2; 54 | repeated NameStringPair configuration = 3; 55 | } 56 | 57 | /** 58 | * Protocol buffer version of HRegionInfo. 59 | */ 60 | message RegionInfo { 61 | required uint64 region_id = 1; 62 | required TableName table_name = 2; 63 | optional bytes start_key = 3; 64 | optional bytes end_key = 4; 65 | optional bool offline = 5; 66 | optional bool split = 6; 67 | } 68 | 69 | /** 70 | * Protocol buffer for favored nodes 71 | */ 72 | message FavoredNodes { 73 | repeated ServerName favored_node = 1; 74 | } 75 | 76 | /** 77 | * Container protocol buffer to specify a region. 78 | * You can specify region by region name, or the hash 79 | * of the region name, which is known as encoded 80 | * region name. 81 | */ 82 | message RegionSpecifier { 83 | required RegionSpecifierType type = 1; 84 | required bytes value = 2; 85 | 86 | enum RegionSpecifierType { 87 | // ,,. 88 | REGION_NAME = 1; 89 | 90 | // hash of ,, 91 | ENCODED_REGION_NAME = 2; 92 | } 93 | } 94 | 95 | /** 96 | * A range of time. Both from and to are Java time 97 | * stamp in milliseconds. If you don't specify a time 98 | * range, it means all time. By default, if not 99 | * specified, from = 0, and to = Long.MAX_VALUE 100 | */ 101 | message TimeRange { 102 | optional uint64 from = 1; 103 | optional uint64 to = 2; 104 | } 105 | 106 | /* Comparison operators */ 107 | enum CompareType { 108 | LESS = 0; 109 | LESS_OR_EQUAL = 1; 110 | EQUAL = 2; 111 | NOT_EQUAL = 3; 112 | GREATER_OR_EQUAL = 4; 113 | GREATER = 5; 114 | NO_OP = 6; 115 | } 116 | 117 | /** 118 | * Protocol buffer version of ServerName 119 | */ 120 | message ServerName { 121 | required string host_name = 1; 122 | optional uint32 port = 2; 123 | optional uint64 start_code = 3; 124 | } 125 | 126 | // Comment data structures 127 | 128 | message Coprocessor { 129 | required string name = 1; 130 | } 131 | 132 | message NameStringPair { 133 | required string name = 1; 134 | required string value = 2; 135 | } 136 | 137 | message NameBytesPair { 138 | required string name = 1; 139 | optional bytes value = 2; 140 | } 141 | 142 | message BytesBytesPair { 143 | required bytes first = 1; 144 | required bytes second = 2; 145 | } 146 | 147 | message NameInt64Pair { 148 | optional string name = 1; 149 | optional int64 value = 2; 150 | } 151 | 152 | /** 153 | * Description of the snapshot to take 154 | */ 155 | message SnapshotDescription { 156 | required string name = 1; 157 | optional string table = 2; // not needed for delete, but checked for in taking snapshot 158 | optional int64 creation_time = 3 [default = 0]; 159 | enum Type { 160 | DISABLED = 0; 161 | FLUSH = 1; 162 | } 163 | optional Type type = 4 [default = FLUSH]; 164 | optional int32 version = 5; 165 | } 166 | 167 | message EmptyMsg { 168 | } 169 | 170 | message LongMsg { 171 | required int64 long_msg = 1; 172 | 173 | } 174 | 175 | message BigDecimalMsg { 176 | required bytes bigdecimal_msg = 1; 177 | } 178 | 179 | message UUID { 180 | required uint64 least_sig_bits = 1; 181 | required uint64 most_sig_bits = 2; 182 | } 183 | 184 | message NamespaceDescriptor { 185 | required bytes name = 1; 186 | repeated NameStringPair configuration = 2; 187 | } 188 | 189 | /** 190 | * Description of the region server info 191 | */ 192 | message RegionServerInfo { 193 | optional int32 infoPort = 1; 194 | } 195 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/HFile.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 19 | option java_outer_classname = "HFileProtos"; 20 | option java_generic_services = true; 21 | option java_generate_equals_and_hash = true; 22 | option optimize_for = SPEED; 23 | 24 | import "HBase.proto"; 25 | 26 | // Map of name/values 27 | message FileInfoProto { 28 | repeated BytesBytesPair map_entry = 1; 29 | } 30 | 31 | // HFile file trailer 32 | message FileTrailerProto { 33 | optional uint64 file_info_offset = 1; 34 | optional uint64 load_on_open_data_offset = 2; 35 | optional uint64 uncompressed_data_index_size = 3; 36 | optional uint64 total_uncompressed_bytes = 4; 37 | optional uint32 data_index_count = 5; 38 | optional uint32 meta_index_count = 6; 39 | optional uint64 entry_count = 7; 40 | optional uint32 num_data_index_levels = 8; 41 | optional uint64 first_data_block_offset = 9; 42 | optional uint64 last_data_block_offset = 10; 43 | optional string comparator_class_name = 11; 44 | optional uint32 compression_codec = 12; 45 | } 46 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/LoadBalancer.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers to represent the state of the load balancer. 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "LoadBalancerProtos"; 23 | option java_generate_equals_and_hash = true; 24 | option optimize_for = SPEED; 25 | 26 | message LoadBalancerState { 27 | optional bool balancer_on = 1; 28 | } 29 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/MapReduce.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | //This file includes protocol buffers used in MapReduce only. 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "MapReduceProtos"; 23 | option java_generate_equals_and_hash = true; 24 | option optimize_for = SPEED; 25 | 26 | import "HBase.proto"; 27 | 28 | message ScanMetrics { 29 | repeated NameInt64Pair metrics = 1; 30 | } 31 | 32 | message TableSnapshotRegionSplit { 33 | optional RegionSpecifier region = 1; 34 | repeated string locations = 2; 35 | } 36 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/MultiRowMutation.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | import "Client.proto"; 19 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 20 | option java_outer_classname = "MultiRowMutationProtos"; 21 | option java_generate_equals_and_hash = true; 22 | option java_generic_services = true; 23 | option optimize_for = SPEED; 24 | 25 | message MultiRowMutationProcessorRequest{ 26 | } 27 | 28 | message MultiRowMutationProcessorResponse{ 29 | } 30 | 31 | message MutateRowsRequest { 32 | repeated MutationProto mutation_request = 1; 33 | } 34 | 35 | message MutateRowsResponse { 36 | } 37 | 38 | service MultiRowMutationService { 39 | rpc MutateRows(MutateRowsRequest) 40 | returns(MutateRowsResponse); 41 | } -------------------------------------------------------------------------------- /lib/node_hbase/proto/RPC.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | import "Tracing.proto"; 19 | import "HBase.proto"; 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "RPCProtos"; 23 | option java_generate_equals_and_hash = true; 24 | option optimize_for = SPEED; 25 | 26 | // See https://issues.apache.org/jira/browse/HBASE-7898 for high-level 27 | // description of RPC specification. 28 | // 29 | // On connection setup, the client sends six bytes of preamble -- a four 30 | // byte magic, a byte of version, and a byte of authentication type. 31 | // 32 | // We then send a "ConnectionHeader" protobuf of user information and the 33 | // 'protocol' or 'service' that is to be run over this connection as well as 34 | // info such as codecs and compression to use when we send cell blocks(see below). 35 | // This connection header protobuf is prefaced by an int that holds the length 36 | // of this connection header (this is NOT a varint). The pb connection header 37 | // is sent with Message#writeTo. The server throws an exception if it doesn't 38 | // like what it was sent noting what it is objecting too. Otherwise, the server 39 | // says nothing and is open for business. 40 | // 41 | // Hereafter the client makes requests and the server returns responses. 42 | // 43 | // Requests look like this: 44 | // 45 | // 46 | // 47 | // 48 | // 49 | // 50 | // ...where the Request Parameter Message is whatever the method name stipulated 51 | // in the RequestHeader expects; e.g. if the method is a scan, then the pb 52 | // Request Message is a GetRequest, or a ScanRequest. A block of Cells 53 | // optionally follows. The presence of a Request param Message and/or a 54 | // block of Cells will be noted in the RequestHeader. 55 | // 56 | // Response is the mirror of the request: 57 | // 58 | // 59 | // 60 | // 61 | // 62 | // 63 | // ...where the Response Message is the response type that goes with the 64 | // method specified when making the request and the follow on Cell blocks may 65 | // or may not be there -- read the response header to find out if one following. 66 | // If an exception, it will be included inside the Response Header. 67 | // 68 | // Any time we write a pb, we do it with Message#writeDelimitedTo EXCEPT when 69 | // the connection header is sent; this is prefaced by an int with its length 70 | // and the pb connection header is then written with Message#writeTo. 71 | // 72 | 73 | // User Information proto. Included in ConnectionHeader on connection setup 74 | message UserInformation { 75 | required string effective_user = 1; 76 | optional string real_user = 2; 77 | } 78 | 79 | // This is sent on connection setup after the connection preamble is sent. 80 | message ConnectionHeader { 81 | optional UserInformation user_info = 1; 82 | optional string service_name = 2; 83 | // Cell block codec we will use sending over optional cell blocks. Server throws exception 84 | // if cannot deal. Null means no codec'ing going on so we are pb all the time (SLOW!!!) 85 | optional string cell_block_codec_class = 3; 86 | // Compressor we will use if cell block is compressed. Server will throw exception if not supported. 87 | // Class must implement hadoop's CompressionCodec Interface. Can't compress if no codec. 88 | optional string cell_block_compressor_class = 4; 89 | } 90 | 91 | // Optional Cell block Message. Included in client RequestHeader 92 | message CellBlockMeta { 93 | // Length of the following cell block. Could calculate it but convenient having it too hand. 94 | optional uint32 length = 1; 95 | } 96 | 97 | // At the RPC layer, this message is used to carry 98 | // the server side exception to the RPC client. 99 | message ExceptionResponse { 100 | // Class name of the exception thrown from the server 101 | optional string exception_class_name = 1; 102 | // Exception stack trace from the server side 103 | optional string stack_trace = 2; 104 | // Optional hostname. Filled in for some exceptions such as region moved 105 | // where exception gives clue on where the region may have moved. 106 | optional string hostname = 3; 107 | optional int32 port = 4; 108 | // Set if we are NOT to retry on receipt of this exception 109 | optional bool do_not_retry = 5; 110 | } 111 | 112 | // Header sent making a request. 113 | message RequestHeader { 114 | // Monotonically increasing call_id to keep track of RPC requests and their response 115 | optional uint32 call_id = 1; 116 | optional RPCTInfo trace_info = 2; 117 | optional string method_name = 3; 118 | // If true, then a pb Message param follows. 119 | optional bool request_param = 4; 120 | // If present, then an encoded data block follows. 121 | optional CellBlockMeta cell_block_meta = 5; 122 | // 0 is NORMAL priority. 100 is HIGH. If no priority, treat it as NORMAL. 123 | // See HConstants. 124 | optional uint32 priority = 6; 125 | } 126 | 127 | message ResponseHeader { 128 | optional uint32 call_id = 1; 129 | // If present, then request threw an exception and no response message (else we presume one) 130 | optional ExceptionResponse exception = 2; 131 | // If present, then an encoded data block follows. 132 | optional CellBlockMeta cell_block_meta = 3; 133 | } 134 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/RegionServerStatus.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // This file contains protocol buffers that are used for RegionServerStatusProtocol. 20 | 21 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 22 | option java_outer_classname = "RegionServerStatusProtos"; 23 | option java_generic_services = true; 24 | option java_generate_equals_and_hash = true; 25 | option optimize_for = SPEED; 26 | 27 | import "HBase.proto"; 28 | import "ClusterStatus.proto"; 29 | 30 | message RegionServerStartupRequest { 31 | /** Port number this regionserver is up on */ 32 | required uint32 port = 1; 33 | 34 | /** This servers' startcode */ 35 | required uint64 server_start_code = 2; 36 | 37 | /** Current time of the region server in ms */ 38 | required uint64 server_current_time = 3; 39 | } 40 | 41 | message RegionServerStartupResponse { 42 | /** 43 | * Configuration for the regionserver to use: e.g. filesystem, 44 | * hbase rootdir, the hostname to use creating the RegionServer ServerName, 45 | * etc 46 | */ 47 | repeated NameStringPair map_entries = 1; 48 | } 49 | 50 | message RegionServerReportRequest { 51 | required ServerName server = 1; 52 | 53 | /** load the server is under */ 54 | optional ServerLoad load = 2; 55 | } 56 | 57 | message RegionServerReportResponse { 58 | } 59 | 60 | message ReportRSFatalErrorRequest { 61 | /** name of the server experiencing the error */ 62 | required ServerName server = 1; 63 | 64 | /** informative text to expose in the master logs and UI */ 65 | required string error_message = 2; 66 | } 67 | 68 | message ReportRSFatalErrorResponse { 69 | } 70 | 71 | message GetLastFlushedSequenceIdRequest { 72 | /** region name */ 73 | required bytes region_name = 1; 74 | } 75 | 76 | message GetLastFlushedSequenceIdResponse { 77 | /** the last HLog sequence id flushed from MemStore to HFile for the region */ 78 | required uint64 last_flushed_sequence_id = 1; 79 | } 80 | 81 | service RegionServerStatusService { 82 | /** Called when a region server first starts. */ 83 | rpc RegionServerStartup(RegionServerStartupRequest) 84 | returns(RegionServerStartupResponse); 85 | 86 | /** Called to report the load the RegionServer is under. */ 87 | rpc RegionServerReport(RegionServerReportRequest) 88 | returns(RegionServerReportResponse); 89 | 90 | /** 91 | * Called by a region server to report a fatal error that is causing it to 92 | * abort. 93 | */ 94 | rpc ReportRSFatalError(ReportRSFatalErrorRequest) 95 | returns(ReportRSFatalErrorResponse); 96 | 97 | /** Called to get the sequence id of the last MemStore entry flushed to an 98 | * HFile for a specified region. Used by the region server to speed up 99 | * log splitting. */ 100 | rpc GetLastFlushedSequenceId(GetLastFlushedSequenceIdRequest) 101 | returns(GetLastFlushedSequenceIdResponse); 102 | } 103 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/RowProcessor.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | /** 19 | * Defines a protocol to perform multi row transactions. 20 | * See BaseRowProcessorEndpoint for the implementation. 21 | * See HRegion#processRowsWithLocks() for details. 22 | */ 23 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 24 | option java_outer_classname = "RowProcessorProtos"; 25 | option java_generic_services = true; 26 | option java_generate_equals_and_hash = true; 27 | option optimize_for = SPEED; 28 | 29 | message ProcessRequest { 30 | required string row_processor_class_name = 1; 31 | optional string row_processor_initializer_message_name = 2; 32 | optional bytes row_processor_initializer_message = 3; 33 | } 34 | 35 | message ProcessResponse { 36 | required bytes row_processor_result = 1; 37 | } 38 | 39 | service RowProcessorService { 40 | rpc Process(ProcessRequest) returns (ProcessResponse); 41 | } 42 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/SecureBulkLoad.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 20 | option java_outer_classname = "SecureBulkLoadProtos"; 21 | option java_generic_services = true; 22 | option java_generate_equals_and_hash = true; 23 | option optimize_for = SPEED; 24 | 25 | import 'HBase.proto'; 26 | import 'Client.proto'; 27 | 28 | message SecureBulkLoadHFilesRequest { 29 | repeated BulkLoadHFileRequest.FamilyPath family_path = 1; 30 | optional bool assign_seq_num = 2; 31 | required DelegationToken fs_token = 3; 32 | required string bulk_token = 4; 33 | } 34 | 35 | message SecureBulkLoadHFilesResponse { 36 | required bool loaded = 1; 37 | } 38 | 39 | message DelegationToken { 40 | optional bytes identifier = 1; 41 | optional bytes password = 2; 42 | optional string kind = 3; 43 | optional string service = 4; 44 | } 45 | 46 | message PrepareBulkLoadRequest { 47 | required TableName table_name = 1; 48 | } 49 | 50 | message PrepareBulkLoadResponse { 51 | required string bulk_token = 1; 52 | } 53 | 54 | message CleanupBulkLoadRequest { 55 | required string bulk_token = 1; 56 | 57 | } 58 | 59 | message CleanupBulkLoadResponse { 60 | } 61 | 62 | service SecureBulkLoadService { 63 | rpc PrepareBulkLoad(PrepareBulkLoadRequest) 64 | returns (PrepareBulkLoadResponse); 65 | 66 | rpc SecureBulkLoadHFiles(SecureBulkLoadHFilesRequest) 67 | returns (SecureBulkLoadHFilesResponse); 68 | 69 | rpc CleanupBulkLoad(CleanupBulkLoadRequest) 70 | returns (CleanupBulkLoadResponse); 71 | } 72 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/Tracing.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 19 | option java_outer_classname = "TracingProtos"; 20 | option java_generate_equals_and_hash = true; 21 | option optimize_for = SPEED; 22 | 23 | //Used to pass through the information necessary to continue 24 | //a trace after an RPC is made. All we need is the traceid 25 | //(so we know the overarching trace this message is a part of), and 26 | //the id of the current span when this message was sent, so we know 27 | //what span caused the new span we will create when this message is received. 28 | message RPCTInfo { 29 | optional int64 trace_id = 1; 30 | optional int64 parent_id = 2; 31 | } 32 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/WAL.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 19 | option java_outer_classname = "WALProtos"; 20 | option java_generic_services = false; 21 | option java_generate_equals_and_hash = true; 22 | option optimize_for = SPEED; 23 | 24 | import "HBase.proto"; 25 | 26 | message WALHeader { 27 | optional bool has_compression = 1; 28 | } 29 | 30 | // Protocol buffer version of HLogKey; see HLogKey comment, not really a key but WALEdit header for some KVs 31 | message WALKey { 32 | required bytes encoded_region_name = 1; 33 | required bytes table_name = 2; 34 | required uint64 log_sequence_number = 3; 35 | required uint64 write_time = 4; 36 | /* 37 | This parameter is deprecated in favor of clusters which 38 | contains the list of clusters that have consumed the change. 39 | It is retained so that the log created by earlier releases (0.94) 40 | can be read by the newer releases. 41 | */ 42 | optional UUID cluster_id = 5 [deprecated=true]; 43 | 44 | repeated FamilyScope scopes = 6; 45 | optional uint32 following_kv_count = 7; 46 | /* 47 | This field contains the list of clusters that have 48 | consumed the change 49 | */ 50 | repeated UUID cluster_ids = 8; 51 | /* 52 | optional CustomEntryType custom_entry_type = 9; 53 | 54 | enum CustomEntryType { 55 | COMPACTION = 0; 56 | } 57 | */ 58 | } 59 | 60 | enum ScopeType { 61 | REPLICATION_SCOPE_LOCAL = 0; 62 | REPLICATION_SCOPE_GLOBAL = 1; 63 | } 64 | 65 | message FamilyScope { 66 | required bytes family = 1; 67 | required ScopeType scope_type = 2; 68 | } 69 | 70 | /** 71 | * Custom WAL entries 72 | */ 73 | 74 | /** 75 | * Special WAL entry to hold all related to a compaction. 76 | * Written to WAL before completing compaction. There is 77 | * sufficient info in the below message to complete later 78 | * the * compaction should we fail the WAL write. 79 | */ 80 | message CompactionDescriptor { 81 | required bytes table_name = 1; // TODO: WALKey already stores these, might remove 82 | required bytes encoded_region_name = 2; 83 | required bytes family_name = 3; 84 | repeated string compaction_input = 4; 85 | repeated string compaction_output = 5; 86 | required string store_home_dir = 6; 87 | } 88 | 89 | /** 90 | * A trailer that is appended to the end of a properly closed HLog WAL file. 91 | * If missing, this is either a legacy or a corrupted WAL file. 92 | */ 93 | message WALTrailer { 94 | } 95 | -------------------------------------------------------------------------------- /lib/node_hbase/proto/ZooKeeper.proto: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | // ZNode data in hbase are serialized protobufs with a four byte 20 | // 'magic' 'PBUF' prefix. 21 | 22 | option java_package = "org.apache.hadoop.hbase.protobuf.generated"; 23 | option java_outer_classname = "ZooKeeperProtos"; 24 | option java_generic_services = true; 25 | option java_generate_equals_and_hash = true; 26 | option optimize_for = SPEED; 27 | 28 | import "HBase.proto"; 29 | 30 | /** 31 | * Content of the meta-region-server znode. 32 | */ 33 | message MetaRegionServer { 34 | // The ServerName hosting the meta region currently. 35 | required ServerName server = 1; 36 | // The major version of the rpc the server speaks. This is used so that 37 | // clients connecting to the cluster can have prior knowledge of what version 38 | // to send to a RegionServer. AsyncHBase will use this to detect versions. 39 | optional uint32 rpc_version = 2; 40 | } 41 | 42 | /** 43 | * Content of the master znode. 44 | */ 45 | message Master { 46 | // The ServerName of the current Master 47 | required ServerName master = 1; 48 | // Major RPC version so that clients can know what version the master can accept. 49 | optional uint32 rpc_version = 2; 50 | } 51 | 52 | /** 53 | * Content of the '/hbase/running', cluster state, znode. 54 | */ 55 | message ClusterUp { 56 | // If this znode is present, cluster is up. Currently 57 | // the data is cluster start_date. 58 | required string start_date = 1; 59 | } 60 | 61 | /** 62 | * What we write under unassigned up in zookeeper as a region moves through 63 | * open/close, etc., regions. Details a region in transition. 64 | */ 65 | message RegionTransition { 66 | // Code for EventType gotten by doing o.a.h.h.EventHandler.EventType.getCode() 67 | required uint32 event_type_code = 1; 68 | // Full regionname in bytes 69 | required bytes region_name = 2; 70 | required uint64 create_time = 3; 71 | // The region server where the transition will happen or is happening 72 | required ServerName server_name = 4; 73 | optional bytes payload = 5; 74 | } 75 | 76 | /** 77 | * WAL SplitLog directory znodes have this for content. Used doing distributed 78 | * WAL splitting. Holds current state and name of server that originated split. 79 | */ 80 | message SplitLogTask { 81 | enum State { 82 | UNASSIGNED = 0; 83 | OWNED = 1; 84 | RESIGNED = 2; 85 | DONE = 3; 86 | ERR = 4; 87 | } 88 | required State state = 1; 89 | required ServerName server_name = 2; 90 | } 91 | 92 | /** 93 | * The znode that holds state of table. 94 | */ 95 | message Table { 96 | // Table's current state 97 | enum State { 98 | ENABLED = 0; 99 | DISABLED = 1; 100 | DISABLING = 2; 101 | ENABLING = 3; 102 | } 103 | // This is the table's state. If no znode for a table, 104 | // its state is presumed enabled. See o.a.h.h.zookeeper.ZKTable class 105 | // for more. 106 | required State state = 1 [default = ENABLED]; 107 | } 108 | 109 | /** 110 | * Used by replication. Holds a replication peer key. 111 | */ 112 | message ReplicationPeer { 113 | // clusterkey is the concatenation of the slave cluster's 114 | // hbase.zookeeper.quorum:hbase.zookeeper.property.clientPort:zookeeper.znode.parent 115 | required string clusterkey = 1; 116 | } 117 | 118 | /** 119 | * Used by replication. Holds whether enabled or disabled 120 | */ 121 | message ReplicationState { 122 | enum State { 123 | ENABLED = 0; 124 | DISABLED = 1; 125 | } 126 | required State state = 1; 127 | } 128 | 129 | /** 130 | * Used by replication. Holds the current position in an HLog file. 131 | */ 132 | message ReplicationHLogPosition { 133 | required int64 position = 1; 134 | } 135 | 136 | /** 137 | * Used by replication. Used to lock a region server during failover. 138 | */ 139 | message ReplicationLock { 140 | required string lock_owner = 1; 141 | } 142 | 143 | /** 144 | * Metadata associated with a table lock in zookeeper 145 | */ 146 | message TableLock { 147 | optional TableName table_name = 1; 148 | optional ServerName lock_owner = 2; 149 | optional int64 thread_id = 3; 150 | optional bool is_shared = 4; 151 | optional string purpose = 5; 152 | optional int64 create_time = 6; 153 | } 154 | 155 | /** 156 | * sequence Id of a store 157 | */ 158 | message StoreSequenceId { 159 | required bytes family_name = 1; 160 | required uint64 sequence_id = 2; 161 | } 162 | 163 | /** 164 | * contains a sequence id of a region which should be the minimum of its store sequence ids and 165 | * list sequence ids of the region's stores 166 | */ 167 | message RegionStoreSequenceIds { 168 | required uint64 last_flushed_sequence_id = 1; 169 | repeated StoreSequenceId store_sequence_id = 2; 170 | } 171 | -------------------------------------------------------------------------------- /lib/phantomjs/phantomjs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/lib/phantomjs/phantomjs -------------------------------------------------------------------------------- /lib/phantomjs/phantomjs.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/lib/phantomjs/phantomjs.exe -------------------------------------------------------------------------------- /lib/phantomjs/phantomjs32: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahkimkoo/neocrawler/337259b53873090161490c5b1ad4d41b52ee73a7/lib/phantomjs/phantomjs32 -------------------------------------------------------------------------------- /lib/phantomjs/webpatch.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by james on 13-11-27. 3 | * web patch 4 | */ 5 | if (window._phantom) { 6 | // Patch since PhantomJS does not implement click() on HTMLElement. In some 7 | // cases we need to execute the native click on an element. However, jQuery's 8 | // $.fn.click() does not dispatch to the native function on elements, so we 9 | // can't use it in our implementations: $el[0].click() to correctly dispatch. 10 | if (!HTMLElement.prototype.click) { 11 | HTMLElement.prototype.click = function() { 12 | var ev = document.createEvent('MouseEvent'); 13 | ev.initMouseEvent( 14 | 'click', 15 | /*bubble*/true, /*cancelable*/true, 16 | window, null, 17 | 0, 0, 0, 0, /*coordinates*/ 18 | false, false, false, false, /*modifier keys*/ 19 | 0/*button=left*/, null 20 | ); 21 | this.dispatchEvent(ev); 22 | }; 23 | } 24 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "neocrawler", 3 | "description": "Nodejs Distribute Crawler", 4 | "keywords": [ 5 | "nodejs", 6 | "crawler", 7 | "phantomjs" 8 | ], 9 | "author": { 10 | "name": "James", 11 | "email": "successage@gmail.com", 12 | "url": "http://my.oschina.net/waterbear/blog" 13 | }, 14 | "version": "2.0.0", 15 | "main": "./run.js", 16 | "bin": { 17 | "run": "./run.js" 18 | }, 19 | "repository": { 20 | "type": "git", 21 | "url": "http://git.oschina.net/dreamidea/neocrawler.git" 22 | }, 23 | "dependencies": { 24 | "async": ">=0.2.10", 25 | "bufferhelper": "0.2.0", 26 | "bytebuffer": "^5.0.1", 27 | "cheerio": "0.12.4", 28 | "debug": "^1.0.0", 29 | "ejs": "0.8.5", 30 | "express": "3.3.4", 31 | "generic-pool": ">=2.1.1", 32 | "hbase": "0.1.6", 33 | "iconv-lite": "0.2.11", 34 | "log4js": "0.6.9", 35 | "mongodb": "^2.1.4", 36 | "mysql": "2.0.0", 37 | "optimist": "0.6.0", 38 | "protobufjs": "^2.2.1", 39 | "readable-stream": "^1.0.26-4", 40 | "redis": ">=0.12.1", 41 | "request": "2.33.0", 42 | "stylus": "0.40.3", 43 | "thrift": "0.9.1", 44 | "zookeeper-watcher": "^0.2.0" 45 | }, 46 | "scripts": { 47 | "start": "node run.js", 48 | "test": "node test/test.js" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /run.js: -------------------------------------------------------------------------------- 1 | /** 2 | * ux crawler entrance 3 | */ 4 | ////log setting//////////////////////////////////////////////////////////////////// 5 | var logging = require('./lib/logging.js'); 6 | ////arguments parse/////////////////////////////////////////////////////////////// 7 | var userArgv = require('optimist') 8 | .usage('Usage: $0 -i [instance name] -a [crawl|test|config|proxy|schedule] -p [num] -l[url] -h') 9 | .options('i', { 10 | 'alias' : 'instance', 11 | 'default' : 'pengtouba', 12 | 'describe' : 'Specify a instance', 13 | 'demand' : true 14 | }) 15 | .options('a', { 16 | 'alias' : 'action', 17 | 'default' : 'crawl', 18 | 'describe' : 'Specify a action[crawl|test|config|proxy|schedule]', 19 | 'demand' : true 20 | }) 21 | .options('p', { 22 | 'alias' : 'port', 23 | 'default' : 2013, 24 | 'describe' : 'Specify a service port, for config service and proxy router' 25 | }) 26 | .options('l', { 27 | 'alias' : 'link', 28 | 'default' : '', 29 | 'describe' : 'Specify a url to test crawling' 30 | }) 31 | .options('h', { 32 | 'alias' : 'help', 33 | 'describe' : 'Help infomation' 34 | }); 35 | 36 | var options = userArgv.argv; 37 | if(options['h']){userArgv.showHelp();process.exit();} 38 | var settings = require('./instance/'+options['i']+'/'+'settings.json'); 39 | settings['instance'] = options['i']; 40 | ////log level///////////////////////////////////////////////////////////////// 41 | var log_level = 'DEBUG'; 42 | if(settings['log_level'])log_level = settings['log_level']; 43 | ////crawling action/////////////////////////////////////////////////////////// 44 | var crawling = function(){ 45 | var logger = logging.getLogger('crawling',options['i'],log_level); 46 | settings['logger'] = logger; 47 | settings['instance'] = options['i']; 48 | var spider = new (require('./spider'))(settings); 49 | 50 | spider.start(); 51 | } 52 | ////proxy Service//////////////////////////////////////////////////////////// 53 | var proxyService = function(){ 54 | var logger = logging.getLogger('proxy-service',options['i'],log_level); 55 | settings['logger'] = logger; 56 | settings['port'] = parseInt(options['p']); 57 | var proxyRouter = new (require('./proxyrouter'))(settings); 58 | 59 | proxyRouter.start(); 60 | } 61 | ////config service//////////////////////////////////////////////////////////// 62 | var configService = function(){ 63 | var logger = logging.getLogger('config-service',options['i'],log_level); 64 | settings['logger'] = logger; 65 | settings['port'] = parseInt(options['p']); 66 | var webConfig = new(require('./webconfig'))(settings); 67 | 68 | webConfig.start(); 69 | } 70 | ////scheduler/////////////////////////////////////////////////////////////// 71 | var schedule = function(){ 72 | var logger = logging.getLogger('schedule',options['i'],log_level); 73 | settings['logger'] = logger; 74 | var scheduler = new (require('./scheduler'))(settings); 75 | 76 | scheduler.start(); 77 | } 78 | 79 | ////test url///////////////////////////////////////////////////////////////// 80 | var testUrl = function(){ 81 | if(options['l']!=''){ 82 | var logger = logging.getLogger('crawling-testing',options['i'],'DEBUG'); 83 | settings['logger'] = logger; 84 | settings['test'] = true; 85 | settings['use_proxy'] = false; 86 | var spider = new (require('./spider'))(settings); 87 | 88 | spider.test(options['l']); 89 | } 90 | } 91 | 92 | ////route///////////////////////////////////////////////////////////////////// 93 | switch(options['a']){ 94 | case 'crawl': 95 | crawling(); 96 | break; 97 | case 'proxy': 98 | proxyService(); 99 | break; 100 | case 'config': 101 | configService(); 102 | break; 103 | case 'schedule': 104 | schedule(); 105 | break; 106 | case 'test': 107 | testUrl(); 108 | break; 109 | default: 110 | userArgv.showHelp(); 111 | } -------------------------------------------------------------------------------- /test/extractorTest.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by james on 13-12-31. 3 | */ 4 | var http = require('http'); 5 | var iconv = require('iconv-lite'); 6 | var BufferHelper = require('bufferhelper'); 7 | var assert = require('assert'); 8 | var util = require('util'); 9 | require('../lib/jsextend.js'); 10 | 11 | var logging = require('../lib/logging.js'); 12 | var logger = logging.getLogger('testing','spaceux','DEBUG'); 13 | var settings = require('../instance/spaceux/settings.json'); 14 | settings['instance'] = 'spaceux'; 15 | settings['logger'] = logger; 16 | var spider = new (require('../spider'))(settings); 17 | //spider.start(); 18 | 19 | 20 | var dirllRelationTest1 = function(){ 21 | var url = 'http://www.amazon.cn/s/ref=sr_ex_n_1?rh=n%3A2127215051&bbn=2127215051&ie=UTF8&qid=1387944813'; 22 | http.get(url,function(res){ 23 | var bufferHelper = new BufferHelper(); 24 | res.on('data', function (chunk) { 25 | bufferHelper.concat(chunk); 26 | }); 27 | res.on('end',function(){ 28 | var html = iconv.decode(bufferHelper.toBuffer(),'utf-8'); 29 | var cheerio = require('cheerio'); 30 | var $ = cheerio.load(html); 31 | var result = spider.extractor.cssSelector($,'#breadCrumb','innerText',1); 32 | assert.equal(result.trim(),'食品','dill relation extract failure.'); 33 | }); 34 | }); 35 | } 36 | 37 | var dirllRelationTest2 = function(){ 38 | var url = 'http://www.amazon.cn/s/ref=sr_ex_n_1?rh=n%3A2127215051&bbn=2127215051&ie=UTF8&qid=1387944813'; 39 | http.get(url,function(res){ 40 | var bufferHelper = new BufferHelper(); 41 | res.on('data', function (chunk) { 42 | bufferHelper.concat(chunk); 43 | }); 44 | res.on('end',function(){ 45 | var html = iconv.decode(bufferHelper.toBuffer(),'utf-8'); 46 | var cheerio = require('cheerio'); 47 | var $ = cheerio.load(html); 48 | var result = spider.extractor.regexSelector(url,'.*?qid=(\\d+).*',1); 49 | var expected = '1387944813'; 50 | assert(result,'extract none'); 51 | assert.equal(result.trim(),expected,util.format('dill relation extract failure.expect: %s, actual: ',expected,result.trim())); 52 | }); 53 | }); 54 | } 55 | 56 | var arrangeLinkText = function() { 57 | spider.assembly(); 58 | spider.spider.refreshDrillerRules(); 59 | setTimeout(function(){ 60 | var links = [ 61 | 'http://www.amazon.cn/Toblerone%E7%91%9E%E5%A3%AB%E4%B8%89%E8%A7%92%E9%BB%91%E5%B7%A7%E5%85%8B%E5%8A%9B%E5%90%AB%E8%9C%82%E8%9C%9C%E5%8F%8A%E5%B7%B4%E6%97%A6%E6%9C%A8%E7%B3%9650g-5%E5%85%83%E8%B6%85%E5%80%BC%E6%8D%A2%E8%B4%AD%E4%B8%AD-%E4%BC%98%E6%83%A0%E7%A0%815HUANGOU/dp/B003NNUIA8/ref=sr_1_1?s=grocery&ie=UTF8&qid=1388994448&sr=1-1' 62 | ]; 63 | var arranged_links = spider.extractor.arrange_link(links); 64 | assert.equal(arranged_links['urllib:driller:amazon.cn:detailpage'][0],'http://www.amazon.cn/Toblerone%E7%91%9E%E5%A3%AB%E4%B8%89%E8%A7%92%E9%BB%91%E5%B7%A7%E5%85%8B%E5%8A%9B%E5%90%AB%E8%9C%82%E8%9C%9C%E5%8F%8A%E5%B7%B4%E6%97%A6%E6%9C%A8%E7%B3%9650g-5%E5%85%83%E8%B6%85%E5%80%BC%E6%8D%A2%E8%B4%AD%E4%B8%AD-%E4%BC%98%E6%83%A0%E7%A0%815HUANGOU/dp/B003NNUIA8/ref=sr_1_1'); 65 | },2000); 66 | } 67 | 68 | var requestTest = function(){ 69 | var request = require('request'); 70 | request({ 71 | 'url': 'http://ip.jsontest.com/', 72 | 'headers': { 73 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36" 74 | }, 75 | 'timeout':60*1000, 76 | 'proxy':'http://60.223.241.92:9999' 77 | }, function(error, response, body){ 78 | if (!error && response.statusCode == 200) { 79 | console.log(body); 80 | } 81 | }); 82 | } 83 | 84 | 85 | var hbaseThriftTest = function(){ 86 | // This section includes a fix from here: 87 | // https://stackoverflow.com/questions/17415528/nodejs-hbase-thrift-weirdness/21141862#21141862 88 | var thrift = require('thrift'), 89 | HBase = require('../lib/node_hbase/gen-nodejs/HBase.js'), 90 | HBaseTypes = require('../lib/node_hbase/gen-nodejs/HBase_types.js'), 91 | connection = thrift.createConnection('192.168.1.4', 9090, { 92 | transport: thrift.TFramedTransport, 93 | protocol: thrift.TBinaryProtocol 94 | }); 95 | 96 | connection.on('connect', function() { 97 | var client = thrift.createClient(HBase,connection); 98 | client.getTableNames(function(err,data) { 99 | if (err) { 100 | console.log('gettablenames error:', err); 101 | } else { 102 | console.log('hbase tables:', data); 103 | } 104 | // connection.end(); 105 | }); 106 | client.mutateRow('test',''+(new Date()).getTime(), 107 | [ 108 | new Mutation({column:'f:name',value:'James'}), 109 | new Mutation({column:'f:mail',value:'successage@gmail.com'}) 110 | ], 111 | null, 112 | function(err, success){ 113 | if (err) { 114 | console.log(err); 115 | }else console.log('insert successful'); 116 | } 117 | ); 118 | }); 119 | 120 | connection.on('error', function(err){ 121 | console.log('error', err); 122 | }); 123 | } 124 | 125 | var hbaseClientTest = function(){ 126 | var HBase = require('hbase-client'); 127 | var client = HBase.create({ 128 | zookeeperHosts: [ 129 | '192.168.1.4:2222' 130 | ], 131 | zookeeperRoot: '/hbase' 132 | }); 133 | 134 | var put = new HBase.Put('row0'); 135 | put.add('f', 'name', 'foo'); 136 | put.add('f', 'age', '18'); 137 | client.put('test', put, function (err) { 138 | console.log(err); 139 | }); 140 | 141 | client.putRow('test','row1', {'f:name':'foo name','f:age':'18'},function(err) { 142 | if(err)console.log(err); 143 | else console.log('put successful.'); 144 | }); 145 | } 146 | 147 | //dirllRelationTest1(); 148 | //dirllRelationTest2(); 149 | //arrangeLinkText(); 150 | //requestTest(); 151 | //hbaseThriftTest(); 152 | //hbaseClientTest(); -------------------------------------------------------------------------------- /tools/http-echo-server.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by cherokee on 14-6-16. 3 | */ 4 | var http = require('http'); 5 | http.createServer(function (req, res) { 6 | res.writeHead(200, {'Content-Type': 'application/json'}); 7 | res.end(JSON.stringify({ 8 | 'IP':req.socket.remoteAddress, 9 | 'HEADERS':(function(headers){ 10 | var new_headers = {}; 11 | for(var x in headers){ 12 | if(headers.hasOwnProperty(x)){ 13 | new_headers[x.toUpperCase()] = headers[x]; 14 | } 15 | } 16 | return new_headers; 17 | })(req.headers) 18 | })); 19 | }).listen(80, "0.0.0.0"); 20 | console.log('Http Echo Server running at http://127.0.0.1:80/'); -------------------------------------------------------------------------------- /tools/rule-dump.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by James on 14-2-27. 3 | * redis helper: dump to file, and restore from file 4 | */ 5 | 6 | var userArgv = require('optimist') 7 | .usage('Usage: $0 -h [host name] -p [port] -f [file prefix] -h') 8 | .options('h', { 9 | 'alias' : 'host', 10 | 'default' : 'localhost', 11 | 'describe' : 'Specify redis hostname, e.g: localhost', 12 | 'demand' : true 13 | }) 14 | .options('p', { 15 | 'alias' : 'port', 16 | 'default' : 6379, 17 | 'describe' : 'Specify redis port, e.g: 6379', 18 | 'demand' : true 19 | }) 20 | .options('f', { 21 | 'alias' : 'file', 22 | 'default' : 'dump', 23 | 'describe' : 'Specify a file name' 24 | }) 25 | .options('t', { 26 | 'alias' : 'type', 27 | 'default' : 'redis', 28 | 'describe' : 'Specify a DBtype: redis/ssdb' 29 | }) 30 | .options('d', { 31 | 'alias' : 'db', 32 | 'default' : '0', 33 | 'describe' : 'Specify a db' 34 | }) 35 | .options('q', { 36 | 'alias' : 'query', 37 | 'default' : 'driller:*', 38 | 'describe' : 'Specify a query string, e.g:driller:*' 39 | }) 40 | .options('H', { 41 | 'alias' : 'help', 42 | 'describe' : 'Help infomation' 43 | }); 44 | 45 | var options = userArgv.argv; 46 | if(options['H']){userArgv.showHelp();process.exit();} 47 | 48 | var redis = require("../lib/myredis.js"); 49 | var fs = require("fs"); 50 | var async = require('async'); 51 | 52 | var dumpdb = function(db){ 53 | redis.createClient(options['h'],options['p'],db,options['t'],function(err,redis_cli) 54 | { 55 | redis_cli.hlist(options['q'],function(err,keyList){ 56 | for(var i=0;i [options] 26 | 27 | file file to parse; otherwise uses stdin 28 | 29 | options: 30 | -v, --version print version and exit 31 | -s, --sort-keys sort object keys 32 | -i, --in-place overwrite the file 33 | -t CHAR, --indent CHAR character(s) to use for indentation 34 | -c, --compact compact error display 35 | -V, --validate a JSON schema to use for validation 36 | -e, --environment which specification of JSON Schema the validation file uses 37 | 38 | 39 | ## Module interface 40 | 41 | I'm not sure why you wouldn't use the built in `JSON.parse` but you can use jsonlint from a CommonJS module: 42 | 43 | var jsonlint = require("jsonlint"); 44 | 45 | jsonlint.parse('{"creative?": false}'); 46 | 47 | It returns the parsed object or throws an `Error`. 48 | 49 | ## Vim Plugins 50 | 51 | * [Syntastic](http://www.vim.org/scripts/script.php?script_id=2736) 52 | * [sourcebeautify](http://www.vim.org/scripts/script.php?script_id=4079) 53 | 54 | ## MIT License 55 | 56 | Copyright (C) 2012 Zachary Carter 57 | 58 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 59 | 60 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 61 | 62 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /webconfig/public/javascripts/my-json-editor.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by James on 14-2-13. 3 | * jquery, jsoneditor required 4 | */ 5 | var jsonEditorConstructor = function(jsonElement,inputElement){ 6 | var objectValue = $.parseJSON(inputElement.val()); 7 | if(!jsonElement.hasClass("json-editor")){ 8 | jsonElement.addClass("json-editor"); 9 | } 10 | if(!inputElement.hasClass("json-input")){ 11 | inputElement.addClass("json-input"); 12 | } 13 | inputElement.change(function(){ 14 | var val = inputElement.val(); 15 | if (val) { 16 | try { objectValue = JSON.parse(val); } 17 | catch (e) { alert('Error in parsing json. ' + e); } 18 | } else { 19 | objectValue = {}; 20 | } 21 | jsonElement.jsonEditor(objectValue, { change: function(){ 22 | inputElement.val(JSON.stringify(objectValue)); 23 | }}); 24 | }); 25 | jsonElement.jsonEditor(objectValue, { change: function(){ 26 | inputElement.val(JSON.stringify(objectValue)); 27 | }}); 28 | } -------------------------------------------------------------------------------- /webconfig/public/proxy/new.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Edit Proxy Manually 6 | 7 | 8 | 9 | 10 | 11 |

Edit Proxy Manually

12 | 13 |
Return 14 | 15 |
16 |
17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 42 | 43 | 44 |
address
authorize
group 29 | 41 |
45 |
46 | 47 | 48 | 49 |
50 |
51 |
52 | -------------------------------------------------------------------------------- /webconfig/public/stylesheets/FlexiJsonEditor/jsoneditor.css: -------------------------------------------------------------------------------- 1 | .json-editor { 2 | margin-left: 10px; 3 | padding: 20px; 4 | } 5 | 6 | .json-editor.expanded .item { 7 | display: block !important; 8 | } 9 | 10 | .json-editor .property, 11 | .json-editor .value { 12 | border: none; 13 | -webkit-border-radius: 5px; 14 | -moz-border-radius: 5px; 15 | border-radius: 5px; 16 | padding: 3px; 17 | margin-right: 2px; 18 | } 19 | 20 | .json-editor .value { 21 | background-color: #ECF3C3; 22 | width: 600px; 23 | } 24 | .json-editor .property { 25 | width: 154px; 26 | background-color: #B1C639; 27 | color: white; 28 | font-weight: bold; 29 | text-shadow: 1px 1px 1px black; 30 | } 31 | .json-editor button.property { 32 | width: 160px; 33 | } 34 | 35 | .json-editor .editing { 36 | border: 1px solid gray; 37 | outline: none; 38 | } 39 | 40 | .json-editor .item { 41 | line-height: 20px; 42 | margin-top: 2px; 43 | } 44 | .json-editor .item:last-child { 45 | border-bottom: none; 46 | } 47 | 48 | .json-editor .item > .item { 49 | margin-left: 30px; 50 | display: none; 51 | } 52 | 53 | .json-editor .item.expanded > .item { 54 | display: block; 55 | } 56 | 57 | .json-editor .expander { 58 | background: url("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A/wD/oL2nkwAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9sJCxMmIIjsHfkAAAHTSURBVDjLzZPLaxNRFMZ/ZzKTNs10xHUyYFsfsZC2IOii2ftHCKKx/0G1brQlFhShCzfuXXUjuHErKGiVpr6qYK0bGyMoyCRVq5Mwkzku8ja6c+GFA5f7ne87zwv/zXn2bgcAVRVVNf5i0usLIO3Lq/cfze2trdfra48yHbCFqnYDnZjNbafcdPbtxpMgn893BXYqPxZO5o5fNwwDAUSkL0NVRYEoarB65+6tY5mJswBGN5okfP9nhyxA2nVJp9N9b/Vana/VaidLszeKIIgIhghhGDI/fx7H2ceZ06ewrDhRt2YdEBBpg0rKdRm1bZxRm+TIMNnsFN/39iiXyyDd3vSd8q6/dGB8XI8cndR7Dx5qpKqNqGmqqsUXm3o4k9GxiYN6f/3p44EMFAUFyzRZLiyxMjzEpcuL2LbNxYUL+LU6cStOEAQ9BfQKNPkoUPE8wjDEr9UxrSFKpRKWFW/ytH+s5sCoVImAWCzG8pVC08myiFpYk6vyhwz0m+04aKPRchE8z/ttD5SRZJJEIlEZ2MSV26sc2p/afL5RnOrZjQ653afp6Zmg+ulDbu7cXLFPYO3lG2ZnJrlWWByLxUyr09jWfrRnvfvlc/nqjZv+P/uEvwD/VNrBXhv7BAAAAABJRU5ErkJggg==") no-repeat center center; 59 | width: 16px; 60 | height: 11px; 61 | display: inline-block; 62 | margin-left: -15px; 63 | cursor: pointer; 64 | } 65 | .json-editor .item.expanded > .expander { 66 | background: url("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A/wD/oL2nkwAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9sKBBAdNcQGwvUAAAGjSURBVDjLzZNNS1tREIafOfdGjcaA65hFKQUpWIWCLgwI3fQnFMGN0Z/QiqsWKVgKLrpx78pNoZv+BUU0WNRC/dgoTaFCMWnUNoZ7b6aLm5ycS+quix44cJiPd953Zg78N2fv9BwAVRVVNXdccWMBpP04PPvmnxwdfd7Z2hyxzpZXtVNocqpwkssPjx6XtoNisdgBOK/8WnxamHhrjEEAEUkwVFUUaDYjNj58XH88cn8OwHSqSbpe/22T3XTX1rhtUKtWLUtDIlAQEYwIURRZexgGGBEX2IryXb3S8uXyeQYzGYc/XN/cUC6XQTq9SQIQI4RhxPMXSzyZLtjmGYHS/iGzM89iQX8DUBQUUr7P6+VXrPb1MpjNAnBVq1G/bdCT6iEIAkeACxDno0Dl8pIwDDGeZwONSJynybH6XaNSpQl4nmcLiUCz5YttKl1TUNWrTDabAEmwU0VR+gcGSKfTla5NXH2/wYOh3MGn0u4jZzcss3afxsbGg+r3r4WF+YXdBMDW/hemxh/yZvnlPc/zU7ax7Qm1tPz8cVFeebdW/2ef8A+6rb2Fr7Cr4wAAAABJRU5ErkJggg==") no-repeat center center; 67 | } 68 | 69 | .json-editor .item.appender > .property { 70 | background-color: #999999; 71 | cursor: pointer; 72 | width: 100px; 73 | } 74 | .json-editor .item.string > .property { background-color: #009408; } 75 | .json-editor .item.array > .property { background-color: #2D5B89; } 76 | .json-editor .item.object > .property { background-color: #E17000; } 77 | .json-editor .item.number > .property { background-color: #497B8D; } 78 | /*.json-editor .item.boolean { background-color: }*/ 79 | /*.json-editor .item.null { background-color: }*/ 80 | -------------------------------------------------------------------------------- /webconfig/public/stylesheets/demo_page.css: -------------------------------------------------------------------------------- 1 | 2 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 3 | * General page setup 4 | */ 5 | #dt_example { 6 | font: 80%/1.45em "Lucida Grande", Verdana, Arial, Helvetica, sans-serif; 7 | margin: 0; 8 | padding: 0; 9 | color: #333; 10 | background-color: #fff; 11 | } 12 | 13 | 14 | #dt_example #container { 15 | width: 800px; 16 | margin: 30px auto; 17 | padding: 0; 18 | } 19 | 20 | 21 | #dt_example #footer { 22 | margin: 50px auto 0 auto; 23 | padding: 0; 24 | } 25 | 26 | #dt_example #demo { 27 | margin: 30px auto 0 auto; 28 | } 29 | 30 | #dt_example .demo_jui { 31 | margin: 30px auto 0 auto; 32 | } 33 | 34 | #dt_example .big { 35 | font-size: 1.3em; 36 | font-weight: bold; 37 | line-height: 1.6em; 38 | color: #4E6CA3; 39 | } 40 | 41 | #dt_example .spacer { 42 | height: 20px; 43 | clear: both; 44 | } 45 | 46 | #dt_example .clear { 47 | clear: both; 48 | } 49 | 50 | #dt_example pre { 51 | padding: 15px; 52 | background-color: #F5F5F5; 53 | border: 1px solid #CCCCCC; 54 | } 55 | 56 | #dt_example h1 { 57 | margin-top: 2em; 58 | font-size: 1.3em; 59 | font-weight: normal; 60 | line-height: 1.6em; 61 | color: #4E6CA3; 62 | border-bottom: 1px solid #B0BED9; 63 | clear: both; 64 | } 65 | 66 | #dt_example h2 { 67 | font-size: 1.2em; 68 | font-weight: normal; 69 | line-height: 1.6em; 70 | color: #4E6CA3; 71 | clear: both; 72 | } 73 | 74 | #dt_example a { 75 | color: #0063DC; 76 | text-decoration: none; 77 | } 78 | 79 | #dt_example a:hover { 80 | text-decoration: underline; 81 | } 82 | 83 | #dt_example ul { 84 | color: #4E6CA3; 85 | } 86 | 87 | .css_right { 88 | float: right; 89 | } 90 | 91 | .css_left { 92 | float: left; 93 | } 94 | 95 | .demo_links { 96 | float: left; 97 | width: 50%; 98 | margin-bottom: 1em; 99 | } 100 | 101 | #demo_info { 102 | padding: 5px; 103 | border: 1px solid #B0BED9; 104 | height: 100px; 105 | width: 100%; 106 | overflow: auto; 107 | } 108 | 109 | #dt_example code { 110 | font-family: Menlo, Monaco, Consolas, "Courier New", monospace; 111 | padding: 2px 4px !important; 112 | white-space: nowrap; 113 | font-size: 0.9em; 114 | 115 | color: #D14; 116 | background-color: #F7F7F9; 117 | 118 | border: 1px solid #E1E1E8; 119 | -webkit-border-radius: 3px; 120 | -moz-border-radius: 3px; 121 | border-radius: 3px; 122 | } 123 | -------------------------------------------------------------------------------- /webconfig/public/stylesheets/jquery.dataTables.css: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Table 4 | */ 5 | table.dataTable { 6 | margin: 0 auto; 7 | clear: both; 8 | width: 100%; 9 | } 10 | 11 | table.dataTable thead th { 12 | padding: 3px 18px 3px 10px; 13 | border-bottom: 1px solid black; 14 | font-weight: bold; 15 | cursor: pointer; 16 | *cursor: hand; 17 | } 18 | 19 | table.dataTable tfoot th { 20 | padding: 3px 18px 3px 10px; 21 | border-top: 1px solid black; 22 | font-weight: bold; 23 | } 24 | 25 | table.dataTable td { 26 | padding: 3px 10px; 27 | } 28 | 29 | table.dataTable td.center, 30 | table.dataTable td.dataTables_empty { 31 | text-align: center; 32 | } 33 | 34 | table.dataTable tr.odd { background-color: #E2E4FF; } 35 | table.dataTable tr.even { background-color: white; } 36 | 37 | table.dataTable tr.odd td.sorting_1 { background-color: #D3D6FF; } 38 | table.dataTable tr.odd td.sorting_2 { background-color: #DADCFF; } 39 | table.dataTable tr.odd td.sorting_3 { background-color: #E0E2FF; } 40 | table.dataTable tr.even td.sorting_1 { background-color: #EAEBFF; } 41 | table.dataTable tr.even td.sorting_2 { background-color: #F2F3FF; } 42 | table.dataTable tr.even td.sorting_3 { background-color: #F9F9FF; } 43 | 44 | 45 | /* 46 | * Table wrapper 47 | */ 48 | .dataTables_wrapper { 49 | position: relative; 50 | clear: both; 51 | *zoom: 1; 52 | } 53 | 54 | 55 | /* 56 | * Page length menu 57 | */ 58 | .dataTables_length { 59 | float: left; 60 | } 61 | 62 | 63 | /* 64 | * Filter 65 | */ 66 | .dataTables_filter { 67 | float: right; 68 | text-align: right; 69 | } 70 | 71 | 72 | /* 73 | * Table information 74 | */ 75 | .dataTables_info { 76 | clear: both; 77 | float: left; 78 | } 79 | 80 | 81 | /* 82 | * Pagination 83 | */ 84 | .dataTables_paginate { 85 | float: right; 86 | text-align: right; 87 | } 88 | 89 | /* Two button pagination - previous / next */ 90 | .paginate_disabled_previous, 91 | .paginate_enabled_previous, 92 | .paginate_disabled_next, 93 | .paginate_enabled_next { 94 | height: 19px; 95 | float: left; 96 | cursor: pointer; 97 | *cursor: hand; 98 | color: #111 !important; 99 | } 100 | .paginate_disabled_previous:hover, 101 | .paginate_enabled_previous:hover, 102 | .paginate_disabled_next:hover, 103 | .paginate_enabled_next:hover { 104 | text-decoration: none !important; 105 | } 106 | .paginate_disabled_previous:active, 107 | .paginate_enabled_previous:active, 108 | .paginate_disabled_next:active, 109 | .paginate_enabled_next:active { 110 | outline: none; 111 | } 112 | 113 | .paginate_disabled_previous, 114 | .paginate_disabled_next { 115 | color: #666 !important; 116 | } 117 | .paginate_disabled_previous, 118 | .paginate_enabled_previous { 119 | padding-left: 23px; 120 | } 121 | .paginate_disabled_next, 122 | .paginate_enabled_next { 123 | padding-right: 23px; 124 | margin-left: 10px; 125 | } 126 | 127 | .paginate_enabled_previous { background: url('../images/back_enabled.png') no-repeat top left; } 128 | .paginate_enabled_previous:hover { background: url('../images/back_enabled_hover.png') no-repeat top left; } 129 | .paginate_disabled_previous { background: url('../images/back_disabled.png') no-repeat top left; } 130 | 131 | .paginate_enabled_next { background: url('../images/forward_enabled.png') no-repeat top right; } 132 | .paginate_enabled_next:hover { background: url('../images/forward_enabled_hover.png') no-repeat top right; } 133 | .paginate_disabled_next { background: url('../images/forward_disabled.png') no-repeat top right; } 134 | 135 | /* Full number pagination */ 136 | .paging_full_numbers { 137 | height: 22px; 138 | line-height: 22px; 139 | } 140 | .paging_full_numbers a:active { 141 | outline: none 142 | } 143 | .paging_full_numbers a:hover { 144 | text-decoration: none; 145 | } 146 | 147 | .paging_full_numbers a.paginate_button, 148 | .paging_full_numbers a.paginate_active { 149 | border: 1px solid #aaa; 150 | -webkit-border-radius: 5px; 151 | -moz-border-radius: 5px; 152 | border-radius: 5px; 153 | padding: 2px 5px; 154 | margin: 0 3px; 155 | cursor: pointer; 156 | *cursor: hand; 157 | color: #333 !important; 158 | } 159 | 160 | .paging_full_numbers a.paginate_button { 161 | background-color: #ddd; 162 | } 163 | 164 | .paging_full_numbers a.paginate_button:hover { 165 | background-color: #ccc; 166 | text-decoration: none !important; 167 | } 168 | 169 | .paging_full_numbers a.paginate_active { 170 | background-color: #99B3FF; 171 | } 172 | 173 | 174 | /* 175 | * Processing indicator 176 | */ 177 | .dataTables_processing { 178 | position: absolute; 179 | top: 50%; 180 | left: 50%; 181 | width: 250px; 182 | height: 30px; 183 | margin-left: -125px; 184 | margin-top: -15px; 185 | padding: 14px 0 2px 0; 186 | border: 1px solid #ddd; 187 | text-align: center; 188 | color: #999; 189 | font-size: 14px; 190 | background-color: white; 191 | } 192 | 193 | 194 | /* 195 | * Sorting 196 | */ 197 | .sorting { background: url('../images/sort_both.png') no-repeat center right; } 198 | .sorting_asc { background: url('../images/sort_asc.png') no-repeat center right; } 199 | .sorting_desc { background: url('../images/sort_desc.png') no-repeat center right; } 200 | 201 | .sorting_asc_disabled { background: url('../images/sort_asc_disabled.png') no-repeat center right; } 202 | .sorting_desc_disabled { background: url('../images/sort_desc_disabled.png') no-repeat center right; } 203 | 204 | table.dataTable thead th:active, 205 | table.dataTable thead td:active { 206 | outline: none; 207 | } 208 | 209 | 210 | /* 211 | * Scrolling 212 | */ 213 | .dataTables_scroll { 214 | clear: both; 215 | } 216 | 217 | .dataTables_scrollBody { 218 | *margin-top: -1px; 219 | -webkit-overflow-scrolling: touch; 220 | } 221 | 222 | -------------------------------------------------------------------------------- /webconfig/public/stylesheets/jquery.dataTables_themeroller.css: -------------------------------------------------------------------------------- 1 | 2 | 3 | /* 4 | * Table 5 | */ 6 | table.dataTable { 7 | margin: 0 auto; 8 | clear: both; 9 | width: 100%; 10 | border-collapse: collapse; 11 | } 12 | 13 | table.dataTable thead th { 14 | padding: 3px 0px 3px 10px; 15 | cursor: pointer; 16 | *cursor: hand; 17 | } 18 | 19 | table.dataTable tfoot th { 20 | padding: 3px 10px; 21 | } 22 | 23 | table.dataTable td { 24 | padding: 3px 10px; 25 | } 26 | 27 | table.dataTable td.center, 28 | table.dataTable td.dataTables_empty { 29 | text-align: center; 30 | } 31 | 32 | table.dataTable tr.odd { background-color: #E2E4FF; } 33 | table.dataTable tr.even { background-color: white; } 34 | 35 | table.dataTable tr.odd td.sorting_1 { background-color: #D3D6FF; } 36 | table.dataTable tr.odd td.sorting_2 { background-color: #DADCFF; } 37 | table.dataTable tr.odd td.sorting_3 { background-color: #E0E2FF; } 38 | table.dataTable tr.even td.sorting_1 { background-color: #EAEBFF; } 39 | table.dataTable tr.even td.sorting_2 { background-color: #F2F3FF; } 40 | table.dataTable tr.even td.sorting_3 { background-color: #F9F9FF; } 41 | 42 | 43 | /* 44 | * Table wrapper 45 | */ 46 | .dataTables_wrapper { 47 | position: relative; 48 | clear: both; 49 | *zoom: 1; 50 | } 51 | .dataTables_wrapper .ui-widget-header { 52 | font-weight: normal; 53 | } 54 | .dataTables_wrapper .ui-toolbar { 55 | padding: 5px; 56 | } 57 | 58 | 59 | /* 60 | * Page length menu 61 | */ 62 | .dataTables_length { 63 | float: left; 64 | } 65 | 66 | 67 | /* 68 | * Filter 69 | */ 70 | .dataTables_filter { 71 | float: right; 72 | text-align: right; 73 | } 74 | 75 | 76 | /* 77 | * Table information 78 | */ 79 | .dataTables_info { 80 | padding-top: 3px; 81 | clear: both; 82 | float: left; 83 | } 84 | 85 | 86 | /* 87 | * Pagination 88 | */ 89 | .dataTables_paginate { 90 | float: right; 91 | text-align: right; 92 | } 93 | 94 | .dataTables_paginate .ui-button { 95 | margin-right: -0.1em !important; 96 | } 97 | 98 | .paging_two_button .ui-button { 99 | float: left; 100 | cursor: pointer; 101 | * cursor: hand; 102 | } 103 | 104 | .paging_full_numbers .ui-button { 105 | padding: 2px 6px; 106 | margin: 0; 107 | cursor: pointer; 108 | * cursor: hand; 109 | color: #333 !important; 110 | } 111 | 112 | /* Two button pagination - previous / next */ 113 | .paginate_disabled_previous, 114 | .paginate_enabled_previous, 115 | .paginate_disabled_next, 116 | .paginate_enabled_next { 117 | height: 19px; 118 | float: left; 119 | cursor: pointer; 120 | *cursor: hand; 121 | color: #111 !important; 122 | } 123 | .paginate_disabled_previous:hover, 124 | .paginate_enabled_previous:hover, 125 | .paginate_disabled_next:hover, 126 | .paginate_enabled_next:hover { 127 | text-decoration: none !important; 128 | } 129 | .paginate_disabled_previous:active, 130 | .paginate_enabled_previous:active, 131 | .paginate_disabled_next:active, 132 | .paginate_enabled_next:active { 133 | outline: none; 134 | } 135 | 136 | .paginate_disabled_previous, 137 | .paginate_disabled_next { 138 | color: #666 !important; 139 | } 140 | .paginate_disabled_previous, 141 | .paginate_enabled_previous { 142 | padding-left: 23px; 143 | } 144 | .paginate_disabled_next, 145 | .paginate_enabled_next { 146 | padding-right: 23px; 147 | margin-left: 10px; 148 | } 149 | 150 | .paginate_enabled_previous { background: url('../images/back_enabled.png') no-repeat top left; } 151 | .paginate_enabled_previous:hover { background: url('../images/back_enabled_hover.png') no-repeat top left; } 152 | .paginate_disabled_previous { background: url('../images/back_disabled.png') no-repeat top left; } 153 | 154 | .paginate_enabled_next { background: url('../images/forward_enabled.png') no-repeat top right; } 155 | .paginate_enabled_next:hover { background: url('../images/forward_enabled_hover.png') no-repeat top right; } 156 | .paginate_disabled_next { background: url('../images/forward_disabled.png') no-repeat top right; } 157 | 158 | /* Full number pagination */ 159 | .paging_full_numbers a:active { 160 | outline: none 161 | } 162 | .paging_full_numbers a:hover { 163 | text-decoration: none; 164 | } 165 | 166 | .paging_full_numbers a.paginate_button, 167 | .paging_full_numbers a.paginate_active { 168 | border: 1px solid #aaa; 169 | -webkit-border-radius: 5px; 170 | -moz-border-radius: 5px; 171 | border-radius: 5px; 172 | padding: 2px 5px; 173 | margin: 0 3px; 174 | cursor: pointer; 175 | *cursor: hand; 176 | color: #333 !important; 177 | } 178 | 179 | .paging_full_numbers a.paginate_button { 180 | background-color: #ddd; 181 | } 182 | 183 | .paging_full_numbers a.paginate_button:hover { 184 | background-color: #ccc; 185 | text-decoration: none !important; 186 | } 187 | 188 | .paging_full_numbers a.paginate_active { 189 | background-color: #99B3FF; 190 | } 191 | 192 | 193 | /* 194 | * Processing indicator 195 | */ 196 | .dataTables_processing { 197 | position: absolute; 198 | top: 50%; 199 | left: 50%; 200 | width: 250px; 201 | height: 30px; 202 | margin-left: -125px; 203 | margin-top: -15px; 204 | padding: 14px 0 2px 0; 205 | border: 1px solid #ddd; 206 | text-align: center; 207 | color: #999; 208 | font-size: 14px; 209 | background-color: white; 210 | } 211 | 212 | 213 | /* 214 | * Sorting 215 | */ 216 | table.dataTable thead th div.DataTables_sort_wrapper { 217 | position: relative; 218 | padding-right: 20px; 219 | } 220 | 221 | table.dataTable thead th div.DataTables_sort_wrapper span { 222 | position: absolute; 223 | top: 50%; 224 | margin-top: -8px; 225 | right: 0; 226 | } 227 | 228 | table.dataTable th:active { 229 | outline: none; 230 | } 231 | 232 | 233 | /* 234 | * Scrolling 235 | */ 236 | .dataTables_scroll { 237 | clear: both; 238 | } 239 | 240 | .dataTables_scrollBody { 241 | *margin-top: -1px; 242 | -webkit-overflow-scrolling: touch; 243 | } 244 | 245 | -------------------------------------------------------------------------------- /webconfig/public/stylesheets/main.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 50px; 3 | } 4 | table { 5 | width: 100%; 6 | border-collapse: collapse; 7 | border: 2; 8 | } 9 | table, 10 | td, 11 | th, 12 | caption { 13 | border: 2px solid #000; 14 | align: left; 15 | } 16 | tbody { 17 | color: #008000; 18 | } 19 | td { 20 | padding: 10px; 21 | align: left; 22 | } 23 | caption { 24 | font-size: larger; 25 | background-color: #ff0; 26 | padding: 10px; 27 | } 28 | h1 { 29 | font: 1.5em Georgia, serif; 30 | } 31 | ul { 32 | list-style-type: none; 33 | } 34 | form { 35 | margin: 20px; 36 | padding: 20px; 37 | } 38 | hr { 39 | width: 100%; 40 | height: 2px; 41 | } 42 | th { 43 | vertical-align: center; 44 | background-color: #666; 45 | color: #fff; 46 | } 47 | div.container { 48 | margin: 15px; 49 | height: 400px; 50 | width: 1800px; 51 | background-color: #fff; 52 | } 53 | div.proxy { 54 | float: left; 55 | padding: 10px; 56 | height: 400px; 57 | overflow-y: scroll; 58 | background-color: #fff; 59 | } 60 | -------------------------------------------------------------------------------- /webconfig/public/stylesheets/tablesorter.css: -------------------------------------------------------------------------------- 1 | /* tables */ 2 | table.tablesorter { 3 | font-family:arial; 4 | background-color: #CDCDCD; 5 | margin:10px 0pt 15px; 6 | font-size: 8pt; 7 | width: 100%; 8 | text-align: left; 9 | } 10 | table.tablesorter thead tr th, table.tablesorter tfoot tr th { 11 | background-color: #e6EEEE; 12 | border: 1px solid #FFF; 13 | font-size: 8pt; 14 | padding: 4px; 15 | } 16 | table.tablesorter thead tr .header { 17 | background-image: url(../images/bg.gif); 18 | background-repeat: no-repeat; 19 | background-position: center right; 20 | cursor: pointer; 21 | } 22 | table.tablesorter tbody td { 23 | color: #3D3D3D; 24 | padding: 4px; 25 | /*background-color: #FFF;*/ 26 | vertical-align: top; 27 | } 28 | table.tablesorter tbody tr.odd td { 29 | /*background-color:#F0F0F6;*/ 30 | } 31 | table.tablesorter thead tr .headerSortUp { 32 | background-image: url(../images/asc.gif); 33 | } 34 | table.tablesorter thead tr .headerSortDown { 35 | background-image: url(../images/desc.gif); 36 | } 37 | table.tablesorter thead tr .headerSortDown, table.tablesorter thead tr .headerSortUp { 38 | background-color: #8dbdd8; 39 | } 40 | -------------------------------------------------------------------------------- /webconfig/routecontroller.js: -------------------------------------------------------------------------------- 1 | // map request with request handler 2 | exports.mapRoute = function(app) { 3 | //prefix = '/' + prefix; 4 | 5 | prefix_rule = '/rule'; 6 | prefix_proxy = '/proxy'; 7 | prefix_monitor = '/monitor'; 8 | 9 | var prefixRuleObj = require('./controllers' + prefix_rule); 10 | var prefixProxyObj = require('./controllers' + prefix_proxy); 11 | var prefixMonitorObj = require('./controllers' + prefix_monitor); 12 | 13 | // index 14 | app.get(prefix_rule, prefixRuleObj.index); 15 | 16 | // search 17 | app.post(prefix_rule + '/search', prefixRuleObj.search); 18 | // add 19 | app.get(prefix_rule + '/new', prefixRuleObj.new); 20 | // show 21 | app.get(prefix_rule + '/:id', prefixRuleObj.show); 22 | // create 23 | app.post(prefix_rule + '/create', prefixRuleObj.create); 24 | // edit 25 | app.get(prefix_rule + '/:id/edit', prefixRuleObj.edit); 26 | // update 27 | app.post(prefix_rule + '/upsert', prefixRuleObj.update); 28 | // destroy 29 | app.del(prefix_rule + '/:id', prefixRuleObj.destroy); 30 | 31 | ///////////////////////////////////////////////////////////////////// 32 | // Proxy index 33 | app.get(prefix_proxy, prefixProxyObj.index); 34 | // Proxy index 35 | app.get(prefix_proxy + '/index', prefixProxyObj.index); 36 | // Proxy edit 37 | app.get(prefix_proxy + "/new", prefixProxyObj.new); 38 | // Proxy create 39 | app.post(prefix_proxy + "/create", prefixProxyObj.create); 40 | // Proxy destroy 41 | app.get(prefix_proxy + "/:host/:key", prefixProxyObj.destroy); 42 | 43 | ///////////////////////////////////////////////////////////////////////// 44 | 45 | app.get(prefix_monitor + "/daily", prefixMonitorObj.daily); 46 | app.get(prefix_monitor + "/linkdb", prefixMonitorObj.linkdb); 47 | app.get(prefix_monitor + "/chart", prefixMonitorObj.chart); 48 | }; -------------------------------------------------------------------------------- /webconfig/routes/index.js: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * GET home page. 4 | */ 5 | 6 | exports.index = function(req, res){ 7 | res.render('index', { title: 'Config Home' }, function(err, stuff) { 8 | if (!err) { 9 | res.write(stuff); 10 | res.end(); 11 | } 12 | }); 13 | }; 14 | -------------------------------------------------------------------------------- /webconfig/views/index.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | <%= title %> 4 | 5 | 6 |

Neocrawler Config Home

7 |

This page is designed to manage crawling rules and proxy server. Wiki

8 |
9 |

Crawling Rules Configure

10 |

Proxy Configure

11 |

Crawling Daily Report

12 |

Link DB State

13 | 14 | -------------------------------------------------------------------------------- /webconfig/views/monitor/chart.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Domain Chart 6 | 7 | 44 | 45 | 46 |

<%=domain%> <%=days%> days line chart

47 | 55 |
56 | 57 |
58 | 170 | 171 | -------------------------------------------------------------------------------- /webconfig/views/monitor/daily.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Statistic Monitor 5 | 6 | 7 | 8 | 9 | 10 | 14 | 15 | 42 | 43 | 44 |

Crawling Daily Statistics
Home

45 |
46 | Date: 47 | 48 |
49 |
50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | <% for(var head in result){ 77 | var ir = false; 78 | var ccu = ''; 79 | if(result[head]['retry']/result[head]['crawl']*1.0>0.31) {ir = true; ccu+='retry/crawl='+(result[head]['retry']/result[head]['crawl']*1.0)+'
';} 80 | if(result[head]['fail']/result[head]['crawl']*1.0>0.11) {ir = true;ccu+='fail/crawl='+(result[head]['fail']/result[head]['crawl']*1.0)+'
';} 81 | if(result[head]['lack']/result[head]['crawl']*1.0>0.051) {ir = true;ccu+='lack/crawl='+(result[head]['lack']/result[head]['crawl']*1.0)+'
';} 82 | if(result[head]['finish']/(result[head]['crawl']-result[head]['retry'])*1.0<0.5) {ir = true;ccu+='finish/(crawl-retry)='+(result[head]['finish']/(result[head]['crawl']-result[head]['retry'])*1.0)+'
';} 83 | if(result[head]['lack']/result[head]['save']*1.0>0.51) {ir = true;ccu+='lack/save='+(result[head]['lack']/result[head]['save']*1.0)+'
';} 84 | %> 85 | bgcolor="#ffc0cb" <%}else{%> bgcolor="#ffffff" <%}%>> 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | <% } %> 96 | 97 |
domaincrawlretryfaillacksavefinishCONCLUSION
Daily Total<%=total['crawl']||'0'%><%=total['retry']||'0'%><%=total['fail']||'0'%><%=total['lack']||'0'%><%=total['save']||'0'%><%=total['finish']||'0'%>-
<%=head%><%=result[head]['crawl']||'0'%><%=result[head]['retry']||'0'%><%=result[head]['fail']||'0'%><%=result[head]['lack']||'0'%><%=result[head]['save']||'0'%><%=result[head]['finish']||'0'%><%- ccu%>
98 | 105 | 106 | -------------------------------------------------------------------------------- /webconfig/views/monitor/linkdb.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Linkdb state 5 | 9 | 10 | 11 |

Linkdb State (realtime)
Home

12 |
13 |

Link info db size: <%=linkinfodbsize%>

14 |

Rules last modify: <%=new Date(parseInt(rulelastchanged))%>

15 |

Links in queue: <%=total_linkcount%>

16 |

Links in candidate queue: <%=scheduledcount%>

17 |

18 | 19 | 20 | 21 | 22 | <%for(var k in linkcount){%> 23 | 24 | 25 | 26 | 27 | <%}%> 28 |
namecount
<%=k%><%=linkcount[k]%>

29 |
30 | 31 | -------------------------------------------------------------------------------- /webconfig/views/proxy/edit.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Edit Proxy Manually 6 | 7 | 8 | 9 | 10 | 11 |

Edit Proxy Manually

12 | 13 |
Return 14 | 15 |
16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
address
authorize
group
32 | 33 |
34 | 35 | 36 |
37 |
38 |
39 |
40 | -------------------------------------------------------------------------------- /webconfig/views/proxy/index.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Available Proxy List 5 | 6 | 7 |

Available Proxy List

8 | Config Home  Add 9 |
10 |
11 |
12 |

public proxy available within 1s.

13 | <%if (proxyList[0] != null && proxyList[0].list.length > 0) {%> 14 |
    15 | <% proxyList[0].list.forEach(function(p){%> 16 |
  • <%= p%>   Delete
  • 17 | <%})%> 18 |
19 | <%}%> 20 |
21 |
22 |

public proxy available within 3s.

23 | <%if (proxyList[1] != null && proxyList[1].list.length > 0) {%> 24 |
    25 | <% proxyList[1].list.forEach(function(p){%> 26 |
  • <%= p%>   Delete
  • 27 | <%})%> 28 |
29 | <%}%> 30 |
31 |
32 |

public proxy available within 5s.

33 | <%if (proxyList[2] != null && proxyList[2].list.length > 0) {%> 34 |
    35 | <% proxyList[2].list.forEach(function(p){%> 36 |
  • <%= p%>   Delete
  • 37 | <%})%> 38 |
39 | <%}%> 40 |
41 |
42 |

public proxy available within 8s.

43 | <%if (proxyList[3] != null && proxyList[3].list.length > 0) {%> 44 |
    45 | <% proxyList[3].list.forEach(function(p){%> 46 |
  • <%= p%>   Delete
  • 47 | <%})%> 48 |
49 | <%}%> 50 |
51 |
52 |

public proxy available within 12s.

53 | <%if (proxyList[4] != null && proxyList[4].list.length > 0) {%> 54 |
    55 | <% proxyList[4].list.forEach(function(p){%> 56 |
  • <%= p%>   Delete
  • 57 | <%})%> 58 |
59 | <%}%> 60 |
61 | 62 |
63 |

public proxy available within 20s.

64 | <%if (proxyList[5] != null && proxyList[5].list.length > 0) {%> 65 |
    66 | <% proxyList[5].list.forEach(function(p){%> 67 |
  • <%= p%>   Delete
  • 68 | <%})%> 69 |
70 | <%}%> 71 |
72 |
73 | 74 |
75 |
76 |

VIP proxy available within 1s.

77 | <%if ( proxyList[6] != null && proxyList[6].list.length > 0) {%> 78 |
    79 | <% proxyList[6].list.forEach(function(p){%> 80 |
  • <%= p%>   Delete
  • 81 | <%})%> 82 |
83 | <%}%> 84 |
85 | 86 |
87 |

VIP proxy available within 3s.

88 | <%if (proxyList[7] != null && proxyList[7].list.length > 0) {%> 89 |
    90 | <% proxyList[7].list.forEach(function(p){%> 91 |
  • <%= p%>   Delete
  • 92 | <%})%> 93 |
94 | <%}%> 95 |
96 |
97 |

VIP proxy available within 8s.

98 | <%if (proxyList[8] != null && proxyList[8].list.length > 0) {%> 99 |
    100 | <% proxyList[8].list.forEach(function(p){%> 101 |
  • <%= p%>   Delete
  • 102 | <%})%> 103 |
104 | <%}%> 105 |
106 |
107 |

VIP proxy available within 15s.

108 | <%if (proxyList[9] != null && proxyList[9].list.length > 0) {%> 109 |
    110 | <% proxyList[9].list.forEach(function(p){%> 111 |
  • <%= p%>   Delete
  • 112 | <%})%> 113 |
114 | <%}%> 115 |
116 |
117 |
118 | -------------------------------------------------------------------------------- /webconfig/views/proxy/index.ejs.org: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Available Proxy List 5 | 6 | 7 |

Available Proxy List

8 |
9 | 10 |

Public proxy response within 1s.

11 | 12 |
13 | 14 | 15 | -------------------------------------------------------------------------------- /webconfig/views/rule/edit.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Edit Crawling Rules 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 34 | 35 | 36 | 37 | 38 | 39 |

Edit <%= rule.domain %>

40 | 41 |
Return 42 |

43 |
44 | 45 |
46 |

47 | 48 |

49 |
50 | 74 | -------------------------------------------------------------------------------- /webconfig/views/rule/index.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Drilling Rules 5 | 6 | 11 | 12 | 13 |

Drilling Rules

14 |
15 | Config Home 16 |
17 | Domain: 18 |   Add
19 |
20 |

21 | 22 | 23 | <% if(session.list.length) {%> 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | <% session.list.forEach(function(rule){ %> 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 53 | 54 | <% }) %> 55 | 56 |
IDactivesave contentuse proxypage logichandle jspriorityweightschedule intervallast scheduleaction
<%= rule.id %><%= rule.active %><%= rule.save_page %><%= rule.use_proxy %><%= rule.type %><%= rule.jshandle %><%= rule.priority %><%= rule.weight %><%= rule.schedule_interval %><%=: new Date(parseInt(rule.first_schedule)) | localDate:'yyyy-MM-dd hh:mm:ss.S' %>Edit  52 | Delete
57 | <% } %> 58 |
59 | -------------------------------------------------------------------------------- /webconfig/views/rule/show.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | <%= title %> 6 | 7 | 8 |

<%= rule.domain %>

9 | 14 |
16 | 17 | 18 | 19 | 20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /webconfig/views/stylesheets/main.styl: -------------------------------------------------------------------------------- 1 | body 2 | margin 50px 3 | table 4 | width 100% 5 | border-collapse collapse 6 | border 2 7 | table, td, th, caption 8 | border 2px solid black 9 | align left 10 | tbody 11 | color green 12 | td 13 | padding 10px 14 | align left 15 | caption 16 | font-size larger 17 | background-color yellow 18 | padding 10px 19 | h1 20 | font 1.5em Georgia, serif 21 | ul 22 | list-style-type none 23 | form 24 | margin 20px 25 | padding 20px 26 | hr 27 | width 100% 28 | height 2px 29 | th 30 | vertical-align center 31 | background-color rgb(102, 102, 102) 32 | color white 33 | div.container 34 | margin 15px 35 | height 400px 36 | width 1800px 37 | background-color white 38 | div.proxy 39 | float left 40 | padding 10px 41 | height 400px 42 | overflow-y scroll 43 | background-color white 44 | -------------------------------------------------------------------------------- /webconfig/webconfig.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Module dependencies. 3 | */ 4 | 5 | var express = require('express'); 6 | var controller = require('./routecontroller.js'); 7 | var routes = require('./routes') 8 | var http = require('http'); 9 | var path = require('path'); 10 | var ejs = require('ejs'); 11 | var events = require('events'); 12 | var stylus = require('stylus') 13 | 14 | // constructor 15 | var webconfig = function(webconfigCore){ 16 | events.EventEmitter.call(this); 17 | this.webconfigCore = webconfigCore; 18 | logger = webconfigCore.settings.logger; 19 | } 20 | 21 | ////////////////////////////////// 22 | Date.prototype.Format = function (fmt) { 23 | var o = { 24 | "M+": this.getMonth() + 1, 25 | "d+": this.getDate(), 26 | "h+": this.getHours(), 27 | "m+": this.getMinutes(), 28 | "s+": this.getSeconds(), 29 | "q+": Math.floor((this.getMonth() + 3) / 3), 30 | "S": this.getMilliseconds() 31 | }; 32 | if (/(y+)/.test(fmt)) fmt = fmt.replace(RegExp.$1, (this.getFullYear() + "").substr(4 - RegExp.$1.length)); 33 | for (var k in o) 34 | if (new RegExp("(" + k + ")").test(fmt)) fmt = fmt.replace(RegExp.$1, (RegExp.$1.length == 1) ? (o[k]) : (("00" + o[k]).substr(("" + o[k]).length))); 35 | return fmt; 36 | } 37 | ////////////////////////////////// 38 | 39 | ejs.filters.localDate = function(date,fmt) { 40 | return date.Format(fmt); 41 | }; 42 | 43 | webconfig.prototype.launch = function(settings){ 44 | 45 | var app = express(); 46 | // all environments 47 | app.set('port', settings['port']); 48 | 49 | 50 | app.configure(function(){ 51 | app.set('views', __dirname + '/views'); 52 | app.set('view engine', 'ejs'); 53 | app.use(express.favicon()); 54 | // app.use(express.logger('dev')); 55 | // 56 | app.use(express.cookieParser()); 57 | app.use(express.session({secret: '1234567890QWERTY'})); 58 | // 59 | app.use(express.urlencoded()); 60 | app.use(express.staticCache({maxObjects: 100, maxLength: 512})); 61 | app.use(stylus.middleware({ 62 | src: __dirname + '/views' 63 | , dest: __dirname + '/public' 64 | })); 65 | app.use(express.static(__dirname + '/public')); 66 | app.use(express.bodyParser()); 67 | app.use(express.methodOverride()); 68 | app.use(app.router); 69 | app.use(express.directory(__dirname + '/public')); 70 | app.use(function(req, res, next){ 71 | throw new Error(req.url + ' not found'); 72 | }); 73 | app.use(function(err, req, res, next) { 74 | if(err)console.error(err); 75 | res.send(err.message); 76 | }); 77 | }); 78 | // development only 79 | if ('development' == app.get('env')) { 80 | app.use(express.errorHandler()); 81 | } 82 | 83 | // Authenticator 84 | //app.use(express.basicAuth('config','config123'));//for 2.x 85 | if(settings['config_web_auth']){ 86 | var basicAuth = express.basicAuth(function(username, password) { 87 | return (username == settings['config_web_auth'][0] && password == settings['config_web_auth'][1]); 88 | },'Restrict area, please identify'); 89 | 90 | app.all("*", basicAuth); 91 | } 92 | 93 | app.get('/', routes.index); 94 | controller.mapRoute(app); 95 | 96 | console.log('create server.'); 97 | http.createServer(app).listen(app.get('port'), function(){ 98 | console.log('webconfig server listening on port ' + app.get('port')); 99 | }); 100 | } 101 | //////////////////////////////////////// 102 | module.exports = webconfig; 103 | --------------------------------------------------------------------------------