├── web ├── __init__.py ├── static │ ├── js │ │ ├── base.js │ │ ├── angular-cookies.min.js │ │ ├── angular-loader.min.js │ │ ├── script.min.js │ │ ├── angular-masonry.min.js │ │ ├── loading-bar.min.js │ │ ├── angular-aria.min.js │ │ ├── angular-resource.min.js │ │ ├── angular-route.min.js │ │ ├── codemirror-component.min.js │ │ ├── imagesloaded.pkgd.min.js │ │ ├── ui-bootstrap-custom-tpls-0.13.0.min.js │ │ ├── angular-animate.min.js │ │ └── app.js │ ├── fonts │ │ ├── FontAwesome.otf │ │ ├── fontawesome-webfont.eot │ │ ├── fontawesome-webfont.ttf │ │ ├── fontawesome-webfont.woff │ │ └── fontawesome-webfont.woff2 │ └── css │ │ ├── base.css │ │ ├── loading-bar.min.css │ │ ├── codemirror.min.css │ │ └── font-awesome.min.css ├── templates │ ├── component │ │ ├── index.html │ │ ├── task.html │ │ ├── 403-list.html │ │ ├── error-list.html │ │ ├── project.html │ │ ├── exec-test.html │ │ ├── result-image.html │ │ ├── result.html │ │ ├── project-edit.html │ │ └── slave.html │ └── main.html └── web_ui.py ├── .gitignore ├── setup.cfg ├── requirements.txt ├── .idea └── scopes │ └── scope_settings.xml ├── setup.py ├── mongo_single.py ├── test.py ├── slave_ctrl.py ├── jieba_cut.py ├── README.md ├── spider_for_test.py ├── spider.py ├── functions.py ├── pagination.py ├── client.py ├── ser_handle.py ├── service.py ├── LICENSE └── helper.py /web/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /web/static/js/base.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.pyc 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [easy_install] 2 | index_url = http://pypi.douban.com/simple -------------------------------------------------------------------------------- /web/static/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenset/pyFetch/HEAD/web/static/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click 2 | requests 3 | pymongo 4 | flask 5 | flask-compress 6 | gevent 7 | tld 8 | pybloomfiltermmap 9 | -------------------------------------------------------------------------------- /web/static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenset/pyFetch/HEAD/web/static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /web/static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenset/pyFetch/HEAD/web/static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /web/static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenset/pyFetch/HEAD/web/static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /web/static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenset/pyFetch/HEAD/web/static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /.idea/scopes/scope_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="pyFetch", 5 | version="0.1", 6 | packages=find_packages(), 7 | install_requires=[ 8 | 'Flask', 9 | 'Flask-compress', 10 | 'requests', 11 | 'pymongo', 12 | 'gevent', 13 | 'Tld', 14 | ], 15 | ) -------------------------------------------------------------------------------- /mongo_single.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | 3 | 4 | class Mongo: 5 | instance = None 6 | conn = None 7 | cursor = None 8 | 9 | def __init__(self): 10 | self.conn = pymongo.MongoClient("127.0.0.1", 27017).pyfetch 11 | 12 | @classmethod 13 | def get(cls): 14 | if cls.instance is None: 15 | cls.instance = cls() 16 | 17 | return cls.instance.conn 18 | 19 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import click 2 | from functions import get_wan_ip 3 | import socket 4 | print socket.gethostbyname(socket.gethostname()) 5 | 6 | print get_wan_ip() 7 | 8 | # 9 | # @click.command() 10 | # @click.option('--name', prompt='Your name', 11 | # help='The person to greet.') 12 | # def hello(name): 13 | # """Simple program that greets NAME for a total of COUNT times.""" 14 | # click.echo('Hello %s!' % name) 15 | # 16 | # 17 | # if __name__ == '__main__': 18 | # hello() 19 | # print 123123 -------------------------------------------------------------------------------- /slave_ctrl.py: -------------------------------------------------------------------------------- 1 | from mongo_single import Mongo 2 | 3 | 4 | class SlaveCtrl(): 5 | def __init__(self): 6 | pass 7 | 8 | def switch_ctrl(self): 9 | pass 10 | 11 | def code_ctrl(self): 12 | result = [] 13 | for project in Mongo.get().projects.find({}, {'_id': 0, 'code': 1, 'name': 1, 'init_url': 1}): 14 | result.append(project) 15 | return result 16 | 17 | 18 | if __name__ == '__main__': 19 | import json 20 | 21 | ctrl = SlaveCtrl() 22 | print ctrl.code_ctrl() 23 | print json.dumps(ctrl.code_ctrl()) -------------------------------------------------------------------------------- /jieba_cut.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import time 3 | 4 | import jieba 5 | 6 | from jieba import analyse 7 | 8 | start_time = time.time() 9 | 10 | seg_list = jieba.cut("我来到北京清华大学", cut_all=True) 11 | print("Full Mode: " + "/ ".join(seg_list)) # 全模式 12 | 13 | seg_list = jieba.cut("我来到北京清华大学", cut_all=False) 14 | print("Default Mode: " + "/ ".join(seg_list)) # 精确模式 15 | 16 | seg_list = jieba.cut("他来到了网易杭研大厦") # 默认是精确模式 17 | print(", ".join(seg_list)) 18 | 19 | seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") # 搜索引擎模式 20 | print(", ".join(seg_list)) 21 | 22 | for i in jieba.analyse.extract_tags("小明硕士毕业于中国科学院计算所,后在日本京都大学深造", topK=20, withWeight=True): 23 | print i[0] 24 | 25 | print round((time.time() - start_time) * 1000, 2), 'ms' -------------------------------------------------------------------------------- /web/static/js/angular-cookies.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | AngularJS v1.3.15 3 | (c) 2010-2014 Google, Inc. http://angularjs.org 4 | License: MIT 5 | */ 6 | (function(p,f,n){'use strict';f.module("ngCookies",["ng"]).factory("$cookies",["$rootScope","$browser",function(e,b){var c={},g={},h,k=!1,l=f.copy,m=f.isUndefined;b.addPollFn(function(){var a=b.cookies();h!=a&&(h=a,l(a,g),l(a,c),k&&e.$apply())})();k=!0;e.$watch(function(){var a,d,e;for(a in g)m(c[a])&&b.cookies(a,n);for(a in c)d=c[a],f.isString(d)||(d=""+d,c[a]=d),d!==g[a]&&(b.cookies(a,d),e=!0);if(e)for(a in d=b.cookies(),c)c[a]!==d[a]&&(m(d[a])?delete c[a]:c[a]=d[a])});return c}]).factory("$cookieStore", 7 | ["$cookies",function(e){return{get:function(b){return(b=e[b])?f.fromJson(b):b},put:function(b,c){e[b]=f.toJson(c)},remove:function(b){delete e[b]}}}])})(window,window.angular); 8 | //# sourceMappingURL=angular-cookies.min.js.map 9 | -------------------------------------------------------------------------------- /web/templates/component/index.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 26 | 27 | 28 |
项目状态进度耗时
{{$index+1}}{{item.name}}345 22 | 23 | 任务 24 | 结果 25 |
29 |
30 | -------------------------------------------------------------------------------- /web/templates/component/task.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 20 | 21 | 22 |
时间IP状态URL
{{ item.add_time+'000'|date:'MM-dd HH:mm':'UTC+8'}}{{ item.slave_ip }}200{{ item.url|limitTo:100}} 19 |
23 |
24 |
25 |
26 | -------------------------------------------------------------------------------- /web/templates/component/403-list.html: -------------------------------------------------------------------------------- 1 | 6 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyFetch 2 | 3 | 基于python的分布式爬虫 4 | 5 | [DEMO: https://fetch.flysay.com](https://fetch.flysay.com) 6 | 7 | ### 安装mongoDB 8 | 9 | https://www.mongodb.org/downloads 10 | 默认端口运行mongoDB 11 | 12 | ### 安装依赖 13 | 14 | linux 安装 15 | 16 | #ubuntu 17 | apt-get install build-essential 18 | apt-get install python-dev 19 | #centos 20 | yum groupinstall "Development Tools" 21 | yum install python-devel 22 | 23 | 24 | windows 下的 gevent 可能需要安装 Microsoft Visual C++ Compiler for Python 2.7 http://www.microsoft.com/en-us/download/confirmation.aspx?id=44266 25 | 26 | pip install requests 27 | pip install pymongo 28 | pip install flask 29 | pip install flask-compress 30 | pip install gevent 31 | pip install tld 32 | pip install click 33 | pip install pybloomfiltermmap 34 | 35 | ### 执行 36 | 37 | 服务器 38 | 39 | python service.py 40 | 41 | 客服端 42 | 43 | python client.py 44 | 45 | ### 访问 46 | 47 | http://127.0.0.1 48 | 49 | 50 | ## Todo list 51 | 52 | - 参数可配置化, 还有mongo的连接配置 53 | - slave 执行环境安全 54 | - setup.py 55 | - 列表的时间排序有问题 56 | - 每个项目都可以添加多个url抓取入口 57 | - 项目与爬虫的抓取频率显示 58 | - 结果页面图片浏览模式 59 | - 新建项目且修改代码时,会有缓存且爬虫会使用旧代码进行抓取 60 | - 当有域名403时, mongod CPU占用较高 61 | -------------------------------------------------------------------------------- /web/templates/component/error-list.html: -------------------------------------------------------------------------------- 1 | 6 | 30 | -------------------------------------------------------------------------------- /web/static/js/angular-loader.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | AngularJS v1.3.15 3 | (c) 2010-2014 Google, Inc. http://angularjs.org 4 | License: MIT 5 | */ 6 | (function(){'use strict';function d(b){return function(){var c=arguments[0],e;e="["+(b?b+":":"")+c+"] http://errors.angularjs.org/1.3.15/"+(b?b+"/":"")+c;for(c=1;c 2 | 新建 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 32 | 33 | 34 |
项目状态抓取数队列数结果数
{{$index+1}} {{item.name}}{{item.static}}{{item.parsed_len}}{{item.queue_len}}{{item.result_len}} 27 | 28 | 29 | 历史 30 | 结果 31 |
35 | 36 | -------------------------------------------------------------------------------- /web/templates/component/exec-test.html: -------------------------------------------------------------------------------- 1 | 9 | 35 | -------------------------------------------------------------------------------- /web/templates/component/result-image.html: -------------------------------------------------------------------------------- 1 |
2 | 5 | 8 |
9 |
10 |
11 | 34 |
35 |
36 |
37 |
38 |
39 |
40 | 无法加载 41 |
42 |
43 |
44 |
-------------------------------------------------------------------------------- /web/static/js/angular-masonry.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * angular-masonry 0.11.1 3 | * Pascal Hartig, weluse GmbH, http://weluse.de/ 4 | * License: MIT 5 | */ 6 | !function(){"use strict";angular.module("wu.masonry",[]).controller("MasonryCtrl",["$scope","$element","$timeout",function(a,b,c){function d(a){a.addClass("loaded")}var e={},f=[],g=!1,h=this,i=null;this.preserveOrder=!1,this.loadImages=!0,this.scheduleMasonryOnce=function(){var a=arguments,b=f.filter(function(b){return b[0]===a[0]}).length>0;b||this.scheduleMasonry.apply(null,arguments)},this.scheduleMasonry=function(){i&&c.cancel(i),f.push([].slice.call(arguments)),i=c(function(){g||(f.forEach(function(a){b.masonry.apply(b,a)}),f=[])},30)},this.appendBrick=function(a,c){function f(){0===Object.keys(e).length&&b.masonry("resize"),void 0===e[c]&&(e[c]=!0,d(a),b.masonry("appended",a,!0))}function i(){h.scheduleMasonryOnce("layout")}g||(h.loadImages?h.preserveOrder?(f(),a.imagesLoaded(i)):a.imagesLoaded(function(){f(),i()}):(f(),i()))},this.removeBrick=function(a,c){g||(delete e[a],b.masonry("remove",c),this.scheduleMasonryOnce("layout"))},this.destroy=function(){g=!0,b.data("masonry")&&b.masonry("destroy"),a.$emit("masonry.destroyed"),e={}},this.reload=function(){b.masonry(),a.$emit("masonry.reloaded")}}]).directive("masonry",function(){return{restrict:"AE",controller:"MasonryCtrl",link:{pre:function(a,b,c,d){var e=a.$eval(c.masonry||c.masonryOptions),f=angular.extend({itemSelector:c.itemSelector||".masonry-brick",columnWidth:parseInt(c.columnWidth,10)||c.columnWidth},e||{});b.masonry(f),a.masonryContainer=b[0];var g=a.$eval(c.loadImages);d.loadImages=g!==!1;var h=a.$eval(c.preserveOrder);d.preserveOrder=h!==!1&&void 0!==c.preserveOrder;var i=a.$eval(c.reloadOnShow);i!==!1&&void 0!==c.reloadOnShow&&a.$watch(function(){return b.prop("offsetParent")},function(a,b){a&&!b&&d.reload()});var j=a.$eval(c.reloadOnResize);j!==!1&&void 0!==c.reloadOnResize&&a.$watch("masonryContainer.offsetWidth",function(a,b){a!=b&&d.reload()}),a.$emit("masonry.created",b),a.$on("$destroy",d.destroy)}}}}).directive("masonryBrick",function(){return{restrict:"AC",require:"^masonry",scope:!0,link:{pre:function(a,b,c,d){var e,f=a.$id;d.appendBrick(b,f),b.on("$destroy",function(){d.removeBrick(f,b)}),a.$on("masonry.reload",function(){d.scheduleMasonryOnce("reloadItems"),d.scheduleMasonryOnce("layout")}),a.$watch("$index",function(){void 0!==e&&e!==a.$index&&(d.scheduleMasonryOnce("reloadItems"),d.scheduleMasonryOnce("layout")),e=a.$index})}}}})}(); -------------------------------------------------------------------------------- /web/static/css/loading-bar.min.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * angular-loading-bar v0.7.1 3 | * https://chieffancypants.github.io/angular-loading-bar 4 | * Copyright (c) 2015 Wes Cruver 5 | * License: MIT 6 | */ 7 | 8 | #loading-bar,#loading-bar-spinner{pointer-events:none;-webkit-pointer-events:none;-webkit-transition:350ms linear all;-moz-transition:350ms linear all;-o-transition:350ms linear all;transition:350ms linear all}#loading-bar.ng-enter,#loading-bar.ng-leave.ng-leave-active,#loading-bar-spinner.ng-enter,#loading-bar-spinner.ng-leave.ng-leave-active{opacity:0}#loading-bar.ng-enter.ng-enter-active,#loading-bar.ng-leave,#loading-bar-spinner.ng-enter.ng-enter-active,#loading-bar-spinner.ng-leave{opacity:1}#loading-bar .bar{-webkit-transition:width 350ms;-moz-transition:width 350ms;-o-transition:width 350ms;transition:width 350ms;background:#29d;position:fixed;z-index:10002;top:0;left:0;width:100%;height:2px;border-bottom-right-radius:1px;border-top-right-radius:1px}#loading-bar .peg{position:absolute;width:70px;right:0;top:0;height:2px;opacity:.45;-moz-box-shadow:#29d 1px 0 6px 1px;-ms-box-shadow:#29d 1px 0 6px 1px;-webkit-box-shadow:#29d 1px 0 6px 1px;box-shadow:#29d 1px 0 6px 1px;-moz-border-radius:100%;-webkit-border-radius:100%;border-radius:100%}#loading-bar-spinner{display:block;position:fixed;z-index:10002;top:10px;left:10px}#loading-bar-spinner .spinner-icon{width:14px;height:14px;border:solid 2px transparent;border-top-color:#29d;border-left-color:#29d;border-radius:10px;-webkit-animation:loading-bar-spinner 400ms linear infinite;-moz-animation:loading-bar-spinner 400ms linear infinite;-ms-animation:loading-bar-spinner 400ms linear infinite;-o-animation:loading-bar-spinner 400ms linear infinite;animation:loading-bar-spinner 400ms linear infinite}@-webkit-keyframes loading-bar-spinner{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(360deg);transform:rotate(360deg)}}@-moz-keyframes loading-bar-spinner{0%{-moz-transform:rotate(0deg);transform:rotate(0deg)}100%{-moz-transform:rotate(360deg);transform:rotate(360deg)}}@-o-keyframes loading-bar-spinner{0%{-o-transform:rotate(0deg);transform:rotate(0deg)}100%{-o-transform:rotate(360deg);transform:rotate(360deg)}}@-ms-keyframes loading-bar-spinner{0%{-ms-transform:rotate(0deg);transform:rotate(0deg)}100%{-ms-transform:rotate(360deg);transform:rotate(360deg)}}@keyframes loading-bar-spinner{0%{transform:rotate(0deg);transform:rotate(0deg)}100%{transform:rotate(360deg);transform:rotate(360deg)}} -------------------------------------------------------------------------------- /web/templates/component/result.html: -------------------------------------------------------------------------------- 1 |
2 | 6 | 9 |
10 |
11 |
12 |
13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 29 | 30 | 31 |
{{ title }}
24 | {{ item[title]|limitTo: 150}} 25 | {{ item[title]|limitTo: 150}} 28 |
32 |
33 | 34 | 57 |
58 |
-------------------------------------------------------------------------------- /spider_for_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import helper 3 | import traceback 4 | from functions import get_urls_form_html, stdoutIO 5 | 6 | 7 | class SpiderForTest(): 8 | http_helper = None 9 | current_url = '' 10 | handle_method = None 11 | rest_result = {} 12 | 13 | def __init__(self): 14 | self.rest_result = {'urls': [], 'current_url': '', 'http_code': 0, 'result': {}} 15 | self.http_helper = helper.HttpHelper() 16 | 17 | def run(self, func, project_name, init_url): 18 | self.handle_method = func 19 | 20 | crawl_result = self.http_helper.get(self.current_url) 21 | if not str(crawl_result[1]).startswith('20') \ 22 | and not str(crawl_result[1]).startswith('30'): # 如果不是200系列和300系列的状态码输出错误 23 | return { 24 | 'error': 'URL: ' + self.current_url + ' 获取失败 HTTP code: ' + str(crawl_result[1]) + ' Runtime: ' + str( 25 | crawl_result[2]) + 'ms'} 26 | 27 | urls = get_urls_form_html(self.current_url, crawl_result[0]) 28 | self.rest_result['current_url'] = self.current_url 29 | self.rest_result['http_code'] = crawl_result[1] 30 | 31 | current_url = self.current_url # 缓存一下,self.current_url会被下面代码改写 32 | # 如果抓取自定义函数存在dict返回值则将dict推送至服务器 33 | parse_result = self.handle_method( 34 | helper.S(self, crawl_result[0], urls, project_name, init_url)) 35 | 36 | if not isinstance(parse_result, dict): 37 | return self.rest_result 38 | 39 | if 'url' not in parse_result: 40 | parse_result['url'] = current_url 41 | if 'runtime' not in parse_result: 42 | parse_result['runtime'] = crawl_result[2] 43 | 44 | self.rest_result['result'] = parse_result 45 | 46 | return self.rest_result 47 | 48 | def crawl(self, url='', add_urls_flag=True): 49 | self.current_url = url 50 | if add_urls_flag: 51 | url not in self.rest_result['urls'] and self.rest_result['urls'].append(url) 52 | 53 | 54 | def test_run(form_data): 55 | context = {} 56 | 57 | def start(callback): 58 | context['callback'] = callback 59 | 60 | result = { 61 | 'urls': [], 'current_url': '', 'http_code': 0, 'result': {}, 'stdout': '' 62 | } 63 | with stdoutIO() as s: 64 | try: 65 | code = compile(form_data['code'], 'test_mode_file', 'exec') 66 | exec code in {'start': start} 67 | spider = SpiderForTest() 68 | spider.crawl(form_data['init_url'], False) 69 | result = spider.run(context['callback'], form_data['project_name'], form_data['real_init_url']) 70 | except Exception, e: 71 | print traceback.format_exc() 72 | 73 | result['stdout'] = s.getvalue().strip() 74 | return result 75 | -------------------------------------------------------------------------------- /web/templates/main.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | PyFetch 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 39 |
40 | 41 | 42 |
43 |
44 | 45 | 57 | 58 |
61 | 62 |
63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /web/static/js/loading-bar.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * angular-loading-bar v0.7.1 3 | * https://chieffancypants.github.io/angular-loading-bar 4 | * Copyright (c) 2015 Wes Cruver 5 | * License: MIT 6 | */ 7 | !function(){"use strict";angular.module("angular-loading-bar",["cfp.loadingBarInterceptor"]),angular.module("chieffancypants.loadingBar",["cfp.loadingBarInterceptor"]),angular.module("cfp.loadingBarInterceptor",["cfp.loadingBar"]).config(["$httpProvider",function(a){var b=["$q","$cacheFactory","$timeout","$rootScope","$log","cfpLoadingBar",function(b,c,d,e,f,g){function h(){d.cancel(j),g.complete(),l=0,k=0}function i(b){var d,e=c.get("$http"),f=a.defaults;!b.cache&&!f.cache||b.cache===!1||"GET"!==b.method&&"JSONP"!==b.method||(d=angular.isObject(b.cache)?b.cache:angular.isObject(f.cache)?f.cache:e);var g=void 0!==d?void 0!==d.get(b.url):!1;return void 0!==b.cached&&g!==b.cached?b.cached:(b.cached=g,g)}var j,k=0,l=0,m=g.latencyThreshold;return{request:function(a){return a.ignoreLoadingBar||i(a)||(e.$broadcast("cfpLoadingBar:loading",{url:a.url}),0===k&&(j=d(function(){g.start()},m)),k++,g.set(l/k)),a},response:function(a){return a&&a.config?(a.config.ignoreLoadingBar||i(a.config)||(l++,e.$broadcast("cfpLoadingBar:loaded",{url:a.config.url,result:a}),l>=k?h():g.set(l/k)),a):(f.error("Broken interceptor detected: Config object not supplied in response:\n https://github.com/chieffancypants/angular-loading-bar/pull/50"),a)},responseError:function(a){return a&&a.config?(a.config.ignoreLoadingBar||i(a.config)||(l++,e.$broadcast("cfpLoadingBar:loaded",{url:a.config.url,result:a}),l>=k?h():g.set(l/k)),b.reject(a)):(f.error("Broken interceptor detected: Config object not supplied in rejection:\n https://github.com/chieffancypants/angular-loading-bar/pull/50"),b.reject(a))}}}];a.interceptors.push(b)}]),angular.module("cfp.loadingBar",[]).provider("cfpLoadingBar",function(){this.includeSpinner=!0,this.includeBar=!0,this.latencyThreshold=100,this.startSize=.02,this.parentSelector="body",this.spinnerTemplate='
',this.loadingBarTemplate='
',this.$get=["$injector","$document","$timeout","$rootScope",function(a,b,c,d){function e(){k||(k=a.get("$animate"));var e=b.find(n).eq(0);c.cancel(m),r||(d.$broadcast("cfpLoadingBar:started"),r=!0,u&&k.enter(o,e,angular.element(e[0].lastChild)),t&&k.enter(q,e,angular.element(e[0].lastChild)),f(v))}function f(a){if(r){var b=100*a+"%";p.css("width",b),s=a,c.cancel(l),l=c(function(){g()},250)}}function g(){if(!(h()>=1)){var a=0,b=h();a=b>=0&&.25>b?(3*Math.random()+3)/100:b>=.25&&.65>b?3*Math.random()/100:b>=.65&&.9>b?2*Math.random()/100:b>=.9&&.99>b?.005:0;var c=h()+a;f(c)}}function h(){return s}function i(){s=0,r=!1}function j(){k||(k=a.get("$animate")),d.$broadcast("cfpLoadingBar:completed"),f(1),c.cancel(m),m=c(function(){var a=k.leave(o,i);a&&a.then&&a.then(i),k.leave(q)},500)}var k,l,m,n=this.parentSelector,o=angular.element(this.loadingBarTemplate),p=o.find("div").eq(0),q=angular.element(this.spinnerTemplate),r=!1,s=0,t=this.includeSpinner,u=this.includeBar,v=this.startSize;return{start:e,set:f,status:h,inc:g,complete:j,includeSpinner:this.includeSpinner,latencyThreshold:this.latencyThreshold,parentSelector:this.parentSelector,startSize:this.startSize}}]})}(); -------------------------------------------------------------------------------- /spider.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import helper 3 | import time 4 | from functions import echo_err, get_urls_form_html, format_and_filter_urls, get_domain 5 | from helper import S 6 | from helper import Slave 7 | import sys 8 | import traceback 9 | 10 | reload(sys) 11 | sys.setdefaultencoding('utf-8') 12 | 13 | 14 | class Spider(Slave): 15 | """ 16 | slave抓取逻辑 17 | """ 18 | 19 | def __init__(self, project_name): 20 | self.handle_method = None 21 | self.pre_url_queue = [] 22 | self.http_helper = None 23 | self.current_url = '' # 当前url 24 | self.pre_url_queue = [] 25 | self.http_helper = helper.HttpHelper() 26 | Slave.__init__(self, project_name) 27 | 28 | def run(self, func, current_url, project_name, init_url, gevent_id): 29 | """ 30 | :param func: 31 | :return: 32 | """ 33 | self.handle_method = func 34 | 35 | # while True: 36 | # todo 需要些速度控制方法. gevent.sleep 37 | self.current_url = current_url 38 | 39 | print 'gevent_id: ' + str(gevent_id) + ' -- ' + self.project_name + ' -- ' + self.current_url 40 | if not self.current_url: 41 | # continue 42 | return 43 | self.put_data(urls_parsed=[self.current_url, ]) 44 | crawl_result = self.http_helper.get(self.current_url) 45 | if not str(crawl_result[1]).startswith('20') \ 46 | and not str(crawl_result[1]).startswith('30'): # 如果不是200系列和300系列的状态码输出错误 47 | echo_err('gevent_id: ' + str(gevent_id) + ' -- ' + self.project_name + 48 | ' -- URL: ' + self.current_url + ' 获取失败 HTTP code: ' + str(crawl_result[1]) + ' Runtime: ' + str( 49 | crawl_result[2]) + 'ms') 50 | # continue 51 | self.put_data( 52 | urls_fail=( 53 | get_domain(self.current_url), 54 | int(crawl_result[1] if str(crawl_result[1]).isdigit() else 0), 55 | int(time.time()), 56 | ), 57 | ) 58 | return 59 | 60 | # 如果抓取自定义函数存在dict返回值则将dict推送至服务器 61 | try: 62 | parse_result = self.handle_method( 63 | S(self, crawl_result[0], get_urls_form_html(self.current_url, crawl_result[0]), project_name, init_url)) 64 | except: 65 | print traceback.format_exc() 66 | return 67 | 68 | if not isinstance(parse_result, dict): 69 | # continue 70 | return 71 | 72 | if 'url' not in parse_result: 73 | parse_result['url'] = self.current_url 74 | # if 'runtime' not in parse_result: 75 | # parse_result['runtime'] = crawl_result[2] 76 | 77 | self.put_data(save=parse_result) 78 | 79 | def crawl(self, url=''): 80 | """ 81 | 仅加入远程待抓取队列 82 | self.run()会循环的从本地队列中获取url进行实际抓取 83 | :param string url: 84 | :return: 85 | """ 86 | url and self.put_data(urls_add=(format_and_filter_urls(self.current_url, url),)) 87 | 88 | # from gevent import monkey 89 | # 90 | # monkey.patch_all() 91 | # import gevent 92 | # 93 | # 94 | # def start(project_name, callback): 95 | # gevent.joinall([gevent.spawn(Spider(project_name).run, callback) for i in xrange(2)]) 96 | -------------------------------------------------------------------------------- /web/static/js/angular-aria.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | AngularJS v1.3.15 3 | (c) 2010-2014 Google, Inc. http://angularjs.org 4 | License: MIT 5 | */ 6 | (function(r,n,s){'use strict';n.module("ngAria",["ng"]).provider("$aria",function(){function a(a,f,g){return function(b,c,d){var k=d.$normalize(f);e[k]&&!d[k]&&b.$watch(d[a],function(b){g&&(b=!b);c.attr(f,b)})}}var e={ariaHidden:!0,ariaChecked:!0,ariaDisabled:!0,ariaRequired:!0,ariaInvalid:!0,ariaMultiline:!0,ariaValue:!0,tabindex:!0,bindKeypress:!0};this.config=function(a){e=n.extend(e,a)};this.$get=function(){return{config:function(a){return e[a]},$$watchExpr:a}}}).directive("ngShow",["$aria",function(a){return a.$$watchExpr("ngShow", 7 | "aria-hidden",!0)}]).directive("ngHide",["$aria",function(a){return a.$$watchExpr("ngHide","aria-hidden",!1)}]).directive("ngModel",["$aria",function(a){function e(e,b,c){return a.config(b)&&!c.attr(e)}function h(a,b){return!b.attr("role")&&b.attr("type")===a&&"INPUT"!==b[0].nodeName}function f(a,b){var c=a.type,d=a.role;return"checkbox"===(c||d)||"menuitemcheckbox"===d?"checkbox":"radio"===(c||d)||"menuitemradio"===d?"radio":"range"===c||"progressbar"===d||"slider"===d?"range":"textbox"===(c||d)|| 8 | "TEXTAREA"===b[0].nodeName?"multiline":""}return{restrict:"A",require:"?ngModel",priority:200,link:function(g,b,c,d){function k(){return d.$modelValue}function p(){return m?(m=!1,function(a){a=c.value==d.$viewValue;b.attr("aria-checked",a);b.attr("tabindex",0-!a)}):function(a){b.attr("aria-checked",c.value==d.$viewValue)}}function q(a){b.attr("aria-checked",!d.$isEmpty(d.$viewValue))}var l=f(c,b),m=e("tabindex","tabindex",b);switch(l){case "radio":case "checkbox":h(l,b)&&b.attr("role",l);e("aria-checked", 9 | "ariaChecked",b)&&g.$watch(k,"radio"===l?p():q);break;case "range":h(l,b)&&b.attr("role","slider");a.config("ariaValue")&&(c.min&&!b.attr("aria-valuemin")&&b.attr("aria-valuemin",c.min),c.max&&!b.attr("aria-valuemax")&&b.attr("aria-valuemax",c.max),b.attr("aria-valuenow")||g.$watch(k,function(a){b.attr("aria-valuenow",a)}));break;case "multiline":e("aria-multiline","ariaMultiline",b)&&b.attr("aria-multiline",!0)}m&&b.attr("tabindex",0);d.$validators.required&&e("aria-required","ariaRequired",b)&& 10 | g.$watch(function(){return d.$error.required},function(a){b.attr("aria-required",!!a)});e("aria-invalid","ariaInvalid",b)&&g.$watch(function(){return d.$invalid},function(a){b.attr("aria-invalid",!!a)})}}}]).directive("ngDisabled",["$aria",function(a){return a.$$watchExpr("ngDisabled","aria-disabled")}]).directive("ngMessages",function(){return{restrict:"A",require:"?ngMessages",link:function(a,e,h,f){e.attr("aria-live")||e.attr("aria-live","assertive")}}}).directive("ngClick",["$aria","$parse",function(a, 11 | e){return{restrict:"A",compile:function(h,f){var g=e(f.ngClick,null,!0);return function(b,c,d){function e(b,a){if(-1!==a.indexOf(b[0].nodeName))return!0}var f=["BUTTON","A","INPUT","TEXTAREA"];c.attr("role")||e(c,f)||c.attr("role","button");a.config("tabindex")&&!c.attr("tabindex")&&c.attr("tabindex",0);if(a.config("bindKeypress")&&!d.ngKeypress&&!e(c,f))c.on("keypress",function(a){function c(){g(b,{$event:a})}32!==a.keyCode&&13!==a.keyCode||b.$apply(c)})}}}}]).directive("ngDblclick",["$aria",function(a){return function(e, 12 | h,f){a.config("tabindex")&&!h.attr("tabindex")&&h.attr("tabindex",0)}}])})(window,window.angular); 13 | //# sourceMappingURL=angular-aria.min.js.map 14 | -------------------------------------------------------------------------------- /web/templates/component/project-edit.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 基本设置 7 | 8 | 9 |
10 | 11 |
12 | 执行或保存后不可修改! 13 |
14 | 格式: ^[0-9a-zA-Z_-]+$ 15 | 18 |
19 |
20 | 21 |
22 | 执行或保存后不可修改! 23 |
24 | 抓取入口, 在新建的项目中会默认将其加入抓取队列, 成为第一个抓取的链接. 25 |
26 | 格式: http://www.example.com/example.html 27 | 35 |
36 |
37 | 38 | 45 |
46 |
47 |
48 | 80 |
81 | 82 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /web/templates/component/slave.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 37 | 38 | 42 | 43 | 44 | 56 | 57 | 58 |
IP状态通信次数抓取页面403域名失败统计在线时长最近通信
{{$index+1}}{{ item.ip }}{{ item.static }}{{ item.connected_count }}{{ item.parsed_count }}0 34 | {{ item.deny_domains.length }}个 36 | 0 39 | {{ item.error_domain_count }}组 41 | {{ item.work_time_count }}秒{{ item.last_connected_time+'000'|date:'MM-dd HH:mm:ss':'UTC+8'}} 45 | 49 | 54 | 历史 55 |
59 |

60 | 61 | 1. 点击 暂停重启 会先发送指令, 真正的暂停或重启会在该爬虫本地剩余队列全部处理完毕后. 62 | 63 |

64 |

65 | 66 | 2. 使用命令 python client.py -h 服务器IP -p 服务器端口 添加爬虫 67 | 68 |

69 |
70 |
71 |
72 | -------------------------------------------------------------------------------- /web/static/js/angular-resource.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | AngularJS v1.3.15 3 | (c) 2010-2014 Google, Inc. http://angularjs.org 4 | License: MIT 5 | */ 6 | (function(I,d,B){'use strict';function D(f,q){q=q||{};d.forEach(q,function(d,h){delete q[h]});for(var h in f)!f.hasOwnProperty(h)||"$"===h.charAt(0)&&"$"===h.charAt(1)||(q[h]=f[h]);return q}var w=d.$$minErr("$resource"),C=/^(\.[a-zA-Z_$][0-9a-zA-Z_$]*)+$/;d.module("ngResource",["ng"]).provider("$resource",function(){var f=this;this.defaults={stripTrailingSlashes:!0,actions:{get:{method:"GET"},save:{method:"POST"},query:{method:"GET",isArray:!0},remove:{method:"DELETE"},"delete":{method:"DELETE"}}}; 7 | this.$get=["$http","$q",function(q,h){function t(d,g){this.template=d;this.defaults=s({},f.defaults,g);this.urlParams={}}function v(x,g,l,m){function c(b,k){var c={};k=s({},g,k);r(k,function(a,k){u(a)&&(a=a());var d;if(a&&a.charAt&&"@"==a.charAt(0)){d=b;var e=a.substr(1);if(null==e||""===e||"hasOwnProperty"===e||!C.test("."+e))throw w("badmember",e);for(var e=e.split("."),n=0,g=e.length;n]+href="([(\.|h|/)][^"]+)"[^>]*>[^<]+' 72 | r = re.compile(patt) 73 | match = r.findall(html) 74 | urls = [] 75 | for url in match: 76 | url and urls.append(format_and_filter_urls(base_url, url)) 77 | 78 | return urls 79 | 80 | 81 | def smarty_encode(text): 82 | chars = text[0: 1000].lower() 83 | if chars.find('charset=utf-8') != -1 and chars.find('chiphell') != -1: # 一些编码不标准的站点的特殊处理 84 | return text 85 | 86 | for k in ['utf-8', 'gb18030', 'ISO-8859-2', 'ISO-8859-1', 'gb2312', 'gbk']: 87 | try: 88 | return unicode(text, k) 89 | except: 90 | continue 91 | 92 | raise Exception('Had no way to encode') 93 | 94 | 95 | def get_domain(url): 96 | try: 97 | return get_tld(url) 98 | except: 99 | base_url = "".join(url) # 删除所有\s+ 100 | protocol, rest = urllib2.splittype(base_url) 101 | host, rest = urllib2.splithost(rest) 102 | return host 103 | 104 | 105 | def fetch_ip(content): 106 | result = re.search( 107 | '((?:(?:25[0-5]|2[0-4]\d|((1\d{2})|([1-9]?\d)))\.){3}(?:25[0-5]|2[0-4]\d|((1\d{2})|([1-9]?\d))))', 108 | content) 109 | if not result: 110 | return None 111 | return result.group(0) 112 | 113 | 114 | def get_wan_ip(): 115 | server_list = ( 116 | # 获取wan ip的网站地址, 可以自己添加更多 117 | 'http://wanip.sinaapp.com/', 118 | 'http://1111.ip138.com/ic.asp', 119 | 'http://city.ip138.com/ip2city.asp', 120 | 'http://www.ip38.com/', 121 | ) 122 | 123 | for url in server_list: 124 | try: 125 | html = requests.get(url) 126 | ip = fetch_ip(html.content) 127 | 128 | except: 129 | continue 130 | else: 131 | return ip 132 | 133 | -------------------------------------------------------------------------------- /pagination.py: -------------------------------------------------------------------------------- 1 | from math import ceil, floor 2 | 3 | 4 | def paginate(mongo_cursor, page, per_page=20, error_out=True): 5 | """Returns ``per_page`` items from page ``page`` By default, it will 6 | abort with 404 if no items were found and the page was larger than 1. 7 | This behaviour can be disabled by setting ``error_out`` to ``False``. 8 | Returns a :class:`Pagination` object.""" 9 | if page < 1 and error_out: 10 | return None 11 | 12 | items = list(mongo_cursor.skip((page - 1) * per_page).limit(per_page)) 13 | if not items and page != 1 and error_out: 14 | return None 15 | 16 | return Pagination(mongo_cursor, page, per_page, items) 17 | 18 | 19 | class Pagination(object): 20 | """Internal helper class returned by :meth:`~BaseQuery.paginate`.""" 21 | 22 | def __init__(self, query, page, per_page, items): 23 | #: query object used to create this 24 | #: pagination object. 25 | self._query = query 26 | #: current page number 27 | self._page = page 28 | #: number of items to be displayed per page 29 | self.per_page = per_page 30 | #: total number of items matching the query 31 | self._total = None 32 | #: list of items for the current page 33 | self._items = items 34 | 35 | def render_json(self, page_limit, url_patten=''): 36 | page_list = [] 37 | start_page = int(self.current_page() - floor(page_limit / 2)) 38 | end_page = int(self.current_page() + ceil(page_limit / 2)) 39 | 40 | if start_page < 1: 41 | end_page += 1 - start_page 42 | 43 | if end_page > self.count(): 44 | start_page -= end_page - self.count() 45 | end_page = self.count()+1 46 | 47 | if start_page < 1: 48 | start_page = 1 49 | 50 | for i in xrange(start_page, end_page): 51 | page_list.append(url_patten + str(i)) 52 | 53 | return { 54 | 'total': self.total(), 55 | 'count': self.count(), 56 | 'page_list': page_list, 57 | 'current_page': url_patten + str(self._page), 58 | 'prev_page': url_patten + str(self.prev_page()) if self.prev_page() else '', 59 | 'next_page': url_patten + str(self.next_page()) if self.next_page() else '', 60 | 'first_page': url_patten + '1', 61 | 'last_page': url_patten + str(self.count()), 62 | } 63 | 64 | def render_view(self, url_patten='page='): 65 | pass 66 | 67 | def total(self): 68 | """The total number of documents""" 69 | if self._total is None: 70 | self._total = self._query.count() 71 | 72 | return self._total 73 | 74 | def count(self): 75 | """The count number of pages""" 76 | return int(ceil(self.total() / float(self.per_page))) 77 | 78 | def current_page(self): 79 | return self._page 80 | 81 | def result(self): 82 | return self._items 83 | 84 | def next_page(self): 85 | """The next page number.""" 86 | return self._page + 1 if self.has_next() else None 87 | 88 | def has_next(self): 89 | """Returns ``True`` if a next page exists.""" 90 | return self._page < self.count() 91 | 92 | def next(self, error_out=False): 93 | """Return a :class:`Pagination` object for the next page.""" 94 | if not self.has_next(): 95 | return None 96 | 97 | return paginate(self._query, self._page + 1, self.per_page, error_out) 98 | 99 | def prev_page(self): 100 | """The previous page number.""" 101 | return self._page - 1 if self.has_prev() else None 102 | 103 | def has_prev(self): 104 | """Returns ``True`` if a previous page exists.""" 105 | return self._page > 1 106 | 107 | def prev(self, error_out=False): 108 | """Return a :class:`Pagination` object for the previous page.""" 109 | if not self.has_prev(): 110 | return None 111 | 112 | return self._query.paginate(self._query, self._page - 1, self.per_page, error_out) -------------------------------------------------------------------------------- /web/static/js/angular-route.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | AngularJS v1.3.15 3 | (c) 2010-2014 Google, Inc. http://angularjs.org 4 | License: MIT 5 | */ 6 | (function(q,d,C){'use strict';function v(r,k,h){return{restrict:"ECA",terminal:!0,priority:400,transclude:"element",link:function(a,f,b,c,y){function z(){l&&(h.cancel(l),l=null);m&&(m.$destroy(),m=null);n&&(l=h.leave(n),l.then(function(){l=null}),n=null)}function x(){var b=r.current&&r.current.locals;if(d.isDefined(b&&b.$template)){var b=a.$new(),c=r.current;n=y(b,function(b){h.enter(b,null,n||f).then(function(){!d.isDefined(t)||t&&!a.$eval(t)||k()});z()});m=c.scope=b;m.$emit("$viewContentLoaded"); 7 | m.$eval(w)}else z()}var m,n,l,t=b.autoscroll,w=b.onload||"";a.$on("$routeChangeSuccess",x);x()}}}function A(d,k,h){return{restrict:"ECA",priority:-400,link:function(a,f){var b=h.current,c=b.locals;f.html(c.$template);var y=d(f.contents());b.controller&&(c.$scope=a,c=k(b.controller,c),b.controllerAs&&(a[b.controllerAs]=c),f.data("$ngControllerController",c),f.children().data("$ngControllerController",c));y(a)}}}q=d.module("ngRoute",["ng"]).provider("$route",function(){function r(a,f){return d.extend(Object.create(a), 8 | f)}function k(a,d){var b=d.caseInsensitiveMatch,c={originalPath:a,regexp:a},h=c.keys=[];a=a.replace(/([().])/g,"\\$1").replace(/(\/)?:(\w+)([\?\*])?/g,function(a,d,b,c){a="?"===c?c:null;c="*"===c?c:null;h.push({name:b,optional:!!a});d=d||"";return""+(a?"":d)+"(?:"+(a?d:"")+(c&&"(.+?)"||"([^/]+)")+(a||"")+")"+(a||"")}).replace(/([\/$\*])/g,"\\$1");c.regexp=new RegExp("^"+a+"$",b?"i":"");return c}var h={};this.when=function(a,f){var b=d.copy(f);d.isUndefined(b.reloadOnSearch)&&(b.reloadOnSearch=!0); 9 | d.isUndefined(b.caseInsensitiveMatch)&&(b.caseInsensitiveMatch=this.caseInsensitiveMatch);h[a]=d.extend(b,a&&k(a,b));if(a){var c="/"==a[a.length-1]?a.substr(0,a.length-1):a+"/";h[c]=d.extend({redirectTo:a},k(c,b))}return this};this.caseInsensitiveMatch=!1;this.otherwise=function(a){"string"===typeof a&&(a={redirectTo:a});this.when(null,a);return this};this.$get=["$rootScope","$location","$routeParams","$q","$injector","$templateRequest","$sce",function(a,f,b,c,k,q,x){function m(b){var e=s.current; 10 | (v=(p=l())&&e&&p.$$route===e.$$route&&d.equals(p.pathParams,e.pathParams)&&!p.reloadOnSearch&&!w)||!e&&!p||a.$broadcast("$routeChangeStart",p,e).defaultPrevented&&b&&b.preventDefault()}function n(){var u=s.current,e=p;if(v)u.params=e.params,d.copy(u.params,b),a.$broadcast("$routeUpdate",u);else if(e||u)w=!1,(s.current=e)&&e.redirectTo&&(d.isString(e.redirectTo)?f.path(t(e.redirectTo,e.params)).search(e.params).replace():f.url(e.redirectTo(e.pathParams,f.path(),f.search())).replace()),c.when(e).then(function(){if(e){var a= 11 | d.extend({},e.resolve),b,g;d.forEach(a,function(b,e){a[e]=d.isString(b)?k.get(b):k.invoke(b,null,null,e)});d.isDefined(b=e.template)?d.isFunction(b)&&(b=b(e.params)):d.isDefined(g=e.templateUrl)&&(d.isFunction(g)&&(g=g(e.params)),g=x.getTrustedResourceUrl(g),d.isDefined(g)&&(e.loadedTemplateUrl=g,b=q(g)));d.isDefined(b)&&(a.$template=b);return c.all(a)}}).then(function(c){e==s.current&&(e&&(e.locals=c,d.copy(e.params,b)),a.$broadcast("$routeChangeSuccess",e,u))},function(b){e==s.current&&a.$broadcast("$routeChangeError", 12 | e,u,b)})}function l(){var a,b;d.forEach(h,function(c,h){var g;if(g=!b){var k=f.path();g=c.keys;var m={};if(c.regexp)if(k=c.regexp.exec(k)){for(var l=1,n=k.length;lc?A(a,b,"py"):c>d&&B(a,b)&&(b.errorToken=!0),null}var e=y(a,b);return c>0&&B(a,b)&&(e+=" "+k),e}return y(a,b)}function y(a,b){if(a.eatSpace())return null;var d=a.peek();if("#"==d)return a.skipToEnd(),"comment";if(a.match(/^[0-9\.]/,!1)){var e=!1;if(a.match(/^\d*\.\d+(e[\+\-]?\d+)?/i)&&(e=!0),a.match(/^\d+\.\d*/)&&(e=!0),a.match(/^\.\d+/)&&(e=!0),e)return a.eat(/J/i),"number";var f=!1;if(a.match(/^0x[0-9a-f]+/i)&&(f=!0),a.match(/^0b[01]+/i)&&(f=!0),a.match(/^0o[0-7]+/i)&&(f=!0),a.match(/^[1-9]\d*(e[\+\-]?\d+)?/)&&(a.eat(/J/i),f=!0),a.match(/^0(?![\dx])/i)&&(f=!0),f)return a.eat(/L/i),"number"}return a.match(u)?(b.tokenize=z(a.current()),b.tokenize(a,b)):a.match(o)||a.match(n)?null:a.match(m)||a.match(p)?"operator":a.match(l)?null:a.match(v)||a.match(c)?"keyword":a.match(w)?"builtin":a.match(/^(self|cls)\b/)?"variable-2":a.match(q)?"def"==b.lastToken||"class"==b.lastToken?"def":"variable":(a.next(),k)}function z(a){function d(d,e){for(;!d.eol();)if(d.eatWhile(/[^'"\\]/),d.eat("\\")){if(d.next(),b&&d.eol())return c}else{if(d.match(a))return e.tokenize=x,c;d.eat(/['"]/)}if(b){if(j.singleLineStringErrors)return k;e.tokenize=x}return c}for(;"rub".indexOf(a.charAt(0).toLowerCase())>=0;)a=a.substr(1);var b=1==a.length,c="string";return d.isString=!0,d}function A(a,b,c){var d=0,e=null;if("py"==c)for(;"py"!=h(b).type;)b.scopes.pop();d=h(b).offset+("py"==c?i.indentUnit:r),"py"==c||a.match(/^(\s|#.*)*$/,!1)||(e=a.column()+1),b.scopes.push({offset:d,type:c,align:e})}function B(a,b){for(var c=a.indentation();h(b).offset>c;){if("py"!=h(b).type)return!0;b.scopes.pop()}return h(b).offset!=c}function C(a,b){var c=b.tokenize(a,b),d=a.current();if("."==d)return c=a.match(q,!1)?null:k,null==c&&"meta"==b.lastStyle&&(c="meta"),c;if("@"==d)return j.version&&3==parseInt(j.version,10)?a.match(q,!1)?"meta":"operator":a.match(q,!1)?"meta":k;"variable"!=c&&"builtin"!=c||"meta"!=b.lastStyle||(c="meta"),("pass"==d||"return"==d)&&(b.dedent+=1),"lambda"==d&&(b.lambda=!0),":"!=d||b.lambda||"py"!=h(b).type||A(a,b,"py");var e=1==d.length?"[({".indexOf(d):-1;if(-1!=e&&A(a,b,"])}".slice(e,e+1)),e="])}".indexOf(d),-1!=e){if(h(b).type!=d)return k;b.scopes.pop()}return b.dedent>0&&a.eol()&&"py"==h(b).type&&(b.scopes.length>1&&b.scopes.pop(),b.dedent-=1),c}var k="error",l=j.singleDelimiters||new RegExp("^[\\(\\)\\[\\]\\{\\}@,:`=;\\.]"),m=j.doubleOperators||new RegExp("^((==)|(!=)|(<=)|(>=)|(<>)|(<<)|(>>)|(//)|(\\*\\*))"),n=j.doubleDelimiters||new RegExp("^((\\+=)|(\\-=)|(\\*=)|(%=)|(/=)|(&=)|(\\|=)|(\\^=))"),o=j.tripleDelimiters||new RegExp("^((//=)|(>>=)|(<<=)|(\\*\\*=))");if(j.version&&3==parseInt(j.version,10))var p=j.singleOperators||new RegExp("^[\\+\\-\\*/%&|\\^~<>!@]"),q=j.identifiers||new RegExp("^[_A-Za-z\xa1-\uffff][_A-Za-z0-9\xa1-\uffff]*");else var p=j.singleOperators||new RegExp("^[\\+\\-\\*/%&|\\^~<>!]"),q=j.identifiers||new RegExp("^[_A-Za-z][_A-Za-z0-9]*");var r=j.hangingIndent||i.indentUnit,s=d,t=e;if(void 0!=j.extra_keywords&&(s=s.concat(j.extra_keywords)),void 0!=j.extra_builtins&&(t=t.concat(j.extra_builtins)),j.version&&3==parseInt(j.version,10)){s=s.concat(g.keywords),t=t.concat(g.builtins);var u=new RegExp("^(([rb]|(br))?('{3}|\"{3}|['\"]))","i")}else{s=s.concat(f.keywords),t=t.concat(f.builtins);var u=new RegExp("^(([rub]|(ur)|(br))?('{3}|\"{3}|['\"]))","i")}var v=b(s),w=b(t),D={startState:function(a){return{tokenize:x,scopes:[{offset:a||0,type:"py",align:null}],lastStyle:null,lastToken:null,lambda:!1,dedent:0}},token:function(a,b){var c=b.errorToken;c&&(b.errorToken=!1);var d=C(a,b);b.lastStyle=d;var e=a.current();return e&&d&&(b.lastToken=e),a.eol()&&b.lambda&&(b.lambda=!1),c?d+" "+k:d},indent:function(b,c){if(b.tokenize!=x)return b.tokenize.isString?a.Pass:0;var d=h(b),e=c&&c.charAt(0)==d.type;return null!=d.align?d.align-(e?1:0):e&&b.scopes.length>1?b.scopes[b.scopes.length-2].offset:d.offset},closeBrackets:{triples:"'\""},lineComment:"#",fold:"indent"};return D}),a.defineMIME("text/x-python","python");var i=function(a){return a.split(" ")};a.defineMIME("text/x-cython",{name:"python",extra_keywords:i("by cdef cimport cpdef ctypedef enum exceptextern gil include nogil property publicreadonly struct union DEF IF ELIF ELSE")})}); 2 | 3 | !function(a){"object"==typeof exports&&"object"==typeof module?a(require("../../lib/codemirror")):"function"==typeof define&&define.amd?define(["../../lib/codemirror"],a):a(CodeMirror)}(function(a){"use strict";function d(a){for(var d=0;dt;t+=1)n.push(e[t].listener);return n},i.getListenersAsObject=function(e){var t,n=this.getListeners(e);return n instanceof Array&&(t={},t[e]=n),t||n},i.addListener=function(e,n){var i,r=this.getListenersAsObject(e),o="object"==typeof n;for(i in r)r.hasOwnProperty(i)&&-1===t(r[i],n)&&r[i].push(o?n:{listener:n,once:!1});return this},i.on=n("addListener"),i.addOnceListener=function(e,t){return this.addListener(e,{listener:t,once:!0})},i.once=n("addOnceListener"),i.defineEvent=function(e){return this.getListeners(e),this},i.defineEvents=function(e){for(var t=0;e.length>t;t+=1)this.defineEvent(e[t]);return this},i.removeListener=function(e,n){var i,r,o=this.getListenersAsObject(e);for(r in o)o.hasOwnProperty(r)&&(i=t(o[r],n),-1!==i&&o[r].splice(i,1));return this},i.off=n("removeListener"),i.addListeners=function(e,t){return this.manipulateListeners(!1,e,t)},i.removeListeners=function(e,t){return this.manipulateListeners(!0,e,t)},i.manipulateListeners=function(e,t,n){var i,r,o=e?this.removeListener:this.addListener,s=e?this.removeListeners:this.addListeners;if("object"!=typeof t||t instanceof RegExp)for(i=n.length;i--;)o.call(this,t,n[i]);else for(i in t)t.hasOwnProperty(i)&&(r=t[i])&&("function"==typeof r?o.call(this,i,r):s.call(this,i,r));return this},i.removeEvent=function(e){var t,n=typeof e,i=this._getEvents();if("string"===n)delete i[e];else if("object"===n)for(t in i)i.hasOwnProperty(t)&&e.test(t)&&delete i[t];else delete this._events;return this},i.removeAllListeners=n("removeEvent"),i.emitEvent=function(e,t){var n,i,r,o,s=this.getListenersAsObject(e);for(r in s)if(s.hasOwnProperty(r))for(i=s[r].length;i--;)n=s[r][i],n.once===!0&&this.removeListener(e,n.listener),o=n.listener.apply(this,t||[]),o===this._getOnceReturnValue()&&this.removeListener(e,n.listener);return this},i.trigger=n("emitEvent"),i.emit=function(e){var t=Array.prototype.slice.call(arguments,1);return this.emitEvent(e,t)},i.setOnceReturnValue=function(e){return this._onceReturnValue=e,this},i._getOnceReturnValue=function(){return this.hasOwnProperty("_onceReturnValue")?this._onceReturnValue:!0},i._getEvents=function(){return this._events||(this._events={})},e.noConflict=function(){return r.EventEmitter=o,e},"function"==typeof define&&define.amd?define("eventEmitter/EventEmitter",[],function(){return e}):"object"==typeof module&&module.exports?module.exports=e:this.EventEmitter=e}).call(this),function(e){function t(t){var n=e.event;return n.target=n.target||n.srcElement||t,n}var n=document.documentElement,i=function(){};n.addEventListener?i=function(e,t,n){e.addEventListener(t,n,!1)}:n.attachEvent&&(i=function(e,n,i){e[n+i]=i.handleEvent?function(){var n=t(e);i.handleEvent.call(i,n)}:function(){var n=t(e);i.call(e,n)},e.attachEvent("on"+n,e[n+i])});var r=function(){};n.removeEventListener?r=function(e,t,n){e.removeEventListener(t,n,!1)}:n.detachEvent&&(r=function(e,t,n){e.detachEvent("on"+t,e[t+n]);try{delete e[t+n]}catch(i){e[t+n]=void 0}});var o={bind:i,unbind:r};"function"==typeof define&&define.amd?define("eventie/eventie",o):e.eventie=o}(this),function(e,t){"function"==typeof define&&define.amd?define(["eventEmitter/EventEmitter","eventie/eventie"],function(n,i){return t(e,n,i)}):"object"==typeof exports?module.exports=t(e,require("wolfy87-eventemitter"),require("eventie")):e.imagesLoaded=t(e,e.EventEmitter,e.eventie)}(window,function(e,t,n){function i(e,t){for(var n in t)e[n]=t[n];return e}function r(e){return"[object Array]"===d.call(e)}function o(e){var t=[];if(r(e))t=e;else if("number"==typeof e.length)for(var n=0,i=e.length;i>n;n++)t.push(e[n]);else t.push(e);return t}function s(e,t,n){if(!(this instanceof s))return new s(e,t);"string"==typeof e&&(e=document.querySelectorAll(e)),this.elements=o(e),this.options=i({},this.options),"function"==typeof t?n=t:i(this.options,t),n&&this.on("always",n),this.getImages(),a&&(this.jqDeferred=new a.Deferred);var r=this;setTimeout(function(){r.check()})}function f(e){this.img=e}function c(e){this.src=e,v[e]=this}var a=e.jQuery,u=e.console,h=u!==void 0,d=Object.prototype.toString;s.prototype=new t,s.prototype.options={},s.prototype.getImages=function(){this.images=[];for(var e=0,t=this.elements.length;t>e;e++){var n=this.elements[e];"IMG"===n.nodeName&&this.addImage(n);var i=n.nodeType;if(i&&(1===i||9===i||11===i))for(var r=n.querySelectorAll("img"),o=0,s=r.length;s>o;o++){var f=r[o];this.addImage(f)}}},s.prototype.addImage=function(e){var t=new f(e);this.images.push(t)},s.prototype.check=function(){function e(e,r){return t.options.debug&&h&&u.log("confirm",e,r),t.progress(e),n++,n===i&&t.complete(),!0}var t=this,n=0,i=this.images.length;if(this.hasAnyBroken=!1,!i)return this.complete(),void 0;for(var r=0;i>r;r++){var o=this.images[r];o.on("confirm",e),o.check()}},s.prototype.progress=function(e){this.hasAnyBroken=this.hasAnyBroken||!e.isLoaded;var t=this;setTimeout(function(){t.emit("progress",t,e),t.jqDeferred&&t.jqDeferred.notify&&t.jqDeferred.notify(t,e)})},s.prototype.complete=function(){var e=this.hasAnyBroken?"fail":"done";this.isComplete=!0;var t=this;setTimeout(function(){if(t.emit(e,t),t.emit("always",t),t.jqDeferred){var n=t.hasAnyBroken?"reject":"resolve";t.jqDeferred[n](t)}})},a&&(a.fn.imagesLoaded=function(e,t){var n=new s(this,e,t);return n.jqDeferred.promise(a(this))}),f.prototype=new t,f.prototype.check=function(){var e=v[this.img.src]||new c(this.img.src);if(e.isConfirmed)return this.confirm(e.isLoaded,"cached was confirmed"),void 0;if(this.img.complete&&void 0!==this.img.naturalWidth)return this.confirm(0!==this.img.naturalWidth,"naturalWidth"),void 0;var t=this;e.on("confirm",function(e,n){return t.confirm(e.isLoaded,n),!0}),e.check()},f.prototype.confirm=function(e,t){this.isLoaded=e,this.emit("confirm",this,t)};var v={};return c.prototype=new t,c.prototype.check=function(){if(!this.isChecked){var e=new Image;n.bind(e,"load",this),n.bind(e,"error",this),e.src=this.src,this.isChecked=!0}},c.prototype.handleEvent=function(e){var t="on"+e.type;this[t]&&this[t](e)},c.prototype.onload=function(e){this.confirm(!0,"onload"),this.unbindProxyEvents(e)},c.prototype.onerror=function(e){this.confirm(!1,"onerror"),this.unbindProxyEvents(e)},c.prototype.confirm=function(e,t){this.isConfirmed=!0,this.isLoaded=e,this.emit("confirm",this,t)},c.prototype.unbindProxyEvents=function(e){n.unbind(e.target,"load",this),n.unbind(e.target,"error",this)},s}); -------------------------------------------------------------------------------- /web/static/css/codemirror.min.css: -------------------------------------------------------------------------------- 1 | /* BASICS */ 2 | 3 | .CodeMirror { 4 | /* Set height, width, borders, and global font properties here */ 5 | font-family: monospace; 6 | height: 300px; 7 | color: black; 8 | } 9 | 10 | /* PADDING */ 11 | 12 | .CodeMirror-lines { 13 | padding: 4px 0; /* Vertical padding around content */ 14 | } 15 | .CodeMirror pre { 16 | padding: 0 4px; /* Horizontal padding of content */ 17 | } 18 | 19 | .CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler { 20 | background-color: white; /* The little square between H and V scrollbars */ 21 | } 22 | 23 | /* GUTTER */ 24 | 25 | .CodeMirror-gutters { 26 | border-right: 1px solid #ddd; 27 | background-color: #f7f7f7; 28 | white-space: nowrap; 29 | } 30 | .CodeMirror-linenumbers {} 31 | .CodeMirror-linenumber { 32 | padding: 0 3px 0 5px; 33 | min-width: 20px; 34 | text-align: right; 35 | color: #999; 36 | white-space: nowrap; 37 | } 38 | 39 | .CodeMirror-guttermarker { color: black; } 40 | .CodeMirror-guttermarker-subtle { color: #999; } 41 | 42 | /* CURSOR */ 43 | 44 | .CodeMirror div.CodeMirror-cursor { 45 | border-left: 1px solid black; 46 | } 47 | /* Shown when moving in bi-directional text */ 48 | .CodeMirror div.CodeMirror-secondarycursor { 49 | border-left: 1px solid silver; 50 | } 51 | .CodeMirror.cm-fat-cursor div.CodeMirror-cursor { 52 | width: auto; 53 | border: 0; 54 | background: #7e7; 55 | } 56 | .CodeMirror.cm-fat-cursor div.CodeMirror-cursors { 57 | z-index: 1; 58 | } 59 | 60 | .cm-animate-fat-cursor { 61 | width: auto; 62 | border: 0; 63 | -webkit-animation: blink 1.06s steps(1) infinite; 64 | -moz-animation: blink 1.06s steps(1) infinite; 65 | animation: blink 1.06s steps(1) infinite; 66 | } 67 | @-moz-keyframes blink { 68 | 0% { background: #7e7; } 69 | 50% { background: none; } 70 | 100% { background: #7e7; } 71 | } 72 | @-webkit-keyframes blink { 73 | 0% { background: #7e7; } 74 | 50% { background: none; } 75 | 100% { background: #7e7; } 76 | } 77 | @keyframes blink { 78 | 0% { background: #7e7; } 79 | 50% { background: none; } 80 | 100% { background: #7e7; } 81 | } 82 | 83 | /* Can style cursor different in overwrite (non-insert) mode */ 84 | div.CodeMirror-overwrite div.CodeMirror-cursor {} 85 | 86 | .cm-tab { display: inline-block; text-decoration: inherit; } 87 | 88 | .CodeMirror-ruler { 89 | border-left: 1px solid #ccc; 90 | position: absolute; 91 | } 92 | 93 | /* DEFAULT THEME */ 94 | 95 | .cm-s-default .cm-keyword {color: #708;} 96 | .cm-s-default .cm-atom {color: #219;} 97 | .cm-s-default .cm-number {color: #164;} 98 | .cm-s-default .cm-def {color: #00f;} 99 | .cm-s-default .cm-variable, 100 | .cm-s-default .cm-punctuation, 101 | .cm-s-default .cm-property, 102 | .cm-s-default .cm-operator {} 103 | .cm-s-default .cm-variable-2 {color: #05a;} 104 | .cm-s-default .cm-variable-3 {color: #085;} 105 | .cm-s-default .cm-comment {color: #a50;} 106 | .cm-s-default .cm-string {color: #a11;} 107 | .cm-s-default .cm-string-2 {color: #f50;} 108 | .cm-s-default .cm-meta {color: #555;} 109 | .cm-s-default .cm-qualifier {color: #555;} 110 | .cm-s-default .cm-builtin {color: #30a;} 111 | .cm-s-default .cm-bracket {color: #997;} 112 | .cm-s-default .cm-tag {color: #170;} 113 | .cm-s-default .cm-attribute {color: #00c;} 114 | .cm-s-default .cm-header {color: blue;} 115 | .cm-s-default .cm-quote {color: #090;} 116 | .cm-s-default .cm-hr {color: #999;} 117 | .cm-s-default .cm-link {color: #00c;} 118 | 119 | .cm-negative {color: #d44;} 120 | .cm-positive {color: #292;} 121 | .cm-header, .cm-strong {font-weight: bold;} 122 | .cm-em {font-style: italic;} 123 | .cm-link {text-decoration: underline;} 124 | .cm-strikethrough {text-decoration: line-through;} 125 | 126 | .cm-s-default .cm-error {color: #f00;} 127 | .cm-invalidchar {color: #f00;} 128 | 129 | .CodeMirror-composing { border-bottom: 2px solid; } 130 | 131 | /* Default styles for common addons */ 132 | 133 | div.CodeMirror span.CodeMirror-matchingbracket {color: #0f0;} 134 | div.CodeMirror span.CodeMirror-nonmatchingbracket {color: #f22;} 135 | .CodeMirror-matchingtag { background: rgba(255, 150, 0, .3); } 136 | .CodeMirror-activeline-background {background: #e8f2ff;} 137 | 138 | /* STOP */ 139 | 140 | /* The rest of this file contains styles related to the mechanics of 141 | the editor. You probably shouldn't touch them. */ 142 | 143 | .CodeMirror { 144 | position: relative; 145 | overflow: hidden; 146 | background: white; 147 | } 148 | 149 | .CodeMirror-scroll { 150 | overflow: scroll !important; /* Things will break if this is overridden */ 151 | /* 30px is the magic margin used to hide the element's real scrollbars */ 152 | /* See overflow: hidden in .CodeMirror */ 153 | margin-bottom: -30px; margin-right: -30px; 154 | padding-bottom: 30px; 155 | height: 100%; 156 | outline: none; /* Prevent dragging from highlighting the element */ 157 | position: relative; 158 | } 159 | .CodeMirror-sizer { 160 | position: relative; 161 | border-right: 30px solid transparent; 162 | } 163 | 164 | /* The fake, visible scrollbars. Used to force redraw during scrolling 165 | before actuall scrolling happens, thus preventing shaking and 166 | flickering artifacts. */ 167 | .CodeMirror-vscrollbar, .CodeMirror-hscrollbar, .CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler { 168 | position: absolute; 169 | z-index: 6; 170 | display: none; 171 | } 172 | .CodeMirror-vscrollbar { 173 | right: 0; top: 0; 174 | overflow-x: hidden; 175 | overflow-y: scroll; 176 | } 177 | .CodeMirror-hscrollbar { 178 | bottom: 0; left: 0; 179 | overflow-y: hidden; 180 | overflow-x: scroll; 181 | } 182 | .CodeMirror-scrollbar-filler { 183 | right: 0; bottom: 0; 184 | } 185 | .CodeMirror-gutter-filler { 186 | left: 0; bottom: 0; 187 | } 188 | 189 | .CodeMirror-gutters { 190 | position: absolute; left: 0; top: 0; 191 | z-index: 3; 192 | } 193 | .CodeMirror-gutter { 194 | white-space: normal; 195 | height: 100%; 196 | display: inline-block; 197 | margin-bottom: -30px; 198 | /* Hack to make IE7 behave */ 199 | *zoom:1; 200 | *display:inline; 201 | } 202 | .CodeMirror-gutter-wrapper { 203 | position: absolute; 204 | z-index: 4; 205 | height: 100%; 206 | } 207 | .CodeMirror-gutter-elt { 208 | position: absolute; 209 | cursor: default; 210 | z-index: 4; 211 | } 212 | .CodeMirror-gutter-wrapper { 213 | -webkit-user-select: none; 214 | -moz-user-select: none; 215 | user-select: none; 216 | } 217 | 218 | .CodeMirror-lines { 219 | cursor: text; 220 | min-height: 1px; /* prevents collapsing before first draw */ 221 | } 222 | .CodeMirror pre { 223 | /* Reset some styles that the rest of the page might have set */ 224 | -moz-border-radius: 0; -webkit-border-radius: 0; border-radius: 0; 225 | border-width: 0; 226 | background: transparent; 227 | font-family: inherit; 228 | font-size: inherit; 229 | margin: 0; 230 | white-space: pre; 231 | word-wrap: normal; 232 | line-height: inherit; 233 | color: inherit; 234 | z-index: 2; 235 | position: relative; 236 | overflow: visible; 237 | -webkit-tap-highlight-color: transparent; 238 | } 239 | .CodeMirror-wrap pre { 240 | word-wrap: break-word; 241 | white-space: pre-wrap; 242 | word-break: normal; 243 | } 244 | 245 | .CodeMirror-linebackground { 246 | position: absolute; 247 | left: 0; right: 0; top: 0; bottom: 0; 248 | z-index: 0; 249 | } 250 | 251 | .CodeMirror-linewidget { 252 | position: relative; 253 | z-index: 2; 254 | overflow: auto; 255 | } 256 | 257 | .CodeMirror-widget {} 258 | 259 | .CodeMirror-code { 260 | outline: none; 261 | } 262 | 263 | /* Force content-box sizing for the elements where we expect it */ 264 | .CodeMirror-scroll, 265 | .CodeMirror-sizer, 266 | .CodeMirror-gutter, 267 | .CodeMirror-gutters, 268 | .CodeMirror-linenumber { 269 | -moz-box-sizing: content-box; 270 | box-sizing: content-box; 271 | } 272 | 273 | .CodeMirror-measure { 274 | position: absolute; 275 | width: 100%; 276 | height: 0; 277 | overflow: hidden; 278 | visibility: hidden; 279 | } 280 | .CodeMirror-measure pre { position: static; } 281 | 282 | .CodeMirror div.CodeMirror-cursor { 283 | position: absolute; 284 | border-right: none; 285 | width: 0; 286 | } 287 | 288 | div.CodeMirror-cursors { 289 | visibility: hidden; 290 | position: relative; 291 | z-index: 3; 292 | } 293 | .CodeMirror-focused div.CodeMirror-cursors { 294 | visibility: visible; 295 | } 296 | 297 | .CodeMirror-selected { background: #d9d9d9; } 298 | .CodeMirror-focused .CodeMirror-selected { background: #d7d4f0; } 299 | .CodeMirror-crosshair { cursor: crosshair; } 300 | .CodeMirror ::selection { background: #d7d4f0; } 301 | .CodeMirror ::-moz-selection { background: #d7d4f0; } 302 | 303 | .cm-searching { 304 | background: #ffa; 305 | background: rgba(255, 255, 0, .4); 306 | } 307 | 308 | /* IE7 hack to prevent it from returning funny offsetTops on the spans */ 309 | .CodeMirror span { *vertical-align: text-bottom; } 310 | 311 | /* Used to force a border model for a node */ 312 | .cm-force-border { padding-right: .1px; } 313 | 314 | @media print { 315 | /* Hide the cursor when printing */ 316 | .CodeMirror div.CodeMirror-cursors { 317 | visibility: hidden; 318 | } 319 | } 320 | 321 | /* See issue #2901 */ 322 | .cm-tab-wrap-hack:after { content: ''; } 323 | 324 | /* Help users use markselection to safely style text background */ 325 | span.CodeMirror-selectedtext { background: none; } 326 | -------------------------------------------------------------------------------- /web/static/js/ui-bootstrap-custom-tpls-0.13.0.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | * angular-ui-bootstrap 3 | * http://angular-ui.github.io/bootstrap/ 4 | 5 | * Version: 0.13.0 - 2015-05-02 6 | * License: MIT 7 | */ 8 | angular.module("ui.bootstrap",["ui.bootstrap.tpls","ui.bootstrap.accordion","ui.bootstrap.collapse","ui.bootstrap.alert","ui.bootstrap.modal"]),angular.module("ui.bootstrap.tpls",["template/accordion/accordion-group.html","template/accordion/accordion.html","template/alert/alert.html","template/modal/backdrop.html","template/modal/window.html"]),angular.module("ui.bootstrap.accordion",["ui.bootstrap.collapse"]).constant("accordionConfig",{closeOthers:!0}).controller("AccordionController",["$scope","$attrs","accordionConfig",function(e,n,t){this.groups=[],this.closeOthers=function(o){var a=angular.isDefined(n.closeOthers)?e.$eval(n.closeOthers):t.closeOthers;a&&angular.forEach(this.groups,function(e){e!==o&&(e.isOpen=!1)})},this.addGroup=function(e){var n=this;this.groups.push(e),e.$on("$destroy",function(){n.removeGroup(e)})},this.removeGroup=function(e){var n=this.groups.indexOf(e);-1!==n&&this.groups.splice(n,1)}}]).directive("accordion",function(){return{restrict:"EA",controller:"AccordionController",transclude:!0,replace:!1,templateUrl:"template/accordion/accordion.html"}}).directive("accordionGroup",function(){return{require:"^accordion",restrict:"EA",transclude:!0,replace:!0,templateUrl:"template/accordion/accordion-group.html",scope:{heading:"@",isOpen:"=?",isDisabled:"=?"},controller:function(){this.setHeading=function(e){this.heading=e}},link:function(e,n,t,o){o.addGroup(e),e.$watch("isOpen",function(n){n&&o.closeOthers(e)}),e.toggleOpen=function(){e.isDisabled||(e.isOpen=!e.isOpen)}}}}).directive("accordionHeading",function(){return{restrict:"EA",transclude:!0,template:"",replace:!0,require:"^accordionGroup",link:function(e,n,t,o,a){o.setHeading(a(e,angular.noop))}}}).directive("accordionTransclude",function(){return{require:"^accordionGroup",link:function(e,n,t,o){e.$watch(function(){return o[t.accordionTransclude]},function(e){e&&(n.html(""),n.append(e))})}}}),angular.module("ui.bootstrap.collapse",[]).directive("collapse",["$animate",function(e){return{link:function(n,t,o){function a(){t.removeClass("collapse").addClass("collapsing"),e.addClass(t,"in",{to:{height:t[0].scrollHeight+"px"}}).then(r)}function r(){t.removeClass("collapsing"),t.css({height:"auto"})}function l(){t.css({height:t[0].scrollHeight+"px"}).removeClass("collapse").addClass("collapsing"),e.removeClass(t,"in",{to:{height:"0"}}).then(i)}function i(){t.css({height:"0"}),t.removeClass("collapsing"),t.addClass("collapse")}n.$watch(o.collapse,function(e){e?l():a()})}}}]),angular.module("ui.bootstrap.alert",[]).controller("AlertController",["$scope","$attrs",function(e,n){e.closeable="close"in n,this.close=e.close}]).directive("alert",function(){return{restrict:"EA",controller:"AlertController",templateUrl:"template/alert/alert.html",transclude:!0,replace:!0,scope:{type:"@",close:"&"}}}).directive("dismissOnTimeout",["$timeout",function(e){return{require:"alert",link:function(n,t,o,a){e(function(){a.close()},parseInt(o.dismissOnTimeout,10))}}}]),angular.module("ui.bootstrap.modal",[]).factory("$$stackedMap",function(){return{createNew:function(){var e=[];return{add:function(n,t){e.push({key:n,value:t})},get:function(n){for(var t=0;t0),c()})}function c(){if(u&&-1==l()){var e=p;s(u,p,function(){e=null}),u=void 0,p=void 0}}function s(t,o,r){function l(){l.done||(l.done=!0,t.remove(),o.$destroy(),r&&r())}o.animate=!1,t.attr("modal-animation")&&e.enabled()?t.one("$animate:close",function(){a.$evalAsync(l)}):n(l)}function d(e,n,t){return!e.value.modalScope.$broadcast("modal.closing",n,t).defaultPrevented}var u,p,m="modal-open",f=r.createNew(),v={};return a.$watch(l,function(e){p&&(p.index=e)}),t.bind("keydown",function(e){var n;27===e.which&&(n=f.top(),n&&n.value.keyboard&&(e.preventDefault(),a.$apply(function(){v.dismiss(n.key,"escape key press")})))}),v.open=function(e,n){var r=t[0].activeElement;f.add(e,{deferred:n.deferred,renderDeferred:n.renderDeferred,modalScope:n.scope,backdrop:n.backdrop,keyboard:n.keyboard});var i=t.find("body").eq(0),c=l();if(c>=0&&!u){p=a.$new(!0),p.index=c;var s=angular.element('
');s.attr("backdrop-class",n.backdropClass),n.animation&&s.attr("modal-animation","true"),u=o(s)(p),i.append(u)}var d=angular.element('
');d.attr({"template-url":n.windowTemplateUrl,"window-class":n.windowClass,size:n.size,index:f.length()-1,animate:"animate"}).html(n.content),n.animation&&d.attr("modal-animation","true");var v=o(d)(n.scope);f.top().value.modalDomEl=v,f.top().value.modalOpener=r,i.append(v),i.addClass(m)},v.close=function(e,n){var t=f.get(e);return t&&d(t,n,!0)?(t.value.deferred.resolve(n),i(e),t.value.modalOpener.focus(),!0):!t},v.dismiss=function(e,n){var t=f.get(e);return t&&d(t,n,!1)?(t.value.deferred.reject(n),i(e),t.value.modalOpener.focus(),!0):!t},v.dismissAll=function(e){for(var n=this.getTop();n&&this.dismiss(n.key,e);)n=this.getTop()},v.getTop=function(){return f.top()},v.modalRendered=function(e){var n=f.get(e);n&&n.value.renderDeferred.resolve()},v}]).provider("$modal",function(){var e={options:{animation:!0,backdrop:!0,keyboard:!0},$get:["$injector","$rootScope","$q","$templateRequest","$controller","$modalStack",function(n,t,o,a,r,l){function i(e){return e.template?o.when(e.template):a(angular.isFunction(e.templateUrl)?e.templateUrl():e.templateUrl)}function c(e){var t=[];return angular.forEach(e,function(e){(angular.isFunction(e)||angular.isArray(e))&&t.push(o.when(n.invoke(e)))}),t}var s={};return s.open=function(n){var a=o.defer(),s=o.defer(),d=o.defer(),u={result:a.promise,opened:s.promise,rendered:d.promise,close:function(e){return l.close(u,e)},dismiss:function(e){return l.dismiss(u,e)}};if(n=angular.extend({},e.options,n),n.resolve=n.resolve||{},!n.template&&!n.templateUrl)throw new Error("One of template or templateUrl options is required.");var p=o.all([i(n)].concat(c(n.resolve)));return p.then(function(e){var o=(n.scope||t).$new();o.$close=u.close,o.$dismiss=u.dismiss;var i,c={},s=1;n.controller&&(c.$scope=o,c.$modalInstance=u,angular.forEach(n.resolve,function(n,t){c[t]=e[s++]}),i=r(n.controller,c),n.controllerAs&&(o[n.controllerAs]=i)),l.open(u,{scope:o,deferred:a,renderDeferred:d,content:e[0],animation:n.animation,backdrop:n.backdrop,keyboard:n.keyboard,backdropClass:n.backdropClass,windowClass:n.windowClass,windowTemplateUrl:n.windowTemplateUrl,size:n.size})},function(e){a.reject(e)}),p.then(function(){s.resolve(!0)},function(e){s.reject(e)}),u},s}]};return e}),angular.module("template/accordion/accordion-group.html",[]).run(["$templateCache",function(e){e.put("template/accordion/accordion-group.html",'
\n
\n

\n {{heading}}\n

\n
\n
\n
\n
\n
\n')}]),angular.module("template/accordion/accordion.html",[]).run(["$templateCache",function(e){e.put("template/accordion/accordion.html",'
')}]),angular.module("template/alert/alert.html",[]).run(["$templateCache",function(e){e.put("template/alert/alert.html",'\n')}]),angular.module("template/modal/backdrop.html",[]).run(["$templateCache",function(e){e.put("template/modal/backdrop.html",'\n')}]),angular.module("template/modal/window.html",[]).run(["$templateCache",function(e){e.put("template/modal/window.html",'\n')}]); -------------------------------------------------------------------------------- /web/web_ui.py: -------------------------------------------------------------------------------- 1 | #! coding=utf8 2 | from flask import Flask, request, session, g, redirect, url_for, abort, \ 3 | render_template, flash, jsonify, json, make_response 4 | from mongo_single import Mongo 5 | import time 6 | import os 7 | import re 8 | import traceback 9 | from functions import get_project_list, md5, mix_ip 10 | from helper import GlobalHelper 11 | from spider_for_test import test_run 12 | from bson import ObjectId 13 | from pagination import paginate 14 | 15 | # from gevent import monkey 16 | # monkey.patch_socket() # fixme patch_all 会影响跨进程通讯或者异步抓取 1/2 17 | 18 | from flask.ext.compress import Compress 19 | 20 | app = Flask(__name__) 21 | Compress(app) 22 | 23 | app.config.update(dict( 24 | SECRET_KEY='development key', 25 | )) 26 | 27 | 28 | def get_template(template_path): 29 | with open(os.path.dirname(os.path.abspath(__file__)) + '/templates/' + template_path) as f: # todo 后期上缓存 30 | return f.read() 31 | 32 | 33 | @app.errorhandler(404) 34 | def page_not_found(error): 35 | return redirect('/project') 36 | 37 | 38 | @app.route('/') 39 | def index(): 40 | return redirect('/main.html') 41 | 42 | 43 | @app.route('/') 44 | @app.route('/project/') 45 | @app.route('/project/task/') 46 | @app.route('/project/edit/') 47 | @app.route('/project/add/') 48 | @app.route('/slave/task/') 49 | @app.route('/slave/result/') 50 | @app.route('/project/result/') 51 | @app.route('/project/result//image') 52 | def default(path): 53 | """ 54 | 首次全新请求(即不经过angular的请求)url地址的route规则 55 | """ 56 | return get_template('main.html') 57 | 58 | 59 | @app.route('/component/') 60 | @app.route('/component/task/') 61 | def get_page(page): 62 | return get_template('component/' + page + '.html') 63 | 64 | 65 | from pymongo import MongoClient 66 | 67 | 68 | @app.route('/api/test') 69 | def api_test(): 70 | try: 71 | client = MongoClient('localhost', 27017) 72 | res = client['pyfetch']['result_cnbeta'].find().limit(10) 73 | 74 | # print GlobalHelper.get('salve_record') 75 | # time.sleep(10) 76 | pagination = paginate(Mongo.get()['result_cnbeta'].find(), 1, 30) 77 | if pagination: 78 | for row in pagination.current_page(): 79 | print row 80 | 81 | # print pagination.next() 82 | # print pagination.prev() 83 | 84 | except: 85 | print traceback.format_exc() 86 | return jsonify({'fd': 1}) 87 | 88 | 89 | @app.route('/api/slave') 90 | def api_slave(): 91 | try: 92 | new_records = {} 93 | salve_records = GlobalHelper.get('salve_record') 94 | 95 | if not salve_records: 96 | return jsonify(new_records) 97 | 98 | for (key, value) in salve_records.items(): 99 | item = dict(value) 100 | item['ip'] = mix_ip(key) 101 | new_records[value['_id']] = item 102 | 103 | return jsonify(new_records) 104 | except: 105 | print traceback.format_exc() 106 | 107 | 108 | @app.route('/api/slave/') 109 | def get_slave_tasks(slave_id): 110 | res = [] 111 | try: 112 | slave_record = Mongo.get()['slave_record'].find_one({'_id': ObjectId(slave_id)}) 113 | if not slave_record: 114 | raise Exception('不存在的记录!') 115 | except: 116 | return json.dumps(res) 117 | 118 | for project in get_project_list(): 119 | for doc in Mongo.get()['parsed_' + project['name']].find({'slave_ip': slave_record['ip']}).sort('_id', 120 | -1).limit(20): 121 | del doc['_id'] 122 | doc['slave_ip'] = mix_ip(doc['slave_ip']) 123 | res.append(doc) 124 | 125 | res.sort(key=lambda x: x['add_time'], reverse=True) 126 | return json.dumps(res) 127 | 128 | 129 | @app.route('/api/slave//restart') 130 | def restart_slave(slave_id): 131 | try: 132 | slave_record = Mongo.get()['slave_record'].find_one({'_id': ObjectId(slave_id)}) 133 | if not slave_record: 134 | raise Exception('不存在的记录!') 135 | except: 136 | return jsonify({'success': False, 'msg': '不存在的记录!'}) 137 | 138 | restart_slave_list = GlobalHelper.get('restart_slave_list') or [] 139 | restart_slave_list.append(slave_record['ip']) 140 | 141 | GlobalHelper.set('restart_slave_list', list(set(restart_slave_list))) 142 | return jsonify({'success': True, 'msg': '重启中!'}) 143 | 144 | 145 | @app.route('/api/slave//toggle') 146 | def toggle_slave(slave_id): 147 | try: 148 | slave_record = Mongo.get()['slave_record'].find_one({'_id': ObjectId(slave_id)}) 149 | if not slave_record: 150 | raise Exception('不存在的记录!') 151 | except: 152 | return jsonify({'success': False, 'msg': '不存在的记录!'}) 153 | 154 | slave_record['data']['static'] = '抓取中' if slave_record['data']['static'] == '暂停中' else '暂停中' 155 | try: 156 | Mongo.get()['slave_record'].update({'_id': ObjectId(slave_id)}, 157 | {'$set': {'data.static': slave_record['data']['static']}}) 158 | global_salve_record = GlobalHelper.get('salve_record') 159 | global_salve_record[slave_record['ip']]['static'] = slave_record['data']['static'] 160 | GlobalHelper.set('salve_record', global_salve_record) 161 | except: 162 | print traceback.format_exc() 163 | return jsonify({'success': True, 'msg': '切换成功!'}) 164 | 165 | 166 | @app.route('/api/project//toggle') 167 | def toggle_project(project_id): 168 | try: 169 | project = Mongo.get()['projects'].find_one({'_id': ObjectId(project_id)}) 170 | if not project: 171 | raise Exception('不存在的记录!') 172 | except: 173 | return jsonify({'success': False, 'msg': '不存在的记录!'}) 174 | 175 | project['static'] = '抓取中' if project['static'] == '暂停中' else '暂停中' 176 | 177 | Mongo.get()['projects'].update({'_id': ObjectId(project_id)}, 178 | {'$set': {'static': project['static']}}) 179 | return jsonify({'success': True, 'msg': '切换成功!'}) 180 | 181 | 182 | @app.route('/api/project') 183 | def get_projects(): 184 | project_dict = {} 185 | for project in get_project_list(): 186 | project_dict[project['name']] = { 187 | '_id': str(project['_id']), 188 | 'name': project['name'], 189 | 'static': project['static'], 190 | 'queue_len': Mongo.get()['queue_' + project['name']].count(), 191 | 'parsed_len': Mongo.get()['parsed_' + project['name']].count(), 192 | 'result_len': Mongo.get()['result_' + project['name']].count(), 193 | } 194 | 195 | return jsonify(project_dict) 196 | 197 | 198 | @app.route('/api/project/') 199 | def get_project_by_name(name): 200 | res = list(Mongo.get().projects.find({'name': name}, {'_id': 0})) 201 | if not res: 202 | return jsonify({}) 203 | return jsonify(res[0]) 204 | 205 | 206 | @app.route('/api/project/save', methods=['POST']) 207 | def save_project(): 208 | return jsonify({'success': False, 'msg': '禁止修改!'}) 209 | form_data = json.loads(request.data) # todo 需要验证表单数据 210 | 211 | name_r = re.compile(r'^[0-9a-zA-Z_-]+$') 212 | if not name_r.search(form_data['name']): 213 | return jsonify({'success': False, 'msg': '计划名称必须满足正则规则: ^[0-9a-zA-Z_-]+$ '}) 214 | 215 | exists_project = list(Mongo.get()['projects'].find({'name': form_data['name']}, {'_id': 1, 'add_time': 1}).limit(1)) 216 | 217 | if 'edit' not in form_data and exists_project: 218 | return jsonify({'success': False, 'msg': '计划名称已经存在!'}) 219 | 220 | # 新增计划或更新计划 221 | data = { 222 | 'name': form_data['name'], 223 | 'init_url': form_data['init_url'], 224 | 'desc': form_data['desc'] if 'desc' in form_data else '', 225 | 'code': form_data['code'], 226 | 'static': '暂停中', 227 | 'update_time': int(time.time()), 228 | 'add_time': exists_project[0]['add_time'] if exists_project else int(time.time()), 229 | } 230 | Mongo.get()['projects'].update({'name': form_data['name']}, data, True) 231 | 232 | # 当是新计划时的初始化 233 | if 'edit' not in form_data: 234 | Mongo.get()['queue_' + form_data['name']].insert( 235 | { 236 | 'url': form_data['init_url'], 237 | 'url_md5': md5(form_data['init_url']), 238 | 'flag_time': 0, 239 | 'add_time': int(time.time()), 240 | 'slave_ip': '0.0.0.0' 241 | }) 242 | 243 | # 在没创建集合前设置索引mongodb会自动创建该集合并赋索引 244 | Mongo.get()['parsed_' + form_data['name']].ensure_index('url_md5', unique=True) 245 | Mongo.get()['queue_' + form_data['name']].ensure_index('url_md5', unique=True) 246 | 247 | # 有新计划加入, 重启全部slave 248 | restart_slave_list = GlobalHelper.get('restart_slave_list') or [] 249 | for slave_record in Mongo.get()['slave_record'].find(): 250 | restart_slave_list.append(slave_record['ip']) 251 | GlobalHelper.set('restart_slave_list', list(set(restart_slave_list))) 252 | 253 | return jsonify({'success': True, 'msg': '保存成功!'}) 254 | 255 | 256 | @app.route('/api/task/') 257 | def get_project_tasks(project_name): 258 | res = [] 259 | for doc in Mongo.get()['parsed_' + project_name].find().sort('_id', -1).limit(100): 260 | del doc['_id'] 261 | doc['slave_ip'] = mix_ip(doc['slave_ip']) 262 | res.append(doc) 263 | return json.dumps(res) 264 | 265 | 266 | @app.route('/api/result//') 267 | def get_results(project_name, page=1): 268 | res = [] 269 | pagination = paginate(Mongo.get()['result_' + project_name].find().sort('_id', -1), page, 20) 270 | if pagination: 271 | for doc in pagination.result(): 272 | del doc['_id'] 273 | res.append(doc) 274 | 275 | return json.dumps({'result': res, 'render_json': pagination.render_json(10)}) 276 | 277 | 278 | @app.route('/download/.json') 279 | def get_json_results(project_name): 280 | res = make_response(json.dumps(list(Mongo.get()['result_' + project_name].find({}, {'_id': 0}))), 200) 281 | # header("Content-Disposition:attachment;filename={$fileName}"); 282 | res.headers['Content-Disposition'] = 'attachment' 283 | res.headers['filename'] = project_name + '/json' 284 | return res 285 | 286 | 287 | @app.route('/api/project/exec_test', methods=['POST']) 288 | def exec_test(): 289 | return jsonify({'success': False, 'msg': '禁止修改!'}) 290 | form_data = json.loads(request.data) # todo 需要验证表单数据 291 | 292 | result = test_run(form_data) 293 | if 'error' in result: 294 | return json.dumps({'success': False, 'msg': result['error'], 'result': {'stdout': result['stdout']}}) 295 | 296 | result['urls'] = result['urls'][::-1] 297 | 298 | return json.dumps({'success': True, 'msg': '成功', 'result': result}) 299 | 300 | 301 | def web_start(dd, host, web_port): 302 | """ 303 | 当service.py为入口时会调用这里 304 | """ 305 | GlobalHelper.init(dd) 306 | # wsgi.WSGIServer((host, web_port), app, backlog=1).serve_forever(3) 307 | app.run(host=host, port=web_port, threaded=True) 308 | # http_server.serve_forever() 309 | # http_server. 310 | 311 | 312 | if __name__ == '__main__': 313 | """ 314 | 当前文件为入口时会调用这里 315 | """ 316 | # app.run('0.0.0.0', 80, debug=True, threaded=True) 317 | from gevent import wsgi 318 | 319 | wsgi.WSGIServer(('0.0.0.0', 81), app).serve_forever() 320 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /web/static/js/angular-animate.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | AngularJS v1.3.15 3 | (c) 2010-2014 Google, Inc. http://angularjs.org 4 | License: MIT 5 | */ 6 | (function(N,f,W){'use strict';f.module("ngAnimate",["ng"]).directive("ngAnimateChildren",function(){return function(X,C,g){g=g.ngAnimateChildren;f.isString(g)&&0===g.length?C.data("$$ngAnimateChildren",!0):X.$watch(g,function(f){C.data("$$ngAnimateChildren",!!f)})}}).factory("$$animateReflow",["$$rAF","$document",function(f,C){return function(g){return f(function(){g()})}}]).config(["$provide","$animateProvider",function(X,C){function g(f){for(var n=0;n=C&&b>=x&&c()}var m=g(e);a=e.data("$$ngAnimateCSS3Data");if(-1!=m.getAttribute("class").indexOf(b)&&a){var k="",t="";n(b.split(" "),function(a, 26 | b){var e=(0 start_time: 113 | time_count += 1 114 | if time_count < 10: # 未达到次数下限 115 | continue 116 | 117 | # 加入禁止名单和清空临时数据 118 | self.slave_record[ip]['deny_domains'].append({'domain': domain, 'add_time': int(time.time())}) 119 | del self.deny_urls_temp[ip] 120 | continue 121 | 122 | # 其他非403的处理 123 | if http_code != 403: 124 | domain_md5 = md5(domain) # mongoDB不支持带.的key 125 | self.slave_record[ip]['error_domains'].setdefault(domain_md5, {}) 126 | self.slave_record[ip]['error_domains'][domain_md5].setdefault('domain', domain) 127 | self.slave_record[ip]['error_domains'][domain_md5].setdefault('add_time', int(time.time())) 128 | self.slave_record[ip]['error_domains'][domain_md5]['update_time'] = int(time.time()) 129 | self.slave_record[ip]['error_domains'][domain_md5].setdefault('http_code', {}) 130 | self.slave_record[ip]['error_domains'][domain_md5]['http_code'].setdefault(str(http_code), 0) 131 | self.slave_record[ip]['error_domains'][domain_md5]['http_code'][str(http_code)] += 1 132 | continue 133 | 134 | 135 | def add_request_record(self, ip): 136 | self.__init_key(ip) 137 | self.slave_record[ip]['connected_count'] += 1 138 | self.__set_connect_record(ip) 139 | self.refresh_connect_status() 140 | 141 | # todo 不能每次实时插入 或者 web端获取时不通过 process.Manager 的方式获取,改用mongoDB获取 142 | self.__storage_record() 143 | 144 | def refresh_connect_status(self): 145 | now = int(time.time()) 146 | 147 | global_slave_record = GlobalHelper.get('salve_record') 148 | if not global_slave_record: 149 | global_slave_record = self.slave_record 150 | 151 | for k, item in global_slave_record.items(): 152 | leave_second = now - item['last_connected_time'] 153 | if leave_second > 60 * 60: # 失联1小时以上 154 | self.slave_record[k]['static'] = u'已丢失' 155 | elif leave_second > 60 * 10: # 失联10分钟以上 156 | self.slave_record[k]['static'] = u'断开中' 157 | elif global_slave_record[k]['static'] == '暂停中': 158 | self.slave_record[k]['static'] = u'暂停中' 159 | else: 160 | self.slave_record[k]['static'] = u'抓取中' 161 | 162 | def __set_connect_record(self, ip): 163 | now = int(time.time()) 164 | last_connected_time = self.slave_record[ip]['last_connected_time'] 165 | self.slave_record[ip]['last_connected_time'] = now 166 | 167 | if now - last_connected_time < 60 * 10: # fixme 0时会不计算 168 | self.slave_record[ip]['work_time_count'] += now - last_connected_time 169 | 170 | def __storage_record(self): 171 | for ip, data in self.slave_record.items(): 172 | res = Mongo.get().slave_record.update( 173 | {'ip': ip}, 174 | {'ip': ip, 'data': data}, True) # 有着更新, 无则插入 175 | 176 | if not res['updatedExisting'] and 'upserted' in res: # 插入时 177 | self.slave_record[ip]['_id'] = str(res['upserted']) 178 | 179 | 180 | def __del__(self): 181 | self.__storage_record() 182 | 183 | 184 | class GlobalHelper: 185 | """ 186 | 跨进程间的变量共享工具 187 | 依赖 multiprocessing.Manger 188 | """ 189 | __source_data = {} 190 | 191 | @classmethod 192 | def init(cls, d): 193 | """ 194 | 需要特殊的初始化过程, 详见调用处 195 | 设置跨进程的引用变量 196 | """ 197 | cls.__source_data = d 198 | 199 | @classmethod 200 | def get(cls, key): 201 | if key not in cls.__source_data: 202 | return 203 | 204 | return cls.__source_data[key] 205 | 206 | @classmethod 207 | def set(cls, key, value): 208 | """ 209 | 每次跨进程都要重新set, init只需要一次,set是每次 210 | """ 211 | cls.__source_data[key] = value 212 | 213 | 214 | class HttpHelper(): 215 | """ 216 | 基于requests再次封装的http请求对象 217 | """ 218 | 219 | def __init__(self): 220 | self.url = '' 221 | self.domain = '' 222 | self.domain_crawl_history = {} 223 | self.requester = None 224 | self.user_agent = '' 225 | 226 | def __init_domain_crawl_history_keys(self): 227 | self.domain_crawl_history.setdefault(self.domain, { 228 | 'user_agent_counter': 0, 229 | 'requester_counter': 0, 230 | 'last_url': '', 231 | }) 232 | 233 | def get_requester(self): 234 | """ 235 | 获取固定会话信息的requester 236 | 在执行若干次后会更新cookie, 一定几率降低被封可能 237 | """ 238 | self.__init_domain_crawl_history_keys() 239 | 240 | if self.domain_crawl_history[self.domain]['requester_counter'] >= 20 or not self.requester: 241 | self.requester = requests.Session() 242 | self.domain_crawl_history[self.domain]['requester_counter'] = 0 243 | 244 | self.domain_crawl_history[self.domain]['requester_counter'] += 1 245 | return self.requester 246 | 247 | def get_refer_url(self): 248 | self.__init_domain_crawl_history_keys() 249 | last_url = self.domain_crawl_history[self.domain]['last_url'] 250 | self.domain_crawl_history[self.domain]['last_url'] = self.url # 获取后会更新 251 | return last_url 252 | 253 | def get_headers(self): 254 | """ 255 | 以该域名的上次一次访问url做为Referer 256 | 随机获取user_agent 257 | """ 258 | user_agent = [ 259 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.' + str(random.randint(10, 99)) + 260 | ' (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240 ' + str(random.randint(10, 99)), 261 | 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.' + str(random.randint(10, 99)) + 262 | ' (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36 ' + str(random.randint(10, 99)), 263 | 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) ' + 264 | 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.' + 265 | str(random.randint(10, 99)) + ' Safari/537.' + str(random.randint(10, 99)), 266 | 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.' + str(random.randint(10, 99)) + 267 | ' (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.' + str(random.randint(10, 99)), 268 | 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko ' + str(random.randint(10, 99)) 269 | ] 270 | 271 | accept = [ 272 | 'text/html, application/xhtml+xml, image/jxr, */*', 273 | 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' 274 | ] 275 | 276 | base_url = "".join(self.url.split()) 277 | protocol, rest = urllib2.splittype(base_url) 278 | host, rest = urllib2.splithost(rest) 279 | 280 | if self.domain_crawl_history[self.domain]['user_agent_counter'] >= 15 or not self.user_agent: 281 | self.user_agent = random.choice(user_agent) 282 | self.domain_crawl_history[self.domain]['user_agent_counter'] = 0 283 | 284 | self.domain_crawl_history[self.domain]['user_agent_counter'] += 1 285 | 286 | headers = { 287 | 'Host': host, 288 | 'Accept': random.choice(accept), 289 | # 'Pragma': 'no-cache', 290 | # 'Cache-Control': 'no-cache', 291 | 'Accept-Encoding': 'gzip, deflate', 292 | 'Accept-Language': 'zh-CN,zh;q=0.8', 293 | 'Connection': 'keep-alive', 294 | 'User-Agent': self.user_agent, 295 | } 296 | 297 | # headers_ms_edge = { 298 | # 'Accept': 'text/html, application/xhtml+xml, image/jxr, */*', 299 | # 'Accept-Language': 'zh-Hans-CN,zh-Hans;q=0.5', 300 | # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'+ 301 | # ' (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10240 1235', 302 | # 'Accept-Encoding': 'gzip, deflate', 303 | # 'Host': host, 304 | # 'Connection': 'Keep-Alive', 305 | # } 306 | # 307 | # headers = headers_ms_edge 308 | 309 | # print headers 310 | 311 | # 以该域名的上次一次访问url做为refer 312 | refer_url = self.get_refer_url() 313 | if refer_url: 314 | headers['Referer'] = refer_url 315 | 316 | return headers 317 | 318 | # def post(self, url, params=()): 319 | # return self.__request('post', url, params) 320 | 321 | def get(self, url, params=()): 322 | self.domain = get_domain(url) 323 | self.url = url 324 | return self.__request('get', params) 325 | 326 | def __request(self, method, params=()): 327 | start_time = time.time() 328 | try: 329 | content = '' 330 | 331 | if method == 'post': 332 | req = self.get_requester().post(self.url, headers=self.get_headers(), timeout=10, params=params) 333 | if not req.encoding == 'utf-8': 334 | req.encoding = 'utf-8' 335 | 336 | else: 337 | with closing(self.get_requester().get(self.url, headers=self.get_headers(), timeout=10, params=params, 338 | allow_redirects=True, stream=True)) as req: 339 | # if not req.encoding == 'utf-8': 340 | # req.encoding = 'utf-8' 341 | 342 | # print req.headers 343 | size_limit = 1024000 # 最大接收content-length 344 | if 'content-length' in req.headers: 345 | if int(req.headers['content-length']) > size_limit: 346 | raise Exception( 347 | 'content-length too many. content-length: ' + str(req.headers['content-length'])) 348 | 349 | content = req.content 350 | 351 | else: 352 | size_temp = 0 353 | for line in req.iter_lines(): 354 | if line: 355 | size_temp += len(line) 356 | if size_temp > size_limit: 357 | raise Exception('content-length too many.') 358 | 359 | content += line 360 | 361 | content = smarty_encode(content) 362 | 363 | except requests.ConnectionError, e: 364 | return None, str(e.message), round((time.time() - start_time) * 1000, 2) 365 | except requests.HTTPError, e: 366 | return None, str(e.message), round((time.time() - start_time) * 1000, 2) 367 | except requests.Timeout, e: 368 | return None, str(e.message), round((time.time() - start_time) * 1000, 2) 369 | except Exception, e: 370 | return None, str(e.message), round((time.time() - start_time) * 1000, 2) 371 | else: 372 | return str(content), req.status_code, round((time.time() - start_time) * 1000, 2) 373 | 374 | 375 | class S: 376 | """ 377 | 用户自定义抓取回调时的参数对象 378 | """ 379 | 380 | def __init__(self, spider, html, urls, project_name, init_url): 381 | self.__spider = None 382 | self.html = '' 383 | self.urls = [] 384 | self.__spider = spider 385 | self.html = html 386 | self.project_name = project_name 387 | self.init_url = init_url 388 | self.urls = urls 389 | 390 | def crawl(self, url): 391 | self.__spider.crawl(url) 392 | 393 | 394 | class QueueCtrl(): 395 | """ 396 | 采用多种方式控制整个slave的抓取顺序与速度 397 | 具体功能由之类实现 398 | """ 399 | host_freq_pool = {} 400 | 401 | def __init__(self): 402 | pass 403 | 404 | @classmethod 405 | def add_parsed(cls, url): 406 | # 获取主域名并更新该域名的访问频率 407 | cls.__update_host_freq(get_domain(url)) 408 | 409 | @classmethod 410 | def __update_host_freq(cls, host): 411 | """ 412 | 更新域名的访问频率 413 | """ 414 | cls.host_freq_pool.setdefault(host, []) 415 | cls.host_freq_pool[host].append(int(time.time())) 416 | 417 | @classmethod 418 | def clear_host_freq_pool(cls, expire=300): 419 | """ 420 | 整理host_freq_pool 421 | 422 | 过滤掉根域名对应的访问时间戳列表中访问时间超出给定值的时间戳 423 | or 424 | 删除pool长度为0的host 425 | or 426 | 删除pool长度大于1000的部分 427 | """ 428 | now = int(time.time()) 429 | for host, pool in cls.host_freq_pool.items(): 430 | # 过多时删除部分 431 | pool_len = len(cls.host_freq_pool[host]) 432 | if pool_len > 1000: 433 | del cls.host_freq_pool[host][0:500] 434 | elif pool_len == 0: 435 | del cls.host_freq_pool[host] 436 | 437 | for timestamp in list(pool): 438 | if now - timestamp > expire: 439 | cls.host_freq_pool[host].remove(timestamp) 440 | else: 441 | break 442 | 443 | 444 | class QueueSleepCtrl(QueueCtrl): 445 | """ 446 | 根据url请求频率增加同host的url不定的sleep来控制请求速度 447 | """ 448 | 449 | def __init__(self): 450 | QueueCtrl.__init__(self) 451 | 452 | @classmethod 453 | def get_sleep_times(cls, url): 454 | domain = get_domain(url) 455 | parsed_list = cls.host_freq_pool.get(domain, []) 456 | if not parsed_list: 457 | return 1 458 | 459 | list_403 = ['jandan.net', 'meizu.com', 'meizu.cn'] # 一些防爬虫机制比较严格的站点 460 | parsed_list_len = len(parsed_list) 461 | 462 | if parsed_list_len < 5: 463 | return 1 464 | 465 | if parsed_list_len < 10: 466 | return 1 467 | 468 | if parsed_list_len < 20: 469 | if domain in list_403: 470 | return random.randint(5, 20) 471 | return 2 472 | 473 | if parsed_list_len < 30: 474 | if domain in list_403: 475 | return random.randint(5, 35) 476 | return 4 477 | 478 | if parsed_list_len < 40: 479 | if domain in list_403: 480 | return random.randint(5, 40) 481 | return 6 482 | 483 | if parsed_list_len < 50: 484 | if domain in list_403: 485 | return random.randint(5, 56) 486 | return 8 487 | 488 | if parsed_list_len < 60: 489 | if domain in list_403: 490 | return random.randint(5, 70) 491 | return 10 492 | 493 | if parsed_list_len < 70: 494 | if domain in list_403: 495 | return random.randint(5, 90) 496 | return 12 497 | 498 | if parsed_list_len < 80: 499 | if domain in list_403: 500 | return random.randint(5, 100) 501 | return 14 502 | 503 | if parsed_list_len < 90: 504 | if domain in list_403: 505 | return random.randint(5, 110) 506 | return 16 507 | 508 | if parsed_list_len < 100: 509 | if domain in list_403: 510 | return random.randint(5, 120) 511 | return 18 512 | 513 | if parsed_list_len < 110: 514 | if domain in list_403: 515 | return random.randint(5, 130) 516 | return 20 517 | 518 | if parsed_list_len < 120: 519 | if domain in list_403: 520 | return random.randint(5, 140) 521 | return 22 522 | 523 | if parsed_list_len < 130: 524 | if domain in list_403: 525 | return random.randint(5, 150) 526 | return 24 527 | 528 | return 200 529 | 530 | 531 | class UrlsSortCtrl(QueueCtrl): 532 | """ 533 | 将请求次数少的url排序在列表最前 534 | """ 535 | 536 | def __init__(self): 537 | QueueCtrl.__init__(self) 538 | 539 | @classmethod 540 | def sort_urls_by_freq(cls, urls): 541 | """ 542 | 根据host抓取次数排序urls 543 | """ 544 | sorted_urls = {} 545 | for url in urls: 546 | sorted_urls[url] = len(cls.host_freq_pool.get(get_domain(url), [])) 547 | 548 | return cls.__sort_dict_by_value_return_keys(sorted_urls) 549 | 550 | @classmethod 551 | def __sort_dict_by_value_return_keys(cls, d): 552 | """ 553 | 根据value顺序返回keys 554 | 用于根据host抓取次数排序urls 555 | """ 556 | l = [] 557 | temp = d.items() 558 | for value in sorted(d.values()): 559 | index = 0 560 | for k, v in temp: 561 | if v == value: 562 | l.append(k) 563 | del temp[index] 564 | index += 1 565 | break 566 | index += 1 567 | return l 568 | 569 | 570 | class Slave(): 571 | def __init_data(self): 572 | self.data = { 573 | 'project_name': self.project_name, 574 | 'get_urls': 1, 575 | 'urls_parsed': [], 576 | 'urls_add': [], 577 | 'save': [], 578 | 'urls_fail': [], 579 | } 580 | 581 | def __init__(self, project_name): 582 | self.data = {} 583 | self.project_name = '' 584 | self.project_name = project_name 585 | self.original_receive_json = {} 586 | self.last_change_time = 0 587 | self.__init_data() 588 | 589 | def get_data(self): 590 | """ 591 | 获取master中的新url队列, 并把之前缓存的所有数据推送至master 592 | :return: 593 | """ 594 | self.data['urls_add'] = list(set(self.data['urls_add'])) # queue 去重 595 | 596 | start_time = time.time() 597 | 598 | QueueCtrl.clear_host_freq_pool() 599 | response = self.__request_server(self.data) 600 | if not response: 601 | return None 602 | 603 | self.original_receive_json = response 604 | urls = list(set(response.get('urls', []))) 605 | urls = UrlsSortCtrl.sort_urls_by_freq(urls) 606 | 607 | response['urls'] = urls 608 | 609 | print round((time.time() - start_time) * 1000, 2), 'ms' 610 | if response: 611 | self.__init_data() 612 | 613 | return response 614 | 615 | def put_data(self, urls_parsed=(), urls_add=(), save=(), urls_fail=()): 616 | """ 617 | 不会真正推送数据, 只先加入缓存属性中, 当执行self.get_data时再一并推送 618 | :param urls_parsed: 619 | :param urls_add: 620 | :param save: 621 | :return: 622 | """ 623 | 624 | for url in urls_parsed: 625 | QueueCtrl.add_parsed(url) 626 | self.data['urls_parsed'].append(url) 627 | 628 | for url in urls_add: 629 | self.data['urls_add'].append(url) 630 | 631 | urls_fail and self.data['urls_fail'].append(urls_fail) 632 | 633 | save and self.data['save'].append(save) 634 | 635 | def has_project_change(self): 636 | current_change_time = int(self.original_receive_json.get('change_time', self.last_change_time)) 637 | 638 | if self.last_change_time == 0: 639 | self.last_change_time = current_change_time 640 | 641 | if current_change_time == self.last_change_time: 642 | return False 643 | 644 | self.last_change_time = current_change_time 645 | return True 646 | 647 | def get_origin_receive_json(self): 648 | return self.original_receive_json 649 | 650 | @classmethod 651 | def __request_server(cls, data): 652 | response = None 653 | try: 654 | json_string = Socket_client.run(json.dumps(data)) 655 | response = json.loads(json_string) 656 | finally: 657 | return response 658 | 659 | 660 | class Socket_client: 661 | host = '' 662 | port = 0 663 | 664 | @classmethod 665 | def set_host(cls, host): 666 | cls.host = host 667 | 668 | @classmethod 669 | def set_port(cls, port): 670 | cls.port = port 671 | 672 | @classmethod 673 | def run(cls, content): 674 | """ 675 | Slave与Master的socket通讯client端 676 | 使用特定格式传输 677 | 传输时会压缩数据 678 | """ 679 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 680 | sock.connect((cls.host, cls.port)) 681 | 682 | send_date = base64.b64encode(zlib.compress(content)) # 压缩编码 683 | 684 | # content前10个字符串用于标识内容长度. 685 | response_len = (str(len(send_date) + 10) + ' ' * 10)[0:10] 686 | sock.sendall(response_len + send_date) 687 | buff_size = 1024 688 | data = sock.recv(buff_size) 689 | 690 | # content前10个字符串用于标识内容长度. 691 | data_len = int(data[0:10]) 692 | while len(data) < data_len: 693 | s = sock.recv(buff_size) 694 | data += s 695 | 696 | data = zlib.decompress(base64.b64decode(data[10:])) # 解码解压 697 | 698 | sock.close() 699 | 700 | return data -------------------------------------------------------------------------------- /web/static/css/font-awesome.min.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Font Awesome 4.4.0 by @davegandy - http://fontawesome.io - @fontawesome 3 | * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) 4 | */@font-face{font-family:'FontAwesome';src:url('../fonts/fontawesome-webfont.eot?v=4.4.0');src:url('../fonts/fontawesome-webfont.eot?#iefix&v=4.4.0') format('embedded-opentype'),url('../fonts/fontawesome-webfont.woff2?v=4.4.0') format('woff2'),url('../fonts/fontawesome-webfont.woff?v=4.4.0') format('woff'),url('../fonts/fontawesome-webfont.ttf?v=4.4.0') format('truetype'),url('../fonts/fontawesome-webfont.svg?v=4.4.0#fontawesomeregular') format('svg');font-weight:normal;font-style:normal}.fa{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571429em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14285714em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14285714em;width:2.14285714em;top:.14285714em;text-align:center}.fa-li.fa-lg{left:-1.85714286em}.fa-border{padding:.2em .25em .15em;border:solid .08em #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left{margin-right:.3em}.fa.fa-pull-right{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left{margin-right:.3em}.fa.pull-right{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s infinite linear;animation:fa-spin 2s infinite linear}.fa-pulse{-webkit-animation:fa-spin 1s infinite steps(8);animation:fa-spin 1s infinite steps(8)}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=1);-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=2);-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=3);-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1);-webkit-transform:scale(-1, 1);-ms-transform:scale(-1, 1);transform:scale(-1, 1)}.fa-flip-vertical{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1);-webkit-transform:scale(1, -1);-ms-transform:scale(1, -1);transform:scale(1, -1)}:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270,:root .fa-flip-horizontal,:root .fa-flip-vertical{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:"\f000"}.fa-music:before{content:"\f001"}.fa-search:before{content:"\f002"}.fa-envelope-o:before{content:"\f003"}.fa-heart:before{content:"\f004"}.fa-star:before{content:"\f005"}.fa-star-o:before{content:"\f006"}.fa-user:before{content:"\f007"}.fa-film:before{content:"\f008"}.fa-th-large:before{content:"\f009"}.fa-th:before{content:"\f00a"}.fa-th-list:before{content:"\f00b"}.fa-check:before{content:"\f00c"}.fa-remove:before,.fa-close:before,.fa-times:before{content:"\f00d"}.fa-search-plus:before{content:"\f00e"}.fa-search-minus:before{content:"\f010"}.fa-power-off:before{content:"\f011"}.fa-signal:before{content:"\f012"}.fa-gear:before,.fa-cog:before{content:"\f013"}.fa-trash-o:before{content:"\f014"}.fa-home:before{content:"\f015"}.fa-file-o:before{content:"\f016"}.fa-clock-o:before{content:"\f017"}.fa-road:before{content:"\f018"}.fa-download:before{content:"\f019"}.fa-arrow-circle-o-down:before{content:"\f01a"}.fa-arrow-circle-o-up:before{content:"\f01b"}.fa-inbox:before{content:"\f01c"}.fa-play-circle-o:before{content:"\f01d"}.fa-rotate-right:before,.fa-repeat:before{content:"\f01e"}.fa-refresh:before{content:"\f021"}.fa-list-alt:before{content:"\f022"}.fa-lock:before{content:"\f023"}.fa-flag:before{content:"\f024"}.fa-headphones:before{content:"\f025"}.fa-volume-off:before{content:"\f026"}.fa-volume-down:before{content:"\f027"}.fa-volume-up:before{content:"\f028"}.fa-qrcode:before{content:"\f029"}.fa-barcode:before{content:"\f02a"}.fa-tag:before{content:"\f02b"}.fa-tags:before{content:"\f02c"}.fa-book:before{content:"\f02d"}.fa-bookmark:before{content:"\f02e"}.fa-print:before{content:"\f02f"}.fa-camera:before{content:"\f030"}.fa-font:before{content:"\f031"}.fa-bold:before{content:"\f032"}.fa-italic:before{content:"\f033"}.fa-text-height:before{content:"\f034"}.fa-text-width:before{content:"\f035"}.fa-align-left:before{content:"\f036"}.fa-align-center:before{content:"\f037"}.fa-align-right:before{content:"\f038"}.fa-align-justify:before{content:"\f039"}.fa-list:before{content:"\f03a"}.fa-dedent:before,.fa-outdent:before{content:"\f03b"}.fa-indent:before{content:"\f03c"}.fa-video-camera:before{content:"\f03d"}.fa-photo:before,.fa-image:before,.fa-picture-o:before{content:"\f03e"}.fa-pencil:before{content:"\f040"}.fa-map-marker:before{content:"\f041"}.fa-adjust:before{content:"\f042"}.fa-tint:before{content:"\f043"}.fa-edit:before,.fa-pencil-square-o:before{content:"\f044"}.fa-share-square-o:before{content:"\f045"}.fa-check-square-o:before{content:"\f046"}.fa-arrows:before{content:"\f047"}.fa-step-backward:before{content:"\f048"}.fa-fast-backward:before{content:"\f049"}.fa-backward:before{content:"\f04a"}.fa-play:before{content:"\f04b"}.fa-pause:before{content:"\f04c"}.fa-stop:before{content:"\f04d"}.fa-forward:before{content:"\f04e"}.fa-fast-forward:before{content:"\f050"}.fa-step-forward:before{content:"\f051"}.fa-eject:before{content:"\f052"}.fa-chevron-left:before{content:"\f053"}.fa-chevron-right:before{content:"\f054"}.fa-plus-circle:before{content:"\f055"}.fa-minus-circle:before{content:"\f056"}.fa-times-circle:before{content:"\f057"}.fa-check-circle:before{content:"\f058"}.fa-question-circle:before{content:"\f059"}.fa-info-circle:before{content:"\f05a"}.fa-crosshairs:before{content:"\f05b"}.fa-times-circle-o:before{content:"\f05c"}.fa-check-circle-o:before{content:"\f05d"}.fa-ban:before{content:"\f05e"}.fa-arrow-left:before{content:"\f060"}.fa-arrow-right:before{content:"\f061"}.fa-arrow-up:before{content:"\f062"}.fa-arrow-down:before{content:"\f063"}.fa-mail-forward:before,.fa-share:before{content:"\f064"}.fa-expand:before{content:"\f065"}.fa-compress:before{content:"\f066"}.fa-plus:before{content:"\f067"}.fa-minus:before{content:"\f068"}.fa-asterisk:before{content:"\f069"}.fa-exclamation-circle:before{content:"\f06a"}.fa-gift:before{content:"\f06b"}.fa-leaf:before{content:"\f06c"}.fa-fire:before{content:"\f06d"}.fa-eye:before{content:"\f06e"}.fa-eye-slash:before{content:"\f070"}.fa-warning:before,.fa-exclamation-triangle:before{content:"\f071"}.fa-plane:before{content:"\f072"}.fa-calendar:before{content:"\f073"}.fa-random:before{content:"\f074"}.fa-comment:before{content:"\f075"}.fa-magnet:before{content:"\f076"}.fa-chevron-up:before{content:"\f077"}.fa-chevron-down:before{content:"\f078"}.fa-retweet:before{content:"\f079"}.fa-shopping-cart:before{content:"\f07a"}.fa-folder:before{content:"\f07b"}.fa-folder-open:before{content:"\f07c"}.fa-arrows-v:before{content:"\f07d"}.fa-arrows-h:before{content:"\f07e"}.fa-bar-chart-o:before,.fa-bar-chart:before{content:"\f080"}.fa-twitter-square:before{content:"\f081"}.fa-facebook-square:before{content:"\f082"}.fa-camera-retro:before{content:"\f083"}.fa-key:before{content:"\f084"}.fa-gears:before,.fa-cogs:before{content:"\f085"}.fa-comments:before{content:"\f086"}.fa-thumbs-o-up:before{content:"\f087"}.fa-thumbs-o-down:before{content:"\f088"}.fa-star-half:before{content:"\f089"}.fa-heart-o:before{content:"\f08a"}.fa-sign-out:before{content:"\f08b"}.fa-linkedin-square:before{content:"\f08c"}.fa-thumb-tack:before{content:"\f08d"}.fa-external-link:before{content:"\f08e"}.fa-sign-in:before{content:"\f090"}.fa-trophy:before{content:"\f091"}.fa-github-square:before{content:"\f092"}.fa-upload:before{content:"\f093"}.fa-lemon-o:before{content:"\f094"}.fa-phone:before{content:"\f095"}.fa-square-o:before{content:"\f096"}.fa-bookmark-o:before{content:"\f097"}.fa-phone-square:before{content:"\f098"}.fa-twitter:before{content:"\f099"}.fa-facebook-f:before,.fa-facebook:before{content:"\f09a"}.fa-github:before{content:"\f09b"}.fa-unlock:before{content:"\f09c"}.fa-credit-card:before{content:"\f09d"}.fa-feed:before,.fa-rss:before{content:"\f09e"}.fa-hdd-o:before{content:"\f0a0"}.fa-bullhorn:before{content:"\f0a1"}.fa-bell:before{content:"\f0f3"}.fa-certificate:before{content:"\f0a3"}.fa-hand-o-right:before{content:"\f0a4"}.fa-hand-o-left:before{content:"\f0a5"}.fa-hand-o-up:before{content:"\f0a6"}.fa-hand-o-down:before{content:"\f0a7"}.fa-arrow-circle-left:before{content:"\f0a8"}.fa-arrow-circle-right:before{content:"\f0a9"}.fa-arrow-circle-up:before{content:"\f0aa"}.fa-arrow-circle-down:before{content:"\f0ab"}.fa-globe:before{content:"\f0ac"}.fa-wrench:before{content:"\f0ad"}.fa-tasks:before{content:"\f0ae"}.fa-filter:before{content:"\f0b0"}.fa-briefcase:before{content:"\f0b1"}.fa-arrows-alt:before{content:"\f0b2"}.fa-group:before,.fa-users:before{content:"\f0c0"}.fa-chain:before,.fa-link:before{content:"\f0c1"}.fa-cloud:before{content:"\f0c2"}.fa-flask:before{content:"\f0c3"}.fa-cut:before,.fa-scissors:before{content:"\f0c4"}.fa-copy:before,.fa-files-o:before{content:"\f0c5"}.fa-paperclip:before{content:"\f0c6"}.fa-save:before,.fa-floppy-o:before{content:"\f0c7"}.fa-square:before{content:"\f0c8"}.fa-navicon:before,.fa-reorder:before,.fa-bars:before{content:"\f0c9"}.fa-list-ul:before{content:"\f0ca"}.fa-list-ol:before{content:"\f0cb"}.fa-strikethrough:before{content:"\f0cc"}.fa-underline:before{content:"\f0cd"}.fa-table:before{content:"\f0ce"}.fa-magic:before{content:"\f0d0"}.fa-truck:before{content:"\f0d1"}.fa-pinterest:before{content:"\f0d2"}.fa-pinterest-square:before{content:"\f0d3"}.fa-google-plus-square:before{content:"\f0d4"}.fa-google-plus:before{content:"\f0d5"}.fa-money:before{content:"\f0d6"}.fa-caret-down:before{content:"\f0d7"}.fa-caret-up:before{content:"\f0d8"}.fa-caret-left:before{content:"\f0d9"}.fa-caret-right:before{content:"\f0da"}.fa-columns:before{content:"\f0db"}.fa-unsorted:before,.fa-sort:before{content:"\f0dc"}.fa-sort-down:before,.fa-sort-desc:before{content:"\f0dd"}.fa-sort-up:before,.fa-sort-asc:before{content:"\f0de"}.fa-envelope:before{content:"\f0e0"}.fa-linkedin:before{content:"\f0e1"}.fa-rotate-left:before,.fa-undo:before{content:"\f0e2"}.fa-legal:before,.fa-gavel:before{content:"\f0e3"}.fa-dashboard:before,.fa-tachometer:before{content:"\f0e4"}.fa-comment-o:before{content:"\f0e5"}.fa-comments-o:before{content:"\f0e6"}.fa-flash:before,.fa-bolt:before{content:"\f0e7"}.fa-sitemap:before{content:"\f0e8"}.fa-umbrella:before{content:"\f0e9"}.fa-paste:before,.fa-clipboard:before{content:"\f0ea"}.fa-lightbulb-o:before{content:"\f0eb"}.fa-exchange:before{content:"\f0ec"}.fa-cloud-download:before{content:"\f0ed"}.fa-cloud-upload:before{content:"\f0ee"}.fa-user-md:before{content:"\f0f0"}.fa-stethoscope:before{content:"\f0f1"}.fa-suitcase:before{content:"\f0f2"}.fa-bell-o:before{content:"\f0a2"}.fa-coffee:before{content:"\f0f4"}.fa-cutlery:before{content:"\f0f5"}.fa-file-text-o:before{content:"\f0f6"}.fa-building-o:before{content:"\f0f7"}.fa-hospital-o:before{content:"\f0f8"}.fa-ambulance:before{content:"\f0f9"}.fa-medkit:before{content:"\f0fa"}.fa-fighter-jet:before{content:"\f0fb"}.fa-beer:before{content:"\f0fc"}.fa-h-square:before{content:"\f0fd"}.fa-plus-square:before{content:"\f0fe"}.fa-angle-double-left:before{content:"\f100"}.fa-angle-double-right:before{content:"\f101"}.fa-angle-double-up:before{content:"\f102"}.fa-angle-double-down:before{content:"\f103"}.fa-angle-left:before{content:"\f104"}.fa-angle-right:before{content:"\f105"}.fa-angle-up:before{content:"\f106"}.fa-angle-down:before{content:"\f107"}.fa-desktop:before{content:"\f108"}.fa-laptop:before{content:"\f109"}.fa-tablet:before{content:"\f10a"}.fa-mobile-phone:before,.fa-mobile:before{content:"\f10b"}.fa-circle-o:before{content:"\f10c"}.fa-quote-left:before{content:"\f10d"}.fa-quote-right:before{content:"\f10e"}.fa-spinner:before{content:"\f110"}.fa-circle:before{content:"\f111"}.fa-mail-reply:before,.fa-reply:before{content:"\f112"}.fa-github-alt:before{content:"\f113"}.fa-folder-o:before{content:"\f114"}.fa-folder-open-o:before{content:"\f115"}.fa-smile-o:before{content:"\f118"}.fa-frown-o:before{content:"\f119"}.fa-meh-o:before{content:"\f11a"}.fa-gamepad:before{content:"\f11b"}.fa-keyboard-o:before{content:"\f11c"}.fa-flag-o:before{content:"\f11d"}.fa-flag-checkered:before{content:"\f11e"}.fa-terminal:before{content:"\f120"}.fa-code:before{content:"\f121"}.fa-mail-reply-all:before,.fa-reply-all:before{content:"\f122"}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:"\f123"}.fa-location-arrow:before{content:"\f124"}.fa-crop:before{content:"\f125"}.fa-code-fork:before{content:"\f126"}.fa-unlink:before,.fa-chain-broken:before{content:"\f127"}.fa-question:before{content:"\f128"}.fa-info:before{content:"\f129"}.fa-exclamation:before{content:"\f12a"}.fa-superscript:before{content:"\f12b"}.fa-subscript:before{content:"\f12c"}.fa-eraser:before{content:"\f12d"}.fa-puzzle-piece:before{content:"\f12e"}.fa-microphone:before{content:"\f130"}.fa-microphone-slash:before{content:"\f131"}.fa-shield:before{content:"\f132"}.fa-calendar-o:before{content:"\f133"}.fa-fire-extinguisher:before{content:"\f134"}.fa-rocket:before{content:"\f135"}.fa-maxcdn:before{content:"\f136"}.fa-chevron-circle-left:before{content:"\f137"}.fa-chevron-circle-right:before{content:"\f138"}.fa-chevron-circle-up:before{content:"\f139"}.fa-chevron-circle-down:before{content:"\f13a"}.fa-html5:before{content:"\f13b"}.fa-css3:before{content:"\f13c"}.fa-anchor:before{content:"\f13d"}.fa-unlock-alt:before{content:"\f13e"}.fa-bullseye:before{content:"\f140"}.fa-ellipsis-h:before{content:"\f141"}.fa-ellipsis-v:before{content:"\f142"}.fa-rss-square:before{content:"\f143"}.fa-play-circle:before{content:"\f144"}.fa-ticket:before{content:"\f145"}.fa-minus-square:before{content:"\f146"}.fa-minus-square-o:before{content:"\f147"}.fa-level-up:before{content:"\f148"}.fa-level-down:before{content:"\f149"}.fa-check-square:before{content:"\f14a"}.fa-pencil-square:before{content:"\f14b"}.fa-external-link-square:before{content:"\f14c"}.fa-share-square:before{content:"\f14d"}.fa-compass:before{content:"\f14e"}.fa-toggle-down:before,.fa-caret-square-o-down:before{content:"\f150"}.fa-toggle-up:before,.fa-caret-square-o-up:before{content:"\f151"}.fa-toggle-right:before,.fa-caret-square-o-right:before{content:"\f152"}.fa-euro:before,.fa-eur:before{content:"\f153"}.fa-gbp:before{content:"\f154"}.fa-dollar:before,.fa-usd:before{content:"\f155"}.fa-rupee:before,.fa-inr:before{content:"\f156"}.fa-cny:before,.fa-rmb:before,.fa-yen:before,.fa-jpy:before{content:"\f157"}.fa-ruble:before,.fa-rouble:before,.fa-rub:before{content:"\f158"}.fa-won:before,.fa-krw:before{content:"\f159"}.fa-bitcoin:before,.fa-btc:before{content:"\f15a"}.fa-file:before{content:"\f15b"}.fa-file-text:before{content:"\f15c"}.fa-sort-alpha-asc:before{content:"\f15d"}.fa-sort-alpha-desc:before{content:"\f15e"}.fa-sort-amount-asc:before{content:"\f160"}.fa-sort-amount-desc:before{content:"\f161"}.fa-sort-numeric-asc:before{content:"\f162"}.fa-sort-numeric-desc:before{content:"\f163"}.fa-thumbs-up:before{content:"\f164"}.fa-thumbs-down:before{content:"\f165"}.fa-youtube-square:before{content:"\f166"}.fa-youtube:before{content:"\f167"}.fa-xing:before{content:"\f168"}.fa-xing-square:before{content:"\f169"}.fa-youtube-play:before{content:"\f16a"}.fa-dropbox:before{content:"\f16b"}.fa-stack-overflow:before{content:"\f16c"}.fa-instagram:before{content:"\f16d"}.fa-flickr:before{content:"\f16e"}.fa-adn:before{content:"\f170"}.fa-bitbucket:before{content:"\f171"}.fa-bitbucket-square:before{content:"\f172"}.fa-tumblr:before{content:"\f173"}.fa-tumblr-square:before{content:"\f174"}.fa-long-arrow-down:before{content:"\f175"}.fa-long-arrow-up:before{content:"\f176"}.fa-long-arrow-left:before{content:"\f177"}.fa-long-arrow-right:before{content:"\f178"}.fa-apple:before{content:"\f179"}.fa-windows:before{content:"\f17a"}.fa-android:before{content:"\f17b"}.fa-linux:before{content:"\f17c"}.fa-dribbble:before{content:"\f17d"}.fa-skype:before{content:"\f17e"}.fa-foursquare:before{content:"\f180"}.fa-trello:before{content:"\f181"}.fa-female:before{content:"\f182"}.fa-male:before{content:"\f183"}.fa-gittip:before,.fa-gratipay:before{content:"\f184"}.fa-sun-o:before{content:"\f185"}.fa-moon-o:before{content:"\f186"}.fa-archive:before{content:"\f187"}.fa-bug:before{content:"\f188"}.fa-vk:before{content:"\f189"}.fa-weibo:before{content:"\f18a"}.fa-renren:before{content:"\f18b"}.fa-pagelines:before{content:"\f18c"}.fa-stack-exchange:before{content:"\f18d"}.fa-arrow-circle-o-right:before{content:"\f18e"}.fa-arrow-circle-o-left:before{content:"\f190"}.fa-toggle-left:before,.fa-caret-square-o-left:before{content:"\f191"}.fa-dot-circle-o:before{content:"\f192"}.fa-wheelchair:before{content:"\f193"}.fa-vimeo-square:before{content:"\f194"}.fa-turkish-lira:before,.fa-try:before{content:"\f195"}.fa-plus-square-o:before{content:"\f196"}.fa-space-shuttle:before{content:"\f197"}.fa-slack:before{content:"\f198"}.fa-envelope-square:before{content:"\f199"}.fa-wordpress:before{content:"\f19a"}.fa-openid:before{content:"\f19b"}.fa-institution:before,.fa-bank:before,.fa-university:before{content:"\f19c"}.fa-mortar-board:before,.fa-graduation-cap:before{content:"\f19d"}.fa-yahoo:before{content:"\f19e"}.fa-google:before{content:"\f1a0"}.fa-reddit:before{content:"\f1a1"}.fa-reddit-square:before{content:"\f1a2"}.fa-stumbleupon-circle:before{content:"\f1a3"}.fa-stumbleupon:before{content:"\f1a4"}.fa-delicious:before{content:"\f1a5"}.fa-digg:before{content:"\f1a6"}.fa-pied-piper:before{content:"\f1a7"}.fa-pied-piper-alt:before{content:"\f1a8"}.fa-drupal:before{content:"\f1a9"}.fa-joomla:before{content:"\f1aa"}.fa-language:before{content:"\f1ab"}.fa-fax:before{content:"\f1ac"}.fa-building:before{content:"\f1ad"}.fa-child:before{content:"\f1ae"}.fa-paw:before{content:"\f1b0"}.fa-spoon:before{content:"\f1b1"}.fa-cube:before{content:"\f1b2"}.fa-cubes:before{content:"\f1b3"}.fa-behance:before{content:"\f1b4"}.fa-behance-square:before{content:"\f1b5"}.fa-steam:before{content:"\f1b6"}.fa-steam-square:before{content:"\f1b7"}.fa-recycle:before{content:"\f1b8"}.fa-automobile:before,.fa-car:before{content:"\f1b9"}.fa-cab:before,.fa-taxi:before{content:"\f1ba"}.fa-tree:before{content:"\f1bb"}.fa-spotify:before{content:"\f1bc"}.fa-deviantart:before{content:"\f1bd"}.fa-soundcloud:before{content:"\f1be"}.fa-database:before{content:"\f1c0"}.fa-file-pdf-o:before{content:"\f1c1"}.fa-file-word-o:before{content:"\f1c2"}.fa-file-excel-o:before{content:"\f1c3"}.fa-file-powerpoint-o:before{content:"\f1c4"}.fa-file-photo-o:before,.fa-file-picture-o:before,.fa-file-image-o:before{content:"\f1c5"}.fa-file-zip-o:before,.fa-file-archive-o:before{content:"\f1c6"}.fa-file-sound-o:before,.fa-file-audio-o:before{content:"\f1c7"}.fa-file-movie-o:before,.fa-file-video-o:before{content:"\f1c8"}.fa-file-code-o:before{content:"\f1c9"}.fa-vine:before{content:"\f1ca"}.fa-codepen:before{content:"\f1cb"}.fa-jsfiddle:before{content:"\f1cc"}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-saver:before,.fa-support:before,.fa-life-ring:before{content:"\f1cd"}.fa-circle-o-notch:before{content:"\f1ce"}.fa-ra:before,.fa-rebel:before{content:"\f1d0"}.fa-ge:before,.fa-empire:before{content:"\f1d1"}.fa-git-square:before{content:"\f1d2"}.fa-git:before{content:"\f1d3"}.fa-y-combinator-square:before,.fa-yc-square:before,.fa-hacker-news:before{content:"\f1d4"}.fa-tencent-weibo:before{content:"\f1d5"}.fa-qq:before{content:"\f1d6"}.fa-wechat:before,.fa-weixin:before{content:"\f1d7"}.fa-send:before,.fa-paper-plane:before{content:"\f1d8"}.fa-send-o:before,.fa-paper-plane-o:before{content:"\f1d9"}.fa-history:before{content:"\f1da"}.fa-circle-thin:before{content:"\f1db"}.fa-header:before{content:"\f1dc"}.fa-paragraph:before{content:"\f1dd"}.fa-sliders:before{content:"\f1de"}.fa-share-alt:before{content:"\f1e0"}.fa-share-alt-square:before{content:"\f1e1"}.fa-bomb:before{content:"\f1e2"}.fa-soccer-ball-o:before,.fa-futbol-o:before{content:"\f1e3"}.fa-tty:before{content:"\f1e4"}.fa-binoculars:before{content:"\f1e5"}.fa-plug:before{content:"\f1e6"}.fa-slideshare:before{content:"\f1e7"}.fa-twitch:before{content:"\f1e8"}.fa-yelp:before{content:"\f1e9"}.fa-newspaper-o:before{content:"\f1ea"}.fa-wifi:before{content:"\f1eb"}.fa-calculator:before{content:"\f1ec"}.fa-paypal:before{content:"\f1ed"}.fa-google-wallet:before{content:"\f1ee"}.fa-cc-visa:before{content:"\f1f0"}.fa-cc-mastercard:before{content:"\f1f1"}.fa-cc-discover:before{content:"\f1f2"}.fa-cc-amex:before{content:"\f1f3"}.fa-cc-paypal:before{content:"\f1f4"}.fa-cc-stripe:before{content:"\f1f5"}.fa-bell-slash:before{content:"\f1f6"}.fa-bell-slash-o:before{content:"\f1f7"}.fa-trash:before{content:"\f1f8"}.fa-copyright:before{content:"\f1f9"}.fa-at:before{content:"\f1fa"}.fa-eyedropper:before{content:"\f1fb"}.fa-paint-brush:before{content:"\f1fc"}.fa-birthday-cake:before{content:"\f1fd"}.fa-area-chart:before{content:"\f1fe"}.fa-pie-chart:before{content:"\f200"}.fa-line-chart:before{content:"\f201"}.fa-lastfm:before{content:"\f202"}.fa-lastfm-square:before{content:"\f203"}.fa-toggle-off:before{content:"\f204"}.fa-toggle-on:before{content:"\f205"}.fa-bicycle:before{content:"\f206"}.fa-bus:before{content:"\f207"}.fa-ioxhost:before{content:"\f208"}.fa-angellist:before{content:"\f209"}.fa-cc:before{content:"\f20a"}.fa-shekel:before,.fa-sheqel:before,.fa-ils:before{content:"\f20b"}.fa-meanpath:before{content:"\f20c"}.fa-buysellads:before{content:"\f20d"}.fa-connectdevelop:before{content:"\f20e"}.fa-dashcube:before{content:"\f210"}.fa-forumbee:before{content:"\f211"}.fa-leanpub:before{content:"\f212"}.fa-sellsy:before{content:"\f213"}.fa-shirtsinbulk:before{content:"\f214"}.fa-simplybuilt:before{content:"\f215"}.fa-skyatlas:before{content:"\f216"}.fa-cart-plus:before{content:"\f217"}.fa-cart-arrow-down:before{content:"\f218"}.fa-diamond:before{content:"\f219"}.fa-ship:before{content:"\f21a"}.fa-user-secret:before{content:"\f21b"}.fa-motorcycle:before{content:"\f21c"}.fa-street-view:before{content:"\f21d"}.fa-heartbeat:before{content:"\f21e"}.fa-venus:before{content:"\f221"}.fa-mars:before{content:"\f222"}.fa-mercury:before{content:"\f223"}.fa-intersex:before,.fa-transgender:before{content:"\f224"}.fa-transgender-alt:before{content:"\f225"}.fa-venus-double:before{content:"\f226"}.fa-mars-double:before{content:"\f227"}.fa-venus-mars:before{content:"\f228"}.fa-mars-stroke:before{content:"\f229"}.fa-mars-stroke-v:before{content:"\f22a"}.fa-mars-stroke-h:before{content:"\f22b"}.fa-neuter:before{content:"\f22c"}.fa-genderless:before{content:"\f22d"}.fa-facebook-official:before{content:"\f230"}.fa-pinterest-p:before{content:"\f231"}.fa-whatsapp:before{content:"\f232"}.fa-server:before{content:"\f233"}.fa-user-plus:before{content:"\f234"}.fa-user-times:before{content:"\f235"}.fa-hotel:before,.fa-bed:before{content:"\f236"}.fa-viacoin:before{content:"\f237"}.fa-train:before{content:"\f238"}.fa-subway:before{content:"\f239"}.fa-medium:before{content:"\f23a"}.fa-yc:before,.fa-y-combinator:before{content:"\f23b"}.fa-optin-monster:before{content:"\f23c"}.fa-opencart:before{content:"\f23d"}.fa-expeditedssl:before{content:"\f23e"}.fa-battery-4:before,.fa-battery-full:before{content:"\f240"}.fa-battery-3:before,.fa-battery-three-quarters:before{content:"\f241"}.fa-battery-2:before,.fa-battery-half:before{content:"\f242"}.fa-battery-1:before,.fa-battery-quarter:before{content:"\f243"}.fa-battery-0:before,.fa-battery-empty:before{content:"\f244"}.fa-mouse-pointer:before{content:"\f245"}.fa-i-cursor:before{content:"\f246"}.fa-object-group:before{content:"\f247"}.fa-object-ungroup:before{content:"\f248"}.fa-sticky-note:before{content:"\f249"}.fa-sticky-note-o:before{content:"\f24a"}.fa-cc-jcb:before{content:"\f24b"}.fa-cc-diners-club:before{content:"\f24c"}.fa-clone:before{content:"\f24d"}.fa-balance-scale:before{content:"\f24e"}.fa-hourglass-o:before{content:"\f250"}.fa-hourglass-1:before,.fa-hourglass-start:before{content:"\f251"}.fa-hourglass-2:before,.fa-hourglass-half:before{content:"\f252"}.fa-hourglass-3:before,.fa-hourglass-end:before{content:"\f253"}.fa-hourglass:before{content:"\f254"}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:"\f255"}.fa-hand-stop-o:before,.fa-hand-paper-o:before{content:"\f256"}.fa-hand-scissors-o:before{content:"\f257"}.fa-hand-lizard-o:before{content:"\f258"}.fa-hand-spock-o:before{content:"\f259"}.fa-hand-pointer-o:before{content:"\f25a"}.fa-hand-peace-o:before{content:"\f25b"}.fa-trademark:before{content:"\f25c"}.fa-registered:before{content:"\f25d"}.fa-creative-commons:before{content:"\f25e"}.fa-gg:before{content:"\f260"}.fa-gg-circle:before{content:"\f261"}.fa-tripadvisor:before{content:"\f262"}.fa-odnoklassniki:before{content:"\f263"}.fa-odnoklassniki-square:before{content:"\f264"}.fa-get-pocket:before{content:"\f265"}.fa-wikipedia-w:before{content:"\f266"}.fa-safari:before{content:"\f267"}.fa-chrome:before{content:"\f268"}.fa-firefox:before{content:"\f269"}.fa-opera:before{content:"\f26a"}.fa-internet-explorer:before{content:"\f26b"}.fa-tv:before,.fa-television:before{content:"\f26c"}.fa-contao:before{content:"\f26d"}.fa-500px:before{content:"\f26e"}.fa-amazon:before{content:"\f270"}.fa-calendar-plus-o:before{content:"\f271"}.fa-calendar-minus-o:before{content:"\f272"}.fa-calendar-times-o:before{content:"\f273"}.fa-calendar-check-o:before{content:"\f274"}.fa-industry:before{content:"\f275"}.fa-map-pin:before{content:"\f276"}.fa-map-signs:before{content:"\f277"}.fa-map-o:before{content:"\f278"}.fa-map:before{content:"\f279"}.fa-commenting:before{content:"\f27a"}.fa-commenting-o:before{content:"\f27b"}.fa-houzz:before{content:"\f27c"}.fa-vimeo:before{content:"\f27d"}.fa-black-tie:before{content:"\f27e"}.fa-fonticons:before{content:"\f280"} 5 | --------------------------------------------------------------------------------