6 |
7 |
8 |
9 | 文章列表
10 |
11 |
60 |
61 | {% endblock %}
--------------------------------------------------------------------------------
/wechatspider/templatetags/paginator.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'yijingping'
3 | from copy import copy
4 | from django import template
5 |
6 | register = template.Library()
7 |
8 |
9 | def paginator(context, paginated_object_list, adjacent_pages=2):
10 | """
11 | To be used in conjunction with the object_list generic view.
12 |
13 | Adds pagination context variables for use in displaying first, adjacent and
14 | last page links in addition to those created by the object_list generic
15 | view.
16 |
17 | """
18 | page = paginated_object_list.number
19 | pages = paginated_object_list.paginator.num_pages
20 | page_numbers = [n for n in \
21 | range(page - adjacent_pages, page + adjacent_pages + 1) \
22 | if n > 0 and n <= pages]
23 | return {
24 | 'params': context.get('params'),
25 | #'hits': context['hits'],
26 | 'results_per_page': paginated_object_list.paginator.per_page,
27 | 'page': page,
28 | 'pages': pages,
29 | 'page_numbers': page_numbers,
30 | 'next': page + 1,
31 | 'previous': page - 1,
32 | 'has_next': paginated_object_list.has_next(),
33 | 'has_previous': paginated_object_list.has_previous(),
34 | 'show_first': 1 not in page_numbers,
35 | 'show_last': pages not in page_numbers,
36 | 'count': paginated_object_list.paginator.count,
37 | 'start_index': paginated_object_list.start_index,
38 | 'end_index': paginated_object_list.end_index
39 |
40 | }
41 |
42 | register.inclusion_tag('paginator.html', takes_context=True)(paginator)
43 |
44 |
45 | @register.filter
46 | def update_page(params, page):
47 | res = copy((params or {}))
48 | res['page'] = page
49 | return res
50 |
51 | @register.filter
52 | def update_status(params, value):
53 | res = copy((params or {}))
54 | res['status'] = value
55 | return res
56 |
57 | @register.filter
58 | def gen_get_params(params):
59 | res = []
60 | for k, v in params.iteritems():
61 | res.append('%s=%s' % (k, v))
62 |
63 | return '?' + '&'.join(res)
64 |
65 | @register.filter
66 | def remove_key(params, key):
67 | res = copy((params or {}))
68 | res.pop(key, None)
69 | return res
70 |
--------------------------------------------------------------------------------
/wechat/management/commands/checkproxies.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'yijingping'
3 | import time
4 | from django.core.management.base import BaseCommand
5 | from wechat.models import Proxy
6 | from wechat.util import check_proxy, check_wechat
7 |
8 |
9 | class Command(BaseCommand):
10 | help = 'check proxies'
11 |
12 | def handle(self, *args, **options):
13 | while True:
14 | #self.check_all_proxies()
15 | self.check_wechat_proxies()
16 | time.sleep(60)
17 |
18 | def check_all_proxies(self):
19 | # 检测新代理
20 | qs1 = Proxy.objects.filter(status=Proxy.STATUS_NEW)
21 | # 检测成功代理
22 | qs2 = Proxy.objects.filter(status=Proxy.STATUS_SUCCESS)
23 | # 检测失败代理
24 | qs3 = Proxy.objects.filter(status=Proxy.STATUS_FAIL, retry__lt=3)
25 | for qs in [qs1, qs2, qs3]:
26 | for item in qs:
27 | has_exception, proxy_detected, time_diff = check_proxy(item.host, item.port)
28 | if has_exception or not proxy_detected:
29 | item.status = Proxy.STATUS_FAIL
30 | item.retry += 1
31 | item.save()
32 | else:
33 | item.status = Proxy.STATUS_SUCCESS
34 | item.speed = time_diff * 1000
35 | item.retry = 0
36 | item.save()
37 |
38 | def check_wechat_proxies(self):
39 | # 删除无效代理
40 | qs3 = Proxy.objects.filter(kind=Proxy.KIND_DOWNLOAD, status=Proxy.STATUS_FAIL, retry__gte=1).delete()
41 | # 检测新代理
42 | qs1 = Proxy.objects.filter(kind=Proxy.KIND_DOWNLOAD, status=Proxy.STATUS_NEW)
43 | # 检测成功代理
44 | qs2 = Proxy.objects.filter(kind=Proxy.KIND_DOWNLOAD, status=Proxy.STATUS_SUCCESS)
45 | # 检测失败代理
46 | #qs3 = Proxy.objects.filter(kind=Proxy.KIND_DOWNLOAD, status=Proxy.STATUS_FAIL, retry__lt=1)
47 | for qs in [qs1, qs2]:
48 | for item in qs:
49 | has_exception, proxy_detected, time_diff = check_wechat(item.host, item.port)
50 | if has_exception or not proxy_detected:
51 | item.delete()
52 | else:
53 | item.status = Proxy.STATUS_SUCCESS
54 | item.speed = time_diff * 1000
55 | item.retry = 0
56 | item.save()
57 |
--------------------------------------------------------------------------------
/bin/scheduler.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'yijingping'
3 | # 加载django环境
4 | import sys
5 | import os
6 | reload(sys)
7 | sys.setdefaultencoding("utf-8")
8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
9 | os.environ['DJANGO_SETTINGS_MODULE'] = 'wechatspider.settings'
10 | import django
11 | django.setup()
12 |
13 | import json
14 | from wechat.models import Wechat, Word
15 | from django.conf import settings
16 | import logging
17 | logger = logging.getLogger()
18 | from datetime import datetime, timedelta
19 | import time
20 | from wechatspider.util import get_redis
21 | from wechat.constants import KIND_NORMAL, KIND_KEYWORD
22 |
23 |
24 | class Scheduler(object):
25 | def run(self):
26 | r = get_redis()
27 | if settings.CRAWLER_DEBUG:
28 | r.delete(settings.CRAWLER_CONFIG["downloader"])
29 |
30 | while True:
31 | now = datetime.now()
32 | # 获取要抓取的公众号
33 | wechats = Wechat.objects.filter(frequency__gt=0, next_crawl_time__lt=now, status=Wechat.STATUS_DEFAULT).order_by('-id')
34 | for item in wechats:
35 | data = {
36 | 'kind': KIND_NORMAL,
37 | 'wechat_id': item.id,
38 | 'wechatid': item.wechatid
39 | }
40 |
41 | r.lpush(settings.CRAWLER_CONFIG["downloader"], json.dumps(data))
42 |
43 | # 更新index_rule
44 | item.next_crawl_time = now + timedelta(minutes=item.frequency)
45 | #item.next_crawl_time = now + timedelta(seconds=item.frequency)
46 | item.save()
47 |
48 | logging.debug(data)
49 |
50 | # 获取要抓取的关键词
51 | keywords = Word.objects.filter(frequency__gt=0, next_crawl_time__lt=now).order_by('-id')
52 | for item in keywords:
53 | data = {
54 | 'kind': KIND_KEYWORD,
55 | 'word': item.text
56 | }
57 |
58 | r.lpush(settings.CRAWLER_CONFIG["downloader"], json.dumps(data))
59 |
60 | # 更新index_rule
61 | item.next_crawl_time = now + timedelta(minutes=item.frequency)
62 | #item.next_crawl_time = now + timedelta(seconds=item.frequency)
63 | item.save()
64 |
65 | logging.debug(data)
66 |
67 | time.sleep(1)
68 |
69 | if __name__ == '__main__':
70 | scheduler = Scheduler()
71 | scheduler.run()
72 |
--------------------------------------------------------------------------------
/supervisord.conf:
--------------------------------------------------------------------------------
1 | #[program:wechatspider.0fenbei.com]
2 | #command=/usr/bin/python /var/www/0fenbei/wechat-spider/manage.py runserver 0.0.0.0:8090
3 | #umask=022
4 | #user=ripple
5 | #startsecs=0
6 | #stopwaitsecs=0
7 | #autostart=true
8 | #autorestart=true
9 | #stdout_logfile=/var/log/wechatspider/wechatspider.stdout.log
10 | #stderr_logfile=/var/log/wechatspider/wechatspider.stderr.log
11 | #stopsignal=KILL
12 | #killasgroup=true
13 |
14 | [program:uwsgi-wechatspider.0fenbei.com]
15 | command=/usr/bin/uwsgi --ini /var/www/0fenbei/wechat-spider/uwsgi.ini
16 | directory=/var/www/0fenbei/wechat-spider
17 | umask=022
18 | user=ripple
19 | startsecs=0
20 | stopwaitsecs=0
21 | autostart=true
22 | autorestart=true
23 | stdout_logfile=/var/log/bowenpay/wechatspider.stdout.log
24 | stderr_logfile=/var/log/bowenpay/wechatspider.stderr.log
25 | stopsignal=QUIT
26 | killasgroup=true
27 |
28 | [program:wechatspider_scheduler]
29 | command=/usr/bin/python /var/www/0fenbei/wechat-spider/bin/scheduler.py
30 | umask=022
31 | user=ripple
32 | startsecs=0
33 | stopwaitsecs=0
34 | autostart=true
35 | autorestart=true
36 | stdout_logfile=/var/log/wechatspider/wechatspider_scheduler.stdout.log
37 | stderr_logfile=/var/log/wechatspider/wechatspider_scheduler.stderr.log
38 | stopsignal=KILL
39 | killasgroup=true
40 |
41 | [program:wechatspider_downloader]
42 | command=/usr/bin/python /var/www/0fenbei/wechat-spider/bin/downloader.py
43 | umask=022
44 | user=root
45 | startsecs=0
46 | stopwaitsecs=0
47 | autostart=true
48 | autorestart=true
49 | stdout_logfile=/var/log/wechatspider/wechatspider_downloader.stdout.log
50 | stderr_logfile=/var/log/wechatspider/wechatspider_downloader.stderr.log
51 | stopsignal=KILL
52 | killasgroup=true
53 | process_name=%(process_num)s
54 | numprocs=2
55 |
56 | [program:wechatspider_extractor]
57 | command=/usr/bin/python /var/www/0fenbei/wechat-spider/bin/extractor.py
58 | umask=022
59 | user=ripple
60 | startsecs=0
61 | stopwaitsecs=0
62 | autostart=true
63 | autorestart=true
64 | stdout_logfile=/var/log/wechatspider/wechatspider_extractor.stdout.log
65 | stderr_logfile=/var/log/wechatspider/wechatspider_extractor.stderr.log
66 | stopsignal=KILL
67 | killasgroup=true
68 | process_name=%(process_num)s
69 | numprocs=2
70 |
71 | [program:wechatspider_processor]
72 | command=/usr/bin/python /var/www/0fenbei/wechat-spider/bin/processor.py
73 | umask=022
74 | user=ripple
75 | startsecs=0
76 | stopwaitsecs=0
77 | autostart=true
78 | autorestart=true
79 | stdout_logfile=/var/log/wechatspider/wechatspider_processor.stdout.log
80 | stderr_logfile=/var/log/wechatspider/wechatspider_processor.stderr.log
81 | stopsignal=KILL
82 | killasgroup=true
83 |
84 |
--------------------------------------------------------------------------------
/wechatspider/templates/base.html:
--------------------------------------------------------------------------------
1 | {% load staticfiles %}
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | {% block title %}
12 |
微信爬虫
13 |
14 |
15 | {% endblock %}
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | {% block header_css %}
26 |
27 | {% endblock %}
28 |
29 |
30 |
34 |
35 |
36 |
37 | {% block nav %}
38 | {% include "nav.html" %}
39 | {% endblock %}
40 | {% block main %}
41 |
42 |
43 | {% for message in messages %}
44 |
45 |
46 | {{ message | safe }}
47 |
48 | {% endfor %}
49 | {% block main-content %}
50 |
51 | {% endblock %}
52 |
53 |
54 | {% endblock %}
55 |
56 | {% block footer %}
57 | {% include "footer.html" %}
58 | {% endblock %}
59 |
60 |
61 |
62 |
63 |
64 |
65 | {% block footer-js %}
66 |
67 | {% endblock %}
68 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/wechat/templates/wechat/topic_list.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | {% load paginator %}
3 | {% block main-content %}
4 |
5 |
6 |
20 |
21 |
22 | 文章列表
23 |
24 |
25 |
26 |
27 |
28 | | #ID |
29 | 头像 |
30 | 标题 |
31 | 原文链接 |
32 | 公众号 |
33 | 时间 |
34 | 阅读数 |
35 | 点赞数 |
36 | 是否可用 |
37 | 操作 |
38 |
39 |
40 |
41 | {% for item in topics %}
42 | {% with wechat=item.wechat %}
43 |
44 | | {{ item.id}} |
45 | |
46 | {{ item.title}} |
47 | |
48 | {{ wechat.name}} |
49 | {{ item.publish_time}} |
50 | {{ item.read_num }} |
51 | {{ item.like_num }} |
52 | {{ item.available }} |
53 |
54 | {% if wechat.status == 2 %}
55 | 公众号已删除
56 | {% else %}
57 | 删除公众号
58 | {% endif %}
59 | |
60 |
61 | {% endwith %}
62 | {% endfor %}
63 |
64 |
65 |
66 |
69 |
70 |
71 |
72 |
73 | {% endblock %}
74 |
--------------------------------------------------------------------------------
/wechatspider/static/web/js/qiniu-custom.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Created by yijingping on 15/11/12.
3 | */
4 | function getQiniuUploader(options){
5 | var uploader_option = {
6 | runtimes: 'html5,flash,html4', //上传模式,依次退化
7 | browse_button: options.browse_button, //上传选择的点选按钮,**必需**
8 | uptoken_url: '/qiniu/uptoken/', //Ajax请求upToken的Url,**强烈建议设置**(服务端提供)
9 | // downtoken_url: '/downtoken',
10 | // Ajax请求downToken的Url,私有空间时使用,JS-SDK将向该地址POST文件的key和domain,服务端返回的JSON必须包含`url`字段,`url`值为该文件的下载地址
11 | // uptoken : '
', //若未指定uptoken_url,则必须指定 uptoken ,uptoken由其他程序生成
12 | // unique_names: true, // 默认 false,key为文件名。若开启该选项,SDK会为每个文件自动生成key(文件名)
13 | // save_key: true, // 默认 false。若在服务端生成uptoken的上传策略中指定了 `sava_key`,则开启,SDK在前端将不对key进行任何处理
14 | domain: 'http://7oxfjw.com1.z0.glb.clouddn.com/', //bucket 域名,下载资源时用到,**必需**
15 | get_new_uptoken: false, //设置上传文件的时候是否每次都重新获取新的token
16 | container: options.container, //上传区域DOM ID,默认是browser_button的父元素,
17 | max_file_size: '100mb', //最大文件体积限制
18 | flash_swf_url: 'js/plupload/Moxie.swf', //引入flash,相对路径
19 | max_retries: 3, //上传失败最大重试次数
20 | dragdrop: true, //开启可拖曳上传
21 | drop_element: options.drop_element, //拖曳上传区域元素的ID,拖曳文件或文件夹后可触发上传
22 | chunk_size: '4mb', //分块上传时,每片的体积
23 | auto_start: true, //选择文件后自动上传,若关闭需要自己绑定事件触发上传,
24 | //x_vars : {
25 | // 自定义变量,参考http://developer.qiniu.com/docs/v6/api/overview/up/response/vars.html
26 | // 'time' : function(up,file) {
27 | // var time = (new Date()).getTime();
28 | // do something with 'time'
29 | // return time;
30 | // },
31 | // 'size' : function(up,file) {
32 | // var size = file.size;
33 | // do something with 'size'
34 | // return size;
35 | // }
36 | //},
37 | init: {
38 | 'FilesAdded': function(up, files) {
39 | plupload.each(files, function(file) {
40 | // 文件添加进队列后,处理相关的事情
41 | });
42 | },
43 | 'BeforeUpload': function(up, file) {
44 | // 每个文件上传前,处理相关的事情
45 | },
46 | 'UploadProgress': function(up, file) {
47 | // 每个文件上传时,处理相关的事情
48 | },
49 | 'FileUploaded': options.FileUploaded,
50 | 'Error': function(up, err, errTip) {
51 | //上传出错时,处理相关的事情
52 | },
53 | 'UploadComplete': function() {
54 | //队列文件处理完毕后,处理相关的事情
55 | },
56 | 'Key': function(up, file) {
57 | // 若想在前端对每个文件的key进行个性化处理,可以配置该函数
58 | // 该配置必须要在 unique_names: false , save_key: false 时才生效
59 |
60 | var key = ""+new Date().getTime();
61 | // do something with key here
62 | return key
63 | }
64 | }
65 | };
66 | var uploader = Qiniu.uploader(uploader_option);
67 | // domain 为七牛空间(bucket)对应的域名,选择某个空间后,可通过"空间设置->基本设置->域名设置"查看获取
68 |
69 | // uploader 为一个plupload对象,继承了所有plupload的方法,参考http://plupload.com/docs
70 | }
--------------------------------------------------------------------------------
/wechat/util.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'yijingping'
3 | import time
4 | import urllib2
5 | import requests
6 | from lxml.etree import tostring
7 | from itertools import chain
8 |
9 | ip_check_url = 'http://api.ipify.org'
10 | user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'
11 | socket_timeout = 3
12 |
13 |
14 | # Get real public IP address
15 | def get_real_pip():
16 | req = urllib2.Request(ip_check_url)
17 | req.add_header('User-agent', user_agent)
18 | conn = urllib2.urlopen(req)
19 | page = conn.read()
20 | conn.close()
21 | return page
22 |
23 | # Set global variable containing "real" public IP address
24 | real_pip = get_real_pip()
25 |
26 |
27 | def check_proxy(host, port):
28 | try:
29 | # Build opener
30 | proxy_handler = urllib2.ProxyHandler({'http': '%s:%s' % (host, port)})
31 | opener = urllib2.build_opener(proxy_handler)
32 | opener.addheaders = [('User-agent', user_agent)]
33 | urllib2.install_opener(opener)
34 |
35 | # Build, time, and execute request
36 | req = urllib2.Request(ip_check_url)
37 | time_start = time.time()
38 | conn = urllib2.urlopen(req, timeout=socket_timeout)
39 | time_end = time.time()
40 | detected_pip = conn.read()
41 | conn.close()
42 |
43 | # Calculate request time
44 | time_diff = time_end - time_start
45 |
46 | # Check if proxy is detected
47 | if detected_pip == real_pip:
48 | proxy_detected = False
49 | else:
50 | proxy_detected = True
51 |
52 | # Catch exceptions
53 | except urllib2.HTTPError, e:
54 | print "ERROR: Code ", e.code
55 | return (True, False, 999)
56 | except Exception, detail:
57 | print "ERROR: ", detail
58 | return (True, False, 999)
59 |
60 | # Return False if no exceptions, proxy_detected=True if proxy detected
61 | return (False, proxy_detected, time_diff)
62 |
63 |
64 | def check_wechat(host, port):
65 | try:
66 | time_start = time.time()
67 | # Build opener
68 | proxies = {
69 | 'http': 'http://%s:%s' % (host, port)
70 | }
71 | headers = {
72 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36'
73 | }
74 | rsp = requests.get("http://weixin.sogou.com/weixin",
75 | params={"type": 1, "query": "金融"},
76 | proxies=proxies, headers=headers,
77 | timeout=1
78 | )
79 | rsp.close()
80 | time_end = time.time()
81 |
82 | # Calculate request time
83 | time_diff = time_end - time_start
84 |
85 | #print rsp.content
86 | # Check if proxy is detected
87 | if '金融的相关微信公众号' in rsp.content:
88 | proxy_detected = True
89 | print rsp.content
90 | else:
91 | proxy_detected = False
92 |
93 | # Catch exceptions
94 | except urllib2.HTTPError, e:
95 | print "ERROR: Code ", e.code
96 | return (True, False, 999)
97 | except Exception, detail:
98 | print "ERROR: ", detail
99 | return (True, False, 999)
100 |
101 | # Return False if no exceptions, proxy_detected=True if proxy detected
102 | return (False, proxy_detected, time_diff)
103 |
104 |
105 | def stringify_children(node):
106 | return "".join([x for x in node.itertext()])
107 |
--------------------------------------------------------------------------------
/bin/downloader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # 加载django环境
3 | import sys
4 | import os
5 | reload(sys)
6 | sys.setdefaultencoding('utf8')
7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
8 | os.environ['DJANGO_SETTINGS_MODULE'] = 'wechatspider.settings'
9 | import django
10 | django.setup()
11 |
12 | import time
13 | import json
14 | from random import randint
15 | from django.conf import settings
16 | from wechatspider.util import get_redis
17 | from wechat.proxies import MysqlProxyBackend
18 | from wechat.downloaders import SeleniumDownloaderBackend
19 | from wechat.constants import KIND_HISTORY, KIND_DETAIL, KIND_KEYWORD
20 | import logging
21 | logger = logging.getLogger()
22 |
23 | CRAWLER_CONFIG = settings.CRAWLER_CONFIG
24 | CRAWLER_GLOBAL_LIMIT_SPEED = settings.CRAWLER_GLOBAL_LIMIT_SPEED
25 |
26 |
27 | class Downloader(object):
28 | def __init__(self):
29 | self.redis = get_redis()
30 |
31 | def get_proxy(self):
32 | return MysqlProxyBackend()
33 |
34 | def check_limit_speed(self):
35 | proxy = self.get_proxy()
36 | key = '%s:%s' % (CRAWLER_CONFIG['global_limit_speed'], proxy)
37 | if self.redis.exists(key):
38 | return True, proxy
39 | else:
40 | self.redis.psetex(key, CRAWLER_GLOBAL_LIMIT_SPEED, CRAWLER_GLOBAL_LIMIT_SPEED)
41 | return False, proxy
42 |
43 | def run(self):
44 | r = self.redis
45 | if settings.CRAWLER_DEBUG:
46 | r.delete(CRAWLER_CONFIG["downloader"])
47 | while True:
48 | try:
49 | resp_data = r.brpop(settings.CRAWLER_CONFIG["downloader"])
50 | except Exception as e:
51 | print e
52 | continue
53 |
54 | try:
55 | data = json.loads(resp_data[1])
56 |
57 | logger.debug(data)
58 | is_limited, proxy = self.check_limit_speed()
59 | if is_limited:
60 | print '# 被限制, 放回去, 下次下载'
61 | time.sleep(1) # 休息一秒, 延迟放回去的时间
62 | r.lpush(CRAWLER_CONFIG["downloader"], resp_data[1])
63 | else:
64 | print '# 未被限制,可以下载'
65 | # 处理文章的函数,用于回调. 每下载一篇, 处理一篇
66 | def process_topic(topic):
67 | if topic.get('kind', None) == KIND_DETAIL:
68 | item_data = topic
69 | elif topic.get('kind', None) == KIND_KEYWORD:
70 | item_data = topic
71 | else:
72 | item_data = topic
73 | item_data["wechat_id"] = data["wechat_id"]
74 | r.lpush(CRAWLER_CONFIG["extractor"], json.dumps(item_data))
75 | logger.debug(item_data)
76 |
77 | with SeleniumDownloaderBackend(proxy=proxy) as browser:
78 | if data.get('kind') == KIND_DETAIL:
79 | res = browser.download_wechat_topic_detail(data, process_topic)
80 | elif data.get('kind') == KIND_HISTORY:
81 | #res = browser.download_wechat_history(data, process_topic)
82 | pass
83 | elif data.get('kind') == KIND_KEYWORD:
84 | res = browser.download_wechat_keyword(data, process_topic)
85 | else:
86 | res = browser.download_wechat(data, process_topic)
87 |
88 | time.sleep(randint(1, 5))
89 | except Exception as e:
90 | print e
91 | raise
92 |
93 |
94 | if __name__ == '__main__':
95 | downloader = Downloader()
96 | downloader.run()
97 |
--------------------------------------------------------------------------------
/wechatspider/static/bootstrap-datepicker/css/bootstrap-datepicker.min.css.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["less/datepicker.less","build/build.less"],"names":[],"mappings":"AAAA,YACC,QAAA,ICsBC,sBAAA,IACG,mBAAA,IACK,cAAA,IDnBT,UAAA,IAHC,mBACA,MAAA,MAGC,2BACD,UAAA,IACY,4CACX,MAAA,MAGD,qBACA,IAAA,EACA,KAAA,EACC,4BACA,QAAA,GACA,QAAA,aACA,YAAA,IAAA,MAAA,YACA,aAAA,IAAA,MAAA,YACA,cAAA,IAAA,MAAA,KACA,WAAA,EACA,oBAAA,eACA,SAAA,SAEA,2BACA,QAAA,GACA,QAAA,aACA,YAAA,IAAA,MAAA,YACA,aAAA,IAAA,MAAA,YACA,cAAA,IAAA,MAAA,KACA,WAAA,EACA,SAAA,SAEuB,mDAAY,KAAA,IACZ,kDAAY,KAAA,IACX,oDAAW,MAAA,IACX,mDAAW,MAAA,IACV,qDAAU,IAAA,KACV,oDAAU,IAAA,KACb,kDACtB,OAAA,KACA,cAAA,EACA,WAAA,IAAA,MAAA,KAEsB,iDACtB,OAAA,KACA,cAAA,EACA,WAAA,IAAA,MAAA,KAGF,gBACC,QAAA,KAED,kBACC,OAAA,EACA,sBAAA,KACA,oBAAA,KACA,mBAAA,KACA,iBAAA,KACA,gBAAA,KACA,YAAA,KAED,eAAI,eACH,WAAA,OACA,MAAA,KACA,OAAA,KC5CA,sBAAA,IACG,mBAAA,IACK,cAAA,ID6CR,OAAA,KAKA,uCAAI,uCACH,iBAAA,YAKI,oCADA,kCAEJ,WAAA,KACA,OAAA,QAGA,4BADA,4BAEA,MAAA,KAEA,iCACS,uCACT,WAAA,IACA,MAAA,KACA,OAAA,QAEA,oCACA,WAAA,QACA,cAAA,EAEA,8BAEM,uCACS,6CAFT,oCC5CL,iBAAA,QACA,iBAAkB,gDAClB,iBAAkB,+CAClB,iBAA2E,8DAC3E,iBAAkB,mDAClB,iBAAkB,8CAClB,iBAAkB,2CAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,QAAA,QAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DD8DP,MAAA,KCzEmB,qCAAU,uCAAV,8CAAU,gDAApB,8CAAT,6CAAmB,oDAAU,sDAApB,oDAAT,mDAAyC,uDAAA,iDAAhC,qCAAT,oCAAmB,2CAAU,6CAApB,2CAAT,0CAAyC,8CAAA,wCACxC,iBAAA,QAGD,qCAAA,8CADA,8CACA,oDADA,oDAAA,qCACA,2CADA,2CAEC,iBAAA,UDsEW,0CAEZ,MAAA,KAEa,2CACb,MAAA,KAEA,8BAEM,uCACS,6CAFT,oCAGN,WAAA,KC/FD,sBAAA,EACG,mBAAA,EACK,cAAA,EDgGD,oCAEM,6CACS,mDAFT,0CClEX,iBAAA,QACA,iBAAkB,gDAClB,iBAAkB,+CAClB,iBAA2E,8DAC3E,iBAAkB,mDAClB,iBAAkB,8CAClB,iBAAkB,2CAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,QAAA,QAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DApBR,sBAAA,EACG,mBAAA,EACK,cAAA,EAOY,2CAAU,6CAAV,oDAAU,sDAApB,oDAAT,mDAAmB,0DAAU,4DAApB,0DAAT,yDAAyC,6DAAA,uDAAhC,2CAAT,0CAAmB,iDAAU,mDAApB,iDAAT,gDAAyC,oDAAA,8CACxC,iBAAA,QAGD,2CAAA,oDADA,oDACA,0DADA,0DAAA,2CACA,iDADA,iDAEC,iBAAA,UD4FD,iCAES,0CACS,gDAFT,uCC1ER,iBAAA,QACA,iBAAkB,6CAClB,iBAAkB,4CAClB,iBAA2E,2DAC3E,iBAAkB,gDAClB,iBAAkB,2CAClB,iBAAkB,wCAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,KAAA,KAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DD2FP,MAAA,KACA,YAAA,EAAA,KAAA,EAAA,gBCvGmB,wCAAU,0CAAV,iDAAU,mDAApB,iDAAT,gDAAmB,uDAAU,yDAApB,uDAAT,sDAAyC,0DAAA,oDAAhC,wCAAT,uCAAmB,8CAAU,gDAApB,8CAAT,6CAAyC,iDAAA,2CACxC,iBAAA,KAGD,wCAAA,iDADA,iDACA,uDADA,uDAAA,wCACA,8CADA,8CAEC,iBAAA,ODoGD,+BAEO,wCACS,8CAFT,qCClFN,iBAAA,QACA,iBAAkB,0CAClB,iBAAkB,yCAClB,iBAA2E,wDAC3E,iBAAkB,6CAClB,iBAAkB,wCAClB,iBAAkB,qCAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,KAAA,KAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DDmGP,MAAA,KACA,YAAA,EAAA,KAAA,EAAA,gBC/GmB,sCAAU,wCAAV,+CAAU,iDAApB,+CAAT,8CAAmB,qDAAU,uDAApB,qDAAT,oDAAyC,wDAAA,kDAAhC,sCAAT,qCAAmB,4CAAU,8CAApB,4CAAT,2CAAyC,+CAAA,yCACxC,iBAAA,KAGD,sCAAA,+CADA,+CACA,qDADA,qDAAA,sCACA,4CADA,4CAEC,iBAAA,OD4GF,6BACC,QAAA,MACA,MAAA,IACA,OAAA,KACA,YAAA,KACA,MAAA,KACA,OAAA,GACA,OAAA,QCjID,sBAAA,IACG,mBAAA,IACK,cAAA,IDkIN,qCADA,mCAEA,WAAA,KAEA,sCACS,4CACT,WAAA,IACA,MAAA,KACA,OAAA,QAEA,oCAEO,6CACS,mDAFT,0CC7GP,iBAAA,QACA,iBAAkB,0CAClB,iBAAkB,yCAClB,iBAA2E,wDAC3E,iBAAkB,6CAClB,iBAAkB,wCAClB,iBAAkB,qCAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,KAAA,KAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DD8HN,MAAA,KACA,YAAA,EAAA,KAAA,EAAA,gBC1IkB,2CAAU,6CAAV,oDAAU,sDAApB,oDAAT,mDAAmB,0DAAU,4DAApB,0DAAT,yDAAyC,6DAAA,uDAAhC,2CAAT,0CAAmB,iDAAU,mDAApB,iDAAT,gDAAyC,oDAAA,8CACxC,iBAAA,KAGD,2CAAA,oDADA,oDACA,0DADA,0DAAA,2CACA,iDADA,iDAEC,iBAAA,ODwIA,iCADA,iCAEA,MAAA,KAKH,+BACC,MAAA,MAGD,+BAEA,kBADA,kBAES,wBACR,OAAA,QACC,qCAAA,wBAAA,wBAAA,8BACA,WAAA,KAKF,gBACC,UAAA,KACA,MAAA,KACA,QAAA,EAAA,IAAA,EAAA,IACA,eAAA,OAKM,2BAAA,4BACN,OAAA,QAEA,6BAAA,8BACC,WAAA,IAKF,uBACC,WAAA,OAEI,mCChMJ,sBAAA,IAAA,EAAA,EAAA,IACG,mBAAA,IAAA,EAAA,EAAA,IACK,cAAA,IAAA,EAAA,EAAA,IDiMJ,kCCnMJ,sBAAA,EAAA,IAAA,IAAA,EACG,mBAAA,EAAA,IAAA,IAAA,EACK,cAAA,EAAA,IAAA,IAAA,EDoMT,yBACC,QAAA,aACA,MAAA,KACA,UAAA,KACA,OAAA,KACA,QAAA,IAAA,IACA,YAAA,IACA,YAAA,KACA,WAAA,OACA,YAAA,EAAA,IAAA,EAAA,KACA,eAAA,OACA,iBAAA,KACA,OAAA,IAAA,MAAA,KACA,YAAA,KACA,aAAA"}
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 注意:搜狗微信在2019.10.29下线相关功能过后,该项目废弃 !!!
2 |
3 | # 微信爬虫
4 | 一个爬取微信公众号文章的爬虫。
5 |
6 | # 微信爬虫的由来
7 | 零分贝是一家帮助中国5000万贫困人口与社会公益组织对接的公司。
8 |
9 | 我们通过国家和地方政府的“建档立卡”系统,获取到了一手的贫困户数据,目前有100万左右,总数为5000万,目前每个月都在增长。
10 |
11 | 为了帮助这部分贫困户对接公益机构,我写了这个微信爬虫,从微信公众号发布的文章中上找出最新的公益项目。
12 |
13 | 这种找项目的方式的可行性,我们还在试验中。
14 |
15 | 起初,为了快速上线,本爬虫的代码是基于我的另一个 [通用爬虫项目](https://github.com/yijingping/unicrawler) 开发的,我也希望任何对本项目感兴趣的人联系我,与我一同改进这个项目。
16 |
17 | 联系方式:在本项目中新建一个issue留言。
18 |
19 | # 界面预览
20 |
21 | 1) 要爬取的微信公众号列表
22 |
23 | 
24 |
25 | 2) 要爬取的文章关键字列表
26 |
27 | 
28 |
29 | 3) 已经爬取的微信文章
30 |
31 | 
32 |
33 | 4) 查看文章,并标记是否可用
34 |
35 | 
36 |
37 | 5) 控制爬取进程数
38 |
39 | 
40 |
41 |
42 | # 安装
43 |
44 | 1)python环境, 检查python的版本,是否为2.7.x,如果不是,安装2.7.6。
45 |
46 | 如果是centos 6.x,升级python2.6到python2.7,参考教程 http://ruiaylin.github.io/2014/12/12/python%20update/
47 |
48 | 如果是centos 7.x,默认就是python2.7,不用升级
49 |
50 | 如果是mac osx,可以使用virtualenv,安装python2.7
51 |
52 | 2)安装依赖包, clone代码
53 | 安装Mysql-python依赖
54 | ```
55 | yum install python-devel mysql-devel gcc
56 | ```
57 |
58 | 安装lxml依赖
59 | ```
60 | yum install libxslt-devel libxml2-devel
61 | ```
62 |
63 | 安装浏览器环境 selenium依赖.(如果是mac环境,仅需安装firefox, 但确保版本是 firefox 36.0,使用最新的版本会报错)
64 | ```
65 | yum install xorg-x11-server-Xvfb
66 | yum upgrade glib2 # 确保glib2版本大于2.42.2,否则firefox启动会报错
67 | yum install firefox # centos下安装最新的firefox版本
68 | ```
69 |
70 | clone代码,安装依赖python库
71 | ```
72 | $ git clone https://github.com/bowenpay/wechat-spider.git
73 | $ cd wechat-spider
74 | $ pip install -r requirements.txt
75 | ```
76 |
77 | 3) 创建mysql数据库
78 |
79 | 创建数据库wechatspider,默认采用utf8编码。(如果系统支持,可以采用utf8mb4,以兼容emoji字符)
80 |
81 | ```
82 | mysql> CREATE DATABASE `wechatspider` CHARACTER SET utf8;
83 | ```
84 |
85 | 4) 安装和运行Redis
86 |
87 | ```shell
88 | $ wget http://download.redis.io/releases/redis-2.8.3.tar.gz
89 | $ tar xzvf redis-2.8.3.tar.gz
90 | $ cd redis-2.8.3
91 | $ make
92 | $ make install
93 | $ redis-server
94 | ```
95 |
96 | 5) 更新配置文件local_settings
97 |
98 | 在 wechatspider 目录下,添加 `local_settings.py` 文件,配置如下:
99 | ```
100 | # -*- coding: utf-8 -*-
101 |
102 | SECRET_KEY="xxxxxx"
103 |
104 | CRAWLER_DEBUG = True
105 |
106 | # aliyun oss2, 可以将图片和视频存储到阿里云,也可以选择不存储,爬取速度会更快。 默认不存储。
107 | #OSS2_ENABLE = True
108 | #OSS2_CONFIG = {
109 | # "ACCESS_KEY_ID": "XXXXXXXXXXXXXX",
110 | # "ACCESS_KEY_SECRET": "YYYYYYYYYYYYYYYYYYYYYY",
111 | # "ENDPOINT": "",
112 | # "BUCKET_DOMAIN": "oss-cn-hangzhou.aliyuncs.com",
113 | # "BUCKET_NAME": "XXXXX",
114 | # "IMAGES_PATH": "images/",
115 | # "VIDEOS_PATH": "videos/",
116 | # "CDN_DOMAIN": "XXXXXX.oss-cn-hangzhou.aliyuncs.com"
117 | #}
118 | # mysql 数据库配置
119 | DATABASES = {
120 | 'default': {
121 | 'ENGINE': 'django.db.backends.mysql',
122 | 'HOST': '127.0.0.1',
123 | 'NAME': 'wechatspider',
124 | 'USER': 'root',
125 | 'PASSWORD': '',
126 | 'OPTIONS':{
127 | 'charset': 'utf8mb4',
128 | },
129 | }
130 | }
131 | # redis配置,用于消息队列和k-v存储
132 | REDIS_OPTIONS = {
133 | 'host': 'localhost',
134 | 'port': 6379,
135 | 'password': '',
136 | 'db': 4
137 | }
138 |
139 | ```
140 |
141 | 6) 初始化表
142 | ```
143 | $ python manage.py migrate
144 | ```
145 |
146 | 7)启动网站
147 |
148 | ```
149 | python manage.py runserver 0.0.0.0:8001
150 | ```
151 | 访问 http://localhost:8001/。
152 |
153 |
154 | 6) 创建超级管理员账号,访问后台,并配置要爬取的公众号和关键字
155 | ```
156 | python manage.py createsuperuser
157 | ```
158 |
159 |
160 | 8)启动爬虫
161 |
162 | ```shell
163 | $ python bin/scheduler.py
164 | $ python bin/downloader.py
165 | $ python bin/extractor.py
166 | $ python bin/processor.py
167 | ```
168 |
169 | 以上步骤执行成功,并能爬取文章后。可以参考以下部分配置生产环境。
170 |
171 | # 部署nginx
172 | 前期先用nginx将域名www.mydomain.com转发到8001端口。
173 |
174 | # 部署supervisor脚本
175 | 参考文件 `supervisord.conf`
176 |
177 | # 部署crontab脚本
178 | 参考文件 `crontab`
179 |
180 | # 系统使用文档
181 |
182 |
183 | # API接口文档
184 |
185 |
--------------------------------------------------------------------------------
/wechatspider/static/bootstrap-datepicker/css/bootstrap-datepicker.standalone.min.css.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["less/datepicker.less","build/build.less","build/build_standalone.less"],"names":[],"mappings":"AAAA,YACC,QAAA,ICsBC,sBAAA,IACG,mBAAA,IACK,cAAA,IDnBT,UAAA,IAHC,mBACA,MAAA,MAGC,2BACD,UAAA,IACY,4CACX,MAAA,MAGD,qBACA,IAAA,EACA,KAAA,EACC,4BACA,QAAA,GACA,QAAA,aACA,YAAA,IAAA,MAAA,YACA,aAAA,IAAA,MAAA,YACA,cAAA,IAAA,MAAA,KACA,WAAA,EACA,oBAAA,eACA,SAAA,SAEA,2BACA,QAAA,GACA,QAAA,aACA,YAAA,IAAA,MAAA,YACA,aAAA,IAAA,MAAA,YACA,cAAA,IAAA,MAAA,KACA,WAAA,EACA,SAAA,SAEuB,mDAAY,KAAA,IACZ,kDAAY,KAAA,IACX,oDAAW,MAAA,IACX,mDAAW,MAAA,IACV,qDAAU,IAAA,KACV,oDAAU,IAAA,KACb,kDACtB,OAAA,KACA,cAAA,EACA,WAAA,IAAA,MAAA,KAEsB,iDACtB,OAAA,KACA,cAAA,EACA,WAAA,IAAA,MAAA,KAGF,gBACC,QAAA,KAED,kBACC,OAAA,EACA,sBAAA,KACA,oBAAA,KACA,mBAAA,KACA,iBAAA,KACA,gBAAA,KACA,YAAA,KAED,eAAI,eACH,WAAA,OACA,MAAA,KACA,OAAA,KC5CA,sBAAA,IACG,mBAAA,IACK,cAAA,ID6CR,OAAA,KAKA,uCAAI,uCACH,iBAAA,YAKI,oCADA,kCAEJ,WAAA,KACA,OAAA,QAGA,4BADA,4BAEA,MAAA,KAEA,iCACS,uCACT,WAAA,IACA,MAAA,KACA,OAAA,QAEA,oCACA,WAAA,QACA,cAAA,EAEA,8BAEM,uCACS,6CAFT,oCC5CL,iBAAA,QACA,iBAAkB,gDAClB,iBAAkB,+CAClB,iBAA2E,8DAC3E,iBAAkB,mDAClB,iBAAkB,8CAClB,iBAAkB,2CAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,QAAA,QAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DD8DP,MAAA,KCzEmB,qCAAU,uCAAV,8CAAU,gDAApB,8CAAT,6CAAmB,oDAAU,sDAApB,oDAAT,mDAAyC,uDAAA,iDAAhC,qCAAT,oCAAmB,2CAAU,6CAApB,2CAAT,0CAAyC,8CAAA,wCACxC,iBAAA,QAGD,qCAAA,8CADA,8CACA,oDADA,oDAAA,qCACA,2CADA,2CAEC,iBAAA,UDsEW,0CAEZ,MAAA,KAEa,2CACb,MAAA,KAEA,8BAEM,uCACS,6CAFT,oCAGN,WAAA,KC/FD,sBAAA,EACG,mBAAA,EACK,cAAA,EDgGD,oCAEM,6CACS,mDAFT,0CClEX,iBAAA,QACA,iBAAkB,gDAClB,iBAAkB,+CAClB,iBAA2E,8DAC3E,iBAAkB,mDAClB,iBAAkB,8CAClB,iBAAkB,2CAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,QAAA,QAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DApBR,sBAAA,EACG,mBAAA,EACK,cAAA,EAOY,2CAAU,6CAAV,oDAAU,sDAApB,oDAAT,mDAAmB,0DAAU,4DAApB,0DAAT,yDAAyC,6DAAA,uDAAhC,2CAAT,0CAAmB,iDAAU,mDAApB,iDAAT,gDAAyC,oDAAA,8CACxC,iBAAA,QAGD,2CAAA,oDADA,oDACA,0DADA,0DAAA,2CACA,iDADA,iDAEC,iBAAA,UD4FD,iCAES,0CACS,gDAFT,uCC1ER,iBAAA,QACA,iBAAkB,6CAClB,iBAAkB,4CAClB,iBAA2E,2DAC3E,iBAAkB,gDAClB,iBAAkB,2CAClB,iBAAkB,wCAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,KAAA,KAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DD2FP,MAAA,KACA,YAAA,EAAA,KAAA,EAAA,gBCvGmB,wCAAU,0CAAV,iDAAU,mDAApB,iDAAT,gDAAmB,uDAAU,yDAApB,uDAAT,sDAAyC,0DAAA,oDAAhC,wCAAT,uCAAmB,8CAAU,gDAApB,8CAAT,6CAAyC,iDAAA,2CACxC,iBAAA,KAGD,wCAAA,iDADA,iDACA,uDADA,uDAAA,wCACA,8CADA,8CAEC,iBAAA,ODoGD,+BAEO,wCACS,8CAFT,qCClFN,iBAAA,QACA,iBAAkB,0CAClB,iBAAkB,yCAClB,iBAA2E,wDAC3E,iBAAkB,6CAClB,iBAAkB,wCAClB,iBAAkB,qCAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,KAAA,KAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DDmGP,MAAA,KACA,YAAA,EAAA,KAAA,EAAA,gBC/GmB,sCAAU,wCAAV,+CAAU,iDAApB,+CAAT,8CAAmB,qDAAU,uDAApB,qDAAT,oDAAyC,wDAAA,kDAAhC,sCAAT,qCAAmB,4CAAU,8CAApB,4CAAT,2CAAyC,+CAAA,yCACxC,iBAAA,KAGD,sCAAA,+CADA,+CACA,qDADA,qDAAA,sCACA,4CADA,4CAEC,iBAAA,OD4GF,6BACC,QAAA,MACA,MAAA,IACA,OAAA,KACA,YAAA,KACA,MAAA,KACA,OAAA,GACA,OAAA,QCjID,sBAAA,IACG,mBAAA,IACK,cAAA,IDkIN,qCADA,mCAEA,WAAA,KAEA,sCACS,4CACT,WAAA,IACA,MAAA,KACA,OAAA,QAEA,oCAEO,6CACS,mDAFT,0CC7GP,iBAAA,QACA,iBAAkB,0CAClB,iBAAkB,yCAClB,iBAA2E,wDAC3E,iBAAkB,6CAClB,iBAAkB,wCAClB,iBAAkB,qCAClB,kBAAA,SACA,OAAA,2GAfF,aAAA,KAAA,KAAA,QACA,aAAA,eAAA,eAAA,gBAPA,OAAQ,0DD8HN,MAAA,KACA,YAAA,EAAA,KAAA,EAAA,gBC1IkB,2CAAU,6CAAV,oDAAU,sDAApB,oDAAT,mDAAmB,0DAAU,4DAApB,0DAAT,yDAAyC,6DAAA,uDAAhC,2CAAT,0CAAmB,iDAAU,mDAApB,iDAAT,gDAAyC,oDAAA,8CACxC,iBAAA,KAGD,2CAAA,oDADA,oDACA,0DADA,0DAAA,2CACA,iDADA,iDAEC,iBAAA,ODwIA,iCADA,iCAEA,MAAA,KAKH,+BACC,MAAA,MAGD,+BAEA,kBADA,kBAES,wBACR,OAAA,QACC,qCAAA,wBAAA,wBAAA,8BACA,WAAA,KAKF,gBACC,UAAA,KACA,MAAA,KACA,QAAA,EAAA,IAAA,EAAA,IACA,eAAA,OAKM,2BAAA,4BACN,OAAA,QAEA,6BAAA,8BACC,WAAA,IAKF,uBACC,WAAA,OAEI,mCChMJ,sBAAA,IAAA,EAAA,EAAA,IACG,mBAAA,IAAA,EAAA,EAAA,IACK,cAAA,IAAA,EAAA,EAAA,IDiMJ,kCCnMJ,sBAAA,EAAA,IAAA,IAAA,EACG,mBAAA,EAAA,IAAA,IAAA,EACK,cAAA,EAAA,IAAA,IAAA,EDoMT,yBACC,QAAA,aACA,MAAA,KACA,UAAA,KACA,OAAA,KACA,QAAA,IAAA,IACA,YAAA,IACA,YAAA,KACA,WAAA,OACA,YAAA,EAAA,IAAA,EAAA,KACA,eAAA,OACA,iBAAA,KACA,OAAA,IAAA,MAAA,KACA,YAAA,KACA,aAAA,KE3MC,0BACC,SAAA,SACA,IAAA,KACA,KAAA,EACA,QAAA,KACA,MAAA,KACA,QAAA,KACA,UAAA,MACA,WAAA,KACA,iBAAA,KACA,OAAA,IAAA,MAAA,KACA,OAAA,IAAA,MAAA,eDpBF,sBAAA,IACG,mBAAA,IACK,cAAA,ICDR,mBAAA,EAAA,IAAA,KAAA,eACG,gBAAA,EAAA,IAAA,KAAA,eACK,WAAA,EAAA,IAAA,KAAA,eAoBN,wBAAA,YACG,qBAAA,QACK,gBAAA,YACR,oBAAA,IACA,qBAAA,IAGA,MAAA,KACA,UAAA,KACA,YAAA,KAII,iCAAJ,iCAAI,6BAAJ,6BACE,QAAA,IAAA"}
--------------------------------------------------------------------------------
/wechatspider/static/bootstrap-datepicker/css/bootstrap-datepicker3.min.css.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["less/datepicker3.less","build/build3.less"],"names":[],"mappings":"AAAA,YACC,cAAA,IAIA,UAAA,IAHC,mBACA,MAAA,MAGC,2BACD,UAAA,IACY,4CACX,MAAA,MAGD,qBACA,IAAA,EACA,KAAA,EACA,QAAA,IACC,4BACA,QAAA,GACA,QAAA,aACA,YAAA,IAAA,MAAA,YACA,aAAA,IAAA,MAAA,YACA,cAAA,IAAA,MAAA,gBACA,WAAA,EACA,oBAAA,eACA,SAAA,SAEA,2BACA,QAAA,GACA,QAAA,aACA,YAAA,IAAA,MAAA,YACA,aAAA,IAAA,MAAA,YACA,cAAA,IAAA,MAAA,KACA,WAAA,EACA,SAAA,SAEuB,mDAAY,KAAA,IACZ,kDAAY,KAAA,IACX,oDAAW,MAAA,IACX,mDAAW,MAAA,IACV,qDAAU,IAAA,KACV,oDAAU,IAAA,KACb,kDACtB,OAAA,KACA,cAAA,EACA,WAAA,IAAA,MAAA,gBAEsB,iDACtB,OAAA,KACA,cAAA,EACA,WAAA,IAAA,MAAA,KAGF,gBACC,QAAA,KAED,kBACC,OAAA,EACA,sBAAA,KACA,oBAAA,KACA,mBAAA,KACA,iBAAA,KACA,gBAAA,KACA,YAAA,KAEC,wBAAI,wBACH,WAAA,OACA,MAAA,KACA,OAAA,KACA,cAAA,IACA,OAAA,KAOF,uCAAI,uCACH,iBAAA,YAKA,4BADA,4BAEA,MAAA,KAEI,kCACJ,gCACA,WAAA,KACA,OAAA,QAEA,iCACS,uCACT,WAAA,IACA,MAAA,KACA,OAAA,QAEA,oCC9DD,MAAA,KACA,iBAAA,QACA,aAAA,QD+DC,cAAA,EC5DA,0CADA,0CAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,0CACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,2CADA,2CAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,iDADA,iDADA,iDAEA,iDADA,iDADA,iDAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,mDADA,mDADA,mDAEA,oDADA,oDADA,oDAEA,6DADA,6DADA,6DAGC,iBAAA,QACI,aAAA,QD+BN,4CACA,WAAA,QAGA,6CACS,oDACT,WAAA,QACA,MAAA,KAGD,8BC7ED,MAAA,KACA,iBAAA,QACA,aAAA,QAGC,oCADA,oCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,oCACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,qCADA,qCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,2CADA,2CADA,2CAEA,2CADA,2CADA,2CAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,6CADA,6CADA,6CAEA,8CADA,8CADA,8CAEA,uDADA,uDADA,uDAGC,iBAAA,QACI,aAAA,QD6CN,sCACA,WAAA,QAGA,uCACS,8CACT,WAAA,QACA,MAAA,KAGD,8BC3FD,MAAA,KACA,iBAAA,KACA,aAAA,KD4FC,cAAA,ECzFA,oCADA,oCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,oCACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,qCADA,qCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,2CADA,2CADA,2CAEA,2CADA,2CADA,2CAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,6CADA,6CADA,6CAEA,8CADA,8CADA,8CAEA,uDADA,uDADA,uDAGC,iBAAA,KACI,aAAA,KD4DN,sCACA,WAAA,QAGA,uCACS,8CACT,WAAA,KACA,MAAA,KAGK,0CC1GP,MAAA,KACA,iBAAA,QACA,aAAA,QAGC,gDADA,gDAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,gDACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,iDADA,iDAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,uDADA,uDADA,uDAEA,uDADA,uDADA,uDAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,yDADA,yDADA,yDAEA,0DADA,0DADA,0DAEA,mEADA,mEADA,mEAGC,iBAAA,QACI,aAAA,QD0EN,kDACA,WAAA,QAGA,mDACS,0DACT,WAAA,QACA,MAAA,KAGK,oCCxHP,MAAA,KACA,iBAAA,QACA,aAAA,QAGC,0CADA,0CAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,0CACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,2CADA,2CAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,iDADA,iDADA,iDAEA,iDADA,iDADA,iDAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,mDADA,mDADA,mDAEA,oDADA,oDADA,oDAEA,6DADA,6DADA,6DAGC,iBAAA,QACI,aAAA,QDwFN,6CACS,oDACT,WAAA,QACA,MAAA,KAGD,iCACS,6CCnIV,MAAA,KACA,iBAAA,KACA,aAAA,KDmIC,YAAA,EAAA,KAAA,EAAA,gBChIA,uCAAA,mDADA,mDAAA,uCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,mDAAA,uCACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,wCAAA,oDADA,oDAAA,wCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,8CADA,8CADA,8CAEA,0DADA,0DADA,0DAEA,0DADA,0DADA,0DAEA,8CADA,8CADA,8CAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,gDADA,gDADA,gDAEA,4DADA,4DADA,4DAEA,6DADA,6DADA,6DAEA,iDADA,iDADA,iDAEA,0DAAA,sEADA,sEADA,sEACA,0DADA,0DAGC,iBAAA,KACI,aAAA,KDmGP,+BACO,2CCxIR,MAAA,KACA,iBAAA,QACA,aAAA,QDwIC,YAAA,EAAA,KAAA,EAAA,gBCrIA,qCAAA,iDADA,iDAAA,qCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,iDAAA,qCACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,sCAAA,kDADA,kDAAA,sCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,4CADA,4CADA,4CAEA,wDADA,wDADA,wDAEA,wDADA,wDADA,wDAEA,4CADA,4CADA,4CAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,8CADA,8CADA,8CAEA,0DADA,0DADA,0DAEA,2DADA,2DADA,2DAEA,+CADA,+CADA,+CAEA,wDAAA,oEADA,oEADA,oEACA,wDADA,wDAGC,iBAAA,QACI,aAAA,QDwGR,6BACC,QAAA,MACA,MAAA,IACA,OAAA,KACA,YAAA,KACA,MAAA,KACA,OAAA,GACA,OAAA,QACA,cAAA,IAEC,qCADA,mCAEA,WAAA,KAEA,sCACS,4CACT,WAAA,IACA,MAAA,KACA,OAAA,QAEA,oCAEO,6CACS,mDAFT,0CChKT,MAAA,KACA,iBAAA,QACA,aAAA,QDkKE,YAAA,EAAA,KAAA,EAAA,gBC/JD,mDADA,mDACA,yDADA,yDACA,0CADA,0CACA,gDADA,gDAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,mDAAA,yDAAA,0CAAA,gDACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,2CAAA,oDADA,oDACA,0DADA,0DAAA,2CACA,iDADA,iDAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,iDADA,iDADA,iDAEA,0DADA,0DADA,0DAEA,0DADA,0DADA,0DAEA,gEADA,gEADA,gEAEA,gEADA,gEADA,gEAEA,iDADA,iDADA,iDAEA,uDADA,uDADA,uDAEA,uDADA,uDADA,uDAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,4DADA,4DADA,4DAEA,mDADA,mDADA,mDAEA,kEADA,kEADA,kEAEA,mEADA,mEADA,mEAEA,6DADA,6DADA,6DAEA,yDADA,yDADA,yDAEA,0DADA,0DADA,0DAEA,oDADA,oDADA,oDAEA,sEADA,sEADA,sEAEA,4EADA,4EADA,4EAEA,6DADA,6DADA,6DAEA,mEADA,mEADA,mEAGC,iBAAA,QACI,aAAA,QDmIN,iCADA,iCAEA,MAAA,KAKH,+BACC,MAAA,MAGD,+BAEA,kBADA,kBAES,wBACR,OAAA,QACC,qCAAA,wBAAA,wBAAA,8BACA,WAAA,KAKF,gBACC,UAAA,KACA,MAAA,KACA,QAAA,EAAA,IAAA,EAAA,IACA,eAAA,OAGgB,qCACjB,OAAA,QAED,iBACC,MAAA,KACA,uBACC,WAAA,OAEI,mCACJ,cAAA,IAAA,EAAA,EAAA,IAEI,kCACJ,cAAA,EAAA,IAAA,IAAA,EAED,oCACC,MAAA,KACA,UAAA,KACA,QAAA,IAAA,IACA,YAAA,WACA,YAAA,EAAA,IAAA,EAAA,KACA,aAAA,IAAA,EACA,YAAA,KACA,aAAA"}
--------------------------------------------------------------------------------
/wechatspider/static/bootstrap-datepicker/css/bootstrap-datepicker3.standalone.min.css.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["less/datepicker3.less","build/build3.less","build/build_standalone3.less"],"names":[],"mappings":"AAAA,YACC,cAAA,IAIA,UAAA,IAHC,mBACA,MAAA,MAGC,2BACD,UAAA,IACY,4CACX,MAAA,MAGD,qBACA,IAAA,EACA,KAAA,EACA,QAAA,IACC,4BACA,QAAA,GACA,QAAA,aACA,YAAA,IAAA,MAAA,YACA,aAAA,IAAA,MAAA,YACA,cAAA,IAAA,MAAA,gBACA,WAAA,EACA,oBAAA,eACA,SAAA,SAEA,2BACA,QAAA,GACA,QAAA,aACA,YAAA,IAAA,MAAA,YACA,aAAA,IAAA,MAAA,YACA,cAAA,IAAA,MAAA,KACA,WAAA,EACA,SAAA,SAEuB,mDAAY,KAAA,IACZ,kDAAY,KAAA,IACX,oDAAW,MAAA,IACX,mDAAW,MAAA,IACV,qDAAU,IAAA,KACV,oDAAU,IAAA,KACb,kDACtB,OAAA,KACA,cAAA,EACA,WAAA,IAAA,MAAA,gBAEsB,iDACtB,OAAA,KACA,cAAA,EACA,WAAA,IAAA,MAAA,KAGF,gBACC,QAAA,KAED,kBACC,OAAA,EACA,sBAAA,KACA,oBAAA,KACA,mBAAA,KACA,iBAAA,KACA,gBAAA,KACA,YAAA,KAEC,wBAAI,wBACH,WAAA,OACA,MAAA,KACA,OAAA,KACA,cAAA,IACA,OAAA,KAOF,uCAAI,uCACH,iBAAA,YAKA,4BADA,4BAEA,MAAA,KAEI,kCACJ,gCACA,WAAA,KACA,OAAA,QAEA,iCACS,uCACT,WAAA,IACA,MAAA,KACA,OAAA,QAEA,oCC9DD,MAAA,KACA,iBAAA,QACA,aAAA,QD+DC,cAAA,EC5DA,0CADA,0CAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,0CACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,2CADA,2CAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,iDADA,iDADA,iDAEA,iDADA,iDADA,iDAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,mDADA,mDADA,mDAEA,oDADA,oDADA,oDAEA,6DADA,6DADA,6DAGC,iBAAA,QACI,aAAA,QD+BN,4CACA,WAAA,QAGA,6CACS,oDACT,WAAA,QACA,MAAA,KAGD,8BC7ED,MAAA,KACA,iBAAA,QACA,aAAA,QAGC,oCADA,oCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,oCACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,qCADA,qCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,2CADA,2CADA,2CAEA,2CADA,2CADA,2CAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,6CADA,6CADA,6CAEA,8CADA,8CADA,8CAEA,uDADA,uDADA,uDAGC,iBAAA,QACI,aAAA,QD6CN,sCACA,WAAA,QAGA,uCACS,8CACT,WAAA,QACA,MAAA,KAGD,8BC3FD,MAAA,KACA,iBAAA,KACA,aAAA,KD4FC,cAAA,ECzFA,oCADA,oCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,oCACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,qCADA,qCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,2CADA,2CADA,2CAEA,2CADA,2CADA,2CAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,6CADA,6CADA,6CAEA,8CADA,8CADA,8CAEA,uDADA,uDADA,uDAGC,iBAAA,KACI,aAAA,KD4DN,sCACA,WAAA,QAGA,uCACS,8CACT,WAAA,KACA,MAAA,KAGK,0CC1GP,MAAA,KACA,iBAAA,QACA,aAAA,QAGC,gDADA,gDAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,gDACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,iDADA,iDAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,uDADA,uDADA,uDAEA,uDADA,uDADA,uDAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,yDADA,yDADA,yDAEA,0DADA,0DADA,0DAEA,mEADA,mEADA,mEAGC,iBAAA,QACI,aAAA,QD0EN,kDACA,WAAA,QAGA,mDACS,0DACT,WAAA,QACA,MAAA,KAGK,oCCxHP,MAAA,KACA,iBAAA,QACA,aAAA,QAGC,0CADA,0CAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,0CACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,2CADA,2CAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,iDADA,iDADA,iDAEA,iDADA,iDADA,iDAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,mDADA,mDADA,mDAEA,oDADA,oDADA,oDAEA,6DADA,6DADA,6DAGC,iBAAA,QACI,aAAA,QDwFN,6CACS,oDACT,WAAA,QACA,MAAA,KAGD,iCACS,6CCnIV,MAAA,KACA,iBAAA,KACA,aAAA,KDmIC,YAAA,EAAA,KAAA,EAAA,gBChIA,uCAAA,mDADA,mDAAA,uCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,mDAAA,uCACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,wCAAA,oDADA,oDAAA,wCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,8CADA,8CADA,8CAEA,0DADA,0DADA,0DAEA,0DADA,0DADA,0DAEA,8CADA,8CADA,8CAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,gDADA,gDADA,gDAEA,4DADA,4DADA,4DAEA,6DADA,6DADA,6DAEA,iDADA,iDADA,iDAEA,0DAAA,sEADA,sEADA,sEACA,0DADA,0DAGC,iBAAA,KACI,aAAA,KDmGP,+BACO,2CCxIR,MAAA,KACA,iBAAA,QACA,aAAA,QDwIC,YAAA,EAAA,KAAA,EAAA,gBCrIA,qCAAA,iDADA,iDAAA,qCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,iDAAA,qCACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,sCAAA,kDADA,kDAAA,sCAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,4CADA,4CADA,4CAEA,wDADA,wDADA,wDAEA,wDADA,wDADA,wDAEA,4CADA,4CADA,4CAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,8CADA,8CADA,8CAEA,0DADA,0DADA,0DAEA,2DADA,2DADA,2DAEA,+CADA,+CADA,+CAEA,wDAAA,oEADA,oEADA,oEACA,wDADA,wDAGC,iBAAA,QACI,aAAA,QDwGR,6BACC,QAAA,MACA,MAAA,IACA,OAAA,KACA,YAAA,KACA,MAAA,KACA,OAAA,GACA,OAAA,QACA,cAAA,IAEC,qCADA,mCAEA,WAAA,KAEA,sCACS,4CACT,WAAA,IACA,MAAA,KACA,OAAA,QAEA,oCAEO,6CACS,mDAFT,0CChKT,MAAA,KACA,iBAAA,QACA,aAAA,QDkKE,YAAA,EAAA,KAAA,EAAA,gBC/JD,mDADA,mDACA,yDADA,yDACA,0CADA,0CACA,gDADA,gDAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAEL,mDAAA,yDAAA,0CAAA,gDACC,MAAA,KACA,iBAAA,QACI,aAAA,QAGL,2CAAA,oDADA,oDACA,0DADA,0DAAA,2CACA,iDADA,iDAEC,MAAA,KACA,iBAAA,QACI,aAAA,QAIH,iDADA,iDADA,iDAEA,0DADA,0DADA,0DAEA,0DADA,0DADA,0DAEA,gEADA,gEADA,gEAEA,gEADA,gEADA,gEAEA,iDADA,iDADA,iDAEA,uDADA,uDADA,uDAEA,uDADA,uDADA,uDAGC,MAAA,KACA,iBAAA,QACI,aAAA,QAQL,4DADA,4DADA,4DAEA,mDADA,mDADA,mDAEA,kEADA,kEADA,kEAEA,mEADA,mEADA,mEAEA,6DADA,6DADA,6DAEA,yDADA,yDADA,yDAEA,0DADA,0DADA,0DAEA,oDADA,oDADA,oDAEA,sEADA,sEADA,sEAEA,4EADA,4EADA,4EAEA,6DADA,6DADA,6DAEA,mEADA,mEADA,mEAGC,iBAAA,QACI,aAAA,QDmIN,iCADA,iCAEA,MAAA,KAKH,+BACC,MAAA,MAGD,+BAEA,kBADA,kBAES,wBACR,OAAA,QACC,qCAAA,wBAAA,wBAAA,8BACA,WAAA,KAKF,gBACC,UAAA,KACA,MAAA,KACA,QAAA,EAAA,IAAA,EAAA,IACA,eAAA,OAGgB,qCACjB,OAAA,QAED,iBACC,MAAA,KACA,uBACC,WAAA,OAEI,mCACJ,cAAA,IAAA,EAAA,EAAA,IAEI,kCACJ,cAAA,EAAA,IAAA,IAAA,EAED,oCACC,MAAA,KACA,UAAA,KACA,QAAA,IAAA,IACA,YAAA,WACA,YAAA,EAAA,IAAA,EAAA,KACA,aAAA,IAAA,EACA,YAAA,KACA,aAAA,KElOC,0BACC,SAAA,SACA,IAAA,KACA,KAAA,EACA,QAAA,KACA,QAAA,KACA,MAAA,KACA,UAAA,MACA,WAAA,KACA,iBAAA,KACA,OAAA,IAAA,MAAA,KACA,OAAA,IAAA,MAAA,gBACA,cAAA,IApBF,mBAAA,EAAA,IAAA,KAAA,iBACG,gBAAA,EAAA,IAAA,KAAA,iBACK,WAAA,EAAA,IAAA,KAAA,iBAoBN,wBAAA,YACG,qBAAA,QACK,gBAAA,YAGR,MAAA,KACA,UAAA,KACA,YAAA,WAII,iCAAJ,iCAAI,6BAAJ,6BACE,QAAA,EAAA"}
--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
1 | 接口文档
2 | ==========
3 |
4 | # 添加/修改公众号
5 | 功能: 输入需要爬取的公众号id和爬取频率,如果公众号不存在就添加,如果已经存在就更新爬取频率.同事更新公众号信息.包括头像,描述,二维码等.
6 |
7 | URL: http://127.0.0.1:8090/api/wechat/add/
8 |
9 | 方法: POST
10 |
11 | 参数:
12 | ```
13 | wechatid: 微信id
14 | frequency: 爬取频率, 正整数, 单位是分钟
15 | ```
16 | 返回:
17 | ```
18 | // 已存在更新返回
19 | {
20 | "message": "已更新",
21 | "ret": 0
22 | }
23 | // 不存在保存后返回
24 | {
25 | "message": "已添加",
26 | "ret": 0
27 | }
28 | // 没有找到对应的微信id
29 | {
30 | "message": "公众号不存在",
31 | "ret": 1
32 | }
33 | ```
34 |
35 |
36 | # 添加文章
37 |
38 | 功能: 输入需要爬取的文章URL.将文章加入爬取队列优先爬取.同事获取相关公众号信息,并将爬取频率设置为0
39 |
40 | URL: http://127.0.0.1:8090/api/wechat/topic/add/
41 |
42 | 方法: POST
43 |
44 | 参数:
45 | ```
46 | url: 文章连接,如http://mp.weixin.qq.com/s?__biz=MjM5NDg2NjA4MQ==&mid=402566965&idx=1&sn=616fb1ffa9afc5acc3f4f2a210f6dd83&3rd=MzA3MDU4NTYzMw==&scene=6#rd
47 | ```
48 |
49 | 返回:
50 | ```
51 | // 正确返回
52 | {
53 | 'ret': 0,
54 | 'message': '提交成功,链接已经提交给爬虫,稍后查看爬取结果'
55 | }
56 |
57 | // 错误返回
58 | {
59 | 'ret': 1,
60 | 'message': '提交失败,url必须以 http://mp.weixin.qq.com/ 开头'
61 | }
62 | ```
63 |
64 |
65 |
66 |
67 | # 搜索api
68 |
69 | 功能: 输入公众号名称或者id,返回对应的列表.
70 |
71 | URL: http://127.0.0.1:8090/api/wechat/search?query=财经
72 |
73 | 方法: GET
74 |
75 | 返回:
76 |
77 | ```
78 | {
79 | "data": [
80 | {
81 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/h3VIR0DEFX5qh1EknyCj",
82 | "wechatid": "LLDS365",
83 | "intro": "本号出租/出售.关注后,即可进入【财经论坛】,和大家互动交流! 专注分享最实用的财经消息以及股票内容!欢迎关注!合作QQ800178778",
84 | "name": "財經",
85 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFt3HcMp751wj8bt3PNRmRzHg"
86 | },
87 | {
88 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/vklgb27EEFBvh38Mnxya",
89 | "wechatid": "zdbank",
90 | "intro": "指点财经每日提供全面真实的财经快讯,发布最新银行理财、保险、资管、信托等正规金融机构发布的产品信息并高效对接理财师,成为投资者一站式理财资讯交互平台.",
91 | "name": "指点财经",
92 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFt-wxZs0pb4AGlpIKntt-xxo"
93 | },
94 | {
95 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/MHVmaV-EomHdh04KnyAU",
96 | "wechatid": "cctvyscj",
97 | "intro": "中央电视台",
98 | "name": "央视财经",
99 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFt_cUwbglodLkLT749ZABOt4"
100 | },
101 | {
102 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/dUOmqSDE0R6uhzHKnxZR",
103 | "wechatid": "tttmoney",
104 | "intro": "原创的财经评论,独立的观察视角,深度的市场剖析. 联系方式:QQ:1527356260;邮箱:1527356260@qq.com.",
105 | "name": "博闻财经",
106 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFtza9DUiaCPSyfc_MQO4N5PY"
107 | },
108 | {
109 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/H0jT3OvEsNXPh-q-nx07",
110 | "wechatid": "lianhuacaijing",
111 | "intro": "莲花APP官微,让你的财富在这里绽放.依托证券时报22年资本市场专业背景,抢先知晓上市公司公告解读、热点事件分析、主题机会挖掘;更有上市公司深度研究及最新机构调研动向.",
112 | "name": "莲花财经",
113 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFtxNDf-e3-9SNoKmaZ8B9hcw"
114 | },
115 | {
116 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/5XVNQmXEd1sIh3QhnyDB",
117 | "wechatid": "sohucaijing2013",
118 | "intro": "搜狐财经,不海量,不枯燥,财经可以很简单!这里有最及时资讯、最独家爆料、最麻辣点评,最权威的专家学者声音.",
119 | "name": "搜狐财经",
120 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFt89mqsRNr-HSk72nHeLEUls"
121 | },
122 | {
123 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/D0wGCVjEpGbbh0lqnxkr",
124 | "wechatid": "bxcjtv",
125 | "intro": "百姓财经栏目是四川广播电视台经济频道2007年强力推出的一档财经类节目.从百姓投资理财的需求出发,以中国股市为着眼点,联手全国一流券商、邀请资深专业人士,以超前的眼光、敏锐的视角、权威的分析、把握大势,...",
126 | "name": "百姓财经",
127 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFtzfSD2HictGHcf_wip7ImVU"
128 | },
129 | {
130 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/tkP59tLEEuxth8OVnxaS",
131 | "wechatid": "njue_xcb_weixin",
132 | "intro": "南京财经大学官方公众平台,传播校园文化,传递校园信息,服务广大师生校友.欢迎关注!",
133 | "name": "南京财经大学",
134 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFt2H_o--VPZokXgl-56Mhh1g"
135 | },
136 | {
137 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/UUgYF4fE-rmBh5Z0nx11",
138 | "wechatid": "bh2056",
139 | "intro": "做有趣、有料、有新意的大众金融平台",
140 | "name": "渤海财经",
141 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFtyo_x125073RKHO4j27ZEtE"
142 | },
143 | {
144 | "qrcode": "http://img03.sogoucdn.com/app/a/100520105/7HVuYcHEfv8Bh9ACnyDI",
145 | "wechatid": "wzdsbcf",
146 | "intro": "欢迎关注温州财经,本公众号由温州都市报财经部运营,为广大温都爱理财俱乐部成员提供在线搜索、便民服务、财经新闻,以及在线股票问诊、国企类理财产品服务.",
147 | "name": "温州财经",
148 | "avatar": "http://img01.sogoucdn.com/app/a/100520090/oIWsFt0G2xPqCn-pG1sAAY2n8GI4"
149 | }
150 | ],
151 | "ret": 0
152 | }
153 | ```
--------------------------------------------------------------------------------
/wechat/models.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'yijingping'
3 | from django.db import models
4 | from datetime import date, datetime, timedelta
5 |
6 | class Wechat(models.Model):
7 | STATUS_DEFAULT = 0
8 | STATUS_DISABLE = 1
9 | STATUS_DELETE = 2
10 | STATUS_CHOICES = (
11 | (STATUS_DEFAULT, '默认'),
12 | (STATUS_DISABLE, '禁用'),
13 | (STATUS_DELETE, '删除')
14 | )
15 | avatar = models.CharField(max_length=500, blank=True, default='', verbose_name='公众号头像')
16 | qrcode = models.CharField(max_length=500, blank=True, default='', verbose_name='二维码')
17 | name = models.CharField(max_length=100, verbose_name='公众号')
18 | wechatid = models.CharField(max_length=100, verbose_name='公众号id', unique=True)
19 | intro = models.TextField(default='', blank=True, verbose_name='简介')
20 | frequency = models.IntegerField(default=0, verbose_name='爬取频率, 单位:分钟')
21 | next_crawl_time = models.DateTimeField(auto_now_add=True, verbose_name='下次爬取时间')
22 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
23 | status = models.IntegerField(default=STATUS_DEFAULT, choices=STATUS_CHOICES, verbose_name="状态")
24 |
25 | def last_day_topics_count(self):
26 | yestoday = date.today() - timedelta(days=1)
27 | yestoday_datetime = datetime.combine(yestoday, datetime.min.time())
28 | return Topic.objects.filter(wechat=self, publish_time__gt=yestoday_datetime).count()
29 |
30 | def last_week_topics_count(self):
31 | last_week = date.today() - timedelta(days=7)
32 | last_week_datetime = datetime.combine(last_week, datetime.min.time())
33 | return Topic.objects.filter(wechat=self, publish_time__gt=last_week_datetime).count()
34 |
35 | def total_topics_count(self):
36 | return Topic.objects.filter(wechat=self).count()
37 |
38 | def __unicode__(self):
39 | return self.name
40 |
41 | class Meta:
42 | verbose_name_plural = "公众号"
43 |
44 |
45 | class Topic(models.Model):
46 | wechat = models.ForeignKey('Wechat', verbose_name='公众号')
47 | uniqueid = models.CharField(unique=True, max_length=100, verbose_name='url的md5值')
48 | words = models.IntegerField(default=0, verbose_name='字数')
49 |
50 | url = models.CharField(max_length=500, default='', verbose_name='文章的url')
51 | avatar = models.CharField(max_length=500, default='', verbose_name='缩略图地址')
52 | title = models.CharField(max_length=200, verbose_name='标题')
53 | origin_title = models.CharField(max_length=200, default='', verbose_name='原文标题')
54 |
55 | abstract = models.TextField(default='', verbose_name='内容简介')
56 | content = models.TextField(default='', verbose_name='文章内容')
57 | source = models.TextField(default='', verbose_name='文章原内容')
58 |
59 | read_num = models.IntegerField(default=0, verbose_name='阅读数')
60 | like_num = models.IntegerField(default=0, verbose_name='点赞数')
61 |
62 | publish_time = models.DateTimeField(db_index=True, verbose_name='发布时间')
63 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
64 | update_time = models.DateTimeField(auto_now=True, verbose_name='更新时间')
65 | available = models.CharField(db_index=True, max_length=100, default='', verbose_name='是否可用')
66 |
67 | def __unicode__(self):
68 | return self.title
69 |
70 | class Meta:
71 | verbose_name_plural = "文章"
72 |
73 |
74 | class Proxy(models.Model):
75 | STATUS_NEW = 0
76 | STATUS_SUCCESS = 1
77 | STATUS_FAIL = 2
78 | STATUS_CHOICES = (
79 | (STATUS_NEW,'未检测'),
80 | (STATUS_SUCCESS,'检测成功'),
81 | (STATUS_FAIL,'检测失败'),
82 | )
83 | KIND_SEARCH = 0
84 | KIND_DOWNLOAD = 1
85 | KIND_CHOICES = (
86 | (KIND_SEARCH, '搜索代理'),
87 | (KIND_DOWNLOAD, '下载代理'),
88 | )
89 | kind = models.IntegerField(default=KIND_DOWNLOAD, choices=KIND_CHOICES, verbose_name="类型")
90 | user = models.CharField(default='', blank=True, max_length=100)
91 | password = models.CharField(default='', blank=True, max_length=100)
92 | host = models.CharField(max_length=100)
93 | port = models.IntegerField(default=80)
94 | speed = models.IntegerField(default=0, verbose_name="连接速度(ms)")
95 | status = models.IntegerField(default=STATUS_NEW, choices=STATUS_CHOICES, verbose_name="状态")
96 | retry = models.IntegerField(default=0, verbose_name="尝试次数")
97 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
98 | update_time = models.DateTimeField(auto_now=True, verbose_name='更新时间')
99 |
100 | class Meta:
101 | verbose_name_plural = "访问代理"
102 |
103 |
104 | class Word(models.Model):
105 | KIND_KEYWORD = 0
106 | #KIND_TOPIC = 1 #
107 | KIND_CHOICES = (
108 | (KIND_KEYWORD, '关键词'),
109 | #(KIND_TOPIC, '话题'),
110 | )
111 | kind = models.IntegerField(default=KIND_KEYWORD, choices=KIND_CHOICES, verbose_name="类型")
112 | text = models.CharField(max_length=100, verbose_name='词')
113 | intro = models.TextField(default='', blank=True, verbose_name='简介')
114 | frequency = models.IntegerField(default=100, verbose_name='爬取频率, 单位:分钟')
115 | next_crawl_time = models.DateTimeField(auto_now_add=True, verbose_name='下次爬取时间')
116 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
117 |
118 | def __unicode__(self):
119 | return '%s %s' % (self.kind, self.text)
120 |
121 | class Meta:
122 | verbose_name_plural = "词"
123 |
--------------------------------------------------------------------------------
/wechat/templates/wechat/index.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | {% load paginator %}
3 | {% load staticfiles %}
4 | {% block main-content %}
5 |
6 |
7 |
8 |
9 |
10 | 添加公众号
11 |
12 |
13 |
22 |
23 |
24 |
25 |
26 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
55 |
95 |
96 |
99 |
100 |
101 |
102 |
103 |
104 | {% endblock %}
105 |
106 | {% block footer-js %}
107 |
108 |
109 |
110 |
111 |
135 | {% endblock %}
136 |
--------------------------------------------------------------------------------
/wechatspider/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | Django settings for wechatspider project.
5 |
6 | Generated by 'django-admin startproject' using Django 1.8.1.
7 |
8 | For more information on this file, see
9 | https://docs.djangoproject.com/en/1.8/topics/settings/
10 |
11 | For the full list of settings and their values, see
12 | https://docs.djangoproject.com/en/1.8/ref/settings/
13 | """
14 |
15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
16 | import os
17 |
18 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
19 |
20 |
21 | # Quick-start development settings - unsuitable for production
22 | # See https://docs.djangoproject.com/en/1.8/howto/deployment/checklist/
23 |
24 | # SECURITY WARNING: keep the secret key used in production secret!
25 | SECRET_KEY = ''
26 |
27 | # SECURITY WARNING: don't run with debug turned on in production!
28 | DEBUG = True
29 |
30 | ALLOWED_HOSTS = []
31 |
32 |
33 | # Application definition
34 |
35 | INSTALLED_APPS = (
36 | 'django.contrib.admin',
37 | 'django.contrib.auth',
38 | 'django.contrib.contenttypes',
39 | 'django.contrib.sessions',
40 | 'django.contrib.messages',
41 | 'django.contrib.staticfiles',
42 |
43 | 'wechatspider',
44 | 'wechat',
45 |
46 | )
47 |
48 | MIDDLEWARE_CLASSES = (
49 | 'django.contrib.sessions.middleware.SessionMiddleware',
50 | 'django.middleware.common.CommonMiddleware',
51 | 'django.middleware.csrf.CsrfViewMiddleware',
52 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
53 | 'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
54 | 'django.contrib.messages.middleware.MessageMiddleware',
55 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
56 | 'django.middleware.security.SecurityMiddleware',
57 | )
58 |
59 | ROOT_URLCONF = 'wechatspider.urls'
60 |
61 | TEMPLATES = [
62 | {
63 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
64 | 'DIRS': [],
65 | 'APP_DIRS': True,
66 | 'OPTIONS': {
67 | 'context_processors': [
68 | 'django.template.context_processors.debug',
69 | 'django.template.context_processors.request',
70 | 'django.contrib.auth.context_processors.auth',
71 | 'django.contrib.messages.context_processors.messages',
72 | ],
73 | },
74 | },
75 | ]
76 |
77 | WSGI_APPLICATION = 'wechatspider.wsgi.application'
78 |
79 |
80 | # Database
81 | # https://docs.djangoproject.com/en/1.8/ref/settings/#databases
82 |
83 | DATABASES = {
84 | 'default': {
85 | 'ENGINE': 'django.db.backends.sqlite3',
86 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
87 | }
88 | }
89 |
90 |
91 | # Internationalization
92 | # https://docs.djangoproject.com/en/1.8/topics/i18n/
93 |
94 | LANGUAGE_CODE = 'zh-hans'
95 |
96 | TIME_ZONE = 'Asia/Shanghai'
97 |
98 | USE_I18N = True
99 |
100 | USE_L10N = True
101 |
102 | USE_TZ = False
103 |
104 |
105 | # Static files (CSS, JavaScript, Images)
106 | # https://docs.djangoproject.com/en/1.8/howto/static-files/
107 |
108 | STATIC_URL = '/static/'
109 | STATIC_ROOT = 'static'
110 | TEMPLATE_DIRS = (
111 | os.path.join(BASE_DIR, 'templates'),
112 | )
113 |
114 | STATICFILES_FINDERS = (
115 | 'django.contrib.staticfiles.finders.FileSystemFinder',
116 | 'django.contrib.staticfiles.finders.AppDirectoriesFinder',
117 | )
118 |
119 |
120 | # messages
121 | from django.contrib import messages
122 | MESSAGE_TAGS = {
123 | messages.ERROR: 'danger'
124 | }
125 |
126 | # logging config
127 | LOGGING = {
128 | 'version': 1,
129 | 'disable_existing_loggers': False,
130 | 'formatters': {
131 | 'verbose': {
132 | 'format': '%(levelname)s %(asctime)s %(module)s %(process)d %(thread)d %(message)s'
133 | },
134 | 'simple': {
135 | 'format': '%(levelname)s %(message)s'
136 | },
137 | },
138 | 'handlers': {
139 | 'console': {
140 | 'class': 'logging.StreamHandler',
141 | 'formatter': 'verbose'
142 | },
143 | },
144 | 'loggers': {
145 | 'django': {
146 | 'handlers': ['console'],
147 | 'level': os.getenv('DJANGO_LOG_LEVEL', 'INFO'),
148 | },
149 | '': {
150 | 'handlers': ['console'],
151 | 'level': 'DEBUG',
152 | },
153 | },
154 | }
155 |
156 | # redis config
157 | REDIS_OPTIONS = {
158 | 'host': 'localhost',
159 | 'port': 6379,
160 | 'password': '',
161 | 'db': 4
162 | }
163 |
164 | # crawler config
165 | CRAWLER_DEBUG = False
166 | CRAWLER_CONFIG = {
167 | 'scheduler': 'unicrawler:scheduler',
168 | 'downloader': 'unicrawler:downloader',
169 | 'extractor': 'unicrawler:extractor',
170 | 'processor': 'unicrawler:processor',
171 | 'global_limit_speed': 'unicrawler:global_limit_speed',
172 | 'antispider': 'unicrawler:antispider'
173 | }
174 | CRAWLER_GLOBAL_LIMIT_SPEED = 20 * 1000 # 毫秒
175 |
176 | # aliyun oss2
177 | OSS2_ENABLE = False
178 | OSS2_CONFIG = {
179 | "ACCESS_KEY_ID": "",
180 | "ACCESS_KEY_SECRET": "",
181 | "ENDPOINT": "",
182 | "BUCKET_DOMAIN": "oss-cn-beijing.aliyuncs.com",
183 | "BUCKET_NAME": "pythonzone",
184 | "IMAGES_PATH": "images/",
185 | "VIDEOS_PATH": "videos/",
186 | "CDN_DOMAIN": "pystats.bowenpay.com"
187 | }
188 | LOGIN_URL = '/admin/login/'
189 | ## Import local settings
190 | try:
191 | from local_settings import *
192 | except ImportError:
193 | import sys, traceback
194 | sys.stderr.write("Warning: Can't find the file 'local_settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
195 | sys.stderr.write("\nFor debugging purposes, the exception was:\n\n")
196 | traceback.print_exc()
197 |
--------------------------------------------------------------------------------
/wechat/extractors.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'yijingping'
3 | from abc import ABCMeta
4 | from abc import abstractmethod
5 | import requests
6 | import oss2
7 | import re
8 | from bs4 import BeautifulSoup
9 | from oss2.exceptions import NotFound
10 | from copy import copy
11 | from hashlib import md5
12 | from lxml import etree
13 | from io import StringIO
14 | from django.conf import settings
15 | import logging
16 | logger = logging.getLogger()
17 |
18 |
19 | OSS2_CONF = settings.OSS2_CONFIG
20 | BUCKET = None
21 |
22 |
23 | def get_bucket():
24 | global BUCKET
25 | if not BUCKET:
26 | auth = oss2.Auth(OSS2_CONF['ACCESS_KEY_ID'], OSS2_CONF['ACCESS_KEY_SECRET'])
27 | BUCKET = oss2.Bucket(auth, 'http://%s' % OSS2_CONF['BUCKET_DOMAIN'], OSS2_CONF['BUCKET_NAME'])
28 |
29 | return BUCKET
30 |
31 |
32 | def download_to_oss(url, path):
33 | # 如果没有开启oss,直接返回原url
34 | if not settings.OSS2_ENABLE:
35 | return url
36 |
37 | r = requests.get(url)
38 | r.close()
39 | key = path + md5(r.content).hexdigest()
40 | bucket = get_bucket()
41 | try:
42 | bucket.head_object(key)
43 | except NotFound as e:
44 | logging.exception(e)
45 | bucket.put_object(key, r, headers={'Content-Type': r.headers.get('Content-Type', '')})
46 |
47 | return 'http://%s/%s' % (OSS2_CONF["CDN_DOMAIN"], key)
48 |
49 |
50 | class BaseExtractor(object):
51 | __metaclass__ = ABCMeta
52 |
53 | @abstractmethod
54 | def __init__(self):
55 | pass
56 |
57 | @abstractmethod
58 | def extract(self):
59 | pass
60 |
61 |
62 | def replace_all(content, srcs, new_srcs):
63 | """ 将content中的srcs全部替换成new_srcs
64 | """
65 | replaces = zip(srcs, new_srcs)
66 | for src, new_src in replaces:
67 | content = content.replace(src.split('?')[0], new_src)
68 | return content
69 |
70 |
71 | class ImageExtractor(BaseExtractor):
72 | def __init__(self, data):
73 | """ data 是图片url,或者图片url的列表
74 | :param data:
75 | :return: 如果是url,返回新的url; 如果是列表,返回新的url列表
76 | """
77 | self.data = data
78 |
79 | def extract(self):
80 | d = self.data
81 | res = None
82 | if not d:
83 | return d
84 | elif isinstance(d, basestring):
85 | if d.startswith('http'):
86 | ## 内容是图片地址
87 | res = download_to_oss(d, OSS2_CONF["IMAGES_PATH"])
88 | else:
89 | ## 内容是包含图片的文字
90 | htmlparser = etree.HTMLParser()
91 | tree = etree.parse(StringIO(d), htmlparser)
92 | # 找出所有图片src
93 | srcs = tree.xpath("//img[starts-with(@src,'http')]/@src")
94 | data_srcs = tree.xpath("//img[starts-with(@data-src,'http')]/@data-src")
95 | srcs = list(set(srcs + data_srcs))
96 | # 下载并传到OSS中
97 | new_srcs = [download_to_oss(item, OSS2_CONF["IMAGES_PATH"]) for item in srcs]
98 | # 替换掉原文中的图片src
99 | res = replace_all(d, srcs, new_srcs)
100 | elif isinstance(d, list):
101 | res = [download_to_oss(item, OSS2_CONF["IMAGES_PATH"]) for item in d]
102 |
103 | return res
104 |
105 |
106 | class VideoExtractor(BaseExtractor):
107 | def __init__(self, data):
108 | """ data 是视频url,或者视频url的列表
109 | :param data:
110 | :return: 如果是url,返回新的url; 如果是列表,返回新的url列表
111 | """
112 | self.data = data
113 |
114 | def extract(self):
115 | d = self.data
116 | new_url = None
117 | if not d:
118 | return d
119 | elif isinstance(d, basestring):
120 | new_url = download_to_oss(d, OSS2_CONF["VIDEOS_PATH"])
121 | elif isinstance(d, list):
122 | new_url = [download_to_oss(item, OSS2_CONF["VIDEOS_PATH"]) for item in d]
123 |
124 | return new_url
125 |
126 |
127 | class XPathExtractor(BaseExtractor):
128 | def __init__(self, content, rule):
129 | htmlparser = etree.HTMLParser()
130 | self.tree = etree.parse(StringIO(content), htmlparser)
131 | self.rule = rule
132 |
133 | def extract(self):
134 | return self.tree.xpath(self.rule)
135 |
136 |
137 | class PythonExtractor(BaseExtractor):
138 | def __init__(self, code, in_val, context):
139 | self.code = code
140 | self.in_val = in_val
141 | self.context = copy(context)
142 | self.context.update({'in_val': in_val})
143 |
144 | def extract(self):
145 | res = self.in_val
146 | g, l = {}, self.context
147 | try:
148 | exec(self.code, g, l)
149 | res = l["out_val"]
150 | except Exception as e:
151 | logger.exception(e)
152 | finally:
153 | return res
154 |
155 |
156 |
157 | class WechatContentExtractor(BaseExtractor):
158 | """
159 | 去掉投票的iframe,将图片和视频的宽高变为auto
160 | """
161 | def __init__(self, data):
162 | self.data = data
163 |
164 | def extract(self):
165 | res = self.data
166 | try:
167 | # 去掉图片蒙版
168 | res = res.replace('var occupyImg = ', '')
169 | bs=BeautifulSoup(res)
170 | # 去掉投票的iframe
171 | vote = bs.find('span', {'class':'vote_area'})
172 | if vote:
173 | vote.replace_with('')
174 | # 将图片和视频的宽高变为auto,src替换为data-src
175 | imgs = bs.select('img')
176 | for x in imgs:
177 | if x.get('style'):
178 | x['style'] = re.sub(r'(\w+px)', 'auto', x['style'])
179 |
180 | if x.get('data-src'):
181 | x['src'] = x.get('data-src')
182 |
183 | # 将视频的宽高设置为auto
184 | videos = bs.find_all('iframe', {'class': 'video_iframe'})
185 | for item in videos:
186 | item['width'] = 'auto'
187 | item['height'] = 'auto'
188 | if item.get('src'):
189 | s = item.get('src')
190 | s = re.sub(r'(height=\d+[\.\d+]*)', 'height=auto', s)
191 | s = re.sub(r'(width=\d+[\.\d+]*)', 'width=auto', s)
192 | item['src'] = s
193 | if item.get('data-src'):
194 | s = item.get('data-src')
195 | s = re.sub(r'(height=\d+[\.\d+]*)', 'height=auto', s)
196 | s = re.sub(r'(width=\d+[\.\d+]*)', 'width=auto', s)
197 | item['data-src'] = s
198 | #if item.get('style'):
199 | # item['style'] = re.sub(r'(\w+px)', 'auto', item['style'])
200 |
201 | res = unicode(bs)
202 | except Exception as e:
203 | logger.exception(e)
204 | finally:
205 | return res
--------------------------------------------------------------------------------
/wechat/migrations/0001_initial.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import unicode_literals
3 |
4 | from django.db import models, migrations
5 |
6 |
7 | class Migration(migrations.Migration):
8 |
9 | dependencies = [
10 | ]
11 |
12 | operations = [
13 | migrations.CreateModel(
14 | name='Proxy',
15 | fields=[
16 | ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
17 | ('kind', models.IntegerField(default=1, verbose_name=b'\xe7\xb1\xbb\xe5\x9e\x8b', choices=[(0, b'\xe6\x90\x9c\xe7\xb4\xa2\xe4\xbb\xa3\xe7\x90\x86'), (1, b'\xe4\xb8\x8b\xe8\xbd\xbd\xe4\xbb\xa3\xe7\x90\x86')])),
18 | ('user', models.CharField(default=b'', max_length=100, blank=True)),
19 | ('password', models.CharField(default=b'', max_length=100, blank=True)),
20 | ('host', models.CharField(max_length=100)),
21 | ('port', models.IntegerField(default=80)),
22 | ('speed', models.IntegerField(default=0, verbose_name=b'\xe8\xbf\x9e\xe6\x8e\xa5\xe9\x80\x9f\xe5\xba\xa6(ms)')),
23 | ('status', models.IntegerField(default=0, verbose_name=b'\xe7\x8a\xb6\xe6\x80\x81', choices=[(0, b'\xe6\x9c\xaa\xe6\xa3\x80\xe6\xb5\x8b'), (1, b'\xe6\xa3\x80\xe6\xb5\x8b\xe6\x88\x90\xe5\x8a\x9f'), (2, b'\xe6\xa3\x80\xe6\xb5\x8b\xe5\xa4\xb1\xe8\xb4\xa5')])),
24 | ('retry', models.IntegerField(default=0, verbose_name=b'\xe5\xb0\x9d\xe8\xaf\x95\xe6\xac\xa1\xe6\x95\xb0')),
25 | ('create_time', models.DateTimeField(auto_now_add=True, verbose_name=b'\xe5\x88\x9b\xe5\xbb\xba\xe6\x97\xb6\xe9\x97\xb4')),
26 | ('update_time', models.DateTimeField(auto_now=True, verbose_name=b'\xe6\x9b\xb4\xe6\x96\xb0\xe6\x97\xb6\xe9\x97\xb4')),
27 | ],
28 | options={
29 | 'verbose_name_plural': '\u8bbf\u95ee\u4ee3\u7406',
30 | },
31 | ),
32 | migrations.CreateModel(
33 | name='Topic',
34 | fields=[
35 | ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
36 | ('uniqueid', models.CharField(unique=True, max_length=100, verbose_name=b'url\xe7\x9a\x84md5\xe5\x80\xbc')),
37 | ('words', models.IntegerField(default=0, verbose_name=b'\xe5\xad\x97\xe6\x95\xb0')),
38 | ('url', models.CharField(default=b'', max_length=500, verbose_name=b'\xe6\x96\x87\xe7\xab\xa0\xe7\x9a\x84url')),
39 | ('avatar', models.CharField(default=b'', max_length=500, verbose_name=b'\xe7\xbc\xa9\xe7\x95\xa5\xe5\x9b\xbe\xe5\x9c\xb0\xe5\x9d\x80')),
40 | ('title', models.CharField(max_length=200, verbose_name=b'\xe6\xa0\x87\xe9\xa2\x98')),
41 | ('origin_title', models.CharField(default=b'', max_length=200, verbose_name=b'\xe5\x8e\x9f\xe6\x96\x87\xe6\xa0\x87\xe9\xa2\x98')),
42 | ('abstract', models.TextField(default=b'', verbose_name=b'\xe5\x86\x85\xe5\xae\xb9\xe7\xae\x80\xe4\xbb\x8b')),
43 | ('content', models.TextField(default=b'', verbose_name=b'\xe6\x96\x87\xe7\xab\xa0\xe5\x86\x85\xe5\xae\xb9')),
44 | ('source', models.TextField(default=b'', verbose_name=b'\xe6\x96\x87\xe7\xab\xa0\xe5\x8e\x9f\xe5\x86\x85\xe5\xae\xb9')),
45 | ('read_num', models.IntegerField(default=0, verbose_name=b'\xe9\x98\x85\xe8\xaf\xbb\xe6\x95\xb0')),
46 | ('like_num', models.IntegerField(default=0, verbose_name=b'\xe7\x82\xb9\xe8\xb5\x9e\xe6\x95\xb0')),
47 | ('publish_time', models.DateTimeField(verbose_name=b'\xe5\x8f\x91\xe5\xb8\x83\xe6\x97\xb6\xe9\x97\xb4', db_index=True)),
48 | ('create_time', models.DateTimeField(auto_now_add=True, verbose_name=b'\xe5\x88\x9b\xe5\xbb\xba\xe6\x97\xb6\xe9\x97\xb4')),
49 | ('update_time', models.DateTimeField(auto_now=True, verbose_name=b'\xe6\x9b\xb4\xe6\x96\xb0\xe6\x97\xb6\xe9\x97\xb4')),
50 | ('available', models.CharField(default=b'', max_length=100, verbose_name=b'\xe6\x98\xaf\xe5\x90\xa6\xe5\x8f\xaf\xe7\x94\xa8', db_index=True)),
51 | ],
52 | options={
53 | 'verbose_name_plural': '\u6587\u7ae0',
54 | },
55 | ),
56 | migrations.CreateModel(
57 | name='Wechat',
58 | fields=[
59 | ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
60 | ('avatar', models.CharField(default=b'', max_length=500, verbose_name=b'\xe5\x85\xac\xe4\xbc\x97\xe5\x8f\xb7\xe5\xa4\xb4\xe5\x83\x8f', blank=True)),
61 | ('qrcode', models.CharField(default=b'', max_length=500, verbose_name=b'\xe4\xba\x8c\xe7\xbb\xb4\xe7\xa0\x81', blank=True)),
62 | ('name', models.CharField(max_length=100, verbose_name=b'\xe5\x85\xac\xe4\xbc\x97\xe5\x8f\xb7')),
63 | ('wechatid', models.CharField(unique=True, max_length=100, verbose_name=b'\xe5\x85\xac\xe4\xbc\x97\xe5\x8f\xb7id')),
64 | ('intro', models.TextField(default=b'', verbose_name=b'\xe7\xae\x80\xe4\xbb\x8b', blank=True)),
65 | ('frequency', models.IntegerField(default=0, verbose_name=b'\xe7\x88\xac\xe5\x8f\x96\xe9\xa2\x91\xe7\x8e\x87, \xe5\x8d\x95\xe4\xbd\x8d:\xe5\x88\x86\xe9\x92\x9f')),
66 | ('next_crawl_time', models.DateTimeField(auto_now_add=True, verbose_name=b'\xe4\xb8\x8b\xe6\xac\xa1\xe7\x88\xac\xe5\x8f\x96\xe6\x97\xb6\xe9\x97\xb4')),
67 | ('create_time', models.DateTimeField(auto_now_add=True, verbose_name=b'\xe5\x88\x9b\xe5\xbb\xba\xe6\x97\xb6\xe9\x97\xb4')),
68 | ('status', models.IntegerField(default=0, verbose_name=b'\xe7\x8a\xb6\xe6\x80\x81', choices=[(0, b'\xe9\xbb\x98\xe8\xae\xa4'), (1, b'\xe7\xa6\x81\xe7\x94\xa8'), (2, b'\xe5\x88\xa0\xe9\x99\xa4')])),
69 | ],
70 | options={
71 | 'verbose_name_plural': '\u516c\u4f17\u53f7',
72 | },
73 | ),
74 | migrations.CreateModel(
75 | name='Word',
76 | fields=[
77 | ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
78 | ('kind', models.IntegerField(default=0, verbose_name=b'\xe7\xb1\xbb\xe5\x9e\x8b', choices=[(0, b'\xe5\x85\xb3\xe9\x94\xae\xe8\xaf\x8d')])),
79 | ('text', models.CharField(max_length=100, verbose_name=b'\xe8\xaf\x8d')),
80 | ('intro', models.TextField(default=b'', verbose_name=b'\xe7\xae\x80\xe4\xbb\x8b', blank=True)),
81 | ('frequency', models.IntegerField(default=100, verbose_name=b'\xe7\x88\xac\xe5\x8f\x96\xe9\xa2\x91\xe7\x8e\x87, \xe5\x8d\x95\xe4\xbd\x8d:\xe5\x88\x86\xe9\x92\x9f')),
82 | ('next_crawl_time', models.DateTimeField(auto_now_add=True, verbose_name=b'\xe4\xb8\x8b\xe6\xac\xa1\xe7\x88\xac\xe5\x8f\x96\xe6\x97\xb6\xe9\x97\xb4')),
83 | ('create_time', models.DateTimeField(auto_now_add=True, verbose_name=b'\xe5\x88\x9b\xe5\xbb\xba\xe6\x97\xb6\xe9\x97\xb4')),
84 | ],
85 | options={
86 | 'verbose_name_plural': '\u8bcd',
87 | },
88 | ),
89 | migrations.AddField(
90 | model_name='topic',
91 | name='wechat',
92 | field=models.ForeignKey(verbose_name=b'\xe5\x85\xac\xe4\xbc\x97\xe5\x8f\xb7', to='wechat.Wechat'),
93 | ),
94 | ]
95 |
--------------------------------------------------------------------------------
/bin/extractor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'yijingping'
3 | # 加载django环境
4 | import sys
5 | import os
6 | reload(sys)
7 | sys.setdefaultencoding('utf8')
8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
9 | os.environ['DJANGO_SETTINGS_MODULE'] = 'wechatspider.settings'
10 | import django
11 | django.setup()
12 |
13 | import json
14 | from django.conf import settings
15 | from wechatspider.util import get_redis, get_uniqueid
16 | from wechat.extractors import XPathExtractor, PythonExtractor, ImageExtractor, VideoExtractor, WechatContentExtractor
17 | from wechat.constants import KIND_HISTORY, KIND_DETAIL, KIND_KEYWORD
18 | import logging
19 | logger = logging.getLogger()
20 |
21 | NORMAL_RULES = [
22 | {
23 | "key":"avatar",
24 | "rules":[
25 | {
26 | "kind":"python",
27 | "data":"out_val=data['avatar'];"
28 | },
29 | {
30 | "kind":"image",
31 | "data":""
32 | }
33 | ]
34 | },
35 | {
36 | "key":"origin_title",
37 | "rules":[
38 | {
39 | "kind":"xpath",
40 | "data":"//title/text()"
41 | },
42 | {
43 | "kind":"python",
44 | "data":"out_val=in_val[0] if in_val else '';"
45 | }
46 | ]
47 | },
48 | {
49 | "key":"source",
50 | "rules":[
51 | {
52 | "kind":"image",
53 | "data":""
54 | },
55 | {
56 | "kind":"WechatContent",
57 | "data":""
58 | }
59 | ]
60 | },
61 | {
62 | "key":"content",
63 | "rules":[
64 | {
65 | "kind":"xpath",
66 | "data":"//div[@id='js_content']"
67 | },
68 | {
69 | "kind":"python",
70 | "data":"from lxml import html;out_val=''.join([html.tostring(child, encoding='unicode') for child in in_val])"
71 | },
72 | {
73 | "kind":"image",
74 | "data":""
75 | },
76 | {
77 | "kind":"WechatContent",
78 | "data":""
79 | }
80 | ]
81 | },
82 | {
83 | "key":"words",
84 | "rules":[
85 | {
86 | "kind":"xpath",
87 | "data":"//div[@id='js_content']//text()"
88 | },
89 | {
90 | "kind":"python",
91 | "data":"out_val=sum([len(item.strip()) for item in in_val])"
92 | }
93 | ]
94 | },
95 | {
96 | "key":"publish_time",
97 | "rules":[
98 | {
99 | "kind":"python",
100 | "data":"from datetime import datetime;out_val = str(datetime.now());"
101 | }
102 | ]
103 | },
104 | {
105 | "key": "read_num",
106 | "rules": [
107 | {
108 | "kind": "xpath",
109 | "data": "//span[@id='sg_readNum3']/text()"
110 | },
111 | {
112 | "kind": "python",
113 | "data": "out_val=sum([int(item.strip().strip('+')) for item in in_val])"
114 | }
115 | ]
116 | },
117 | {
118 | "key": "like_num",
119 | "rules": [
120 | {
121 | "kind":"xpath",
122 | "data":"//span[@id='sg_likeNum3']/text()"
123 | },
124 | {
125 | "kind":"python",
126 | "data":"out_val=sum([int(item.strip().strip('+')) for item in in_val])"
127 | }
128 | ]
129 | }
130 | ]
131 |
132 | DETAIL_RULES = [
133 | {
134 | "key":"title",
135 | "rules":[
136 | {
137 | "kind":"xpath",
138 | "data":"//title/text()"
139 | },
140 | {
141 | "kind":"python",
142 | "data":"out_val=in_val[0] if in_val else '';"
143 | }
144 | ]
145 | },
146 | {
147 | "key":"origin_title",
148 | "rules":[
149 | {
150 | "kind":"xpath",
151 | "data":"//title/text()"
152 | },
153 | {
154 | "kind":"python",
155 | "data":"out_val=in_val[0] if in_val else '';"
156 | }
157 | ]
158 | },
159 | {
160 | "key":"source",
161 | "rules":[
162 | {
163 | "kind":"image",
164 | "data":""
165 | },
166 | {
167 | "kind":"WechatContent",
168 | "data":""
169 | }
170 | ]
171 | },
172 | {
173 | "key":"content",
174 | "rules":[
175 | {
176 | "kind":"xpath",
177 | "data":"//div[@id='js_content']"
178 | },
179 | {
180 | "kind":"python",
181 | "data":"from lxml import html;out_val=''.join([html.tostring(child, encoding='unicode') for child in in_val])"
182 | },
183 | {
184 | "kind":"image",
185 | "data":""
186 | },
187 | {
188 | "kind":"WechatContent",
189 | "data":""
190 | }
191 | ]
192 | },
193 | {
194 | "key":"words",
195 | "rules":[
196 | {
197 | "kind":"xpath",
198 | "data":"//div[@id='js_content']//text()"
199 | },
200 | {
201 | "kind":"python",
202 | "data":"out_val=sum([len(item.strip()) for item in in_val])"
203 | }
204 | ]
205 | },
206 | {
207 | "key":"abstract",
208 | "rules":[
209 | {
210 | "kind":"xpath",
211 | "data":"//div[@id='js_content']//text()"
212 | },
213 | {
214 | "kind":"python",
215 | "data":"tmp=''.join([item.strip() for item in in_val]);out_val = '%s...' % tmp[:220] if len(tmp) > 220 else tmp;"
216 | }
217 | ]
218 | },
219 | {
220 | "key":"avatar",
221 | "rules":[
222 | {
223 | "kind":"python",
224 | "data":"out_val=data['content'];"
225 | },
226 | {
227 | "kind":"xpath",
228 | "data":"//img/@src"
229 | },
230 | {
231 | "kind":"python",
232 | "data":"out_val=in_val[1] if len(in_val) > 1 else '';"
233 | }
234 | ]
235 | },
236 | {
237 | "key":"publish_time",
238 | "rules":[
239 | {
240 | "kind":"python",
241 | "data":"from datetime import datetime;out_val = str(datetime.now());"
242 | }
243 | ]
244 | },
245 | {
246 | "key":"wechatid",
247 | "rules":[
248 | {
249 | "kind":"xpath",
250 | "data":"//span[@class='profile_meta_value']/text()"
251 | },
252 | {
253 | "kind":"python",
254 | "data":"out_val=in_val[0] if len(in_val) == 2 else '';"
255 | }
256 | ]
257 | },
258 | {
259 | "key":"name",
260 | "rules":[
261 | {
262 | "kind":"xpath",
263 | "data":"//strong[@class='profile_nickname']/text()"
264 | },
265 | {
266 | "kind":"python",
267 | "data":"out_val=in_val[0] if in_val else '';"
268 | }
269 | ]
270 | },
271 | {
272 | "key":"intro",
273 | "rules":[
274 | {
275 | "kind":"xpath",
276 | "data":"//span[@class='profile_meta_value']/text()"
277 | },
278 | {
279 | "kind":"python",
280 | "data":"out_val=in_val[1] if len(in_val) == 2 else '';"
281 | }
282 | ]
283 | },
284 |
285 | {
286 | "key":"qrcode",
287 | "rules":[
288 | {
289 | "kind":"xpath",
290 | "data":"//img[@id='js_pc_qr_code_img']/@src"
291 | },
292 | {
293 | "kind":"python",
294 | "data":"out_val='http://mp.weixin.qq.com' + in_val[0] if in_val else '';"
295 | }
296 | ]
297 | },
298 | {
299 | "key": "read_num",
300 | "rules": [
301 | {
302 | "kind": "xpath",
303 | "data": "//span[@id='sg_readNum3']/text()"
304 | },
305 | {
306 | "kind": "python",
307 | "data": "out_val=sum([int(item.strip().strip('+')) for item in in_val])"
308 | }
309 | ]
310 | },
311 | {
312 | "key": "like_num",
313 | "rules": [
314 | {
315 | "kind":"xpath",
316 | "data":"//span[@id='sg_likeNum3']/text()"
317 | },
318 | {
319 | "kind":"python",
320 | "data":"out_val=sum([int(item.strip().strip('+')) for item in in_val])"
321 | }
322 | ]
323 | }
324 |
325 | ]
326 | class Extractor(object):
327 | def __init__(self):
328 | self.redis = get_redis()
329 |
330 | def extract(self, content, rules, context):
331 | res = content
332 | for rule in rules:
333 | extractor = None
334 | if rule["kind"] == "xpath":
335 | extractor = XPathExtractor(res, rule["data"])
336 | elif rule["kind"] == "python":
337 | extractor = PythonExtractor(rule["data"], res, context=context)
338 | elif rule["kind"] == "image":
339 | extractor = ImageExtractor(res)
340 | elif rule["kind"] == "video":
341 | extractor = VideoExtractor(res)
342 | elif rule["kind"] == "WechatContent":
343 | extractor = WechatContentExtractor(res)
344 |
345 | res = extractor.extract()
346 |
347 | return res
348 |
349 | def get_detail(self, content, data):
350 | if data.get('kind') == KIND_DETAIL:
351 | result = {
352 | "kind": data["kind"],
353 | "url": data["url"],
354 | "source": data["body"],
355 | "avatar": ''
356 | }
357 | rules = DETAIL_RULES
358 | elif data.get('kind') == KIND_KEYWORD:
359 | result = {
360 | "kind": data["kind"],
361 | "url": data["url"],
362 | "source": data["body"],
363 | "avatar": ''
364 | }
365 | rules = DETAIL_RULES
366 | else:
367 | result = {
368 | "wechat_id": data["wechat_id"],
369 | "url": data["url"],
370 | "title": data["title"],
371 | "source": data["body"],
372 | "avatar": data["avatar"],
373 | "abstract": data["abstract"]
374 | }
375 | rules = NORMAL_RULES
376 |
377 | for item in rules:
378 | col = item["key"]
379 | print col
380 | col_rules = item["rules"]
381 | col_value = self.extract(content, col_rules, {'data': result})
382 | result[col] = col_value
383 |
384 | # 解析结束, 保存
385 | self.redis.lpush(settings.CRAWLER_CONFIG["processor"], json.dumps(result))
386 | result["source"] = ""
387 | result["content"] = ""
388 | logger.debug('extracted:%s' % result)
389 |
390 | def run(self):
391 | r = self.redis
392 | if settings.CRAWLER_DEBUG:
393 | r.delete(settings.CRAWLER_CONFIG["extractor"])
394 | while True:
395 | try:
396 | data = r.brpop(settings.CRAWLER_CONFIG["extractor"])
397 | except Exception as e:
398 | print e
399 | continue
400 | #print data
401 | data = json.loads(data[1])
402 | body = data['body']
403 | # 如果没有多项详情,则只是单项
404 | self.get_detail(body, data)
405 |
406 |
407 | if __name__ == '__main__':
408 | my_extractor = Extractor()
409 | my_extractor.run()
410 |
--------------------------------------------------------------------------------
/wechat/downloaders.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'yijingping'
3 | import time
4 | import json
5 | import platform
6 | from datetime import datetime, timedelta
7 | from dateutil.parser import parse
8 | from random import sample, randint
9 | from lxml import etree
10 | from io import StringIO
11 | from pyvirtualdisplay import Display
12 | from selenium import webdriver
13 | from selenium.webdriver.common.keys import Keys
14 | from selenium.webdriver.common.proxy import Proxy, ProxyType
15 | from wechatspider.util import get_uniqueid, get_redis
16 | from wechat.models import Topic
17 | from wechat.constants import KIND_DETAIL, KIND_KEYWORD, KIND_NORMAL
18 | from django.conf import settings
19 | from .util import stringify_children
20 |
21 | import logging
22 | logger = logging.getLogger()
23 |
24 | CRAWLER_CONFIG = settings.CRAWLER_CONFIG
25 |
26 |
27 | class SeleniumDownloaderBackend(object):
28 | headers = [
29 | {
30 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36'
31 | }
32 | ]
33 |
34 | def __init__(self, proxy=None):
35 | # 设置代理
36 | self.proxy = proxy
37 |
38 | def __enter__(self):
39 | # 打开界面
40 | self.display = self.get_display()
41 | # 打开浏览器
42 | self.browser = self.get_browser(self.proxy)
43 | return self
44 |
45 | def __exit__(self, exc_type, exc_val, exc_tb):
46 | # 关闭浏览器
47 | try:
48 | if self.browser:
49 | self.browser.delete_all_cookies()
50 | self.browser.quit()
51 | except Exception as e:
52 | logging.exception(e)
53 | # 关闭界面
54 | try:
55 | # 关闭浏览器,关闭窗口
56 | self.display and self.display.stop()
57 | except Exception as e:
58 | logging.exception(e)
59 |
60 | def get_display(self):
61 | """ 获取操作系统桌面窗口 """
62 | if platform.system() != 'Darwin':
63 | # 不是mac系统, 启动窗口
64 | display = Display(visible=0, size=(1024, 768))
65 | display.start()
66 | else:
67 | display = None
68 | return display
69 |
70 | def get_browser(self, proxy):
71 | """ 启动并返回浏览器,使用firefox """
72 | # 启动浏览器
73 | firefox_profile = webdriver.FirefoxProfile()
74 | # 禁止加载image
75 | #firefox_profile.set_preference('permissions.default.stylesheet', 2)
76 | #firefox_profile.set_preference('permissions.default.image', 2)
77 | #firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
78 | # 代理
79 | if proxy.is_valid():
80 | myProxy = '%s:%s' % (proxy.host, proxy.port)
81 | ff_proxy = Proxy({
82 | 'proxyType': ProxyType.MANUAL,
83 | 'httpProxy': myProxy,
84 | 'ftpProxy': myProxy,
85 | 'sslProxy': myProxy,
86 | 'noProxy': ''})
87 |
88 | browser = webdriver.Firefox(firefox_profile=firefox_profile, proxy=ff_proxy)
89 | else:
90 | browser = webdriver.Firefox(firefox_profile=firefox_profile)
91 |
92 | return browser
93 |
94 | def download(self, url):
95 | pass
96 |
97 | def download_wechat(self, data, process_topic):
98 | """ 根据微信号最新文章 """
99 | wechat_id, wechatid = data['wechat_id'], data['wechatid']
100 | try:
101 | self.visit_wechat_index(wechatid)
102 | if self.visit_wechat_topic_list(wechatid):
103 | self.download_wechat_topics(wechat_id, process_topic)
104 | except Exception as e:
105 | logger.exception(e)
106 | self.log_antispider()
107 | self.retry_crawl(data)
108 |
109 | def download_wechat_keyword(self, data, process_topic):
110 | """ 爬取关键词爬取最新文章 """
111 | word = data['word']
112 | try:
113 | self.visit_wechat_index_keyword(word)
114 | self.download_wechat_keyword_topics(word, process_topic)
115 | except Exception as e:
116 | logger.exception(e)
117 | self.log_antispider()
118 | self.retry_crawl(data)
119 |
120 | def download_wechat_topic_detail(self, data, process_topic):
121 | """ 根据url爬取文章的详情页 """
122 | url = data['url']
123 | browser = self.browser
124 | try:
125 | browser.get(url)
126 | time.sleep(3)
127 |
128 | if 'antispider' in browser.current_url:
129 | """被检测出爬虫了"""
130 | self.log_antispider()
131 | self.retry_crawl(data)
132 | time.sleep(randint(1, 5))
133 | else:
134 | js = """
135 | var imgs = document.getElementsByTagName('img');
136 |
137 | for(var i = 0; i < imgs.length; i++) {
138 | var dataSrc = imgs[i].getAttribute('data-src');
139 | if (dataSrc){
140 | imgs[i].setAttribute('src', dataSrc);
141 | }
142 | }
143 | return document.documentElement.innerHTML;
144 | """
145 | body = browser.execute_script(js)
146 | process_topic({
147 | 'url': browser.current_url,
148 | 'body': body,
149 | 'avatar': '',
150 | 'title': '',
151 | 'kind': KIND_DETAIL
152 | })
153 | time.sleep(randint(1, 5))
154 |
155 | except Exception as e:
156 | logger.exception(e)
157 | self.log_antispider()
158 | self.retry_crawl(data)
159 |
160 | def visit_wechat_index(self, wechatid):
161 | """ 访问微信首页,输入微信id,点击搜公众号 """
162 | browser = self.browser
163 | browser.get("http://weixin.sogou.com/")
164 | print browser.title
165 | element_querybox = browser.find_element_by_name('query')
166 | element_querybox.send_keys(wechatid, Keys.ARROW_DOWN)
167 | element_search_btn = browser.find_element_by_xpath("//input[@value='搜公众号']")
168 | element_search_btn.click()
169 | time.sleep(3)
170 | print browser.title
171 |
172 | def visit_wechat_index_keyword(self, word):
173 | """ 访问微信首页,输入关键词,点击搜文章 """
174 | browser = self.browser
175 | browser.get("http://weixin.sogou.com/")
176 | print browser.title
177 | element_querybox = browser.find_element_by_name('query')
178 | element_querybox.send_keys(word, Keys.ARROW_DOWN)
179 | element_search_btn = browser.find_element_by_xpath("//input[@value='搜文章']")
180 | element_search_btn.click()
181 | time.sleep(3)
182 | print browser.title
183 |
184 | def visit_wechat_topic_list(self, wechatid):
185 | """ 找到微信号,并点击进入微信号的文章列表页面 """
186 | browser = self.browser
187 | # 找到搜索列表第一个微信号, 点击打开新窗口
188 | element_wechat = browser.find_element_by_xpath("//div[@class='txt-box']/p[@class='info']/label")
189 | element_wechat_title = browser.find_element_by_xpath("//div[@class='txt-box']/p[@class='tit']/a")
190 | if element_wechat and element_wechat.text == wechatid:
191 | element_wechat_title.click()
192 | time.sleep(3)
193 | # 切到当前的文章列表页窗口
194 | new_handler = browser.window_handles[-1]
195 | browser.switch_to.window(new_handler)
196 | time.sleep(3)
197 | return True
198 | else:
199 | return False
200 |
201 | def download_wechat_topics(self, wechat_id, process_topic):
202 | """ 在微信号的文章列表页面,逐一点击打开每一篇文章,并爬取 """
203 | browser = self.browser
204 | js = """ return document.documentElement.innerHTML; """
205 | body = browser.execute_script(js)
206 |
207 | htmlparser = etree.HTMLParser()
208 | tree = etree.parse(StringIO(body), htmlparser)
209 |
210 | elems = [item.strip() for item in tree.xpath("//h4[@class='weui_media_title']/text()") if item.strip()]
211 | hrefs = ['http://mp.weixin.qq.com%s' % item for item in tree.xpath("//h4[@class='weui_media_title']/@hrefs")]
212 | elems_avatars = tree.xpath("//div[@class='weui_media_box appmsg']/span/@style")
213 | avatars = [item[21:-1] for item in elems_avatars]
214 | elems_abstracts = tree.xpath("//p[@class='weui_media_desc']")
215 | abstracts = [item.text.strip() if item.text else '' for item in elems_abstracts]
216 | links = []
217 | for idx, item in enumerate(elems[:10]):
218 | title = item
219 | print title
220 | if not title:
221 | continue
222 | uniqueid = get_uniqueid('%s:%s' % (wechat_id, title))
223 | try:
224 | Topic.objects.get(uniqueid=uniqueid)
225 | except Topic.DoesNotExist:
226 | #print len(elems), len(hrefs), len(avatars), len(abstracts)
227 | #print elems, hrefs, avatars, abstracts
228 | links.append((title, hrefs[idx], avatars[idx], abstracts[idx]))
229 | logger.debug('文章不存在, title=%s, uniqueid=%s' % (title, uniqueid))
230 | for title, link, avatar, abstract in reversed(links):
231 | # 可以访问了
232 | browser.get(link)
233 | time.sleep(3)
234 |
235 | if 'antispider' in browser.current_url:
236 | """被检测出爬虫了"""
237 | self.log_antispider()
238 | time.sleep(randint(1, 5))
239 | else:
240 | js = """
241 | var imgs = document.getElementsByTagName('img');
242 |
243 | for(var i = 0; i < imgs.length; i++) {
244 | var dataSrc = imgs[i].getAttribute('data-src');
245 | if (dataSrc){
246 | imgs[i].setAttribute('src', dataSrc);
247 | }
248 | }
249 | return document.documentElement.innerHTML;
250 | """
251 | body = browser.execute_script(js)
252 | process_topic({
253 | 'url': browser.current_url,
254 | 'body': body,
255 | 'avatar': avatar,
256 | 'title': title,
257 | 'abstract': abstract,
258 | 'kind': KIND_NORMAL
259 | })
260 | time.sleep(randint(1, 5))
261 |
262 | def download_wechat_keyword_topics(self, word, process_topic):
263 | """ 在关键词下的文章列表页面,逐一点击打开每一篇文章,并爬取 """
264 | browser = self.browser
265 | js = """ return document.documentElement.innerHTML; """
266 | body = browser.execute_script(js)
267 |
268 | htmlparser = etree.HTMLParser()
269 | tree = etree.parse(StringIO(body), htmlparser)
270 |
271 | elems = [stringify_children(item).replace('red_beg', '').replace('red_end', '') for item in tree.xpath("//div[@class='txt-box']/h3/a")]
272 | hrefs = tree.xpath("//div[@class='txt-box']/h3/a/@href")
273 | #avatars = tree.xpath("//div[@class='img-box']/a/img/@src")
274 | #elems_abstracts = tree.xpath("//div[@class='txt-box']/p")
275 | #abstracts = [item.text.strip() if item.text else '' for item in elems_abstracts]
276 | avatars = [''] * len(elems)
277 | abstracts = [''] * len(elems)
278 | links = []
279 | for idx, item in enumerate(elems):
280 | title = item
281 | print title
282 | if not title:
283 | continue
284 | uniqueid = get_uniqueid('%s:%s' % (word, title))
285 | try:
286 | Topic.objects.get(uniqueid=uniqueid)
287 | except Topic.DoesNotExist:
288 | #print len(elems), len(hrefs), len(avatars), len(abstracts)
289 | print elems, hrefs, avatars, abstracts
290 | links.append((title, hrefs[idx], avatars[idx], abstracts[idx]))
291 | logger.debug('文章不存在, title=%s, uniqueid=%s' % (title, uniqueid))
292 | for title, link, avatar, abstract in reversed(links):
293 | # 可以访问了
294 | browser.get(link)
295 | time.sleep(3)
296 |
297 | if 'antispider' in browser.current_url:
298 | """被检测出爬虫了"""
299 | self.log_antispider()
300 | time.sleep(randint(1, 5))
301 | else:
302 | js = """
303 | var imgs = document.getElementsByTagName('img');
304 |
305 | for(var i = 0; i < imgs.length; i++) {
306 | var dataSrc = imgs[i].getAttribute('data-src');
307 | if (dataSrc){
308 | imgs[i].setAttribute('src', dataSrc);
309 | }
310 | }
311 | return document.documentElement.innerHTML;
312 | """
313 | body = browser.execute_script(js)
314 | process_topic({
315 | 'url': browser.current_url,
316 | 'body': body,
317 | 'avatar': avatar,
318 | 'title': title,
319 | 'abstract': abstract,
320 | 'kind': KIND_KEYWORD
321 | })
322 | time.sleep(randint(1, 5))
323 |
324 | def log_antispider(self):
325 | """ 记录1小时内的被禁爬的数量 """
326 | r = get_redis()
327 | if r.incr(CRAWLER_CONFIG['antispider']) <= 1:
328 | r.expire(CRAWLER_CONFIG['antispider'], 3600)
329 |
330 | def retry_crawl(self, data):
331 | """ 如果被禁爬,重试 """
332 | r = get_redis()
333 | retry = data.get('retry', 0)
334 |
335 | if data.get('kind') == KIND_DETAIL:
336 | if retry >= 20:
337 | return
338 | data = {
339 | 'kind': data['kind'],
340 | 'url': data['url'],
341 | 'retry': retry + 1
342 | }
343 | elif data.get('kind') == KIND_KEYWORD:
344 | if retry >= 3:
345 | return
346 | data = {
347 | 'kind': data['kind'],
348 | 'word': data['word'],
349 | 'retry': retry + 1
350 | }
351 | else:
352 | if retry >= 3:
353 | return
354 | data = {
355 | 'kind': data['kind'],
356 | 'wechat_id': data['wechat_id'],
357 | 'wechatid': data['wechatid'],
358 | 'retry': retry + 1
359 | }
360 |
361 | r.lpush(settings.CRAWLER_CONFIG["downloader"], json.dumps(data))
362 |
--------------------------------------------------------------------------------
/wechatspider/static/bootstrap-datepicker/css/bootstrap-datepicker.min.css:
--------------------------------------------------------------------------------
1 | /*!
2 | * Datepicker for Bootstrap v1.6.0 (https://github.com/eternicode/bootstrap-datepicker)
3 | *
4 | * Copyright 2012 Stefan Petre
5 | * Improvements by Andrew Rowls
6 | * Licensed under the Apache License v2.0 (http://www.apache.org/licenses/LICENSE-2.0)
7 | */
8 | .datepicker{padding:4px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;direction:ltr}.datepicker-inline{width:220px}.datepicker.datepicker-rtl{direction:rtl}.datepicker.datepicker-rtl table tr td span{float:right}.datepicker-dropdown{top:0;left:0}.datepicker-dropdown:before{content:'';display:inline-block;border-left:7px solid transparent;border-right:7px solid transparent;border-bottom:7px solid #999;border-top:0;border-bottom-color:rgba(0,0,0,.2);position:absolute}.datepicker-dropdown:after{content:'';display:inline-block;border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:6px solid #fff;border-top:0;position:absolute}.datepicker-dropdown.datepicker-orient-left:before{left:6px}.datepicker-dropdown.datepicker-orient-left:after{left:7px}.datepicker-dropdown.datepicker-orient-right:before{right:6px}.datepicker-dropdown.datepicker-orient-right:after{right:7px}.datepicker-dropdown.datepicker-orient-bottom:before{top:-7px}.datepicker-dropdown.datepicker-orient-bottom:after{top:-6px}.datepicker-dropdown.datepicker-orient-top:before{bottom:-7px;border-bottom:0;border-top:7px solid #999}.datepicker-dropdown.datepicker-orient-top:after{bottom:-6px;border-bottom:0;border-top:6px solid #fff}.datepicker>div{display:none}.datepicker table{margin:0;-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.datepicker td,.datepicker th{text-align:center;width:20px;height:20px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;border:none}.table-striped .datepicker table tr td,.table-striped .datepicker table tr th{background-color:transparent}.datepicker table tr td.day.focused,.datepicker table tr td.day:hover{background:#eee;cursor:pointer}.datepicker table tr td.new,.datepicker table tr td.old{color:#999}.datepicker table tr td.disabled,.datepicker table tr td.disabled:hover{background:0 0;color:#999;cursor:default}.datepicker table tr td.highlighted{background:#d9edf7;border-radius:0}.datepicker table tr td.today,.datepicker table tr td.today.disabled,.datepicker table tr td.today.disabled:hover,.datepicker table tr td.today:hover{background-color:#fde19a;background-image:-moz-linear-gradient(to bottom,#fdd49a,#fdf59a);background-image:-ms-linear-gradient(to bottom,#fdd49a,#fdf59a);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fdd49a),to(#fdf59a));background-image:-webkit-linear-gradient(to bottom,#fdd49a,#fdf59a);background-image:-o-linear-gradient(to bottom,#fdd49a,#fdf59a);background-image:linear-gradient(to bottom,#fdd49a,#fdf59a);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#fdd49a', endColorstr='#fdf59a', GradientType=0);border-color:#fdf59a #fdf59a #fbed50;border-color:rgba(0,0,0,.1) rgba(0,0,0,.1) rgba(0,0,0,.25);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);color:#000}.datepicker table tr td.today.active,.datepicker table tr td.today.disabled,.datepicker table tr td.today.disabled.active,.datepicker table tr td.today.disabled.disabled,.datepicker table tr td.today.disabled:active,.datepicker table tr td.today.disabled:hover,.datepicker table tr td.today.disabled:hover.active,.datepicker table tr td.today.disabled:hover.disabled,.datepicker table tr td.today.disabled:hover:active,.datepicker table tr td.today.disabled:hover:hover,.datepicker table tr td.today.disabled:hover[disabled],.datepicker table tr td.today.disabled[disabled],.datepicker table tr td.today:active,.datepicker table tr td.today:hover,.datepicker table tr td.today:hover.active,.datepicker table tr td.today:hover.disabled,.datepicker table tr td.today:hover:active,.datepicker table tr td.today:hover:hover,.datepicker table tr td.today:hover[disabled],.datepicker table tr td.today[disabled]{background-color:#fdf59a}.datepicker table tr td.today.active,.datepicker table tr td.today.disabled.active,.datepicker table tr td.today.disabled:active,.datepicker table tr td.today.disabled:hover.active,.datepicker table tr td.today.disabled:hover:active,.datepicker table tr td.today:active,.datepicker table tr td.today:hover.active,.datepicker table tr td.today:hover:active{background-color:#fbf069\9}.datepicker table tr td.today:hover:hover{color:#000}.datepicker table tr td.today.active:hover{color:#fff}.datepicker table tr td.range,.datepicker table tr td.range.disabled,.datepicker table tr td.range.disabled:hover,.datepicker table tr td.range:hover{background:#eee;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.datepicker table tr td.range.today,.datepicker table tr td.range.today.disabled,.datepicker table tr td.range.today.disabled:hover,.datepicker table tr td.range.today:hover{background-color:#f3d17a;background-image:-moz-linear-gradient(to bottom,#f3c17a,#f3e97a);background-image:-ms-linear-gradient(to bottom,#f3c17a,#f3e97a);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f3c17a),to(#f3e97a));background-image:-webkit-linear-gradient(to bottom,#f3c17a,#f3e97a);background-image:-o-linear-gradient(to bottom,#f3c17a,#f3e97a);background-image:linear-gradient(to bottom,#f3c17a,#f3e97a);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#f3c17a', endColorstr='#f3e97a', GradientType=0);border-color:#f3e97a #f3e97a #edde34;border-color:rgba(0,0,0,.1) rgba(0,0,0,.1) rgba(0,0,0,.25);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.datepicker table tr td.range.today.active,.datepicker table tr td.range.today.disabled,.datepicker table tr td.range.today.disabled.active,.datepicker table tr td.range.today.disabled.disabled,.datepicker table tr td.range.today.disabled:active,.datepicker table tr td.range.today.disabled:hover,.datepicker table tr td.range.today.disabled:hover.active,.datepicker table tr td.range.today.disabled:hover.disabled,.datepicker table tr td.range.today.disabled:hover:active,.datepicker table tr td.range.today.disabled:hover:hover,.datepicker table tr td.range.today.disabled:hover[disabled],.datepicker table tr td.range.today.disabled[disabled],.datepicker table tr td.range.today:active,.datepicker table tr td.range.today:hover,.datepicker table tr td.range.today:hover.active,.datepicker table tr td.range.today:hover.disabled,.datepicker table tr td.range.today:hover:active,.datepicker table tr td.range.today:hover:hover,.datepicker table tr td.range.today:hover[disabled],.datepicker table tr td.range.today[disabled]{background-color:#f3e97a}.datepicker table tr td.range.today.active,.datepicker table tr td.range.today.disabled.active,.datepicker table tr td.range.today.disabled:active,.datepicker table tr td.range.today.disabled:hover.active,.datepicker table tr td.range.today.disabled:hover:active,.datepicker table tr td.range.today:active,.datepicker table tr td.range.today:hover.active,.datepicker table tr td.range.today:hover:active{background-color:#efe24b\9}.datepicker table tr td.selected,.datepicker table tr td.selected.disabled,.datepicker table tr td.selected.disabled:hover,.datepicker table tr td.selected:hover{background-color:#9e9e9e;background-image:-moz-linear-gradient(to bottom,#b3b3b3,grey);background-image:-ms-linear-gradient(to bottom,#b3b3b3,grey);background-image:-webkit-gradient(linear,0 0,0 100%,from(#b3b3b3),to(grey));background-image:-webkit-linear-gradient(to bottom,#b3b3b3,grey);background-image:-o-linear-gradient(to bottom,#b3b3b3,grey);background-image:linear-gradient(to bottom,#b3b3b3,grey);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#b3b3b3', endColorstr='#808080', GradientType=0);border-color:grey grey #595959;border-color:rgba(0,0,0,.1) rgba(0,0,0,.1) rgba(0,0,0,.25);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,.25)}.datepicker table tr td.selected.active,.datepicker table tr td.selected.disabled,.datepicker table tr td.selected.disabled.active,.datepicker table tr td.selected.disabled.disabled,.datepicker table tr td.selected.disabled:active,.datepicker table tr td.selected.disabled:hover,.datepicker table tr td.selected.disabled:hover.active,.datepicker table tr td.selected.disabled:hover.disabled,.datepicker table tr td.selected.disabled:hover:active,.datepicker table tr td.selected.disabled:hover:hover,.datepicker table tr td.selected.disabled:hover[disabled],.datepicker table tr td.selected.disabled[disabled],.datepicker table tr td.selected:active,.datepicker table tr td.selected:hover,.datepicker table tr td.selected:hover.active,.datepicker table tr td.selected:hover.disabled,.datepicker table tr td.selected:hover:active,.datepicker table tr td.selected:hover:hover,.datepicker table tr td.selected:hover[disabled],.datepicker table tr td.selected[disabled]{background-color:grey}.datepicker table tr td.selected.active,.datepicker table tr td.selected.disabled.active,.datepicker table tr td.selected.disabled:active,.datepicker table tr td.selected.disabled:hover.active,.datepicker table tr td.selected.disabled:hover:active,.datepicker table tr td.selected:active,.datepicker table tr td.selected:hover.active,.datepicker table tr td.selected:hover:active{background-color:#666\9}.datepicker table tr td.active,.datepicker table tr td.active.disabled,.datepicker table tr td.active.disabled:hover,.datepicker table tr td.active:hover{background-color:#006dcc;background-image:-moz-linear-gradient(to bottom,#08c,#04c);background-image:-ms-linear-gradient(to bottom,#08c,#04c);background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(to bottom,#08c,#04c);background-image:-o-linear-gradient(to bottom,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#0088cc', endColorstr='#0044cc', GradientType=0);border-color:#04c #04c #002a80;border-color:rgba(0,0,0,.1) rgba(0,0,0,.1) rgba(0,0,0,.25);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,.25)}.datepicker table tr td.active.active,.datepicker table tr td.active.disabled,.datepicker table tr td.active.disabled.active,.datepicker table tr td.active.disabled.disabled,.datepicker table tr td.active.disabled:active,.datepicker table tr td.active.disabled:hover,.datepicker table tr td.active.disabled:hover.active,.datepicker table tr td.active.disabled:hover.disabled,.datepicker table tr td.active.disabled:hover:active,.datepicker table tr td.active.disabled:hover:hover,.datepicker table tr td.active.disabled:hover[disabled],.datepicker table tr td.active.disabled[disabled],.datepicker table tr td.active:active,.datepicker table tr td.active:hover,.datepicker table tr td.active:hover.active,.datepicker table tr td.active:hover.disabled,.datepicker table tr td.active:hover:active,.datepicker table tr td.active:hover:hover,.datepicker table tr td.active:hover[disabled],.datepicker table tr td.active[disabled]{background-color:#04c}.datepicker table tr td.active.active,.datepicker table tr td.active.disabled.active,.datepicker table tr td.active.disabled:active,.datepicker table tr td.active.disabled:hover.active,.datepicker table tr td.active.disabled:hover:active,.datepicker table tr td.active:active,.datepicker table tr td.active:hover.active,.datepicker table tr td.active:hover:active{background-color:#039\9}.datepicker table tr td span{display:block;width:23%;height:54px;line-height:54px;float:left;margin:1%;cursor:pointer;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.datepicker table tr td span.focused,.datepicker table tr td span:hover{background:#eee}.datepicker table tr td span.disabled,.datepicker table tr td span.disabled:hover{background:0 0;color:#999;cursor:default}.datepicker table tr td span.active,.datepicker table tr td span.active.disabled,.datepicker table tr td span.active.disabled:hover,.datepicker table tr td span.active:hover{background-color:#006dcc;background-image:-moz-linear-gradient(to bottom,#08c,#04c);background-image:-ms-linear-gradient(to bottom,#08c,#04c);background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(to bottom,#08c,#04c);background-image:-o-linear-gradient(to bottom,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-repeat:repeat-x;filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#0088cc', endColorstr='#0044cc', GradientType=0);border-color:#04c #04c #002a80;border-color:rgba(0,0,0,.1) rgba(0,0,0,.1) rgba(0,0,0,.25);filter:progid:DXImageTransform.Microsoft.gradient(enabled=false);color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,.25)}.datepicker table tr td span.active.active,.datepicker table tr td span.active.disabled,.datepicker table tr td span.active.disabled.active,.datepicker table tr td span.active.disabled.disabled,.datepicker table tr td span.active.disabled:active,.datepicker table tr td span.active.disabled:hover,.datepicker table tr td span.active.disabled:hover.active,.datepicker table tr td span.active.disabled:hover.disabled,.datepicker table tr td span.active.disabled:hover:active,.datepicker table tr td span.active.disabled:hover:hover,.datepicker table tr td span.active.disabled:hover[disabled],.datepicker table tr td span.active.disabled[disabled],.datepicker table tr td span.active:active,.datepicker table tr td span.active:hover,.datepicker table tr td span.active:hover.active,.datepicker table tr td span.active:hover.disabled,.datepicker table tr td span.active:hover:active,.datepicker table tr td span.active:hover:hover,.datepicker table tr td span.active:hover[disabled],.datepicker table tr td span.active[disabled]{background-color:#04c}.datepicker table tr td span.active.active,.datepicker table tr td span.active.disabled.active,.datepicker table tr td span.active.disabled:active,.datepicker table tr td span.active.disabled:hover.active,.datepicker table tr td span.active.disabled:hover:active,.datepicker table tr td span.active:active,.datepicker table tr td span.active:hover.active,.datepicker table tr td span.active:hover:active{background-color:#039\9}.datepicker table tr td span.new,.datepicker table tr td span.old{color:#999}.datepicker .datepicker-switch{width:145px}.datepicker .datepicker-switch,.datepicker .next,.datepicker .prev,.datepicker tfoot tr th{cursor:pointer}.datepicker .datepicker-switch:hover,.datepicker .next:hover,.datepicker .prev:hover,.datepicker tfoot tr th:hover{background:#eee}.datepicker .cw{font-size:10px;width:12px;padding:0 2px 0 5px;vertical-align:middle}.input-append.date .add-on,.input-prepend.date .add-on{cursor:pointer}.input-append.date .add-on i,.input-prepend.date .add-on i{margin-top:3px}.input-daterange input{text-align:center}.input-daterange input:first-child{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-daterange input:last-child{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-daterange .add-on{display:inline-block;width:auto;min-width:16px;height:18px;padding:4px 5px;font-weight:400;line-height:18px;text-align:center;text-shadow:0 1px 0 #fff;vertical-align:middle;background-color:#eee;border:1px solid #ccc;margin-left:-5px;margin-right:-5px}
9 | /*# sourceMappingURL=bootstrap-datepicker.min.css.map */
--------------------------------------------------------------------------------