├── changelog.md
├── log.txt
├── requirements.txt
├── screenshot
├── wx.png
└── zfb.png
├── wechatsogou
├── db.pyc
├── api.pyc
├── base.pyc
├── basic.pyc
├── tools.pyc
├── config.pyc
├── __init__.pyc
├── exceptions.pyc
├── filecache.pyc
├── ruokuaicode.pyc
├── __pycache__
│ ├── db.cpython-35.pyc
│ ├── api.cpython-35.pyc
│ ├── base.cpython-35.pyc
│ ├── basic.cpython-35.pyc
│ ├── config.cpython-35.pyc
│ ├── tools.cpython-35.pyc
│ ├── __init__.cpython-35.pyc
│ ├── filecache.cpython-35.pyc
│ ├── exceptions.cpython-35.pyc
│ └── ruokuaicode.cpython-35.pyc
├── base.py
├── __init__.py
├── config.py
├── tools.py
├── exceptions.py
├── filecache.py
├── ruokuaicode.py
├── db.py
├── basic.py
└── api.py
├── cache
├── 2029240f6d1128be89ddc32729463129
└── 8f0f136a8d509c9a5f221e61e813c820
├── test.py
├── logging.conf
├── auto_add_mp_logging.conf
├── README.md
├── auto_add_mp.py
├── cookies.txt
├── updatewenzhang.py
├── updatemp.py
├── jubang.sql
└── auto_add_mp_log.txt
/changelog.md:
--------------------------------------------------------------------------------
1 | # 1.0.0
2 |
3 | - 重写项目
--------------------------------------------------------------------------------
/log.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/log.txt
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | bs4
2 | requests
3 | PyMySQL
4 | lxml
5 | pillow
6 | werkzeug
7 |
--------------------------------------------------------------------------------
/screenshot/wx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/screenshot/wx.png
--------------------------------------------------------------------------------
/screenshot/zfb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/screenshot/zfb.png
--------------------------------------------------------------------------------
/wechatsogou/db.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/db.pyc
--------------------------------------------------------------------------------
/wechatsogou/api.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/api.pyc
--------------------------------------------------------------------------------
/wechatsogou/base.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/base.pyc
--------------------------------------------------------------------------------
/wechatsogou/basic.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/basic.pyc
--------------------------------------------------------------------------------
/wechatsogou/tools.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/tools.pyc
--------------------------------------------------------------------------------
/wechatsogou/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/config.pyc
--------------------------------------------------------------------------------
/wechatsogou/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__init__.pyc
--------------------------------------------------------------------------------
/wechatsogou/exceptions.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/exceptions.pyc
--------------------------------------------------------------------------------
/wechatsogou/filecache.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/filecache.pyc
--------------------------------------------------------------------------------
/wechatsogou/ruokuaicode.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/ruokuaicode.pyc
--------------------------------------------------------------------------------
/cache/2029240f6d1128be89ddc32729463129:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/cache/2029240f6d1128be89ddc32729463129
--------------------------------------------------------------------------------
/cache/8f0f136a8d509c9a5f221e61e813c820:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/cache/8f0f136a8d509c9a5f221e61e813c820
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/db.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/db.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/api.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/api.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/base.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/base.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | class WechatSogouBase(object):
4 | """基于搜狗搜索的的微信公众号爬虫接口 基类
5 | """
6 | pass
7 |
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/basic.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/basic.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/config.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/tools.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/tools.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/filecache.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/filecache.cpython-35.pyc
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #查找公众号最新文章
3 | import sys
4 |
5 | print(sys.version_info[0])
6 | is_python3 = sys.version_info[0] > 2
7 | print(is_python3)
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/exceptions.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/exceptions.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/__pycache__/ruokuaicode.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jaryee/wechat_sogou_crawl/HEAD/wechatsogou/__pycache__/ruokuaicode.cpython-35.pyc
--------------------------------------------------------------------------------
/wechatsogou/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from wechatsogou.api import WechatSogouApi
4 | from wechatsogou.db import mysql
5 | from wechatsogou.filecache import WechatCache
6 |
7 | __all__ = ['WechatSogouApi', 'WechatCache', 'mysql']
8 |
9 | __version__ = "1.1.7"
10 |
--------------------------------------------------------------------------------
/wechatsogou/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # 缓存配置
4 | cache_dir = 'cache'
5 | cache_session_name = 'requests_wechatsogou_session'
6 |
7 | # mysql数据库配置
8 | host = '127.0.0.1'
9 | user = 'sougou' # 数据库用户名
10 | passwd = '123456' # 数据库密码
11 | db = 'jubang' # 默认数据库
12 | charset = 'utf8mb4'
13 | prefix = '' # 默认数据表前缀,可以不用写
14 |
15 | # 打码平台配置ruokuai http://www.ruokuai.com/
16 | # 注册并充值后,就可以直接使用,识别一个验证码大约0.008元
17 | # 搜狗微信有点变态,有时明明验证码是正确的,他非说是错误的,这是没有办法的事情,好在这个概率非常低
18 | dama_name = 'xxx' #用户名
19 | dama_pswd = 'xxx' #密码
20 |
--------------------------------------------------------------------------------
/logging.conf:
--------------------------------------------------------------------------------
1 | [loggers]
2 | keys=root
3 |
4 | [handlers]
5 | keys=rotateFileHandler
6 |
7 | [formatters]
8 | keys=simpleFormatter
9 |
10 | [logger_root]
11 | level=WARNING
12 | handlers=rotateFileHandler
13 | qualname=simpleExample
14 | propagate=0
15 |
16 | [handler_rotateFileHandler]
17 | class=handlers.RotatingFileHandler
18 | level=WARNING
19 | formatter=simpleFormatter
20 | args=('log.txt', 'a+', 200000, 9)
21 |
22 | [formatter_simpleFormatter]
23 | format=%(asctime)s - [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s
24 | datefmt=
--------------------------------------------------------------------------------
/auto_add_mp_logging.conf:
--------------------------------------------------------------------------------
1 | [loggers]
2 | keys=root
3 |
4 | [handlers]
5 | keys=rotateFileHandler
6 |
7 | [formatters]
8 | keys=simpleFormatter
9 |
10 | [logger_root]
11 | level=WARNING
12 | handlers=rotateFileHandler
13 | qualname=simpleExample
14 | propagate=0
15 |
16 | [handler_rotateFileHandler]
17 | class=handlers.RotatingFileHandler
18 | level=WARNING
19 | formatter=simpleFormatter
20 | args=('auto_add_mp_log.txt', 'a+', 200000, 9)
21 |
22 | [formatter_simpleFormatter]
23 | format=%(asctime)s - [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s
24 | datefmt=
--------------------------------------------------------------------------------
/wechatsogou/tools.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import json
4 |
5 | def prdict(content):
6 | msg = json.dumps(content, indent=1, ensure_ascii=False)
7 | print(msg)
8 |
9 | def list_or_empty(content, contype=None):
10 | if isinstance(content, list):
11 | if content:
12 | return contype(content[0]) if contype else content[0]
13 | else:
14 | if contype:
15 | if contype == int:
16 | return 0
17 | elif contype == str:
18 | return ''
19 | elif contype == list:
20 | return []
21 | else:
22 | raise Exception('only cna deal int str list')
23 | else:
24 | return ''
25 | else:
26 | raise Exception('need list')
--------------------------------------------------------------------------------
/wechatsogou/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | class WechatSogouException(Exception):
4 | """基于搜狗搜索的的微信公众号爬虫接口 异常基类
5 | """
6 | pass
7 |
8 |
9 | class WechatSogouVcodeException(WechatSogouException):
10 | """基于搜狗搜索的的微信公众号爬虫接口 出现验证码 异常类
11 | """
12 | pass
13 |
14 |
15 | class WechatSogouJsonException(WechatSogouException):
16 | """基于搜狗搜索的的微信公众号爬虫接口 非标准json数据 异常类
17 | """
18 | pass
19 |
20 |
21 | class WechatSogouEndException(WechatSogouException):
22 | """基于搜狗搜索的的微信公众号爬虫接口 数据处理完成 异常类
23 | """
24 | pass
25 |
26 | class WechatSogouBreakException(WechatSogouException):
27 | """基于搜狗搜索的的微信公众号爬虫接口 中断 异常类
28 | """
29 | pass
30 |
31 | class WechatSogouHistoryMsgException(WechatSogouException):
32 | """基于搜狗搜索的的微信公众号爬虫接口 数据处理完成 异常类
33 | """
34 | pass
35 |
36 | class ConfigException(WechatSogouException):
37 | """基于搜狗搜索的的微信公众号爬虫接口 配置错误 异常类
38 | """
39 | pass
40 |
41 | class WechatSogouRequestsException(WechatSogouException):
42 | """基于搜狗搜索的的微信公众号爬虫接口 抓取 异常类
43 | """
44 |
45 | def __init__(self, errmsg, status_code):
46 | WechatSogouException(errmsg)
47 | self.status_code = status_code
48 |
--------------------------------------------------------------------------------
/wechatsogou/filecache.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from werkzeug.contrib.cache import FileSystemCache
4 |
5 | from .base import WechatSogouBase
6 |
7 | class WechatCache(WechatSogouBase):
8 | """基于文件的缓存
9 |
10 | """
11 |
12 | def __init__(self, cache_dir='cache', default_timeout=300):
13 | """初始化
14 |
15 | cache_dir是缓存目录
16 | """
17 | self.cache = FileSystemCache(cache_dir, default_timeout=default_timeout)
18 |
19 | def clear(self):
20 | """清空缓存
21 | """
22 | return self.cache.clear()
23 |
24 | def get(self, key):
25 | """获取缓存
26 |
27 | 获取键值key的缓存值
28 | 如果没有对应缓存,返回None
29 | """
30 | return self.cache.get(key)
31 |
32 | def add(self, key, value, timeout=None):
33 | """增加缓存
34 |
35 | 如果键值key对应的缓存不存在,那么增加值value到键值key,过期时间timeout,默认300秒
36 | 否则返回False(即不能覆盖设置缓存)
37 | """
38 | return self.cache.add(key, value, timeout)
39 |
40 | def set(self, key, value, timeout=None):
41 | """设置缓存
42 |
43 | 设置键值key的缓存为value,过期时间300秒
44 | """
45 | return self.cache.set(key, value, timeout)
46 |
47 | def delete(self, key):
48 | """删除缓存
49 |
50 | 删除键值key存储的缓存
51 | """
52 | return self.cache.delete(key)
53 |
54 |
55 | if __name__ == '__main__':
56 | cache = WechatCache()
57 | import requests
58 |
59 | r = requests.session()
60 | print(cache.set('1', r))
61 | print(cache.get('1'), type(cache.get('1')))
62 |
--------------------------------------------------------------------------------
/wechatsogou/ruokuaicode.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import requests
4 | from hashlib import md5
5 |
6 | from .base import WechatSogouBase
7 |
8 | class RClient(WechatSogouBase):
9 |
10 | def __init__(self, username, password, soft_id, soft_key):
11 | self.username = username
12 | self.password = md5(password.encode('utf-8')).hexdigest()
13 | self.soft_id = soft_id
14 | self.soft_key = soft_key
15 | self.base_params = {
16 | 'username': self.username,
17 | 'password': self.password,
18 | 'softid': self.soft_id,
19 | 'softkey': self.soft_key,
20 | }
21 | self.headers = {
22 | 'Connection': 'Keep-Alive',
23 | 'Expect': '100-continue',
24 | 'User-Agent': 'ben',
25 | }
26 |
27 | def create(self, im, im_type, timeout=60):
28 | """
29 | im: 图片字节
30 | im_type: 题目类型
31 | """
32 | params = {
33 | 'typeid': im_type,
34 | 'timeout': timeout,
35 | }
36 | params.update(self.base_params)
37 | files = {'image': ('a.jpg', im)}
38 | r = requests.post('http://api.ruokuai.com/create.json', data=params, files=files, headers=self.headers)
39 | return r.json()
40 |
41 | def report_error(self, im_id):
42 | """
43 | im_id:报错题目的ID
44 | """
45 | params = {
46 | 'id': im_id,
47 | }
48 | params.update(self.base_params)
49 | r = requests.post('http://api.ruokuai.com/reporterror.json', data=params, headers=self.headers)
50 | return r.json()
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 基于搜狗微信搜索的微信公众号爬虫
2 | ===
3 |
4 | [](https://github.com/jaryee/wechat_sogou_crawl)
5 |
6 | 2019-03-30 适应搜狗2019-03-29规则变化
7 |
8 | 2019-03-07 增加对py3的支持,同时支持py2和py3
9 |
10 | 2017-4-27搜狗微信取消了阅读、点攒及评论数据,所以无法通过搜狗获取这些数据了.
11 |
12 | # 项目简介
13 | 基于搜狗微信搜索的微信公众号爬虫
14 | 可以抓取指定公众号的文章信息
15 |
16 | # 赞助作者
17 | 俺是自由职业者,好汉们如果可能的话赞助一些让俺将开源事业进行到底,谢谢!!!
18 |
19 |
20 |
21 |
22 | 兄弟我弄了个淘宝店,有时间的兄弟给捧个场啊,新店需要信誉积分,跪谢!只要一块钱,就能温暖你我他
23 | https://item.taobao.com/item.htm?spm=a230r.1.14.16.PRhaio&id=543333631871&ns=1&abbucket=6#detail
24 |
25 |
26 |
27 | 使用教程大家可以去我的微博查看:
28 | http://blog.csdn.net/niuxiaojia09/article/details/55260770
29 |
30 |
31 | 2017-1-20 增加如何使程序进入搜狗微信登录状态的说明,在Updatemp.py和UpdateWenzhang.py中都有操作说明
32 | 2017-3-21 在API.py中增加把文章本地化的函数,可以根据自己的需要把文章下载到本地
33 |
34 | # 项目使用
35 |
36 | 一、使用说明
37 |
38 | 1、在mysql数据库中创建数据库,数据库命名为Jubang,数据格式为utf8mb4,然后导入jubang.sql文件,创建对应的数据库表
39 |
40 | 2、修改config.py文件中对应的设置,打码平台配置ruokuai这个一定要设置,否则出现验证码就不能正常工作了
41 |
42 | 3、执行:pip install -r requirements.txt 安装所需要的第三方包
43 |
44 | 4、手动或自动在add_mp_list表中增加数据,然后运行auto_add_mp.py文件。
45 | 比如可以这样用:给auto_add_mp.py设定一个定时任务,5分钟或10分钟,然后前台页面文件让使用者添加待抓取的
46 | 公众号信息,然后定时任务执行时就可以把这些公众号加入待抓取列表了
47 | add_mp_list中
48 | name字段是模糊抓取,会根据输入的名称模糊加入10个公众号
49 | wx_hao字段是精确抓取,这个是公众号的微信号,只抓取一个
50 | 这两个字段可以任意填入一个就行
51 |
52 | 5、执行updatemp.py文件,文件说明看后面。使用中可以给该文件设定定时任务30分钟或其它间隔,每隔一定时间,运行该
53 | 文件就会抓取已添加的公众号是否有新文章发出来。
54 | 第一次使用会抓取公众号的最近10条群发数据
55 |
56 | 6、执行updatewenzhang.py文件,该文件是抓取文章阅读及点攒数的。最新的数据会写入wenzhang_info表中,并且会在表wenzhang_statistics中
57 | 添加增量记录,可以根据wenzhang_statistics表中的数据生成曲线图
58 | 使用中可以给该文件添加5分钟或其它时间的定时任务,这样就可以来生成对应的阅读曲线图了
59 |
60 | 二、文件说明
61 |
62 | 1、updatemp.py
63 | 该文件遍历待抓取列表(数据库表:mp_info),查询表中的公众号是否有新文章发布,如果有,就抓取新的文章信息并
64 | 放入数据库表wenzhang_info中
65 |
66 | 2、updatewenzhang.py
67 | 该文件遍历文章表,然后抓取24小时之内的文章阅读数据存入表wenzhang_info和表wenzhang_statistics中
68 |
69 | 3、 auto_add_mp.py
70 | 该文件将指定的公众号添加到待抓取列表中
71 | 该文件读取数据库表(add_mp_list)中的内容,然后将其中指定的公众号填入数据库表(mp_info)中
72 |
73 |
74 |
75 | # TODO
76 | - [x] 使用py2.7
77 | - [x] 获取指定公众号文章
78 | - [x] 文章详情页信息
79 | - [x] 验证码自动识别
80 |
81 | ---
82 |
--------------------------------------------------------------------------------
/auto_add_mp.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #添加指定公众号到爬虫数据库
3 |
4 | # 导入包
5 | from wechatsogou.tools import *
6 | from wechatsogou import *
7 | from PIL import Image
8 | import datetime
9 | import time
10 | import sys,locale
11 | import logging
12 | import logging.config
13 |
14 | # 日志
15 | logging.config.fileConfig('auto_add_mp_logging.conf')
16 | logger = logging.getLogger()
17 |
18 | # 搜索API实例
19 | wechats = WechatSogouApi()
20 |
21 | #数据库实例
22 | mysql = mysql('add_mp_list')
23 |
24 |
25 | add_list = mysql.find(0)
26 | succ_count = 0
27 |
28 | for add_item in add_list :
29 | try:
30 | print(add_item)
31 | if add_item['wx_hao']:
32 | print("add by wx_hao")
33 | mysql.where_sql = "wx_hao ='" + add_item['wx_hao'] + "'"
34 | mp_data = mysql.table('mp_info').find(1)
35 | if not mp_data :
36 | wechat_info = wechats.get_gzh_info(add_item['wx_hao'])
37 | time.sleep(1)
38 | #print(wechat_info)
39 | if(wechat_info != ""):
40 | mysql.table('mp_info').add({'name':wechat_info['name'],
41 | 'wx_hao':wechat_info['wechatid'],
42 | 'company':wechat_info['renzhen'],
43 | 'description':wechat_info['jieshao'],
44 | 'logo_url':wechat_info['img'],
45 | 'qr_url': wechat_info['qrcode'],
46 | 'wz_url': wechat_info['url'],
47 | 'last_qunfa_id': 0,
48 | 'create_time':time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))})
49 | else:
50 | print(u"已经存在的公众号")
51 | elif add_item['name']:
52 | #获取对应信息
53 | print("add by name")
54 | wechat_infos = wechats.search_gzh_info(add_item['name'].encode('utf8'))
55 | time.sleep(1)
56 | #print(wechat_infos)
57 | for wx_item in wechat_infos :
58 | #公众号数据写入数据库
59 | #搜索一下是否已经存在
60 | print(wx_item['name'])
61 | mysql.where_sql = "wx_hao ='" + wx_item['wechatid'] + "'"
62 | print(mysql.where_sql)
63 | mp_data = mysql.table('mp_info').find(1)
64 | if not mp_data :
65 | print(wx_item['name'].decode("utf-8"))
66 | mysql.table('mp_info').add({ 'name':wx_item['name'],
67 | 'wx_hao':wx_item['wechatid'],
68 | 'company':wx_item['renzhen'],
69 | 'description':wx_item['jieshao'],
70 | 'logo_url':wx_item['img'],
71 | 'qr_url': wx_item['qrcode'],
72 | 'wz_url': wx_item['url'],
73 | 'last_qunfa_id': 0,
74 | 'create_time':time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))})
75 | else:
76 | print(u"已经存在的公众号")
77 |
78 | #删除已添加项
79 | mysql.table('add_mp_list').where({'_id':add_item['_id']}).delete()
80 | except:
81 | print(u"出错,继续")
82 | continue
83 |
84 |
85 | print("success")
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/cookies.txt:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "domain": ".sogou.com",
4 | "expirationDate": 1585149164.37005,
5 | "hostOnly": false,
6 | "httpOnly": false,
7 | "name": "IPLOC",
8 | "path": "/",
9 | "sameSite": "no_restriction",
10 | "secure": false,
11 | "session": false,
12 | "storeId": "0",
13 | "value": "CN1100",
14 | "id": 1
15 | },
16 | {
17 | "domain": ".sogou.com",
18 | "expirationDate": 1595289600,
19 | "hostOnly": false,
20 | "httpOnly": false,
21 | "name": "sct",
22 | "path": "/",
23 | "sameSite": "no_restriction",
24 | "secure": false,
25 | "session": false,
26 | "storeId": "0",
27 | "value": "39",
28 | "id": 2
29 | },
30 | {
31 | "domain": ".sogou.com",
32 | "expirationDate": 1585148845,
33 | "hostOnly": false,
34 | "httpOnly": false,
35 | "name": "SNUID",
36 | "path": "/",
37 | "sameSite": "no_restriction",
38 | "secure": false,
39 | "session": false,
40 | "storeId": "0",
41 | "value": "327715CDBFBA3ABE12793907BFD60EC1",
42 | "id": 3
43 | },
44 | {
45 | "domain": ".sogou.com",
46 | "expirationDate": 2181949036.801871,
47 | "hostOnly": false,
48 | "httpOnly": false,
49 | "name": "SUID",
50 | "path": "/",
51 | "sameSite": "no_restriction",
52 | "secure": false,
53 | "session": false,
54 | "storeId": "0",
55 | "value": "6078AB732320940A000000005C75E06B",
56 | "id": 4
57 | },
58 | {
59 | "domain": ".sogou.com",
60 | "expirationDate": 1866589037.181813,
61 | "hostOnly": false,
62 | "httpOnly": false,
63 | "name": "SUV",
64 | "path": "/",
65 | "sameSite": "no_restriction",
66 | "secure": false,
67 | "session": false,
68 | "storeId": "0",
69 | "value": "00E11DBD73AB78605C75E06C33CEE997",
70 | "id": 5
71 | },
72 | {
73 | "domain": ".weixin.sogou.com",
74 | "expirationDate": 2181949036.693053,
75 | "hostOnly": false,
76 | "httpOnly": false,
77 | "name": "SUID",
78 | "path": "/",
79 | "sameSite": "no_restriction",
80 | "secure": false,
81 | "session": false,
82 | "storeId": "0",
83 | "value": "6078AB737D29990A000000005C75E06B",
84 | "id": 6
85 | },
86 | {
87 | "domain": "weixin.sogou.com",
88 | "expirationDate": 1553821036.664727,
89 | "hostOnly": true,
90 | "httpOnly": false,
91 | "name": "ABTEST",
92 | "path": "/",
93 | "sameSite": "no_restriction",
94 | "secure": false,
95 | "session": false,
96 | "storeId": "0",
97 | "value": "0|1551229035|v1",
98 | "id": 7
99 | },
100 | {
101 | "domain": "weixin.sogou.com",
102 | "hostOnly": true,
103 | "httpOnly": false,
104 | "name": "JSESSIONID",
105 | "path": "/",
106 | "sameSite": "no_restriction",
107 | "secure": false,
108 | "session": true,
109 | "storeId": "0",
110 | "value": "aaataTUG3ggNN7aMO65Mw",
111 | "id": 8
112 | },
113 | {
114 | "domain": "weixin.sogou.com",
115 | "hostOnly": true,
116 | "httpOnly": false,
117 | "name": "PHPSESSID",
118 | "path": "/",
119 | "sameSite": "no_restriction",
120 | "secure": false,
121 | "session": true,
122 | "storeId": "0",
123 | "value": "hipbln966cc23kddoj9qb54385",
124 | "id": 9
125 | },
126 | {
127 | "domain": "weixin.sogou.com",
128 | "expirationDate": 1559869037,
129 | "hostOnly": true,
130 | "httpOnly": false,
131 | "name": "weixinIndexVisited",
132 | "path": "/",
133 | "sameSite": "no_restriction",
134 | "secure": false,
135 | "session": false,
136 | "storeId": "0",
137 | "value": "1",
138 | "id": 10
139 | }
140 | ]
--------------------------------------------------------------------------------
/updatewenzhang.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #更新文章阅读数据,目前一篇文章只监控24小时
3 |
4 | # 导入包
5 | from wechatsogou.tools import *
6 | from wechatsogou import *
7 | from PIL import Image
8 | import datetime
9 | import time
10 | import logging
11 | import logging.config
12 |
13 | # 日志
14 | logging.config.fileConfig('logging.conf')
15 | logger = logging.getLogger()
16 |
17 |
18 | # 搜索API实例
19 | wechats = WechatSogouApi()
20 |
21 | #如果想使用外部cookie,主要是为了实现搜狗微信登录状态
22 | #你需要安装chrom浏览器,然后给浏览器安装EditThisCooke这个插件
23 | #1、使用Chrom浏览器登录搜狗微信
24 | #2、使用EditThisCooke插件复制当前Cookie信息
25 | #3、把cookie信息复制到代码目录下的cookies.txt文件
26 | #4、开启下面这行语句
27 | #wechats = WechatSogouApi(cookies_file={'file_name':'cookies.txt'}) #使用外部cookie
28 |
29 |
30 | #数据库实例
31 | mysql.order_sql = " order by _id desc"
32 | mysql = mysql('mp_info')
33 |
34 | #循环获取数据库中所有公众号
35 | mp_list = mysql.find(0)
36 |
37 |
38 | now_time = datetime.datetime.now()
39 | yes_time = now_time + datetime.timedelta(days=-1) #只更新1天之内的数据,可以修改days=-2就是2天
40 | succ_count = 1
41 |
42 | for item in mp_list:
43 | try:
44 | #为了效率,首先查看该公众号是否有24小时之内的文章
45 | mysql.where_sql = "mp_id=%d and date_time >'%s'" %(item['_id'],yes_time)
46 | wz_time = mysql.table('wenzhang_info').find(1)
47 | if not wz_time :
48 | continue
49 |
50 | print(item['name'])
51 | #print('1')
52 | wz_url = ""
53 | if item.has_key('wz_url') :
54 | wz_url = item['wz_url']
55 | else :
56 | wechat_info = wechats.get_gzh_info(item['wx_hao'])
57 | if not wechat_info.has_key('url') :
58 | continue
59 | wz_url = wechat_info['url'];
60 |
61 | #print('2')
62 | wz_list = wechats.get_gzh_message(url=wz_url)
63 | if u'链接已过期' in wz_list:
64 | wechat_info = wechats.get_gzh_info(item['wx_hao'])
65 | print(wechat_info)
66 | if not wechat_info.has_key('url') :
67 | continue
68 | print('guo qi sz chong xin huo qu success')
69 | wz_url = wechat_info['url'];
70 | wz_list = wechats.get_gzh_message(url=wz_url)
71 | mysql.where_sql = " _id=%s" %(item['_id'])
72 | mysql.table('mp_info').save({'wz_url':wechat_info['url'],'logo_url':wechat_info['img'],'qr_url':wechat_info['qrcode']})
73 | #type==49表示是图文消息
74 | #print('3')
75 | for wz_item in wz_list :
76 | #只监控24小时之内的文章
77 | if(wz_item['datetime'] < time.mktime(yes_time.timetuple())):
78 | break
79 |
80 | if wz_item['type'] == '49':
81 | #获取文章数据
82 | time.sleep(0.5)
83 | article_info = wechats.deal_article(url=wz_item['content_url'])
84 | mysql.where_sql = " mp_id=%d and qunfa_id=%d and msg_index=%d" %(item['_id'],wz_item['qunfa_id'],wz_item['main'])
85 | #print(mysql.where_sql)
86 | wz_data = mysql.table('wenzhang_info').find(1)
87 | if not wz_data :
88 | print(u"公众号有新文章了,请执行Updtaemp.py进行抓取")
89 | continue
90 |
91 | #获取当前的数据
92 | print(succ_count)
93 | succ_count += 1
94 | read_count = wz_data['read_count']
95 | like_count = wz_data['like_count']
96 | comment_count = wz_data['comment_count']
97 | print("%d new_read:%d new_like:%d read:%d like:%d" %(wz_data['_id'], article_info['comment']['read_num'],article_info['comment']['like_num'],read_count,like_count))
98 | #把文章写入数据库
99 | mysql.table('wenzhang_statistics').add({'wz_id':wz_data['_id'],
100 | 'create_time':time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())),
101 | 'read_count':int(article_info['comment']['read_num'])-read_count,
102 | 'like_count':int(article_info['comment']['like_num'])-like_count,
103 | 'comment_count': int(article_info['comment']['elected_comment_total_cnt'])-comment_count})
104 | #print('5')
105 | #更新文章总阅读数
106 | mysql.where_sql = " _id=%s" %(wz_data['_id'])
107 | mysql.table('wenzhang_info').save({'read_count':int(article_info['comment']['read_num']),
108 | 'like_count':int(article_info['comment']['like_num']),
109 | 'comment_count': int(article_info['comment']['elected_comment_total_cnt'])})
110 | except KeyboardInterrupt:
111 | break
112 | except: #如果不想因为错误使程序退出,可以开启这两句代码
113 | print(u"出错,继续")
114 | continue
115 |
116 | print('success')
117 |
118 |
--------------------------------------------------------------------------------
/updatemp.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #查找公众号最新文章
3 |
4 | # 导入包
5 | from wechatsogou.tools import *
6 | from wechatsogou import *
7 | from PIL import Image
8 | import datetime
9 | import time
10 | import logging
11 | import logging.config
12 | import random
13 |
14 | # 日志
15 | logging.config.fileConfig('logging.conf')
16 | logger = logging.getLogger()
17 |
18 | # 搜索API实例
19 | wechats = WechatSogouApi() #不使用外部Cookie
20 |
21 |
22 | #如果想使用外部cookie,主要是为了实现搜狗微信登录状态
23 | #你需要安装chrom浏览器,然后给浏览器安装EditThisCooke这个插件
24 | #1、使用Chrom浏览器登录搜狗微信
25 | #2、使用EditThisCooke插件复制当前Cookie信息
26 | #3、把cookie信息复制到代码目录下的cookies.txt文件
27 | #4、开启下面这行语句
28 | #wechats = WechatSogouApi(cookies_file={'file_name':'cookies.txt'}) #使用外部cookie
29 |
30 |
31 | #数据库实例
32 | mysql = mysql('mp_info')
33 |
34 | #循环获取数据库中所有公众号
35 | mysql.order_sql = " order by _id desc"
36 | mp_list = mysql.find(0)
37 |
38 | succ_count = 0
39 |
40 | now_time = datetime.datetime.today()
41 | now_time = datetime.datetime(now_time.year, now_time.month, now_time.day, 0, 0, 0)
42 | #now_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(now_time))
43 |
44 | for item in mp_list:
45 | try:
46 | time.sleep(random.randrange(1,3))
47 | #查看一下该号今天是否已经发送文章
48 | last_qunfa_id = item['last_qunfa_id']
49 | last_qunfa_time = item['last_qufa_time']
50 |
51 | cur_qunfa_id = last_qunfa_id
52 | wz_url = item['wz_url']
53 |
54 | print(item['name'])
55 |
56 | #获取最近文章信息
57 | wz_list = wechats.get_gzh_message(url=wz_url)
58 | if u'链接已过期' in wz_list:
59 | wechat_info = wechats.get_gzh_info(item['wx_hao'])
60 | if 'url' not in wechat_info :
61 | continue
62 | print('guo qi sz chong xin huo qu success')
63 | wz_url = wechat_info['url'];
64 | wz_list = wechats.get_gzh_message(url=wz_url)
65 | mysql.where_sql = " _id=%s" %(item['_id'])
66 | mysql.table('mp_info').where({'_id':item['_id']}).save({'wz_url':wechat_info['url'],'logo_url':wechat_info['img'],'qr_url':wechat_info['qrcode']})
67 | #type==49表示是图文消息
68 | qunfa_time = ''
69 | for wz_item in wz_list :
70 | temp_qunfa_id = int(wz_item['qunfa_id'])
71 | if(last_qunfa_id >= temp_qunfa_id):
72 | print(u"没有更新文章")
73 | print(u"")
74 | break
75 | if(cur_qunfa_id < temp_qunfa_id):
76 | cur_qunfa_id = temp_qunfa_id
77 | qunfa_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(wz_item['datetime']))
78 | succ_count += 1
79 | if wz_item['type'] == '49':
80 | #把文章写入数据库
81 | #更新文章条数
82 | print(succ_count)
83 | print(wz_item['content_url'])
84 | if not wz_item['content_url'] :
85 | continue
86 |
87 | sourceurl = wz_item['source_url']
88 | if len(sourceurl) >= 300 :
89 | sourceurl = ''
90 |
91 | #如果想把文章下载到本地,请开启下面的语句,请确保已经安装:urllib2,httplib2,BeautifulSoup4
92 | #返回值为下载的html文件路径,可以自己保存到数据库
93 | #index_html_path = wechats.down_html(wz_item['content_url'],wz_item['title'])
94 |
95 | #获取文章正文
96 | wz_content = wechats.deal_article_content(url=wz_item['content_url'])
97 |
98 | mysql.table('wenzhang_info').add({'title':wz_item['title'],
99 | 'source_url':sourceurl,
100 | 'content_url':wz_item['content_url'],
101 | 'cover_url':wz_item['cover'],
102 | 'description':wz_item['digest'],
103 | 'date_time': time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(wz_item['datetime'])),
104 | 'mp_id':item['_id'],
105 | 'author':wz_item['author'],
106 | 'msg_index':wz_item['main'],
107 | 'copyright_stat':wz_item['copyright_stat'],
108 | 'qunfa_id':wz_item['qunfa_id'],
109 | 'type':wz_item['type'],
110 | 'like_count':0,
111 | 'read_count':0,
112 | 'comment_count':0,
113 | 'content':wz_content})
114 |
115 |
116 |
117 | #更新最新推送ID
118 | if(last_qunfa_id < cur_qunfa_id):
119 | mysql.where_sql = " _id=%s" %(item['_id'])
120 | mysql.table('mp_info').save({'last_qunfa_id':cur_qunfa_id,'last_qufa_time':qunfa_time,'update_time':time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))})
121 | except KeyboardInterrupt:
122 | break
123 | # except: #如果不想因为错误使程序退出,可以开启这两句代码
124 | # print(u"出错,继续")
125 | # continue
126 |
127 | print('success')
--------------------------------------------------------------------------------
/jubang.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Navicat MySQL Data Transfer
3 |
4 | Source Server : 47.105.144.60
5 | Source Server Version : 50723
6 | Source Host : 47.105.144.60:3306
7 | Source Database : test
8 |
9 | Target Server Type : MYSQL
10 | Target Server Version : 50723
11 | File Encoding : 65001
12 |
13 | Date: 2019-03-07 20:26:19
14 | */
15 |
16 | SET FOREIGN_KEY_CHECKS=0;
17 |
18 | -- ----------------------------
19 | -- Table structure for `add_mp_list`
20 | -- ----------------------------
21 | DROP TABLE IF EXISTS `add_mp_list`;
22 | CREATE TABLE `add_mp_list` (
23 | `_id` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增ID',
24 | `name` varchar(50) DEFAULT '' COMMENT '要添加的公众号名称',
25 | `wx_hao` varchar(50) DEFAULT '' COMMENT '公众号的微信号',
26 | PRIMARY KEY (`_id`)
27 | ) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4;
28 |
29 | -- ----------------------------
30 | -- Records of add_mp_list
31 | -- ----------------------------
32 |
33 | -- ----------------------------
34 | -- Table structure for `mp_info`
35 | -- ----------------------------
36 | DROP TABLE IF EXISTS `mp_info`;
37 | CREATE TABLE `mp_info` (
38 | `_id` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增ID',
39 | `name` varchar(50) DEFAULT '' COMMENT '公众号名称',
40 | `wx_hao` varchar(20) DEFAULT '' COMMENT '公众号的微信号',
41 | `company` varchar(100) DEFAULT '' COMMENT '主体名称',
42 | `description` varchar(200) DEFAULT '' COMMENT '功能简介',
43 | `logo_url` varchar(200) DEFAULT '' COMMENT 'logo url',
44 | `qr_url` varchar(200) DEFAULT '' COMMENT '二维码URL',
45 | `create_time` datetime DEFAULT NULL COMMENT '加入牛榜时间',
46 | `update_time` datetime DEFAULT NULL COMMENT '最后更新时间',
47 | `rank_article_release_count` int(11) DEFAULT '0' COMMENT '群发次数',
48 | `rank_article_count` int(11) DEFAULT '0' COMMENT '群发篇数',
49 | `last_qunfa_id` int(30) DEFAULT '0' COMMENT '最后的群发ID',
50 | `last_qufa_time` datetime DEFAULT NULL COMMENT '最后一次群发的时间',
51 | `wz_url` varchar(300) DEFAULT '' COMMENT '最近文章URL',
52 | PRIMARY KEY (`_id`)
53 | ) ENGINE=InnoDB AUTO_INCREMENT=287 DEFAULT CHARSET=utf8mb4;
54 |
55 | -- ----------------------------
56 | -- Records of mp_info
57 | -- ----------------------------
58 | INSERT INTO `mp_info` VALUES ('266', '今日头条', 'headline_today', '北京字节跳动科技有限公司', '今日头条官方帐号', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt3Om27KzYpmW9LaBGPCUxaU', '', '2017-02-16 17:15:09', null, '0', '0', '0', null, 'http://mp.weixin.qq.com/profile?src=3×tamp=1487236535&ver=1&signature=nDdjBk7tfBptUPQVaSHn*uoQ9hysPGOoChQf5umkzBbz3PSaIHThKmZzsU23I7vU1tNr6R6t8eQS6lC586yDLQ==');
59 | INSERT INTO `mp_info` VALUES ('276', '新榜', 'newrankcn', '上海看榜信息科技有限公司', '涨粉、变现、运营、观察,新榜给你不一样的新思路.新榜——内容创业服务平台,www.newrank.cn', 'http://img01.sogoucdn.com/app/a/100520090/oIWsFt3CUA6HniQM4e_i7zncqWkk', '', '2017-02-16 17:16:04', null, '0', '0', '0', null, 'http://mp.weixin.qq.com/profile?src=3×tamp=1487236590&ver=1&signature=A38golU5GzltuG*u78AoIZkLnJS--EsX4PCDJyq3coRVjU3ZoBZ9UUWZNyOHDzCFw1Q34XVteeqgSGthakK1Ig==');
60 | INSERT INTO `mp_info` VALUES ('278', '娱乐新榜', 'yulexinbang', '北京快络科技有限公司', '娱乐新人第一自媒体平台,为导演找新人,为新人找发展.深度开挖新人潜力与特色,助力新人演艺事业快速起步.向导演制片等影视从业人员提供第一手新晋艺人资料,实现艺人资源与影视需求的完美对接.', '//img01.sogoucdn.com/app/a/100520090/oIWsFt8lrEWgjvNDVlT1S7wL5Nyw', '', '2017-02-16 17:16:04', null, '0', '0', '0', null, 'http://mp.weixin.qq.com/profile?src=3×tamp=1551960049&ver=1&signature=fd*NZOcIHHxSZQ6Y44LFP1WmzZvhuKe0sJd2PpGunRcPNotPrCVBSO7sVIDjNkOkF8MkVzv35-iroU38v0GQww==');
61 | INSERT INTO `mp_info` VALUES ('286', '人民日报', 'rmrbwx', '人民日报社', '参与、沟通、记录时代.', '//img01.sogoucdn.com/app/a/100520090/oIWsFt8_jYUmdw1PQgNVhH9vOEvI', '', '2019-03-07 19:54:26', '2019-03-07 19:58:58', '0', '0', '1000008043', '2019-03-07 18:57:13', 'http://mp.weixin.qq.com/profile?src=3×tamp=1551959664&ver=1&signature=bSSQMK1LY77M4O22qTi37cbhjhwNV7C9V4aor9HLhAt-Wdr*jWO2gFh3jN4KhPmYamKHzx9fg9SuHxCB1nGehg==');
62 |
63 | -- ----------------------------
64 | -- Table structure for `wenzhang_info`
65 | -- ----------------------------
66 | DROP TABLE IF EXISTS `wenzhang_info`;
67 | CREATE TABLE `wenzhang_info` (
68 | `_id` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增ID',
69 | `title` text COMMENT '文章标题',
70 | `source_url` text COMMENT '原文地址',
71 | `cover_url` text COMMENT '封面图URL',
72 | `description` text COMMENT '文章摘要',
73 | `date_time` datetime DEFAULT NULL COMMENT '文章推送时间',
74 | `mp_id` int(11) DEFAULT '0' COMMENT '对应的公众号ID',
75 | `read_count` int(11) DEFAULT '0' COMMENT '阅读数',
76 | `like_count` int(11) DEFAULT '0' COMMENT '点攒数',
77 | `comment_count` int(11) DEFAULT '0' COMMENT '评论数',
78 | `content_url` text COMMENT '文章临时地址',
79 | `author` varchar(50) DEFAULT '' COMMENT '作者',
80 | `msg_index` int(11) DEFAULT '0' COMMENT '一次群发中的图文顺序 1是头条 ',
81 | `copyright_stat` int(1) DEFAULT '0' COMMENT '11表示原创 其它表示非原创',
82 | `qunfa_id` int(30) DEFAULT '0' COMMENT '群发消息ID',
83 | `type` int(11) DEFAULT '0' COMMENT '消息类型',
84 | `content` longtext COMMENT '文章正文',
85 | PRIMARY KEY (`_id`)
86 | ) ENGINE=InnoDB AUTO_INCREMENT=6579 DEFAULT CHARSET=utf8mb4;
87 |
88 | -- ----------------------------
89 | -- Records of wenzhang_info
90 | -- ----------------------------
91 |
92 | -- ----------------------------
93 | -- Table structure for `wenzhang_statistics`
94 | -- ----------------------------
95 | DROP TABLE IF EXISTS `wenzhang_statistics`;
96 | CREATE TABLE `wenzhang_statistics` (
97 | `_id` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增ID',
98 | `wz_id` int(11) DEFAULT '0' COMMENT '对应的文章ID',
99 | `create_time` datetime DEFAULT NULL COMMENT '统计时间',
100 | `read_count` int(11) DEFAULT '0' COMMENT '新增阅读数',
101 | `like_count` int(11) DEFAULT '0' COMMENT '新增点攒数',
102 | `comment_count` int(11) DEFAULT '0' COMMENT '新增评论数',
103 | PRIMARY KEY (`_id`)
104 | ) ENGINE=InnoDB AUTO_INCREMENT=4006 DEFAULT CHARSET=utf8mb4;
105 |
106 | -- ----------------------------
107 | -- Records of wenzhang_statistics
108 | -- ----------------------------
109 |
--------------------------------------------------------------------------------
/wechatsogou/db.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import pymysql
4 | from . import config
5 |
6 |
7 | class DbException(Exception):
8 | """数据库 异常 基类
9 | """
10 | pass
11 |
12 |
13 | class MysqlDbException(DbException):
14 | """数据库 myslq 异常类
15 | """
16 | pass
17 |
18 |
19 | class mysql():
20 | """数据库类
21 |
22 | 例子
23 | m = M('user')
24 | m.table('user').add({}) # 插入
25 | m.table('user').where({}).save({}) # 更新
26 | m.table('user').field(['id']).where({}).order({'id':'desc'}).find() # 读取,asc,desc
27 | m.where({}).delete() # 删除
28 | """
29 |
30 | def __init__(self,table='', prefix='', host='',user='',passwd='',db='',charset=''):
31 | """初始化
32 |
33 | table是初始化选择的表,后面可以使用table()函数更改
34 | prefix是数据表前缀,一般配置在config中
35 | """
36 | self.host = config.host
37 | self.user = config.user
38 | self.passwd = config.passwd
39 | self.db = config.db
40 | self.charset = config.charset
41 |
42 | if host:
43 | self.host = host
44 | if user:
45 | self.user = user
46 | if passwd:
47 | self.passwd = passwd
48 | if db:
49 | self.db = db
50 | if charset:
51 | self.charset = charset
52 | if prefix:
53 | self.prefix = prefix + '_'
54 | elif config.prefix:
55 | self.prefix = config.prefix + '_'
56 | else:
57 | self.prefix = ''
58 | if table:
59 | self.tablename = self.prefix + table
60 | self.__conn()
61 |
62 | def __conn(self):
63 | """连接数据库函数
64 | """
65 | self.conn = pymysql.connect(host=self.host, user=self.user, passwd=self.passwd, db=self.db,
66 | charset=self.charset, cursorclass=pymysql.cursors.DictCursor)
67 | self.cur = self.conn.cursor()
68 | return self
69 |
70 | def __update(self, sqls):
71 | """更新语句,可执行update,insert语句
72 | """
73 | if type(sqls) is str:
74 | sta = self.cur.execute(sqls)
75 | elif type(sqls) is list:
76 | for sql in sqls:
77 | sta = self.cur.execute(sql)
78 | else:
79 | raise MysqlDbException('更新语句参数错误 - Model.__update')
80 | self.conn.commit()
81 |
82 | return self.cur.lastrowid
83 |
84 | def __delete(self, sql):
85 | """删除语句
86 | """
87 | return self.cur.execute(sql)
88 |
89 | def __query(self, sql):
90 | """查询语句
91 | """
92 | return self.cur.execute(sql)
93 |
94 | def __close(self):
95 | """关闭所有连接
96 | """
97 | self.cur.close()
98 | self.conn.close()
99 |
100 | def __del__(self):
101 | """析构函数
102 | """
103 | self.conn.commit()
104 | self.__close()
105 |
106 | """
107 | 以下是封装的提供使用的
108 | """
109 |
110 | def table(self, table, prefix=''):
111 | """设置数据表, 链式操作
112 | """
113 | if prefix:
114 | prefix = prefix + '_'
115 | elif hasattr(self, 'prefix'):
116 | prefix = self.prefix
117 | else:
118 | prefix = ''
119 | self.tablename = prefix + table
120 | return self
121 |
122 | def limit(self, pre, count):
123 | self.limit_sql = 'limit ' + str(pre) + ',' + str(count)
124 | return self
125 |
126 | def where(self, where):
127 | """设置条件, 链式操作
128 | """
129 | if type(where) is str:
130 | raise MysqlDbException('请输入字典 - Model.where')
131 | # self.where_sql = where
132 | elif type(where) is dict:
133 | where_sql = ''
134 | for k, v in where.items():
135 | where_sql += "`" + str(k) + "` LIKE '" + str(v) + "' and "
136 | self.where_sql = where_sql[:-5]
137 | return self
138 |
139 | def field(self, field):
140 | """设置操作的字段
141 | """
142 | if type(field) is str:
143 | if field == '*':
144 | self.field_sql = "*"
145 | else:
146 | self.field_sql = "`" + field + "`"
147 | elif type(field) is list:
148 | field_dian = []
149 | for f in field:
150 | field_dian.append("`" + f + "`")
151 | self.field_sql = ','.join(field_dian)
152 | else:
153 | raise MysqlDbException('field参数不是字符或者列表 - Model.field')
154 | return self
155 |
156 | def order(self, order):
157 | """排序
158 | """
159 | if type(order) is dict:
160 | for k, v in order.items():
161 | self.order_sql = " order by `" + k + "` " + v
162 | break
163 | else:
164 | raise MysqlDbException('排序参数不是字典 - Model.order')
165 | return self
166 |
167 | def add(self, data):
168 | """插入数据
169 | """
170 | ks = ''
171 | vs = ''
172 | for k, v in data.items():
173 | ks += "`" + str(k).replace('\'', '\\\'') + "`,"
174 | vs += "'" + str(v).replace('\'', '\\\'') + "',"
175 | if hasattr(self, 'tablename'):
176 | sql = "insert into `" + self.tablename + "` (" + ks[:-1] + ") values (" + vs[:-1] + ")"
177 | try:
178 | return self.__update(sql)
179 | except pymysql.err.IntegrityError:
180 | pass
181 | else:
182 | raise MysqlDbException('缺少数据表 - Model.add')
183 |
184 | def save(self, data):
185 | """更新数据
186 | """
187 | if not hasattr(self, 'where_sql'):
188 | raise MysqlDbException('缺少where语句 - Model.save')
189 | if not hasattr(self, 'tablename'):
190 | raise MysqlDbException('缺少tablename - Model.save')
191 | data_sql = ''
192 | for k, v in data.items():
193 | data_sql += "`" + str(k) + "` = '" + str(v) + "',"
194 | sql = "update `" + self.tablename + "` set " + data_sql[:-1] + " where " + self.where_sql + ";"
195 | self.__update(sql)
196 |
197 | def find(self, size=25):
198 | """查询数据
199 | """
200 | where_sql = " where " + self.where_sql if hasattr(self, 'where_sql') else ""
201 | field_sql = self.field_sql if hasattr(self, 'field_sql') else "*"
202 | order_sql = self.order_sql if hasattr(self, 'order_sql') else ""
203 | limit_sql = self.limit_sql if hasattr(self, 'limit_sql') else ""
204 | sql = "select " + field_sql + " from `" + self.tablename + "`" + where_sql + order_sql + limit_sql
205 | self.__query(sql)
206 | if size == 0:
207 | return self.cur.fetchall()
208 | elif size == 1:
209 | return self.cur.fetchone()
210 | else:
211 | return self.cur.fetchmany(size)
212 |
213 | def delete(self):
214 | """删除语句
215 | """
216 | where_sql = " where " + self.where_sql if hasattr(self, 'where_sql') else ""
217 | sql = "delete from `" + self.tablename + "`" + where_sql
218 | return self.__delete(sql)
219 |
220 |
221 | if __name__ == '__main__':
222 | pass
223 |
--------------------------------------------------------------------------------
/auto_add_mp_log.txt:
--------------------------------------------------------------------------------
1 | 2016-10-20 10:35:14,977 - [basic.py:158] - ERROR -
2 |
3 |
IP:36.110.68.16
访问时间:2016.10.20 10:35:37
用户您好,您的访问过于频繁,为确认本次访问为正常用户行为,需要您协助验证。
79 | 80 | 95 | 99 |IP:36.110.68.19
访问时间:2016.10.20 10:46:22
用户您好,您的访问过于频繁,为确认本次访问为正常用户行为,需要您协助验证。
184 | 185 | 200 | 204 |