├── scrapeCenter
├── spa14
│ ├── Wasm.wasm
│ └── crawl.py
├── spa7
│ └── crawl.py
├── spa5
│ └── crawl.py
├── spa3
│ └── crawl.py
├── spa15
│ ├── crawl.py
│ └── demo.js
├── spa1
│ └── crawl.py
├── ssr2
│ └── crawl.py
├── spa16
│ └── crawl.py
├── ssr3
│ └── crawl.py
├── spa2
│ └── crawl.py
├── spa6
│ ├── demo.js
│ └── crawl.py
├── ssr1
│ └── crawl.py
└── ssr4
│ └── crawl.py
├── cninfo
├── crawler.py
└── demo.js
├── tweet
├── GetToken.py
└── Tweet.py
├── dzdp_svg
└── dzdp_svg.py
├── README.md
├── weibo
├── weibo_comment.py
├── search_all.py
├── get_fans_info.py
└── search.py
├── jijin
└── TTJJ.py
├── youdao
└── yd_tran.py
├── .gitignore
├── qcc
└── qcc.py
├── music163
├── Music.py
└── Music.js
├── weather
└── weather.py
├── bilibili
└── bilibili_user.py
├── lianjia
└── lianjia.py
├── biqu
└── biqu.py
├── ziru
└── zr.py
├── lagou
└── lg.py
└── zhihu
└── public_func.py
/scrapeCenter/spa14/Wasm.wasm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monkey-hjy/python-spider/HEAD/scrapeCenter/spa14/Wasm.wasm
--------------------------------------------------------------------------------
/scrapeCenter/spa7/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/14 11:27
4 | # Auth: HJY
5 |
6 | """数据存储在js中"""
7 |
8 | import requests
9 |
10 | url = 'https://spa7.scrape.center/js/main.js'
11 | response = requests.get(url).text
12 | print(response)
13 |
--------------------------------------------------------------------------------
/scrapeCenter/spa5/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/13 18:44
4 | # Auth: HJY
5 |
6 | """动态渲染"""
7 |
8 | import requests
9 |
10 | url = 'https://spa5.scrape.center/api/book/?limit=5000&offset=0'
11 | response = requests.get(url).json()
12 | print(len(response['results']))
13 |
--------------------------------------------------------------------------------
/scrapeCenter/spa3/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/13 18:40
4 | # Auth: HJY
5 |
6 | """下滑页面获取新数据"""
7 |
8 | import requests
9 |
10 | url = 'https://spa3.scrape.center/api/movie/?limit=100&offset=0'
11 | response = requests.get(url).json()
12 | print(len(response['results']))
13 |
--------------------------------------------------------------------------------
/scrapeCenter/spa15/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/14 11:53
4 | # Auth: HJY
5 |
6 | """
7 | wasm加密
8 | this.$wasm.ccall("encrypt", "string", ["string", "string"], [this.$store.state.url.index, Math.round((new Date).getTime() / 1e3).toString()]);
9 | this.$wasm.ccall;
10 | """
11 |
12 |
--------------------------------------------------------------------------------
/scrapeCenter/spa1/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/13 18:10
4 | # Auth: HJY
5 |
6 | """Ajax请求返回数据"""
7 |
8 | import requests
9 | from loguru import logger
10 |
11 | url = 'https://spa1.scrape.center/api/movie/?limit=100&offset=0'
12 | response = requests.get(url).json()
13 | for info in response['results']:
14 | logger.info(f'name: {info["name"]}, published_at: {info["published_at"]}, score: {info["score"]}')
15 |
--------------------------------------------------------------------------------
/scrapeCenter/ssr2/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/13 17:45
4 | # Auth: HJY
5 |
6 | """无证书。关闭证书验证即可"""
7 |
8 | import requests
9 |
10 | url = 'https://ssr2.scrape.center/'
11 | headers = {
12 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36'
13 | }
14 | response = requests.get(url, headers=headers, verify=False)
15 | print(response.text)
16 |
--------------------------------------------------------------------------------
/scrapeCenter/spa16/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/18 15:14
4 | # Auth: HJY
5 |
6 | """http2协议"""
7 |
8 | import httpx
9 | client = httpx.Client(http2=True)
10 | url = 'https://spa16.scrape.center/api/book/?limit=18&offset=0'
11 | headers = {
12 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
13 | }
14 | response = client.get(url, headers=headers)
15 | print(response.text)
16 |
--------------------------------------------------------------------------------
/scrapeCenter/ssr3/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/13 17:46
4 | # Auth: HJY
5 |
6 | """加http验证"""
7 |
8 | import requests
9 |
10 | url = 'https://ssr3.scrape.center/'
11 | headers = {
12 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36',
13 | 'Authorization': 'Basic YWRtaW46YWRtaW4='
14 | }
15 | response = requests.get(url, headers=headers)
16 | print(response.text)
17 |
--------------------------------------------------------------------------------
/cninfo/crawler.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawler.py
3 | # Date: 2022/05/30 23:05
4 | # Auth: HJY
5 | import requests
6 | import execjs
7 |
8 | ctx = execjs.compile(open('./demo.js', encoding='utf-8').read())
9 | url = 'https://webapi.cninfo.com.cn/api/sysapi/p_sysapi1007'
10 | headers = {
11 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
12 | 'mcode': ctx.call('getResCode')
13 | }
14 | data = {
15 | 'tdate': '2022-05-27',
16 | 'market': 'SZE'
17 | }
18 | response = requests.post(url, json=data, headers=headers)
19 | print(response.json())
20 |
--------------------------------------------------------------------------------
/scrapeCenter/spa14/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/14 11:35
4 | # Auth: HJY
5 |
6 | """
7 | wasm加密
8 | e = this.$wasm.asm.encrypt(offset, time);
9 | """
10 |
11 | import requests
12 | import pywasm
13 |
14 | import time
15 | import os
16 |
17 | wasm_fun = pywasm.load('scrapeCenter/spa14/Wasm.wasm')
18 | res = wasm_fun.exec('encrypt', [0, int(time.time())])
19 |
20 | url = 'https://spa14.scrape.center/api/movie/'
21 | headers = {
22 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.115 Safari/537.36'
23 | }
24 | params = {
25 | 'limit': 100,
26 | 'offset': 0,
27 | 'sign': res
28 | }
29 | response = requests.get(url, headers=headers, params=params).json()
30 | print(len(response['results']))
31 |
--------------------------------------------------------------------------------
/scrapeCenter/spa2/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/13 18:13
4 | # Auth: HJY
5 |
6 | """有token参数加密"""
7 |
8 | import requests
9 |
10 | from loguru import logger
11 | import hashlib
12 | import base64
13 | import time
14 |
15 |
16 | def get_token(offset):
17 | now_t = str(int(time.time()))
18 | res = hashlib.sha1(f'/api/movie,{offset},{now_t}'.encode('utf8')).hexdigest()
19 | res += f',{now_t}'
20 | res = base64.b64encode(res.encode('utf8')).decode()
21 | return res
22 |
23 | url = 'https://spa2.scrape.center/api/movie/'
24 | params = {
25 | 'limit': 100,
26 | 'offset': 0,
27 | 'token': get_token(0)
28 | }
29 | response = requests.get(url, params=params).json()
30 | for info in response['results']:
31 | logger.info(f'name: {info["name"]}, published_at: {info["published_at"]}, score: {info["score"]}')
32 |
--------------------------------------------------------------------------------
/scrapeCenter/spa6/demo.js:
--------------------------------------------------------------------------------
1 |
2 |
3 | function _0x456254() {
4 | var _0x189cbb = f_3452();
5 | for (var _0x5da681 = Math['round'](new Date()['getTime']() / 0x3e8)['toString'](), _0x2a83dd = arguments['length'], _0x31a891 = new Array(_0x2a83dd), _0x596a02 = 0x0; _0x596a02 < _0x2a83dd; _0x596a02++)
6 | _0x31a891[_0x596a02] = arguments[_0x596a02];
7 | _0x31a891['push'](_0x5da681);
8 | console.log(_0x31a891);
9 | var _0xf7c3c7 = _0x189cbb['SHA1'](_0x31a891['join'](','))['toString'](_0x189cbb['enc']['Hex'])
10 | , _0x3c8435 = [_0xf7c3c7, _0x5da681]['join'](',')
11 | , _0x104b5b = _0x358b1f['encode'](_0x3c8435);
12 | return _0x104b5b;
13 | }
14 |
15 |
16 | _0x358b1f['encode'](_0x3c8435)
17 |
18 | _0x3c8435 = [_0xf7c3c7, _0x5da681]['join'](',')
19 |
20 | _0xf7c3c7 = sha1(_0x31a891['join'](','))['toString'](_0x189cbb['enc']['Hex'])
21 | _0x31a891 = ['/api/movie', time]
22 | _0x5da681 = time
23 |
--------------------------------------------------------------------------------
/scrapeCenter/spa6/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/14 10:48
4 | # Auth: HJY
5 |
6 | """js加密。有混淆"""
7 |
8 | import hashlib
9 | import requests
10 | import time
11 | import base64
12 |
13 |
14 | def get_token():
15 | now_t = str(int(time.time()))
16 | _0x189cbb = ['/api/movie', now_t]
17 | _0xf7c3c7 = hashlib.sha1(','.join(_0x189cbb).encode('utf8')).hexdigest()
18 | _0x3c8435 = _0xf7c3c7 + ',' + now_t
19 | _0x104b5b = base64.b64encode(_0x3c8435.encode('utf8')).decode('utf8')
20 | return _0x104b5b
21 |
22 | url = 'https://spa6.scrape.center/api/movie/'
23 | headers = {
24 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.115 Safari/537.36'
25 | }
26 | params = {
27 | 'limit': 10,
28 | 'offset': 10,
29 | 'token': get_token(),
30 | }
31 | response = requests.get(url=url, headers=headers, params=params).json()
32 | print(len(response['results']))
33 |
--------------------------------------------------------------------------------
/cninfo/demo.js:
--------------------------------------------------------------------------------
1 | function getResCode(){
2 | var time=Math.floor(new Date().getTime()/1000);
3 | return missjson(""+time);
4 | }
5 |
6 | function missjson(input) {
7 | var keyStr = "ABCDEFGHIJKLMNOP" + "QRSTUVWXYZabcdef" + "ghijklmnopqrstuv" + "wxyz0123456789+/" + "=";
8 | var output = "";
9 | var chr1, chr2, chr3 = "";
10 | var enc1, enc2, enc3, enc4 = "";
11 | var i = 0;
12 | do {
13 | chr1 = input.charCodeAt(i++);
14 | chr2 = input.charCodeAt(i++);
15 | chr3 = input.charCodeAt(i++);
16 | enc1 = chr1 >> 2;
17 | enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
18 | enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
19 | enc4 = chr3 & 63;
20 | if (isNaN(chr2)) {
21 | enc3 = enc4 = 64;
22 | } else if (isNaN(chr3)) {
23 | enc4 = 64;
24 | }
25 | output = output + keyStr.charAt(enc1) + keyStr.charAt(enc2) + keyStr.charAt(enc3) + keyStr.charAt(enc4);
26 | chr1 = chr2 = chr3 = "";
27 | enc1 = enc2 = enc3 = enc4 = "";
28 | } while (i < input.length);
29 | return output;
30 | }
31 |
--------------------------------------------------------------------------------
/scrapeCenter/ssr1/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: crawl.py
3 | # Date: 2022/06/13 17:37
4 | # Auth: HJY
5 |
6 | """静态网站。直接请求"""
7 |
8 | import requests
9 | from lxml import etree
10 |
11 |
12 | def parse_page(page):
13 | url = 'https://ssr1.scrape.center/page/{}'.format(page)
14 | headers = {
15 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36'
16 | }
17 | response = requests.get(url=url, headers=headers)
18 | response.encoding = response.apparent_encoding
19 | html = etree.HTML(response.text)
20 | info_element = html.xpath('//div[@class="el-col el-col-18 el-col-offset-3"]/div')
21 | for info in info_element:
22 | title = info.xpath('.//h2/text()')[0]
23 | types = ','.join(info.xpath('.//div[@class="categories"]//span/text()'))
24 | score = info.xpath('.//p[@class="score m-t-md m-b-n-sm"]/text()')[0].strip()
25 | item = {'标题': title, '类型': types, '评分': score}
26 | print(f'page: {page}, item: {item}')
27 | if info_element:
28 | parse_page(page + 1)
29 |
30 |
31 | parse_page(1)
32 |
33 |
--------------------------------------------------------------------------------
/scrapeCenter/ssr4/crawl.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # NAME: scrapy.py
3 | # Date: 2022/06/13 17:49
4 | # Auth: HJY
5 |
6 | """做延时。异步加快速度"""
7 |
8 | import requests
9 | import asyncio
10 | import aiohttp
11 | from loguru import logger
12 |
13 | import time
14 |
15 |
16 | start_time = time.time()
17 |
18 |
19 | async def get(url):
20 | session = aiohttp.ClientSession()
21 | headers = {
22 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36',
23 | }
24 | response = await session.get(url, headers=headers, verify_ssl=False)
25 | await response.text()
26 | await session.close()
27 | return response
28 |
29 |
30 | async def start(page):
31 | url = f'https://ssr4.scrape.center/page/{page}'
32 | logger.info(f'get {url}')
33 | response = await get(url)
34 | logger.info(f'get {url} done, response.status={response.status}')
35 |
36 |
37 | tasks = [asyncio.ensure_future(start(page)) for page in range(1, 10)]
38 | loop = asyncio.get_event_loop()
39 | loop.run_until_complete(asyncio.wait(tasks))
40 | end_time = time.time()
41 | logger.info(f'耗时: {end_time - start_time}')
42 |
43 |
--------------------------------------------------------------------------------
/tweet/GetToken.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author: monkey-hjy
3 | # @Date: 2021-02-24 17:20:13
4 | # @Last Modified by: monkey-hjy
5 | # @Last Modified time: 2021-02-24 17:20:32
6 | import requests
7 |
8 |
9 | class GetToken(object):
10 | """获取到游客token"""
11 | def __init__(self):
12 | self.get_token_url = 'https://api.twitter.com/1.1/guest/activate.json'
13 | self.get_token_headers = {
14 | "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
15 | 'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
16 | }
17 |
18 | def get_token(self, proxies_ip):
19 | proxies = {
20 | 'http': 'http://{}'.format(proxies_ip),
21 | 'https': 'http://{}'.format(proxies_ip),
22 | }
23 | err_count = 0
24 | while err_count < 5:
25 | try:
26 | response = requests.request(url=self.get_token_url, method="POST", headers=self.get_token_headers,
27 | timeout=15)
28 | response.close()
29 | return response.json().get('guest_token')
30 | except Exception as e:
31 | print(e)
32 | err_count += 1
33 |
--------------------------------------------------------------------------------
/dzdp_svg/dzdp_svg.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | # @File : dzdp_svg.py
4 | # @Author : Monkey
5 | # @DATE : 2021/5/13 下午4:54
6 | import re
7 | import requests
8 |
9 |
10 | class DZDP(object):
11 | """大众点评"""
12 |
13 | def __init__(self):
14 | self.css_url = 'https://s3plus.meituan.net/v1/mss_0a06a471f9514fc79c981b5466f56b91/svgtextcss/80da73cea991b1dac8e6c3eb8cfe7461.css'
15 | self.svg_url = 'https://s3plus.meituan.net/v1/mss_0a06a471f9514fc79c981b5466f56b91/svgtextcss/20609a5f67dfd9a34fd762ac63e59960.svg'
16 | self.css_text = requests.get(self.css_url).text
17 | self.svg_info = {int(info.split('">')[0]): info.split('">')[1] for info in re.findall(r'y="(.*?)', requests.get(self.svg_url).text)}
18 |
19 | def get_txt(self, code):
20 | """获取到编码对应的文字"""
21 | try:
22 | patt = '%s{background:(.*?);' % code
23 | index = re.findall(patt, self.css_text)[0].replace('px', '').replace('-', '').split(' ')
24 | index_x, index_y = int(index[0][:-2]), int(index[1][:-2])
25 | for key in self.svg_info:
26 | if key >= index_y:
27 | return self.svg_info[key][index_x // 14]
28 | except:
29 | return code
30 |
31 |
32 | if __name__ == '__main__':
33 | t = DZDP()
34 | print(t.get_txt(code='swnbb'))
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # python-spider
2 | python爬虫小项目
3 | 目录如下:
4 | 1. [笔趣阁小说下载](https://github.com/monkey-hjy/python-spider/tree/master/biqu)
5 | 2. [Tweet数据抓取](https://github.com/monkey-hjy/python-spider/tree/master/tweet)
6 | 3. [中国天气网数据查询](https://github.com/monkey-hjy/python-spider/tree/master/weather)
7 | 4. [网易云音乐逆向爬虫](https://github.com/monkey-hjy/python-spider/tree/master/music163)
8 | 5. [天天基金网指定基金数据抓取](https://github.com/monkey-hjy/python-spider/tree/master/jijin)
9 | 6. [微博信息抓取](https://github.com/monkey-hjy/python-spider/tree/master/weibo)
10 | 7. [有道翻译逆向](https://github.com/monkey-hjy/python-spider/tree/master/youdao)
11 | 8. [链家全国租房信息抓取](https://github.com/monkey-hjy/python-spider/tree/master/lianjia)
12 | 9. [企查查免登陆爬虫](https://github.com/monkey-hjy/python-spider/tree/master/qcc)
13 | 10. [大众点评svg加密](https://github.com/monkey-hjy/python-spider/tree/master/dzdp_svg)
14 | 11. [B站用户爬虫](https://github.com/monkey-hjy/python-spider/tree/master/bilibili)
15 | 12. [拉钩免登录爬虫](https://github.com/monkey-hjy/python-spider/blob/master/lagou)
16 | 13. [自如租房字体加密](https://github.com/monkey-hjy/python-spider/tree/master/ziru)
17 | 14. [知乎问答抓取](https://github.com/monkey-hjy/python-spider/tree/master/zhihu_answer)
18 | 15. [深证信数据服务平台](https://github.com/monkey-hjy/python-spider/tree/master/cninfo)
19 |
20 |
21 | - CSDN不定期更新文章。个人主页 [https://blog.csdn.net/qq_42452095](https://blog.csdn.net/qq_42452095)
22 | - B站不定期更新视频。个人主页 [https://space.bilibili.com/347405521/channel/detail?cid=181641](https://space.bilibili.com/347405521/channel/detail?cid=181641)
23 | - 对代码有问题的话可以在本项目的 [Issues](https://github.com/monkey-hjy/python-spider/issues) 中沟通
24 | - 如果有代写需求。可以联系QQ847703187 微信:847703187
25 |
--------------------------------------------------------------------------------
/weibo/weibo_comment.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Author: 玛卡巴卡
3 | # Date: 2021/4/19 17:10
4 |
5 | import requests
6 | import time
7 |
8 |
9 | class WBComment(object):
10 | """抓取微博全量评论。但是需要登录"""
11 |
12 | def __init__(self):
13 | self.comment_url = 'https://m.weibo.cn/comments/hotflow'
14 | self._headers = {
15 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36',
16 | 'cookie': '用户登录后的cookie',
17 | }
18 |
19 | def get_response(self, url, params=None):
20 | """发起请求"""
21 | response = requests.get(url=url, headers=self._headers, params=params)
22 | if response.status_code == 200:
23 | return response
24 | else:
25 | print('出错。返回的状态码是:{}'.format(response.status_code))
26 | return None
27 |
28 | def start(self, wb_id):
29 | """启动函数,接受微博ID参数"""
30 | # 初始页码的ID。下一页的ID会存放在返回的数据中
31 | max_id = 0
32 | while True:
33 | params = {
34 | 'id': wb_id,
35 | 'mid': wb_id,
36 | 'max_id': max_id,
37 | 'max_id_type': 1,
38 | }
39 | response = self.get_response(url=self.comment_url, params=params)
40 | if response is None:
41 | print('{}出错'.format(weibo_id))
42 | return
43 | response = response.json()['data']
44 | print(max_id, len(response['data']), response['data'][0]['text'])
45 | # 获取到下一页的ID,当作下次的参数使用
46 | max_id = response['max_id']
47 | time.sleep(1)
48 |
49 |
--------------------------------------------------------------------------------
/scrapeCenter/spa15/demo.js:
--------------------------------------------------------------------------------
1 | t = {}
2 | t["_encrypt"] = function() {
3 | return (t["_encrypt"] = t["asm"]["encrypt"]).apply(null, arguments)
4 | }
5 | t["stackSave"] = function() {
6 | return (bt = t["stackSave"] = t["asm"]["stackSave"]).apply(null, arguments)
7 | }
8 |
9 | function ot(n) {
10 | t["onAbort"] && t["onAbort"](n),
11 | n += "",
12 | S(n),
13 | j = !0,
14 | 1,
15 | n = "abort(" + n + "). Build with -s ASSERTIONS=1 for more info.";
16 | var e = new WebAssembly.RuntimeError(n);
17 | throw c(e),
18 | e
19 | }
20 |
21 | function k(t, n) {
22 | t || ot("Assertion failed: " + n)
23 | }
24 |
25 | function I(n) {
26 | var e = t["_" + n];
27 | return k(e, "Cannot call unknown function " + n + ", make sure it is exported"),
28 | e
29 | }
30 |
31 | function L(t, n, e, r, i) {
32 | var o = {
33 | string: function(t) {
34 | var n = 0;
35 | if (null !== t && void 0 !== t && 0 !== t) {
36 | var e = 1 + (t.length << 2);
37 | n = xt(e),
38 | N(t, n, e)
39 | }
40 | return n
41 | },
42 | array: function(t) {
43 | var n = xt(t.length);
44 | return D(t, n),
45 | n
46 | }
47 | };
48 | function a(t) {
49 | return "string" === n ? W(t) : "boolean" === n ? Boolean(t) : t
50 | }
51 | var c = I(t)
52 | , u = []
53 | , s = 0;
54 | if (r)
55 | for (var f = 0; f < r.length; f++) {
56 | var l = o[e[f]];
57 | console.log('l: ', l)
58 | l ? (0 === s && (s = bt()),
59 | u[f] = l(r[f])) : u[f] = r[f]
60 | }
61 | var h = c.apply(null, u);
62 | return h = a(h),
63 | 0 !== s && _t(s),
64 | h
65 | }
66 | L("encrypt", "string", ["string", "string"], ['/api/movie', 1655534908]);
--------------------------------------------------------------------------------
/jijin/TTJJ.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author: monkey-hjy
3 | # @Date: 2021-03-04 11:18:58
4 | # @Last Modified by: monkey-hjy
5 | # @Last Modified time: 2021-03-04 11:19:17
6 | # 天天基金网数据抓取
7 |
8 | import requests
9 | import time
10 | import re
11 | import json
12 | import pandas as pd
13 | import random
14 |
15 | file_path = '基金查询.xlsx'
16 | fund_codes = ['001606', '000924', '005962', '004997', '006751']
17 | start_date = '2019-01-01'
18 | end_date = '2021-10-30'
19 | url = 'http://api.fund.eastmoney.com/f10/lsjz'
20 | headers = {
21 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
22 | 'Referer': 'http://fundf10.eastmoney.com/',
23 | }
24 | result = dict()
25 | result_fsrq = []
26 | for fund_code in fund_codes:
27 | params = {
28 | "callback": f"jQuery183{''.join([str(random.randrange(0, 10)) for _ in range(17)])}_{int(time.time() * 1000)}",
29 | "fundCode": fund_code,
30 | "pageIndex": "1",
31 | "pageSize": "100000",
32 | "startDate": start_date,
33 | "endDate": end_date,
34 | "_": str(int(time.time() * 1000)),
35 | }
36 | response = json.loads(re.findall(r'\((.*)\)', requests.get(url, headers=headers, params=params).text, re.S)[0])
37 | # 日期
38 | FSRQ = []
39 | # 单位净值
40 | DWJZ = []
41 | fund_info = response['Data']['LSJZList']
42 | for i in range(len(fund_info)):
43 | # FSRQ.append(datetime.datetime.strptime(fund_info[i]['FSRQ'], '%Y-%m-%d'))
44 | FSRQ.append(fund_info[i]['FSRQ'])
45 | DWJZ.append(fund_info[i]['DWJZ'])
46 | result_fsrq = FSRQ if len(FSRQ) > len(result_fsrq) else result_fsrq
47 | result[fund_code] = DWJZ
48 | max_len = 0
49 | for key in result:
50 | max_len = len(result[key]) if len(result[key]) > max_len else max_len
51 | for key in result:
52 | result[key] += [None] * (max_len - len(result[key]))
53 | result = pd.DataFrame(result)
54 | result.index = result_fsrq
55 | result.to_excel(file_path, encoding='ANSI')
56 |
--------------------------------------------------------------------------------
/youdao/yd_tran.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author: monkey-hjy
3 | # @Date: 2021-04-27 11:35:40
4 | # @Last Modified by: monkey-hjy
5 | # @Last Modified time: 2021-04-27 11:36:08
6 | import requests
7 | import hashlib
8 | import time
9 | import random
10 |
11 |
12 | class YDDict(object):
13 | """有道翻译"""
14 |
15 | @staticmethod
16 | def get_data(keyword):
17 | """获取到其余的加密参数"""
18 | md = hashlib.md5()
19 | t = str(int(time.time() * 1000))
20 | i = t + str(random.randrange(10))
21 | md.update('fanyideskweb{}{}Tbh5E8=q6U3EXe+&L[4c@'.format(keyword, i).encode('utf8'))
22 | sign = md.hexdigest()
23 | return t, i, sign
24 |
25 | def translate(self, keyword='你好', data_from='AUTO', data_to='AUTO'):
26 | """
27 | 对keyword进行翻译
28 | params: params_from 文本语言
29 | params: params_to 翻译成的语言类型
30 | """
31 | url = 'https://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
32 | headers = {
33 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
34 | 'Referer': 'https://fanyi.youdao.com/?keyfrom=fanyi-new.logo',
35 | 'Host': 'fanyi.youdao.com',
36 | 'Origin': 'https://fanyi.youdao.com',
37 | 'Cache-Control': 'no-cache',
38 | 'Connection': 'keep-alive',
39 | }
40 | t, i, sign = self.get_data(keyword)
41 | data = {
42 | "i": keyword,
43 | "from": data_from,
44 | "to": data_to,
45 | "smartresult": "dict",
46 | "client": "fanyideskweb",
47 | "salt": i,
48 | "sign": sign,
49 | "lts": t,
50 | # 这里bv是对UA加密得到的,所以也写成了定值
51 | "bv": "62c1eba97402d4ff4eb261254e974c27",
52 | "doctype": "json",
53 | "version": "2.1",
54 | "keyfrom": "fanyi.web",
55 | "action": "FY_BY_REALTlME",
56 | }
57 | response = requests.post(url, headers=headers, data=data)
58 | # json中包含结果,自己解析一下OK
59 | print(response.json())
60 |
61 |
62 | if __name__ == '__main__':
63 | t = YDDict()
64 | t.translate(keyword='中国')
65 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | .idea/
132 | /weibo/exist.txt
133 | .DS_Store
134 |
135 |
--------------------------------------------------------------------------------
/qcc/qcc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | # @File : qcc.py
4 | # @Author : Monkey
5 | # @DATE : 2021/5/11 下午5:13
6 |
7 | import requests
8 | import re
9 | from lxml import etree
10 |
11 |
12 | class QCC(object):
13 | """企查查爬虫"""
14 |
15 | def __init__(self):
16 | self._headers = {
17 | 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
18 | }
19 |
20 | def get_cookie(self):
21 | """发起一次测试请求,获取到搜索的cookie"""
22 | url = 'https://www.qcc.com/web/search?key=测试'
23 | response = requests.get(url, headers=self._headers, allow_redirects=False)
24 | response.encoding = 'utf8'
25 | result = re.findall(r'div>您的请求ID是: \n(.*?)', response.text)
26 | if result:
27 | return result[0]
28 |
29 | def search(self, search_keyword):
30 | """搜索"""
31 | url = 'https://www.qcc.com/web/search?key={}'.format(search_keyword)
32 | headers = self._headers
33 | headers['cookie'] = 'acw_tc={}'.format(self.get_cookie())
34 | response = requests.get(url, headers=headers)
35 | response.encoding = 'utf8'
36 | html = etree.HTML(response.text)
37 | com_url = html.xpath('//a[@class="title"]/@href')
38 | print('搜索到{}条结果。即将开始获取详细信息...'.format(len(com_url)))
39 | for url in com_url:
40 | self.get_com_info(url)
41 |
42 | def get_com_info(self, url):
43 | """获取公司的详细信息"""
44 | response = requests.get(url, headers=self._headers)
45 | html = etree.HTML(response.text)
46 | info_elements = html.xpath('//table[@class="ntable"]/tr')
47 | item = {'url': url}
48 | flag = True
49 | for element in info_elements:
50 | if not flag:
51 | break
52 | for index in range(0, len(element.xpath('./td')), 2):
53 | try:
54 | key = element.xpath('./td[{}]/text()'.format(index+1))[0].strip()
55 | if key == '公司介绍:' or key == '经营范围':
56 | flag = False
57 | if key == '法定代表人':
58 | item[key] = element.xpath('./td[{}]//h2/text()'.format(index+2))[0].strip()
59 | else:
60 | item[key] = element.xpath('./td[{}]//text()'.format(index+2))[0].strip()
61 | except:
62 | pass
63 | print(item)
64 |
65 | def run(self):
66 | """启动函数"""
67 | self.search(search_keyword='腾讯')
68 |
69 |
70 | if __name__ == '__main__':
71 | t = QCC()
72 | t.run()
73 |
74 |
75 |
--------------------------------------------------------------------------------
/music163/Music.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author: monkey-hjy
3 | # @Date: 2021-02-24 17:42:40
4 | # @Last Modified by: monkey-hjy
5 | # @Last Modified time: 2021-02-25 10:49:45
6 | import requests
7 | import execjs
8 | import json
9 |
10 |
11 | class Music(object):
12 | """破解网易云音乐JS加密获取数据"""
13 |
14 | def __init__(self):
15 | self.get_comment_url = 'https://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?csrf_token='
16 |
17 | @staticmethod
18 | def get_response(method=None, url=None, headers=None, data=None):
19 | """
20 | 发起请求
21 | :params: method 请求类型:GET/POST
22 | :params: url 请求链接
23 | :params: headers 请求头
24 | :params: data post请求的表单
25 | """
26 | if method is None:
27 | return '请求参数有误 -- method is None'
28 | if url is None:
29 | return '请求链接有误 --- url is None'
30 | if headers is None:
31 | headers = {
32 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
33 | "Chrome/88.0.4324.182 Safari/537.36",
34 | }
35 | if method == 'GET':
36 | response = requests.get(url=url, headers=headers)
37 | elif method == 'POST':
38 | response = requests.post(url=url, headers=headers, data=data)
39 | else:
40 | return '请求参数有误 -- method undefined'
41 | response.encoding = 'utf8'
42 | if response.status_code == 200:
43 | return response
44 | else:
45 | return '请求失败。状态码 %d' % response.status_code
46 |
47 | @staticmethod
48 | def get_token(music_id):
49 | """
50 | 根据歌曲ID获取到对应的加密参数
51 | :param music_id: 需要抓取的歌曲ID
52 | """
53 | js_file = open('Music.js', encoding='utf8').read()
54 | ctx = execjs.compile(js_file, cwd=r'C:\Users\Spider\AppData\Roaming\npm\node_modules')
55 | token = ctx.call('start', music_id)
56 | return {
57 | 'params': token['encText'],
58 | 'encSecKey': token['encSecKey']
59 | }
60 |
61 | def get_comment(self, music_id):
62 | """
63 | 获取评论数据
64 | :params music_id 歌曲id
65 | """
66 | comment_response = self.get_response(method='POST', url=self.get_comment_url.format(music_id),
67 | data=self.get_token(music_id=music_id)).json()
68 | # 解析这个json串,即可获取到对应的数据
69 | print(json.dumps(comment_response))
70 |
71 | def run(self):
72 | """启动函数"""
73 | test_music_id = 1366216050
74 | self.get_comment(music_id=test_music_id)
75 |
76 |
77 | if __name__ == '__main__':
78 | m = Music()
79 | m.run()
80 |
--------------------------------------------------------------------------------
/weather/weather.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author: monkey-hjy
3 | # @Date: 2021-02-24 17:28:36
4 | # @Last Modified by: monkey-hjy
5 | # @Last Modified time: 2021-02-24 17:29:00
6 | # 中国天气网的接口。。。
7 | import requests
8 | from lxml import etree
9 | import pandas as pd
10 | from prettytable import PrettyTable
11 | import os
12 |
13 |
14 | def get_html(url):
15 | # 定义头文件
16 | headers = {'user-agent': 'Mozilla/5.0'}
17 | # 发起请求
18 | response = requests.get(url, headers=headers)
19 | # 修改编码
20 | response.encoding = 'utf8'
21 | # 处理成HTML格式
22 | html = etree.HTML(response.text)
23 | return html
24 |
25 |
26 | # 获取城市信息并保存到本地
27 | def get_cityinfo_write(html):
28 | print('获取城市信息')
29 | city_info = {}
30 | # 获取到城市信息
31 | province_url = html.xpath('//div[@class="lqcontentBoxheader"]//ul//li/a/@href')
32 | for i in range(len(province_url)):
33 | # 拼接出每个城市的URL,并获取到对应的HTML
34 | the_html = get_html('http://www.weather.com.cn' + province_url[i])
35 | # 解析出城市名称
36 | city_name = the_html.xpath('//div[@class="conMidtab3"]//tr//td[position()<3]/a/text()')
37 | # 解析出城市链接
38 | city_url = the_html.xpath('//div[@class="conMidtab3"]//tr//td[position()<3]/a/@href')
39 | # 将城市信息存储到city_info中
40 | for j in range(len(city_name)):
41 | if j != 0 and city_name[j] == city_name[0]:
42 | break
43 | else:
44 | city_info[city_name[j]] = city_url[j]
45 | # 给数据设置列名
46 | data = pd.DataFrame(columns=['city_name', 'city_url'])
47 | # 填充数据
48 | data['city_name'] = city_info.keys()
49 | data['city_url'] = city_info.values()
50 | # 保存到本地
51 | data.to_csv(file_path, index=False, encoding='utf8')
52 |
53 |
54 | if __name__ == '__main__':
55 | # 实例化输出类
56 | p = PrettyTable()
57 | # 接口URL
58 | url = 'http://www.weather.com.cn/textFC/hb.shtml'
59 | # 调用获取HTML的方法
60 | html = get_html(url)
61 | file_path = '/home/monkey/File/中国天气网城市信息.csv'
62 | # 判断存放城市信息的数据文件是否存在。如果不存在,则调用get_cityinfo_write方法下载
63 | if not os.path.exists(file_path):
64 | get_cityinfo_write(html)
65 | # 读取城市信息
66 | data = pd.read_csv(file_path, encoding='utf8')
67 | # 获取到城市名称
68 | city_name = data['city_name'].tolist()
69 | # 获取到城市URL
70 | city_url = data['city_url'].tolist()
71 | # 让用户输入需要查询的城市
72 | name = input('请输入需要查询的城市名称:')
73 | # 如果名称输入正确,则进行查询
74 | if name in city_name:
75 | # 获取到当前城市天气信息的HTML
76 | city_html = get_html(city_url[city_name.index(name)])
77 | # 解析出时间
78 | date = city_html.xpath('//ul[@class="t clearfix"]//li//h1/text()')
79 | # 解析出天气
80 | wea = city_html.xpath('//ul[@class="t clearfix"]//li/p[@class="wea"]/text()')
81 | # 解析出温度列表
82 | tem_list = ''.join(city_html.xpath('//ul[@class="t clearfix"]//li/p[@class="tem"]//text()')).split('\n')
83 | # 取出正确的数据
84 | tem = [tem_list[i] for i in range(len(tem_list)) if i % 2 != 0]
85 | # 解析出风量
86 | win = city_html.xpath('//ul[@class="t clearfix"]//li/p[@class="win"]/i/text()')
87 | print('{}的天气如下'.format(name))
88 | # 把数据填充到表格中,美化输出
89 | p.add_column('日期', date)
90 | p.add_column('天气', wea)
91 | p.add_column('温度', tem)
92 | p.add_column('风量', win)
93 | print(p)
94 | else:
95 | print('输入的城市名称有误!')
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/bilibili/bilibili_user.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | # @File : bilibili_user.py
4 | # @Author : Monkey
5 | # @DATE : 2021/5/17 10:04
6 | from gevent import monkey; monkey.patch_all()
7 | import gevent.pool
8 | import requests
9 | import pymysql
10 | import datetime
11 |
12 |
13 | class BiliUser(object):
14 | """B站用户"""
15 |
16 | def __init__(self):
17 | self.pool = gevent.pool.Pool(size=50)
18 | # 10的7次幂。千万
19 | self.mid_list = list(range(1, pow(10, 7)))
20 | # self.mid_list = list(range(1, pow(10, 3)))
21 | self.conn = pymysql.Connect(host='localhost', user='root', password='root', port=3306, database='demo')
22 | self.cursor = self.conn.cursor()
23 | self.proxies = dict()
24 | self._headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'}
25 | self.data = []
26 | self.ips = []
27 | self.set_proxies()
28 |
29 | def set_proxies(self):
30 | """设置代理"""
31 | ip = "需要在这里填写上自己获取代理IP的方法"
32 | self.proxies = {
33 | 'http': 'http://{}'.format(ip),
34 | 'https': 'http://{}'.format(ip),
35 | }
36 |
37 | def get_fans_count(self, mid):
38 | """获取粉丝数量"""
39 | url = 'https://api.bilibili.com/x/relation/stat?vmid={}&jsonp=jsonp'.format(mid)
40 | response = requests.get(url, headers=self._headers, proxies=self.proxies).json()
41 | follower = response['data']['follower']
42 | following = response['data']['following']
43 | return follower, following
44 |
45 | def get_user_info(self, mid):
46 | """获取用户信息"""
47 | url = 'https://api.bilibili.com/x/space/acc/info?mid={}&jsonp=jsonp'.format(mid)
48 | err_count = 0
49 | while err_count < 5:
50 | try:
51 | response = requests.get(url, headers=self._headers, proxies=self.proxies, timeout=10).json()
52 | if response['code'] == 0:
53 | nike_name = response['data']['name']
54 | sex = response['data']['sex']
55 | level = response['data']['level']
56 | sign = response['data']['sign']
57 | birthday = response['data']['birthday']
58 | follower, following = self.get_fans_count(mid)
59 | self.data.append([mid, nike_name, sex, level, sign, birthday, follower, following])
60 | print('mid:{}\tdata:{}'.format(mid, len(self.data)))
61 | if len(self.data) >= 100:
62 | data, self.data = self.data, []
63 | self.save_data(data)
64 | break
65 | elif response['code'] == -412:
66 | raise Exception
67 | else:
68 | print(datetime.datetime.now(), response, mid)
69 | break
70 | except Exception as e:
71 | err_count += 1
72 | self.set_proxies()
73 | # print(err_count, self.proxies, e)
74 |
75 | def save_data(self, data):
76 | """保存数据"""
77 | sql = "INSERT INTO bili (mid, nike_name, sex, level, sign, birthday, follower, following) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
78 | self.cursor.executemany(sql, data)
79 | self.conn.commit()
80 | print('{}\t保存成功 --- {}'.format(datetime.datetime.now(), len(data)))
81 |
82 | def __del__(self):
83 | self.conn.close()
84 |
85 | def run(self):
86 | """启动函数"""
87 | self.pool.map(self.get_user_info, self.mid_list)
88 | if self.data:
89 | self.save_data(self.data)
90 |
91 |
92 | if __name__ == '__main__':
93 | t = BiliUser()
94 | t.run()
95 |
--------------------------------------------------------------------------------
/lianjia/lianjia.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Author: 玛卡巴卡
3 | # Date: 2021/5/6 14:39
4 |
5 | import requests
6 | from lxml import etree
7 | import pymysql
8 |
9 |
10 | class Lianjia(object):
11 | """抓取链家租房信息"""
12 |
13 | def __init__(self):
14 | self.conn = pymysql.Connect(host='localhost', port=3306, user='root', password='root', database='demo')
15 | self.cursor = self.conn.cursor()
16 | self._headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'}
17 |
18 | def __del__(self):
19 | self.conn.close()
20 |
21 | def get_response(self, url):
22 | """发起请求"""
23 | response = requests.get(url, headers=self._headers)
24 | if response.status_code == 200:
25 | response.encoding = 'utf8'
26 | return response
27 | else:
28 | print('url:{}\tresponse:{}'.format(url, response))
29 |
30 | def get_city_url(self):
31 | """获取到城市的链接"""
32 | url = 'https://www.lianjia.com/city/'
33 | html = etree.HTML(self.get_response(url).text)
34 | city_url = html.xpath('//ul[@class="city_list_ul"]//a/@href')
35 | for url in city_url:
36 | self.get_district_url(city_url=url)
37 |
38 | def get_district_url(self, city_url):
39 | """获取到区的链接"""
40 | html = etree.HTML(self.get_response(city_url + 'zufang/').text)
41 | district_url = html.xpath('//li[@class="filter__item--level2 "]/a/@href')
42 | for url in district_url:
43 | self.get_house_count(url=city_url[:-1] + url)
44 |
45 | def get_house_count(self, url):
46 | """获取当前区的房子数量"""
47 | html = etree.HTML(self.get_response(url).text)
48 | count = int(html.xpath('//span[@class="content__title--hl"]/text()')[0])
49 | if count:
50 | if count >= 3000:
51 | filter_next_url = html.xpath('//li[@class="filter__item--level3 "]/a/@href')
52 | for filter_url in filter_next_url:
53 | the_filter_url = '/'.join(url.split('/')[:3]) + filter_url
54 | html = etree.HTML(self.get_response(the_filter_url).text)
55 | count = min(int(html.xpath('//span[@class="content__title--hl"]/text()')[0]), 3000)
56 | self.start(the_filter_url, count // 30 + 1)
57 | else:
58 | self.start(url, count // 30 + 1)
59 | else:
60 | print('{} 无房源'.format(url))
61 |
62 | def start(self, url, end_page):
63 | """开始抓取数据"""
64 | for page in range(1, end_page+1):
65 | self.get_page_info(url='{}pg{}/'.format(url, page))
66 |
67 | def get_page_info(self, url):
68 | """获取当前页房源信息"""
69 | print(url, end='\t')
70 | err_count = 0
71 | response = self.get_response(url)
72 | html = etree.HTML(response.text)
73 | house_element = html.xpath('//div[@class="content__list--item"]')
74 | for element in house_element:
75 | try:
76 | house_url = '/'.join(url.split('/')[:3]) + element.xpath('./a/@href')[0]
77 | house_code = element.xpath('./@data-house_code')[0]
78 | title = element.xpath('./a/@title')[0]
79 | des = ''.join(element.xpath('./div/p[2]//text()')).replace('\n', '').replace(' ', ' ')
80 | price = int(element.xpath('./div/span/em/text()')[0])
81 | sql = "INSERT INTO lianjia (id, url, title, des, price) values ('%s', '%s', '%s', '%s', %d);" % (house_code, house_url, title, des, price)
82 | self.cursor.execute(sql)
83 | self.conn.commit()
84 | except Exception as e:
85 | err_count += 1
86 | print('出错占比:{}/{}'.format(err_count, len(house_element)))
87 |
88 | def run(self):
89 | """启动函数"""
90 | self.get_city_url()
91 |
92 |
93 | if __name__ == '__main__':
94 | t = Lianjia()
95 | t.run()
96 |
--------------------------------------------------------------------------------
/biqu/biqu.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author: monkey-hjy
3 | # @Date: 2021-02-24 17:12:52
4 | # @Last Modified by: monkey-hjy
5 | # @Last Modified time: 2021-02-24 17:16:23
6 | import requests
7 | from lxml import etree
8 | import random
9 | from datetime import datetime, time
10 |
11 | # 随机UA头
12 | USER_AGENT = [
13 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
14 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
15 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
16 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
17 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
18 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
19 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
20 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
21 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
22 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
23 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
24 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
25 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
26 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
27 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
28 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
29 | ]
30 |
31 |
32 | class SpiderBook(object):
33 |
34 | def __init__(self):
35 | self.search_url = 'https://www.biqooge.com/modules/article/search.php'
36 | self._headers = {'user-agent': random.choice(USER_AGENT)}
37 |
38 | def search_book(self):
39 | book_name = self.book_name
40 | data = {
41 | 'searchtype': 'articlename',
42 | 'searchkey': book_name.encode('gbk'),
43 | }
44 | response = requests.post(self.search_url, headers=self._headers, data=data)
45 | response.encoding = response.apparent_encoding
46 | html = etree.HTML(response.text)
47 | name = html.xpath('//tr[@id="nr"]/td[1]/a/text()')
48 | book_url = html.xpath('//tr[@id="nr"]/td[1]/a/@href')
49 | author = html.xpath('//tr[@id="nr"]/td[3]/text()')
50 | for i in range(len(name)):
51 | print('编号{}信息:作者-{}\t书名-{}'.format(i, author[i], name[i]))
52 | need_id = int(input('输入需要的书籍编号:'))
53 | self.download_book(book_url[need_id])
54 |
55 | def download_book(self, book_url):
56 | response = requests.get(book_url, headers=self._headers)
57 | response.encoding = response.apparent_encoding
58 | html = etree.HTML(response.text)
59 | zj_info = html.xpath('//dt[contains(text(), "章节目录")]/following-sibling::dd')
60 | for i in range(len(zj_info)):
61 | info = zj_info[i]
62 | zj_name = info.xpath('./a/text()')[0]
63 | zj_url = 'https://www.biqooge.com' + info.xpath('./a/@href')[0]
64 | zj_response = requests.get(zj_url, headers=self._headers)
65 | zj_response.encoding = zj_response.apparent_encoding
66 | zj_html = etree.HTML(zj_response.text)
67 | content = ''.join(zj_html.xpath('//div[@id="content"]/text()'))
68 | print('{}/{}\tname:{}\turl:{}'.format(i+1, len(zj_info), zj_name, zj_url))
69 | with open('{}.txt'.format(self.book), 'a', encoding='utf8') as f:
70 | f.write(zj_name + '\n')
71 | f.write(content + '\n\n')
72 |
73 | def run(self):
74 | self.book_name = '完美世界'
75 | self.search_book()
76 |
77 |
78 | if __name__ == '__main__':
79 | s = SpiderBook()
80 | s.run()
81 |
--------------------------------------------------------------------------------
/weibo/search_all.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 |
4 | import pandas as pd
5 | import requests
6 | import random
7 | import re
8 | import datetime
9 |
10 | from lxml import etree
11 |
12 |
13 | class GetFansInfo(object):
14 | """搜索微博"""
15 |
16 | def __init__(self):
17 | self._headers = {
18 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36",
19 | }
20 | self.wb_id = list()
21 | self.user_name = list()
22 | self.content = list()
23 | self.create_date = list()
24 | self.img_list = list()
25 |
26 | @staticmethod
27 | def get_tid():
28 | """获取TID参数"""
29 | url = 'https://passport.weibo.com/visitor/genvisitor?cb=gen_callback&fp={"os":"1","browser":"Chrome89,0,4389,128","fonts":"undefined","screenInfo":"1920*1080*24","plugins":"Portable Document Format::internal-pdf-viewer::Chrome PDF Plugin|::mhjfbmdgcfjbbpaeojofohoefgiehjai::Chrome PDF Viewer|::internal-nacl-plugin::Native Client"}'
30 | response = requests.get(url).text
31 | tid = re.findall(r'"tid":"(.*?)"', response)[0]
32 | return tid
33 |
34 | def get_cookie(self):
35 | """获取 SUB 和 SUBP """
36 | tid = self.get_tid()
37 | while True:
38 | url = 'https://passport.weibo.com/visitor/visitor?a=incarnate&t={}&w=3&c=95&gc=&cb=cross_domain&from=weibo&_rand={}'.format(
39 | tid, random.random())
40 | response = json.loads(re.findall(r'\((.*?)\)', requests.get(url).text)[0])
41 | if response.get('retcode') == 20000000 and response.get('data').get('sub'):
42 | cookie = ''
43 | for key in response.get('data'):
44 | cookie += '{}={};'.format(key.upper(), response.get('data').get(key))
45 | self._headers['cookie'] = cookie.rstrip(';')
46 | return response.get('data')
47 | else:
48 | tid = self.get_tid()
49 |
50 | def search(self):
51 | start_date = datetime.datetime.strptime('2020-12-11', '%Y-%m-%d')
52 | end_date = datetime.datetime.now() - datetime.timedelta(days=1)
53 | while start_date <= end_date:
54 | timescope1 = '{}-{}'.format(str(start_date).split()[0], start_date.hour)
55 | start_date += datetime.timedelta(hours=6)
56 | timescope2 = '{}-{}'.format(str(start_date).split()[0], start_date.hour)
57 | timescope = 'custom:{}:{}'.format(timescope1, timescope2)
58 | url = 'https://s.weibo.com/weibo'
59 | params = {
60 | 'q': '华夏家博会',
61 | 'typeall': '1',
62 | 'suball': '1',
63 | 'timescope': timescope,
64 | 'Refer': 'g',
65 | 'page': '1',
66 | }
67 | response = requests.get(url, headers=self._headers, params=params)
68 | response.encoding = 'utf8'
69 | if '未找到“华夏家博会”相关结果' in response.text:
70 | print(timescope, '无数据')
71 | continue
72 | html = etree.HTML(response.content)
73 | wb_info = html.xpath('//div[@action-type="feed_list_item"]')
74 | wb_id = html.xpath('//div[@action-type="feed_list_item"]/@mid')
75 | print(timescope, len(wb_info))
76 | for i in range(len(wb_info)):
77 | info = wb_info[i]
78 | user_name = info.xpath('.//a[@class="name"]/text()')
79 | content = ''.join(info.xpath('.//p[@class="txt"]//text()'))
80 | img_url = info.xpath('.//div[@node-type="feed_list_media_prev"]//img/@src')
81 | create_date = info.xpath('.//p[@class="from"]/a[1]/text()')
82 | if not user_name:
83 | continue
84 | self.wb_id.append(wb_id[i])
85 | self.user_name.append(user_name[0].strip())
86 | self.content.append(content)
87 | self.img_list.append(img_url)
88 | self.create_date.append(create_date[0].strip())
89 | # item = {
90 | # 'ID': wb_id[i],
91 | # '用户名': user_name[0].strip(),
92 | # '内容': content,
93 | # '图片链接': img_url,
94 | # '时间': create_date[0].strip(),
95 | # }
96 | # print(item)
97 | time.sleep(3)
98 | data = pd.DataFrame({
99 | 'ID': self.wb_id,
100 | '用户名': self.user_name,
101 | '内容': self.content,
102 | '图片链接': self.img_list,
103 | '时间': self.create_date,
104 | })
105 | data.to_excel('微博.xlsx', encoding='ANSI', index=False)
106 |
107 | def run(self):
108 | """启动函数"""
109 | self.search()
110 |
111 |
112 | if __name__ == '__main__':
113 | t = GetFansInfo()
114 | t.run()
115 |
--------------------------------------------------------------------------------
/ziru/zr.py:
--------------------------------------------------------------------------------
1 | from pytesseract.pytesseract import image_to_string
2 | import requests
3 | from lxml import etree
4 | from PIL import Image
5 | import pytesseract
6 | import re
7 | import time
8 | import os
9 | import pymysql
10 |
11 |
12 | class Ziru(object):
13 |
14 | def __init__(self):
15 | self._headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'}
16 | self.city_info = dict()
17 | self.cwd = '/'.join(__file__.split('/')[:-1])
18 | self.conn = pymysql.Connection(host='localhost', user='root', password='root', database='demo', port=3306)
19 | self.cursor = self.conn.cursor()
20 |
21 | def __del__(self):
22 | self.conn.close()
23 |
24 | def get_response(self, url):
25 | response = requests.get(url, headers=self._headers)
26 | if response.status_code == 200:
27 | response.encoding = response.apparent_encoding
28 | return response
29 | else:
30 | print(response.status_code)
31 | return None
32 |
33 | def get_city_info(self):
34 | response = self.get_response(url='https://www.ziroom.com/')
35 | if response is None:
36 | return
37 | html = etree.HTML(response.text)
38 | city_name = html.xpath('//a[@class="Z_city_option ani"]/text()')
39 | city_url = html.xpath('//a[@class="Z_city_option ani"]/@href')
40 | self.city_info = dict(zip(city_name, city_url))
41 |
42 | @staticmethod
43 | def image_identification(img_path):
44 | the_img = Image.open(img_path)
45 | result = pytesseract.image_to_string(the_img, config='--psm 7')
46 | os.remove(img_path)
47 | return list(result.strip())
48 |
49 | def get_zone_info(self, city_url):
50 | response = self.get_response(city_url + 'z/')
51 | if response is None:
52 | return
53 | html = etree.HTML(response.text)
54 | zone_url = html.xpath('//a[text()="区域"]/following-sibling::div/a/@href')
55 | zone_name = html.xpath('//a[text()="区域"]/following-sibling::div/a/text()')
56 | zone_info = dict(zip(zone_name, zone_url))
57 | for key in zone_info:
58 | print('开始获取{}的数据'.format(key))
59 | self.get_room_info('https:{}'.format(zone_info[key]))
60 |
61 | def get_room_info(self, url):
62 | response = self.get_response(url)
63 | if response is None:
64 | print('{}获取失败'.format(url))
65 | return
66 | print(url)
67 | html = etree.HTML(response.text)
68 | title = html.xpath('//h5[starts-with(@class, "title")]/a/text()')
69 | room_url = ['https:{}'.format(info) for info in html.xpath('//h5[starts-with(@class, "title")]/a/@href')]
70 | desc = html.xpath('//div[@class="desc"]/div[1]/text()')
71 | location = [info.strip() for info in html.xpath('//div[@class="location"]/text()')]
72 | room_price = list()
73 | room_element = html.xpath('//div[@class="Z_list"]/div[2]/div')
74 | for element in room_element:
75 | price = ''
76 | img_url = element.xpath('.//span[@class="num"]/@style')
77 | if not img_url:
78 | continue
79 | img_url = re.findall('url\((.*?)\)', img_url[0])[0]
80 | price_position = [float(re.findall('position: -(.*?)px', info)[0]) for info in element.xpath('.//span[@class="num"]/@style')]
81 | img_path = os.path.join(self.cwd, img_url.split('/')[-1])
82 | with open(img_path, 'wb') as f:
83 | f.write(self.get_response('https:{}'.format(img_url)).content)
84 | img_nums = self.image_identification(img_path)
85 | for position in price_position:
86 | price += img_nums[int(position / 20)]
87 | try:
88 | room_price.append(int(price))
89 | except:
90 | room_price.append(None)
91 | data = {
92 | '标题': title,
93 | '链接': room_url,
94 | '信息': desc,
95 | '地址': location,
96 | '价格': room_price,
97 | }
98 | self.save_data(data)
99 | next_url = html.xpath('//a[@class="next"]/@href')
100 | if next_url:
101 | self.get_room_info('https:{}'.format(next_url[0]))
102 |
103 | def save_data(self, item):
104 | data = list()
105 | for i in range(len(item['标题'])):
106 | info = list()
107 | for key in item.keys():
108 | info.append(item[key][i])
109 | data.append(info)
110 | sql = 'INSERT INTO ziru (title, url, info, location, price) VALUES (%s, %s, %s, %s, %s);'
111 | # print(data)
112 | self.cursor.executemany(sql, data)
113 | self.conn.commit()
114 | print('保存成功{}条'.format(len(data)))
115 |
116 | def run(self):
117 | self.get_zone_info('https://sh.ziroom.com/')
118 |
119 |
120 | if __name__ == '__main__':
121 | s = Ziru()
122 | s.run()
123 |
124 |
--------------------------------------------------------------------------------
/tweet/Tweet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author: monkey-hjy
3 | # @Date: 2021-02-24 17:18:02
4 | # @Last Modified by: monkey-hjy
5 | # @Last Modified time: 2021-02-24 17:23:17
6 | from datetime import datetime
7 | import requests
8 | from GetToken import GetToken
9 | import random
10 | from prettytable import PrettyTable
11 |
12 | # 随机UA头
13 | USER_AGENT = [
14 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
15 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
16 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
17 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
18 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
19 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
20 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
21 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
22 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
23 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
24 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
25 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
26 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
27 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
28 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
29 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
30 | ]
31 |
32 |
33 | class SearchTweet(GetToken):
34 | """
35 | 根据关键词搜索推文或者用户
36 | 使用游客token进行抓取数据,没有次数限制
37 | 但是需要境外ip。。。
38 | """
39 |
40 | def __init__(self):
41 | super().__init__()
42 | self.start = datetime.now()
43 | # 定义请求头。需要按照下面的代码去获取游客token
44 | self.headers = {
45 | 'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs'
46 | '%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
47 | 'user-agent': random.choice(USER_AGENT),
48 | 'x-guest-token': self.get_token(proxies_ip='127.0.0.1:10809'),
49 | }
50 | # 获取数据的接口
51 | self.url = 'https://twitter.com/i/api/2/search/adaptive.json'
52 |
53 | def start_requests(self, search_key, search_type='tweet'):
54 | """
55 | 开始搜索
56 | :param search_key: 搜素关键词
57 | :param search_type: 搜索类别。tweet/推文。 account/用户
58 | :return:
59 | """
60 | params = {
61 | "q": search_key,
62 | "count": 20,
63 | }
64 | if search_type == 'account':
65 | params['result_filter'] = 'user'
66 | response = requests.get(url=self.url, headers=self.headers, params=params, timeout=10)
67 | if response.status_code != 200:
68 | return f'{search_key} ERR === {response}'
69 | tweets = response.json().get('globalObjects').get('tweets')
70 | users = response.json().get('globalObjects').get('users')
71 | if not len(tweets) and not len(users):
72 | return f'{search_key}未抓到数据'
73 | p = PrettyTable()
74 | if search_type == 'tweet':
75 | tweet_id = []
76 | create_time = []
77 | full_text = []
78 | user_name = []
79 | screen_name = []
80 | for key in tweets:
81 | tweet_id.append(key)
82 | create_time.append(tweets.get(key).get('created_at'))
83 | full_text.append(tweets.get(key).get('text'))
84 | user_id = tweets.get(key).get('user_id_str')
85 | user_name.append(users.get(user_id).get('name'))
86 | screen_name.append(users.get(user_id).get('screen_name'))
87 | p.add_column(fieldname='推文ID', column=tweet_id)
88 | p.add_column(fieldname='发文时间', column=create_time)
89 | p.add_column(fieldname='内容', column=full_text)
90 | p.add_column(fieldname='用户名', column=user_name)
91 | p.add_column(fieldname='账号', column=screen_name)
92 | else:
93 | user_name = []
94 | screen_name = []
95 | description = []
96 | for key in users:
97 | user_name.append(users.get(key).get('name'))
98 | screen_name.append(users.get(key).get('screen_name'))
99 | description.append(users.get(key).get('description'))
100 | p.add_column(fieldname='用户名', column=user_name)
101 | p.add_column(fieldname='账号', column=screen_name)
102 | p.add_column(fieldname='简介', column=description)
103 | return p
104 |
105 | def run(self):
106 | search_key = ['葫芦娃', '奥特曼']
107 | for key in search_key:
108 | result = self.start_requests(search_key=key, search_type='account')
109 | print(result)
110 |
111 | def __del__(self):
112 | end = datetime.now()
113 | print(f'开始:{self.start},结束:{end}\n用时:{end-self.start}')
114 |
115 |
116 | if __name__ == '__main__':
117 | t = SearchTweet()
118 | t.run()
119 |
--------------------------------------------------------------------------------
/weibo/get_fans_info.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Author: monkey-hjy
3 | # @Date: 2021-04-22 11:32:21
4 | # @Last Modified by: monkey-hjy
5 | # @Last Modified time: 2021-04-22 16:57:22
6 | from gevent import monkey; monkey.patch_all()
7 | import gevent.pool
8 | import json
9 | import requests
10 | import random
11 | import re
12 | import pymongo
13 | import datetime
14 | import redis
15 |
16 |
17 | class GetFansInfo(object):
18 | """获取某个账号粉丝的信息"""
19 |
20 | def __init__(self):
21 | self.mongo_conf = pymongo.MongoClient(host='127.0.0.1', port=27017)
22 | self.mongo_db = self.mongo_conf['data']['weibo']
23 | self.redis_conf = redis.StrictRedis()
24 | # 参数1:用户ID。
25 | # 参数2:初始下标,下一页的下标会在本次请求返回
26 | self.get_fans_url = "https://m.weibo.cn/api/container/getIndex?containerid=231051_-_fans_-_{}&since_id={}"
27 | # 参数1:用户ID
28 | self.get_info_url = "https://weibo.com/p/100505{}/info?mod=pedit_more"
29 | self._headers = {
30 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36",
31 | }
32 | self.get_cookie()
33 | self.err_count = 0
34 |
35 | def __del__(self):
36 | self.redis_conf.close()
37 | self.mongo_conf.close()
38 |
39 | def get_response(self, url):
40 | """解析到对应URL的response"""
41 | err_count = 0
42 | while err_count < 5:
43 | try:
44 | response = requests.get(url, headers=self._headers)
45 | if response.status_code == 200:
46 | response.encoding = 'utf8'
47 | if '
Sina Visitor System' in response.text:
48 | raise Exception
49 | return response
50 | else:
51 | raise Exception
52 | except:
53 | err_count += 1
54 | self.get_cookie()
55 | return None
56 |
57 | def get_fans_info(self, user_info):
58 | """获取粉丝的信息"""
59 | user_info = user_info['user']
60 | response = self.get_response(url=self.get_info_url.format(user_info['id']))
61 | if response is None:
62 | print('出错 === {}'.format(user_info))
63 | return
64 | city = re.findall(r'所在地:.*?pt_detail\\">(.*?)<', response.text)
65 | city = city[0] if city else '其他'
66 | gender = re.findall(r'性别:.*?pt_detail\\">(.*?)<', response.text)
67 | gender = gender[0] if gender else '未知'
68 | reg_date = re.findall(r'注册时间:.*?pt_detail\\">(.*?)<', response.text)
69 | reg_date = reg_date[0].replace('\\n', '').replace('\\r', '').strip() if reg_date else '未知'
70 | item = {
71 | "the_fans_id": user_info['id'],
72 | "screen_name": user_info['screen_name'],
73 | "followers_count": user_info['followers_count'],
74 | "follow_count": user_info['follow_count'],
75 | "gender": gender,
76 | "city": city,
77 | "reg_date": reg_date
78 | }
79 | self.mongo_db.insert_one(item)
80 |
81 | def get_fans_id(self, user_id, since_id=0):
82 | """获取到某个用户的粉丝"""
83 | print(datetime.datetime.now(), user_id, since_id)
84 | if since_id >= 4999:
85 | return
86 | response = self.get_response(url=self.get_fans_url.format(user_id, since_id))
87 | if response is None:
88 | print('哥们。这个用户解析好像有点问题....\t{} is None'.format(user_id))
89 | return
90 | elif response.json()['ok'] == 0:
91 | print('哥们。这个用户解析好像有点问题....\t{}\t{}\t{}'.format(self.err_count, user_id, response.json()))
92 | if self.err_count < 10:
93 | self.err_count += 1
94 | self.get_fans_id(user_id, since_id)
95 | else:
96 | pip = self.redis_conf.pipeline()
97 | [pip.sadd('new_wb_user', info['user']['id']) for info in response.json()['data']['cards'][-1]['card_group']]
98 | pip.execute()
99 | try:
100 | next_since_id = response.json()['data']['cardlistInfo']['since_id']
101 | if next_since_id:
102 | self.err_count = 0
103 | self.get_fans_id(user_id=user_id, since_id=next_since_id)
104 | except Exception as e:
105 | print(e, user_id, since_id, response.json())
106 |
107 | @staticmethod
108 | def get_tid():
109 | """获取TID参数"""
110 | url = 'https://passport.weibo.com/visitor/genvisitor?cb=gen_callback&fp={"os":"1","browser":"Chrome89,0,4389,128","fonts":"undefined","screenInfo":"1920*1080*24","plugins":"Portable Document Format::internal-pdf-viewer::Chrome PDF Plugin|::mhjfbmdgcfjbbpaeojofohoefgiehjai::Chrome PDF Viewer|::internal-nacl-plugin::Native Client"}'
111 | response = requests.get(url).text
112 | tid = re.findall(r'"tid":"(.*?)"', response)[0]
113 | return tid
114 |
115 | def get_cookie(self):
116 | """获取 SUB 和 SUBP """
117 | tid = self.get_tid()
118 | while True:
119 | url = 'https://passport.weibo.com/visitor/visitor?a=incarnate&t={}&w=3&c=95&gc=&cb=cross_domain&from=weibo&_rand={}'.format(
120 | tid, random.random())
121 | response = json.loads(re.findall(r'\((.*?)\)', requests.get(url).text)[0])
122 | if response.get('retcode') == 20000000 and response.get('data').get('sub'):
123 | cookie = ''
124 | for key in response.get('data'):
125 | cookie += '{}={};'.format(key.upper(), response.get('data').get(key))
126 | self._headers['cookie'] = cookie.rstrip(';')
127 | return response.get('data')
128 | else:
129 | tid = self.get_tid()
130 |
131 | def run(self):
132 | """启动函数"""
133 | user_ids = list(set([line.replace('\n', '') for line in open('大V.txt', encoding='utf8').readlines()]))
134 | exist = [line.replace('\n', '') for line in open('exist.txt', encoding='utf8').readlines()]
135 | # # # 1、高并发跑。会有IP封禁问题。自行选择。。。
136 | # pool = gevent.pool.Pool(50)
137 | # pool.map(self.get_fans_id, user_ids)
138 |
139 | # 2、单线程跑。不会封禁IP。但是速度不是很快。
140 | for user_id in user_ids:
141 | if user_id in exist:
142 | continue
143 | self.get_fans_id(user_id)
144 | with open('exist.txt', encoding='utf8', mode='a') as f:
145 | f.write('{}\n'.format(user_id))
146 |
147 |
148 | if __name__ == '__main__':
149 | t = GetFansInfo()
150 | t.run()
151 | """
152 | 小时候
153 | 总是盼望着
154 | 盼望着有自己的零花钱
155 | 盼望着有一辆属于自己的自行车
156 | 盼望着玩到天黑不回家
157 | 盼望着妈妈不再唠叨我
158 |
159 | 长大了
160 | 总是想着
161 | 想着可以不用每天算计着花钱
162 | 想着可以真正的散散步
163 | 想着可以在家里休息一整天
164 | 想着可以每天陪着妈妈说话
165 |
166 | 听说
167 | 20岁的人 怀念童年
168 | 40岁的人 怀念青春
169 | 60岁的人 怀念壮年
170 | 只有那些孩子会缠着人问
171 | 妈妈
172 | 我什么时候长大呀
173 | ---- H 2021/4/26 上海
174 | ---- 结尾摘自 《儿时的夏日》 热评
175 | """
176 |
--------------------------------------------------------------------------------
/lagou/lg.py:
--------------------------------------------------------------------------------
1 | from gevent import monkey; monkey.patch_all()
2 | import gevent.pool
3 | import json
4 | import random
5 | import re
6 |
7 | from lxml import etree
8 | import execjs
9 | import requests
10 | from sns_spider.config.settings import USER_AGENTS
11 | import pymongo
12 |
13 |
14 | class LG(object):
15 | """拉钩 js逆向"""
16 |
17 | def __init__(self):
18 | self.client = pymongo.MongoClient(host='localhost', port=27017)
19 | self.mongo_col = self.client['demo']['lagou']
20 | self.js_file = open('lg.js', encoding='utf8').read()
21 | self._headers = {
22 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
23 | 'referer': 'https://www.lagou.com/jobs/list_java/p-city_3?px=default',
24 | }
25 | self.token = ''
26 | self.proxies = dict()
27 | self.set_proxies()
28 | self.get_token()
29 | self.city_info = dict()
30 |
31 | def set_proxies(self):
32 | """设置代理"""
33 | ip = "获取到代理IP"
34 | self.proxies = {
35 | 'http': 'http://{}'.format(ip),
36 | 'https': 'http://{}'.format(ip),
37 | }
38 |
39 | def get_response(self, url, params=None, data=None, method='GET'):
40 | while True:
41 | try:
42 | if method == 'GET':
43 | response = requests.get(url, params=params, headers=self._headers, proxies=self.proxies)
44 | else:
45 | response = requests.post(url, params=params, data=data, headers=self._headers, proxies=self.proxies)
46 | response.encoding = response.apparent_encoding
47 | return response
48 | except:
49 | self.set_proxies()
50 | self.get_token()
51 |
52 | def get_token(self):
53 | """获取到游客cookie"""
54 | url = 'https://www.lagou.com/gongsi/allCity.html'
55 | while True:
56 | headers = {'user-agent': random.choice(USER_AGENTS)}
57 | try:
58 | response = requests.get(url, headers=headers, allow_redirects=False, proxies=self.proxies, timeout=10)
59 | response.encoding = response.apparent_encoding
60 | user_trace_token = re.findall(r'user_trace_token=(.*?);', response.headers['Set-Cookie'])[0]
61 | x_http_token = re.findall(r'X_HTTP_TOKEN=(.*?);', response.headers['Set-Cookie'])[0]
62 | href = response.headers['Location']
63 | ctx = execjs.compile(self.js_file, cwd='/opt/homebrew/Cellar/node/16.3.0/bin/')
64 | self.token = ctx.call('window.gt.prototype.a',
65 | json.dumps({"href": href, "search": href.split('check.html')[1]}))
66 | self._headers['cookie'] = 'user_trace_token={};X_HTTP_TOKEN={};__lg_stoken__={}'.format(
67 | user_trace_token, x_http_token, self.token)
68 | return
69 | except Exception as e:
70 | print('获取token失败\tproxies:{}\te:{}'.format(self.proxies, e))
71 | self.set_proxies()
72 |
73 | def get_city_info(self):
74 | """获取城市信息"""
75 | url = 'https://www.lagou.com/jobs/allCity.html'
76 | html = etree.HTML(self.get_response(url).text)
77 | city_url = html.xpath('//ul[@class="city_list"]/li/a/@href')
78 | city_name = html.xpath('//ul[@class="city_list"]/li/a/text()')
79 | self.city_info = {city_name[i]: city_url[i] for i in range(len(city_url))}
80 |
81 | def get_job_info(self, input_item):
82 | """获取职位信息"""
83 | url = 'https://www.lagou.com/jobs/positionAjax.json'
84 | params = {
85 | "px": "default",
86 | "city": input_item['city_name'],
87 | "district": input_item['district'],
88 | "needAddtionalResult": "false",
89 | }
90 | sid = ''
91 | page = 1
92 | while True:
93 | data = {
94 | "first": "true",
95 | "pn": page,
96 | "kd": input_item['keyword'],
97 | "sid": sid,
98 | }
99 | job_info = self.get_response(url, params=params, data=data, method='POST').json()
100 | if 'success' in job_info:
101 | sid = job_info['content']['showId']
102 | job_info = job_info['content']['positionResult']['result']
103 | if not job_info or page == 30:
104 | break
105 | self.parse_info(job_info, input_item)
106 | print('{}\t页码:{}\t数据量:{}'.format(input_item, page, len(job_info)))
107 | page += 1
108 |
109 | def parse_info(self, job_info, input_item):
110 | """解析内容"""
111 | items = list()
112 | for info in job_info:
113 | item = {
114 | '_id': info['positionId'],
115 | 'job_name': info['positionName'],
116 | 'job_url': 'https://www.lagou.com/jobs/{}.html'.format(info['positionId']),
117 | 'company_name': info['companyFullName'],
118 | 'company_size': info['companySize'],
119 | 'industry_field': info['industryField'],
120 | 'finance_stage': info['financeStage'],
121 | 'company_label': info['companyLabelList'],
122 | 'skill_label': info['skillLables'],
123 | 'position_label': info['positionLables'],
124 | 'create_time': info['createTime'],
125 | 'city': info['city'],
126 | 'district': info['district'],
127 | 'salary': info['salary'],
128 | 'work_year': info['workYear'],
129 | 'job_nature': info['jobNature'],
130 | 'education': info['education'],
131 | 'position_advantage': info['positionAdvantage'],
132 | 'position_detail': info['positionDetail'],
133 | 'position_address': info['positionAddress']
134 | }
135 | items.append(item)
136 | try:
137 | self.mongo_col.insert_many(items)
138 | # print('{}\t插入成功。本次插入{}条'.format(input_item, len(items)))
139 | except:
140 | for item in items:
141 | try:
142 | self.mongo_col.insert_one(item)
143 | except:
144 | pass
145 |
146 | def run(self):
147 | """启动函数"""
148 | self.get_city_info()
149 | # print(self.city_info)
150 | for city_name, city_url in self.city_info.items():
151 | # for city_name in ['郑州', '北京', '上海', '广州', '深圳']:
152 | city_url = self.city_info[city_name]
153 | if '-zhaopin' not in city_url:
154 | city_url = city_url.rstrip('/') + '-zhaopin/'
155 | response = self.get_response(url=city_url, method='GET')
156 | html = etree.HTML(response.text)
157 | district_name = html.xpath('//div[@data-type="district"]/a[position()>1]/text()')
158 | item = [{'city_name': city_name, 'district': name, 'keyword': 'python'} for name in district_name]
159 | print(item)
160 | pool = gevent.pool.Pool(size=1)
161 | pool.map(self.get_job_info, item)
162 |
163 |
164 | if __name__ == '__main__':
165 | t = LG()
166 | t.run()
167 |
168 |
--------------------------------------------------------------------------------
/weibo/search.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Author: 玛卡巴卡
3 | # Date: 2021/4/20 10:34
4 | import datetime
5 | import logging
6 | import re
7 | import time
8 | from multiprocessing.dummy import Pool as ThreadPool
9 | import requests
10 | import pandas as pd
11 | import random
12 | import os
13 | requests.packages.urllib3.disable_warnings()
14 |
15 | USER_AGENTS = [
16 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
17 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
18 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
19 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
20 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
21 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
22 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
23 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
24 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
25 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
26 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
27 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
28 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
29 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
30 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
31 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
32 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
33 | ]
34 |
35 |
36 | class WeiBo(object):
37 | """
38 | 按照固定的关键词搜索
39 | 采集得到的所有文章和评论信息
40 | """
41 |
42 | def __init__(self):
43 | self.get_wb_url = 'https://m.weibo.cn/api/container/getIndex'
44 | self.comment_url = 'https://m.weibo.cn/comments/hotflow'
45 | self._headers = {'user-agent': ''}
46 | self.wb_info_list = dict()
47 | self.content_id = list()
48 | self.content = list()
49 | self.comment_id = list()
50 | self.comment = list()
51 | logging.basicConfig(level=logging.INFO,
52 | format='%(asctime)s %(filename)s [line:%(lineno)d] %(levelname)s %(message)s',
53 | datefmt='%Y-%m-%d %H:%M:%S',
54 | filename='f:/PDemo/spider_log/{}-{}.log'.format(__file__.split('/')[-1].split('.')[0], str(datetime.datetime.now()).split(" ")[0]),
55 | filemode='a')
56 |
57 | def get_response(self, url, params=None, cookie=None):
58 | """发起请求"""
59 | err_count = 0
60 | while err_count < 5:
61 | try:
62 | time.sleep(1)
63 | if cookie is not None:
64 | self._headers['cookie'] = cookie
65 | else:
66 | self._headers = {'user-agent': random.choice(USER_AGENTS)}
67 | response = requests.get(url, params=params, headers=self._headers)
68 | if response.status_code == 200:
69 | return response
70 | else:
71 | err_count += 1
72 | time.sleep(30)
73 | except:
74 | err_count += 1
75 | return None
76 |
77 | def get_wb_id(self, keyword, page):
78 | """获取微博ID"""
79 | wb_id_list = []
80 | params = {
81 | 'containerid': '100103type=1&q={}'.format(keyword),
82 | 'page_type': 'searchall',
83 | 'page': page,
84 | }
85 | response = self.get_response(url=self.get_wb_url, params=params)
86 | if response is None:
87 | logging.error('- 关键词:{},页码:{}\t出错'.format(keyword, page))
88 | return
89 | response = response.json()['data']['cards']
90 | for info in response:
91 | try:
92 | try:
93 | self.wb_info_list[info['mblog']['id']] = info['mblog']['comments_count']
94 | wb_id_list.append([info['mblog']['id'], info['mblog']['comments_count']])
95 | except:
96 | self.wb_info_list[info['card_group'][0]['mblog']['id']] = info['card_group'][0]['mblog'][
97 | 'comments_count']
98 | wb_id_list.append([info['card_group'][0]['mblog']['id'], info['card_group'][0]['mblog']['comments_count']])
99 | except Exception as e:
100 | pass
101 | logging.info('{}\t{}\t{}'.format(keyword, page, len(wb_id_list)))
102 | if wb_id_list:
103 | return True
104 | else:
105 | return False
106 |
107 | def get_wb_content(self, id):
108 | """获取微博原文"""
109 | url = 'https://m.weibo.cn/statuses/extend?id={}'.format(id)
110 | response = self.get_response(url=url)
111 | if response is None:
112 | return
113 | try:
114 | content = re.sub('<.*?>', '', response.json()['data']['longTextContent'])
115 | self.content_id.append(id)
116 | self.content.append(content)
117 | logging.info('- {}\t{}'.format(id, len(content)))
118 | except Exception as e:
119 | logging.error('- {}\t{}'.format(e, id))
120 |
121 | def get_wb_comment(self, wb_id):
122 | """获取微博评论"""
123 | max_id = 0
124 | max_id_type = 0
125 | while True:
126 | time.sleep(2)
127 | params = {
128 | 'id': wb_id,
129 | 'mid': wb_id,
130 | 'max_id': max_id,
131 | 'max_id_type': max_id_type,
132 | }
133 | err_count = 0
134 | while err_count < 4:
135 | response = self.get_response(url=self.comment_url, params=params, cookie='用户登录m.weibo.cn的cookie')
136 | if response is None:
137 | logging.error('{}出错'.format(wb_id))
138 | return
139 | try:
140 | response.json()
141 | except:
142 | logging.error('转JSON失败 --- {}'.format(response.text))
143 | return None
144 | if response.json()['ok']:
145 | try:
146 | response = response.json()['data']
147 | logging.info('- {}\t{}\t{}'.format(wb_id, max_id, len(response['data'])))
148 | for info in response['data']:
149 | self.comment_id.append(wb_id)
150 | self.comment.append(re.sub('<.*?>', '', info['text']))
151 | # 获取到下一页的ID,当作下次的参数使用
152 | next_max_id = response['max_id']
153 | max_id_type = response['max_id_type']
154 | if next_max_id == 0:
155 | return
156 | logging.info('- 下一页{}'.format(next_max_id))
157 | max_id = next_max_id
158 | time.sleep(1)
159 | break
160 | except Exception as e:
161 | err_count += 1
162 | time.sleep(5)
163 | logging.error('- {}\t{}\t{}'.format(wb_id, err_count, e))
164 | if err_count == 4:
165 | time.sleep(30)
166 | return
167 | else:
168 | logging.error('- {}\t{}'.format(response.json(), params))
169 | return
170 |
171 | def run(self):
172 | """启动函数"""
173 | keyword_list = ['在这里放需要搜索的关键词']
174 | for keyword in keyword_list:
175 | self.__init__()
176 | logging.info('=== {} ==='.format(keyword))
177 | flag = True
178 | page = 1
179 | while flag:
180 | the_page_wb_id = self.get_wb_id(keyword=keyword, page=page)
181 | if the_page_wb_id:
182 | page += 1
183 | else:
184 | break
185 | logging.info(len(self.wb_info_list))
186 | pool = ThreadPool(20)
187 | pool.map(self.get_wb_content, list(self.wb_info_list.keys()))
188 | for key in self.wb_info_list.keys():
189 | if self.wb_info_list[key]:
190 | self.get_wb_comment(wb_id=key)
191 |
192 | content_data = pd.DataFrame({
193 | '微博ID': self.content_id,
194 | '微博正文': self.content
195 | })
196 |
197 | comment_data = pd.DataFrame({
198 | '微博ID': self.comment_id,
199 | '评论': self.comment
200 | })
201 |
202 | """
203 | 可以在此对数据进行持久化保存
204 | """
205 |
206 |
207 | if __name__ == '__main__':
208 | t = WeiBo()
209 | t.run()
210 |
--------------------------------------------------------------------------------
/zhihu/public_func.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File: public_func.py
3 | # Date: 2024/1/5 11:03
4 | # Auth: HJY
5 | # Decs:
6 | import ctypes
7 | import os
8 | import random
9 | import time
10 | from datetime import datetime
11 | from hashlib import md5
12 | from urllib.parse import urlparse, parse_qs, urlencode
13 |
14 | import requests
15 | from requests import utils
16 | from loguru import logger
17 |
18 | # h:签名以来的固定数组
19 | h = {
20 | "zb": [20, 223, 245, 7, 248, 2, 194, 209, 87, 6, 227, 253, 240, 128, 222, 91, 237, 9, 125, 157, 230, 93, 252,
21 | 205, 90, 79, 144, 199, 159, 197, 186, 167, 39, 37, 156, 198, 38, 42, 43, 168, 217, 153, 15, 103, 80, 189,
22 | 71, 191, 97, 84, 247, 95, 36, 69, 14, 35, 12, 171, 28, 114, 178, 148, 86, 182, 32, 83, 158, 109, 22, 255,
23 | 94, 238, 151, 85, 77, 124, 254, 18, 4, 26, 123, 176, 232, 193, 131, 172, 143, 142, 150, 30, 10, 146, 162,
24 | 62, 224, 218, 196, 229, 1, 192, 213, 27, 110, 56, 231, 180, 138, 107, 242, 187, 54, 120, 19, 44, 117,
25 | 228, 215, 203, 53, 239, 251, 127, 81, 11, 133, 96, 204, 132, 41, 115, 73, 55, 249, 147, 102, 48, 122,
26 | 145, 106, 118, 74, 190, 29, 16, 174, 5, 177, 129, 63, 113, 99, 31, 161, 76, 246, 34, 211, 13, 60, 68,
27 | 207, 160, 65, 111, 82, 165, 67, 169, 225, 57, 112, 244, 155, 51, 236, 200, 233, 58, 61, 47, 100, 137,
28 | 185, 64, 17, 70, 234, 163, 219, 108, 170, 166, 59, 149, 52, 105, 24, 212, 78, 173, 45, 0, 116, 226, 119,
29 | 136, 206, 135, 175, 195, 25, 92, 121, 208, 126, 139, 3, 75, 141, 21, 130, 98, 241, 40, 154, 66, 184, 49,
30 | 181, 46, 243, 88, 101, 183, 8, 23, 72, 188, 104, 179, 210, 134, 250, 201, 164, 89, 216, 202, 220, 50,
31 | 221, 152, 140, 33, 235, 214],
32 | "zk": [1170614578, 1024848638, 1413669199, -343334464, -766094290, -1373058082, -143119608, -297228157,
33 | 1933479194, -971186181, -406453910, 460404854, -547427574, -1891326262, -1679095901, 2119585428,
34 | -2029270069, 2035090028, -1521520070, -5587175, -77751101, -2094365853, -1243052806, 1579901135,
35 | 1321810770, 456816404, -1391643889, -229302305, 330002838, -788960546, 363569021, -1947871109],
36 | "zm": [120, 50, 98, 101, 99, 98, 119, 100, 103, 107, 99, 119, 97, 99, 110, 111]
37 | }
38 | # salt: 签名依赖的最终数据
39 | salt = '6fpLRqJO8M/c3jnYxFkUVC4ZIG12SiH=5v0mXDazWBTsuw7QetbKdoPyAl+hN9rgE'
40 | # base_list: 第二次偏移需要使用的固定数组
41 | base_list = [48, 53, 57, 48, 53, 51, 102, 55, 100, 49, 53, 101, 48, 49, 100, 55]
42 |
43 |
44 | class PublicFunc:
45 |
46 | def __init__(self, log_name='default') -> None:
47 | self.now_date = datetime.now().strftime('%Y%m%d')
48 | log_path = '/data/log' if os.path.exists('/data/log') else '/Users/monkey/Documents/log'
49 | logger.add(os.path.join(log_path, f'{log_name}_{self.now_date}.log'), encoding='utf-8',
50 | enqueue=True, retention='10 days')
51 | self._headers = {
52 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
53 | }
54 |
55 | @staticmethod
56 | def parse_params(url):
57 | url = urlparse(url)
58 | params = {k: v[0] for k, v in parse_qs(url.query).items()}
59 | return params
60 |
61 | @staticmethod
62 | def get_proxies():
63 | return {
64 | 'http': 'xxx',
65 | 'https': 'xxx',
66 | }
67 |
68 | def get_response(self, url, params=None, data=None, headers=None, method='get', cookies=None):
69 | err_count = 0
70 | e_res = None
71 | while err_count < 5:
72 | proxies = self.get_proxies()
73 | proxy_name = proxies.get('http').split('@')[-1].split('.')[1]
74 | try:
75 | headers = self._headers if headers is None else headers
76 | if method == 'get':
77 | response = requests.get(url, params=params, timeout=15, headers=headers, proxies=proxies, cookies=cookies)
78 | elif method == 'post':
79 | response = requests.post(url, data=data, timeout=15, headers=headers, cookies=cookies)
80 | else:
81 | return None
82 | if response.status_code == 200:
83 | response.encoding = 'utf8'
84 | if '网络不给力,请稍后重试' in response.text and 'paging' not in response.text:
85 | raise Exception('网络不给力,请稍后重试')
86 | if '安全验证' in response.text and 'paging' not in response.text:
87 | raise Exception('安全验证')
88 | return response
89 | if '"code":4041,"name":"NotFoundError","message":"资源不存在"' in response.text:
90 | return response
91 | raise Exception(response.status_code)
92 | except Exception as e:
93 | err_count += 1
94 | e_res = e
95 | return e_res
96 |
97 | @staticmethod
98 | def encrypt_md5(md5_str):
99 | """md5 加密"""
100 | md5_obj = md5()
101 | md5_obj.update(md5_str.encode())
102 | return md5_obj.hexdigest()
103 |
104 | @staticmethod
105 | def str_to_unicode(translate_str):
106 | """将str 使用ord 转换成 整型列表"""
107 | ord_list = list()
108 | for str_ in translate_str:
109 | ord_list.append(ord(str_))
110 | return ord_list
111 |
112 | @staticmethod
113 | def add_params_to_list(ord_list):
114 | """
115 | 补全 ord_list 中数据
116 | 首先第一个部分是 随机数 * 127
117 | 第二部分是 0
118 | 第三部分是 ord_list
119 | 上面三部分构成长度为34的数组
120 | 第四部分是 [14,14,14,14,14,14,14,14,14,14,14,14,14,14]
121 | 最终构成长度为48为的数组
122 | :param ord_list:
123 | :return:
124 | """
125 | params_list = list()
126 | random_num = int(random.random() * 127) # 随机值 控制每次签名不同
127 | params_list.append(random_num)
128 | params_list.append(0)
129 | params_list.extend(ord_list)
130 | params_list.extend([14 for i in range(14)])
131 | return params_list
132 |
133 | @staticmethod
134 | def get_head_16(params_list):
135 | """
136 | 获取 params_list 前16位
137 | 与数组base_list做异或操作:
138 | base_list=[48,53,57,48,53,51,102,55,100,49,53,101,48,49,100,55]
139 | :param params_list:
140 | :return:
141 | """
142 | head_16_list = [params_list[index] ^ base_list[index] ^ 42 for index in range(16)]
143 | return head_16_list
144 |
145 | def js_func_g_x(self, e, t):
146 | """
147 | 还原js 函数 __g.x
148 | :param e:
149 | :param t:
150 | :return:
151 | """
152 | n = list()
153 | r = len(e) // 16
154 | # 16步进
155 | for i in range(0, r):
156 | a = [0 for i in range(16)] # 16位列表
157 | o = e[16 * i: 16 * (i + 1)]
158 | for c in range(16):
159 | a[c] = o[c] ^ t[c]
160 | t = self.js_func_g_r(a)
161 | n.extend(t)
162 | return n
163 |
164 | def js_func_g_r(self, e):
165 | """
166 | 还原js 函数 __g.r
167 | :param e:
168 | :return:
169 | """
170 | t = [0 for i in range(16)] # 16位列表
171 | n = [0 for j in range(36)] # 36位列表
172 | n[0] = self.js_func_b(e, 0)
173 | n[1] = self.js_func_b(e, 4)
174 | n[2] = self.js_func_b(e, 8)
175 | n[3] = self.js_func_b(e, 12)
176 | for r in range(32):
177 | o = self.js_func_g(n[r + 1] ^ n[r + 2] ^ n[r + 3] ^ h.get('zk')[r])
178 | n[r + 4] = n[r] ^ o
179 | self.js_func_i(n[35], t, 0)
180 | self.js_func_i(n[34], t, 4)
181 | self.js_func_i(n[33], t, 8)
182 | self.js_func_i(n[32], t, 12)
183 | return t
184 |
185 | @staticmethod
186 | def js_func_b(e, t):
187 | """
188 | 还原js 函数B
189 | :param e:
190 | :param t:
191 | :return:
192 | """
193 | return (255 & e[t]) << 24 | (255 & e[t + 1]) << 16 | (255 & e[t + 2]) << 8 | 255 & e[t + 3]
194 |
195 | def js_func_g(self, e):
196 | """
197 | 还原js function G
198 | :param e:
199 | :return:
200 | """
201 |
202 | t = [0 for i in range(4)] # 16位列表
203 | n = [0 for j in range(4)] # 36位列表
204 | self.js_func_i(e, t, 0) # 调用 js_func_i 设定初始值
205 | n[0] = h.get('zb')[255 & t[0]]
206 | n[1] = h.get('zb')[255 & t[1]]
207 | n[2] = h.get('zb')[255 & t[2]]
208 | n[3] = h.get('zb')[255 & t[3]]
209 | r = self.js_func_b(n, 0)
210 | res = r ^ self.js_func_q(r, 2) ^ self.js_func_q(r, 10) ^ self.js_func_q(r, 18) ^ self.js_func_q(r, 24)
211 | return res
212 |
213 | def js_func_q(self, e, t):
214 | """
215 | 还原js function Q
216 | :param e:
217 | :param t:
218 | :return:
219 | """
220 | res = (4294967295 & e) << t | self.unsigned_right_shitf(e, 32 - t)
221 | return res
222 |
223 | def js_func_i(self, e, t, n):
224 | """
225 | 还原 js func i
226 | :param e:
227 | :param t:
228 | :param n:
229 | :return:
230 | """
231 | t[n] = 255 & self.unsigned_right_shitf(e, 24)
232 | t[n + 1] = 255 & self.unsigned_right_shitf(e, 16)
233 | t[n + 2] = 255 & self.unsigned_right_shitf(e, 8)
234 | t[n + 3] = 255 & e
235 |
236 | def unsigned_right_shitf(self, n, i):
237 | # 数字小于0,则转为32位无符号uint
238 | if n < 0:
239 | n = ctypes.c_uint32(n).value
240 | # 正常位移位数是为正数,但是为了兼容js之类的,负数就右移变成左移好了
241 | if i < 0:
242 | return -self.int_overflow(n << abs(i))
243 | return self.int_overflow(n >> i)
244 |
245 | @staticmethod
246 | def int_overflow(val):
247 | maxint = 2147483647
248 | if not -maxint - 1 <= val <= maxint:
249 | val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1
250 | return val
251 |
252 | @staticmethod
253 | def get_result_value_list(new_48_list):
254 | """转换数值列表"""
255 | # 将列表[i:i+3]切片,并饭庄
256 | result_value_list = list()
257 | split_list = [new_48_list[i:i + 3] for i in range(0, len(new_48_list), 3)]
258 | split_list.reverse()
259 | for i in range(len(split_list)):
260 | _temp_list = split_list[i]
261 | _temp_list.reverse()
262 | _val = i % 4
263 | if _val == 0:
264 | temp_value_1 = _temp_list[_val] ^ 58
265 | temp_value_2 = _temp_list[1] << 8
266 | temp_value_3 = _temp_list[2] << 16
267 | elif _val == 1:
268 | temp_value_1 = _temp_list[0]
269 | temp_value_2 = (_temp_list[_val] ^ 58) << 8
270 | temp_value_3 = _temp_list[2] << 16
271 | elif _val == 2:
272 | temp_value_1 = _temp_list[0]
273 | temp_value_2 = _temp_list[1] << 8
274 | temp_value_3 = (_temp_list[_val] ^ 58) << 16
275 | else:
276 | temp_value_1 = _temp_list[0]
277 | temp_value_2 = _temp_list[1] << 8
278 | temp_value_3 = _temp_list[2] << 16
279 | value = temp_value_1 | temp_value_2 | temp_value_3
280 | result_value_list.append(value)
281 | return result_value_list
282 |
283 | @staticmethod
284 | def make_zhihu_sign(result_value_list):
285 | """通过salt 转换签名字符串"""
286 | sign_str = ''
287 | for _value in result_value_list:
288 | sign_str += salt[_value & 63]
289 | sign_str += salt[_value >> 6 & 63]
290 | sign_str += salt[_value >> 12 & 63]
291 | sign_str += salt[_value >> 18 & 63]
292 | return sign_str
293 |
294 | def test_case(self, url, d_c0):
295 | md5_str = '101_3_3.0+' + url + d_c0
296 | md5_res = self.encrypt_md5(md5_str)
297 | ord_list = self.str_to_unicode(md5_res)
298 | params_list = self.add_params_to_list(ord_list)
299 | head_16_list = self.get_head_16(params_list)
300 | end_32_list = params_list[16:]
301 | new_16_list = self.js_func_g_r(head_16_list)
302 | new_32_list = self.js_func_g_x(end_32_list, new_16_list)
303 | new_48_list = list()
304 | new_48_list.extend(new_16_list)
305 | new_48_list.extend(new_32_list)
306 | result_value_list = self.get_result_value_list(new_48_list)
307 | sign_str = self.make_zhihu_sign(result_value_list)
308 | return sign_str
309 |
310 | def get_cookie_d_c0(self, proxies=None):
311 | end_sign = self.test_case('/udid', '')
312 | headers = {
313 | 'x-zse-93': '101_3_3.0',
314 | 'x-api-version': '3.0.91',
315 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
316 | 'x-zse-96': '2.0_' + end_sign,
317 | 'accept': '*/*',
318 | }
319 | d_c0 = None
320 | err_count = 0
321 | while err_count <= 10:
322 | try:
323 | first_res = requests.post('https://www.zhihu.com/udid', data={}, headers=headers, proxies=proxies,
324 | timeout=60)
325 | cookie_t = utils.dict_from_cookiejar(first_res.cookies)
326 | d_c0 = cookie_t.get('d_c0')
327 | return d_c0
328 | except Exception as e:
329 | err_count += 1
330 | time.sleep(random.randint(1, 10))
331 | logger.error(f'get_cookie_d_c0 err_count:{err_count}, proxies: {proxies}, e: {e}')
332 | return d_c0
333 |
334 | def _get_end_sign(self, md5_str):
335 | # md5_str = '101_3_3.0+'+url+d_c0
336 | md5_res = self.encrypt_md5(md5_str)
337 | ord_list = self.str_to_unicode(md5_res)
338 | params_list = self.add_params_to_list(ord_list)
339 | head_16_list = self.get_head_16(params_list)
340 | end_32_list = params_list[16:]
341 | new_16_list = self.js_func_g_r(head_16_list)
342 | new_32_list = self.js_func_g_x(end_32_list, new_16_list)
343 | new_48_list = list()
344 | new_48_list.extend(new_16_list)
345 | new_48_list.extend(new_32_list)
346 | result_value_list = self.get_result_value_list(new_48_list)
347 | sign_str = self.make_zhihu_sign(result_value_list)
348 | return sign_str
349 |
350 | @staticmethod
351 | def get_headers(d_c0, end_sign):
352 | headers = {
353 | "cookie": f"d_c0={d_c0};",
354 | 'x-zse-93': '101_3_3.0',
355 | 'x-api-version': '3.0.91',
356 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
357 | 'x-zse-96': '2.0_' + end_sign,
358 | 'accept': '*/*',
359 | # 'referer': 'https://www.zhihu.com/search?q=%E6%B5%B7%E8%B4%BC%E7%8E%8B%E7%B4%A2%E9%9A%86%E8%BA%AB%E4%B8%96%E6%8F%AD%E7%A7%98&type=zvideo&utm_content=search_hot',
360 | 'accept-encoding': 'gzip, deflate',
361 | 'accept-language': 'zh-CN,zh;q=0.9',
362 | }
363 | return headers
364 |
365 | def run(self, keyword):
366 | url = f'https://www.zhihu.com/api/v4/search_v3?gk_version=gz-gaokao&t=general&q={keyword}&correction=1&offset=0&limit=20&filter_fields=&lc_idx=0&show_all_topics=0&search_source=Filter&vertical=answer&time_interval=a_week'
367 | url_params = t.parse_params(url)
368 | params = url_params
369 | offset = url_params.get('offset', 0)
370 | req_url = 'https://www.zhihu.com/api/v4/search_v3'
371 | reply_num = 0
372 | while True:
373 | d_c0 = t.get_cookie_d_c0()
374 | end_sign = t._get_end_sign(f'101_3_3.0+/api/v4/search_v3?{urlencode(params)}+{d_c0}')
375 | headers = t.get_headers(d_c0, end_sign)
376 | response = t.get_response(url=req_url, headers=headers, params=params)
377 | if isinstance(response, requests.Response):
378 | break
379 | reply_num += 1
380 | logger.error(
381 | f'search keyword reply {reply_num} times! keyword: {keyword}, offset: {offset}, e: {response}')
382 | if reply_num > 50:
383 | return
384 | response = response.json()
385 | return response
386 |
387 |
388 | if __name__ == '__main__':
389 | t = PublicFunc()
390 | keyword = '海贼王'
391 | t.run(keyword)
392 |
393 |
--------------------------------------------------------------------------------
/music163/Music.js:
--------------------------------------------------------------------------------
1 | /*
2 | * @Author: monkey-hjy
3 | * @Date: 2021-02-24 17:42:52
4 | * @Last Modified by: monkey-hjy
5 | * @Last Modified time: 2021-02-24 17:48:21
6 | */
7 | var CryptoJS = CryptoJS || function(u, p) {
8 | var d = {}
9 | , l = d.lib = {}
10 | , s = function() {}
11 | , t = l.Base = {
12 | extend: function(a) {
13 | s.prototype = this;
14 | var c = new s;
15 | a && c.mixIn(a);
16 | c.hasOwnProperty("init") || (c.init = function() {
17 | c.$super.init.apply(this, arguments)
18 | }
19 | );
20 | c.init.prototype = c;
21 | c.$super = this;
22 | return c
23 | },
24 | create: function() {
25 | var a = this.extend();
26 | a.init.apply(a, arguments);
27 | return a
28 | },
29 | init: function() {},
30 | mixIn: function(a) {
31 | for (var c in a)
32 | a.hasOwnProperty(c) && (this[c] = a[c]);
33 | a.hasOwnProperty("toString") && (this.toString = a.toString)
34 | },
35 | clone: function() {
36 | return this.init.prototype.extend(this)
37 | }
38 | }
39 | , r = l.WordArray = t.extend({
40 | init: function(a, c) {
41 | a = this.words = a || [];
42 | this.sigBytes = c != p ? c : 4 * a.length
43 | },
44 | toString: function(a) {
45 | return (a || v).stringify(this)
46 | },
47 | concat: function(a) {
48 | var c = this.words
49 | , e = a.words
50 | , j = this.sigBytes;
51 | a = a.sigBytes;
52 | this.clamp();
53 | if (j % 4)
54 | for (var k = 0; k < a; k++)
55 | c[j + k >>> 2] |= (e[k >>> 2] >>> 24 - 8 * (k % 4) & 255) << 24 - 8 * ((j + k) % 4);
56 | else if (65535 < e.length)
57 | for (k = 0; k < a; k += 4)
58 | c[j + k >>> 2] = e[k >>> 2];
59 | else
60 | c.push.apply(c, e);
61 | this.sigBytes += a;
62 | return this
63 | },
64 | clamp: function() {
65 | var a = this.words
66 | , c = this.sigBytes;
67 | a[c >>> 2] &= 4294967295 << 32 - 8 * (c % 4);
68 | a.length = u.ceil(c / 4)
69 | },
70 | clone: function() {
71 | var a = t.clone.call(this);
72 | a.words = this.words.slice(0);
73 | return a
74 | },
75 | random: function(a) {
76 | for (var c = [], e = 0; e < a; e += 4)
77 | c.push(4294967296 * u.random() | 0);
78 | return new r.init(c,a)
79 | }
80 | })
81 | , w = d.enc = {}
82 | , v = w.Hex = {
83 | stringify: function(a) {
84 | var c = a.words;
85 | a = a.sigBytes;
86 | for (var e = [], j = 0; j < a; j++) {
87 | var k = c[j >>> 2] >>> 24 - 8 * (j % 4) & 255;
88 | e.push((k >>> 4).toString(16));
89 | e.push((k & 15).toString(16))
90 | }
91 | return e.join("")
92 | },
93 | parse: function(a) {
94 | for (var c = a.length, e = [], j = 0; j < c; j += 2)
95 | e[j >>> 3] |= parseInt(a.substr(j, 2), 16) << 24 - 4 * (j % 8);
96 | return new r.init(e,c / 2)
97 | }
98 | }
99 | , b = w.Latin1 = {
100 | stringify: function(a) {
101 | var c = a.words;
102 | a = a.sigBytes;
103 | for (var e = [], j = 0; j < a; j++)
104 | e.push(String.fromCharCode(c[j >>> 2] >>> 24 - 8 * (j % 4) & 255));
105 | return e.join("")
106 | },
107 | parse: function(a) {
108 | for (var c = a.length, e = [], j = 0; j < c; j++)
109 | e[j >>> 2] |= (a.charCodeAt(j) & 255) << 24 - 8 * (j % 4);
110 | return new r.init(e,c)
111 | }
112 | }
113 | , x = w.Utf8 = {
114 | stringify: function(a) {
115 | try {
116 | return decodeURIComponent(escape(b.stringify(a)))
117 | } catch (c) {
118 | throw Error("Malformed UTF-8 data")
119 | }
120 | },
121 | parse: function(a) {
122 | return b.parse(unescape(encodeURIComponent(a)))
123 | }
124 | }
125 | , q = l.BufferedBlockAlgorithm = t.extend({
126 | reset: function() {
127 | this.i9b = new r.init;
128 | this.ty5D = 0
129 | },
130 | vb6V: function(a) {
131 | "string" == typeof a && (a = x.parse(a));
132 | this.i9b.concat(a);
133 | this.ty5D += a.sigBytes
134 | },
135 | kY3x: function(a) {
136 | var c = this.i9b
137 | , e = c.words
138 | , j = c.sigBytes
139 | , k = this.blockSize
140 | , b = j / (4 * k)
141 | , b = a ? u.ceil(b) : u.max((b | 0) - this.JP1x, 0);
142 | a = b * k;
143 | j = u.min(4 * a, j);
144 | if (a) {
145 | for (var q = 0; q < a; q += k)
146 | this.qL5Q(e, q);
147 | q = e.splice(0, a);
148 | c.sigBytes -= j
149 | }
150 | return new r.init(q,j)
151 | },
152 | clone: function() {
153 | var a = t.clone.call(this);
154 | a.i9b = this.i9b.clone();
155 | return a
156 | },
157 | JP1x: 0
158 | });
159 | l.Hasher = q.extend({
160 | cfg: t.extend(),
161 | init: function(a) {
162 | this.cfg = this.cfg.extend(a);
163 | this.reset()
164 | },
165 | reset: function() {
166 | q.reset.call(this);
167 | this.lt3x()
168 | },
169 | update: function(a) {
170 | this.vb6V(a);
171 | this.kY3x();
172 | return this
173 | },
174 | finalize: function(a) {
175 | a && this.vb6V(a);
176 | return this.mA4E()
177 | },
178 | blockSize: 16,
179 | lS3x: function(a) {
180 | return function(b, e) {
181 | return (new a.init(e)).finalize(b)
182 | }
183 | },
184 | vl6f: function(a) {
185 | return function(b, e) {
186 | return (new n.HMAC.init(a,e)).finalize(b)
187 | }
188 | }
189 | });
190 | var n = d.algo = {};
191 | return d
192 | }(Math);
193 | (function() {
194 | var u = CryptoJS
195 | , p = u.lib.WordArray;
196 | u.enc.Base64 = {
197 | stringify: function(d) {
198 | var l = d.words
199 | , p = d.sigBytes
200 | , t = this.bA0x;
201 | d.clamp();
202 | d = [];
203 | for (var r = 0; r < p; r += 3)
204 | for (var w = (l[r >>> 2] >>> 24 - 8 * (r % 4) & 255) << 16 | (l[r + 1 >>> 2] >>> 24 - 8 * ((r + 1) % 4) & 255) << 8 | l[r + 2 >>> 2] >>> 24 - 8 * ((r + 2) % 4) & 255, v = 0; 4 > v && r + .75 * v < p; v++)
205 | d.push(t.charAt(w >>> 6 * (3 - v) & 63));
206 | if (l = t.charAt(64))
207 | for (; d.length % 4; )
208 | d.push(l);
209 | return d.join("")
210 | },
211 | parse: function(d) {
212 | var l = d.length
213 | , s = this.bA0x
214 | , t = s.charAt(64);
215 | t && (t = d.indexOf(t),
216 | -1 != t && (l = t));
217 | for (var t = [], r = 0, w = 0; w < l; w++)
218 | if (w % 4) {
219 | var v = s.indexOf(d.charAt(w - 1)) << 2 * (w % 4)
220 | , b = s.indexOf(d.charAt(w)) >>> 6 - 2 * (w % 4);
221 | t[r >>> 2] |= (v | b) << 24 - 8 * (r % 4);
222 | r++
223 | }
224 | return p.create(t, r)
225 | },
226 | bA0x: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
227 | }
228 | }
229 | )();
230 | (function(u) {
231 | function p(b, n, a, c, e, j, k) {
232 | b = b + (n & a | ~n & c) + e + k;
233 | return (b << j | b >>> 32 - j) + n
234 | }
235 | function d(b, n, a, c, e, j, k) {
236 | b = b + (n & c | a & ~c) + e + k;
237 | return (b << j | b >>> 32 - j) + n
238 | }
239 | function l(b, n, a, c, e, j, k) {
240 | b = b + (n ^ a ^ c) + e + k;
241 | return (b << j | b >>> 32 - j) + n
242 | }
243 | function s(b, n, a, c, e, j, k) {
244 | b = b + (a ^ (n | ~c)) + e + k;
245 | return (b << j | b >>> 32 - j) + n
246 | }
247 | for (var t = CryptoJS, r = t.lib, w = r.WordArray, v = r.Hasher, r = t.algo, b = [], x = 0; 64 > x; x++)
248 | b[x] = 4294967296 * u.abs(u.sin(x + 1)) | 0;
249 | r = r.MD5 = v.extend({
250 | lt3x: function() {
251 | this.cN1x = new w.init([1732584193, 4023233417, 2562383102, 271733878])
252 | },
253 | qL5Q: function(q, n) {
254 | for (var a = 0; 16 > a; a++) {
255 | var c = n + a
256 | , e = q[c];
257 | q[c] = (e << 8 | e >>> 24) & 16711935 | (e << 24 | e >>> 8) & 4278255360
258 | }
259 | var a = this.cN1x.words
260 | , c = q[n + 0]
261 | , e = q[n + 1]
262 | , j = q[n + 2]
263 | , k = q[n + 3]
264 | , z = q[n + 4]
265 | , r = q[n + 5]
266 | , t = q[n + 6]
267 | , w = q[n + 7]
268 | , v = q[n + 8]
269 | , A = q[n + 9]
270 | , B = q[n + 10]
271 | , C = q[n + 11]
272 | , u = q[n + 12]
273 | , D = q[n + 13]
274 | , E = q[n + 14]
275 | , x = q[n + 15]
276 | , f = a[0]
277 | , m = a[1]
278 | , g = a[2]
279 | , h = a[3]
280 | , f = p(f, m, g, h, c, 7, b[0])
281 | , h = p(h, f, m, g, e, 12, b[1])
282 | , g = p(g, h, f, m, j, 17, b[2])
283 | , m = p(m, g, h, f, k, 22, b[3])
284 | , f = p(f, m, g, h, z, 7, b[4])
285 | , h = p(h, f, m, g, r, 12, b[5])
286 | , g = p(g, h, f, m, t, 17, b[6])
287 | , m = p(m, g, h, f, w, 22, b[7])
288 | , f = p(f, m, g, h, v, 7, b[8])
289 | , h = p(h, f, m, g, A, 12, b[9])
290 | , g = p(g, h, f, m, B, 17, b[10])
291 | , m = p(m, g, h, f, C, 22, b[11])
292 | , f = p(f, m, g, h, u, 7, b[12])
293 | , h = p(h, f, m, g, D, 12, b[13])
294 | , g = p(g, h, f, m, E, 17, b[14])
295 | , m = p(m, g, h, f, x, 22, b[15])
296 | , f = d(f, m, g, h, e, 5, b[16])
297 | , h = d(h, f, m, g, t, 9, b[17])
298 | , g = d(g, h, f, m, C, 14, b[18])
299 | , m = d(m, g, h, f, c, 20, b[19])
300 | , f = d(f, m, g, h, r, 5, b[20])
301 | , h = d(h, f, m, g, B, 9, b[21])
302 | , g = d(g, h, f, m, x, 14, b[22])
303 | , m = d(m, g, h, f, z, 20, b[23])
304 | , f = d(f, m, g, h, A, 5, b[24])
305 | , h = d(h, f, m, g, E, 9, b[25])
306 | , g = d(g, h, f, m, k, 14, b[26])
307 | , m = d(m, g, h, f, v, 20, b[27])
308 | , f = d(f, m, g, h, D, 5, b[28])
309 | , h = d(h, f, m, g, j, 9, b[29])
310 | , g = d(g, h, f, m, w, 14, b[30])
311 | , m = d(m, g, h, f, u, 20, b[31])
312 | , f = l(f, m, g, h, r, 4, b[32])
313 | , h = l(h, f, m, g, v, 11, b[33])
314 | , g = l(g, h, f, m, C, 16, b[34])
315 | , m = l(m, g, h, f, E, 23, b[35])
316 | , f = l(f, m, g, h, e, 4, b[36])
317 | , h = l(h, f, m, g, z, 11, b[37])
318 | , g = l(g, h, f, m, w, 16, b[38])
319 | , m = l(m, g, h, f, B, 23, b[39])
320 | , f = l(f, m, g, h, D, 4, b[40])
321 | , h = l(h, f, m, g, c, 11, b[41])
322 | , g = l(g, h, f, m, k, 16, b[42])
323 | , m = l(m, g, h, f, t, 23, b[43])
324 | , f = l(f, m, g, h, A, 4, b[44])
325 | , h = l(h, f, m, g, u, 11, b[45])
326 | , g = l(g, h, f, m, x, 16, b[46])
327 | , m = l(m, g, h, f, j, 23, b[47])
328 | , f = s(f, m, g, h, c, 6, b[48])
329 | , h = s(h, f, m, g, w, 10, b[49])
330 | , g = s(g, h, f, m, E, 15, b[50])
331 | , m = s(m, g, h, f, r, 21, b[51])
332 | , f = s(f, m, g, h, u, 6, b[52])
333 | , h = s(h, f, m, g, k, 10, b[53])
334 | , g = s(g, h, f, m, B, 15, b[54])
335 | , m = s(m, g, h, f, e, 21, b[55])
336 | , f = s(f, m, g, h, v, 6, b[56])
337 | , h = s(h, f, m, g, x, 10, b[57])
338 | , g = s(g, h, f, m, t, 15, b[58])
339 | , m = s(m, g, h, f, D, 21, b[59])
340 | , f = s(f, m, g, h, z, 6, b[60])
341 | , h = s(h, f, m, g, C, 10, b[61])
342 | , g = s(g, h, f, m, j, 15, b[62])
343 | , m = s(m, g, h, f, A, 21, b[63]);
344 | a[0] = a[0] + f | 0;
345 | a[1] = a[1] + m | 0;
346 | a[2] = a[2] + g | 0;
347 | a[3] = a[3] + h | 0
348 | },
349 | mA4E: function() {
350 | var b = this.i9b
351 | , n = b.words
352 | , a = 8 * this.ty5D
353 | , c = 8 * b.sigBytes;
354 | n[c >>> 5] |= 128 << 24 - c % 32;
355 | var e = u.floor(a / 4294967296);
356 | n[(c + 64 >>> 9 << 4) + 15] = (e << 8 | e >>> 24) & 16711935 | (e << 24 | e >>> 8) & 4278255360;
357 | n[(c + 64 >>> 9 << 4) + 14] = (a << 8 | a >>> 24) & 16711935 | (a << 24 | a >>> 8) & 4278255360;
358 | b.sigBytes = 4 * (n.length + 1);
359 | this.kY3x();
360 | b = this.cN1x;
361 | n = b.words;
362 | for (a = 0; 4 > a; a++)
363 | c = n[a],
364 | n[a] = (c << 8 | c >>> 24) & 16711935 | (c << 24 | c >>> 8) & 4278255360;
365 | return b
366 | },
367 | clone: function() {
368 | var b = v.clone.call(this);
369 | b.cN1x = this.cN1x.clone();
370 | return b
371 | }
372 | });
373 | t.MD5 = v.lS3x(r);
374 | t.HmacMD5 = v.vl6f(r)
375 | }
376 | )(Math);
377 | (function() {
378 | var u = CryptoJS
379 | , p = u.lib
380 | , d = p.Base
381 | , l = p.WordArray
382 | , p = u.algo
383 | , s = p.EvpKDF = d.extend({
384 | cfg: d.extend({
385 | keySize: 4,
386 | hasher: p.MD5,
387 | iterations: 1
388 | }),
389 | init: function(d) {
390 | this.cfg = this.cfg.extend(d)
391 | },
392 | compute: function(d, r) {
393 | for (var p = this.cfg, s = p.hasher.create(), b = l.create(), u = b.words, q = p.keySize, p = p.iterations; u.length < q; ) {
394 | n && s.update(n);
395 | var n = s.update(d).finalize(r);
396 | s.reset();
397 | for (var a = 1; a < p; a++)
398 | n = s.finalize(n),
399 | s.reset();
400 | b.concat(n)
401 | }
402 | b.sigBytes = 4 * q;
403 | return b
404 | }
405 | });
406 | u.EvpKDF = function(d, l, p) {
407 | return s.create(p).compute(d, l)
408 | }
409 | }
410 | )();
411 | CryptoJS.lib.Cipher || function(u) {
412 | var p = CryptoJS
413 | , d = p.lib
414 | , l = d.Base
415 | , s = d.WordArray
416 | , t = d.BufferedBlockAlgorithm
417 | , r = p.enc.Base64
418 | , w = p.algo.EvpKDF
419 | , v = d.Cipher = t.extend({
420 | cfg: l.extend(),
421 | createEncryptor: function(e, a) {
422 | return this.create(this.JY1x, e, a)
423 | },
424 | createDecryptor: function(e, a) {
425 | return this.create(this.bqV9M, e, a)
426 | },
427 | init: function(e, a, b) {
428 | this.cfg = this.cfg.extend(b);
429 | this.Qq2x = e;
430 | this.L0x = a;
431 | this.reset()
432 | },
433 | reset: function() {
434 | t.reset.call(this);
435 | this.lt3x()
436 | },
437 | process: function(e) {
438 | this.vb6V(e);
439 | return this.kY3x()
440 | },
441 | finalize: function(e) {
442 | e && this.vb6V(e);
443 | return this.mA4E()
444 | },
445 | keySize: 4,
446 | ivSize: 4,
447 | JY1x: 1,
448 | bqV9M: 2,
449 | lS3x: function(e) {
450 | return {
451 | encrypt: function(b, k, d) {
452 | return ("string" == typeof k ? c : a).encrypt(e, b, k, d)
453 | },
454 | decrypt: function(b, k, d) {
455 | return ("string" == typeof k ? c : a).decrypt(e, b, k, d)
456 | }
457 | }
458 | }
459 | });
460 | d.StreamCipher = v.extend({
461 | mA4E: function() {
462 | return this.kY3x(!0)
463 | },
464 | blockSize: 1
465 | });
466 | var b = p.mode = {}
467 | , x = function(e, a, b) {
468 | var c = this.tw5B;
469 | c ? this.tw5B = u : c = this.DB9s;
470 | for (var d = 0; d < b; d++)
471 | e[a + d] ^= c[d]
472 | }
473 | , q = (d.BlockCipherMode = l.extend({
474 | createEncryptor: function(e, a) {
475 | return this.Encryptor.create(e, a)
476 | },
477 | createDecryptor: function(e, a) {
478 | return this.Decryptor.create(e, a)
479 | },
480 | init: function(e, a) {
481 | this.vw6q = e;
482 | this.tw5B = a
483 | }
484 | })).extend();
485 | q.Encryptor = q.extend({
486 | processBlock: function(e, a) {
487 | var b = this.vw6q
488 | , c = b.blockSize;
489 | x.call(this, e, a, c);
490 | b.encryptBlock(e, a);
491 | this.DB9s = e.slice(a, a + c)
492 | }
493 | });
494 | q.Decryptor = q.extend({
495 | processBlock: function(e, a) {
496 | var b = this.vw6q
497 | , c = b.blockSize
498 | , d = e.slice(a, a + c);
499 | b.decryptBlock(e, a);
500 | x.call(this, e, a, c);
501 | this.DB9s = d
502 | }
503 | });
504 | b = b.CBC = q;
505 | q = (p.pad = {}).Pkcs7 = {
506 | pad: function(a, b) {
507 | for (var c = 4 * b, c = c - a.sigBytes % c, d = c << 24 | c << 16 | c << 8 | c, l = [], n = 0; n < c; n += 4)
508 | l.push(d);
509 | c = s.create(l, c);
510 | a.concat(c)
511 | },
512 | unpad: function(a) {
513 | a.sigBytes -= a.words[a.sigBytes - 1 >>> 2] & 255
514 | }
515 | };
516 | d.BlockCipher = v.extend({
517 | cfg: v.cfg.extend({
518 | mode: b,
519 | padding: q
520 | }),
521 | reset: function() {
522 | v.reset.call(this);
523 | var a = this.cfg
524 | , b = a.iv
525 | , a = a.mode;
526 | if (this.Qq2x == this.JY1x)
527 | var c = a.createEncryptor;
528 | else
529 | c = a.createDecryptor,
530 | this.JP1x = 1;
531 | this.eT2x = c.call(a, this, b && b.words)
532 | },
533 | qL5Q: function(a, b) {
534 | this.eT2x.processBlock(a, b)
535 | },
536 | mA4E: function() {
537 | var a = this.cfg.padding;
538 | if (this.Qq2x == this.JY1x) {
539 | a.pad(this.i9b, this.blockSize);
540 | var b = this.kY3x(!0)
541 | } else
542 | b = this.kY3x(!0),
543 | a.unpad(b);
544 | return b
545 | },
546 | blockSize: 4
547 | });
548 | var n = d.CipherParams = l.extend({
549 | init: function(a) {
550 | this.mixIn(a)
551 | },
552 | toString: function(a) {
553 | return (a || this.formatter).stringify(this)
554 | }
555 | })
556 | , b = (p.format = {}).OpenSSL = {
557 | stringify: function(a) {
558 | var b = a.ciphertext;
559 | a = a.salt;
560 | return (a ? s.create([1398893684, 1701076831]).concat(a).concat(b) : b).toString(r)
561 | },
562 | parse: function(a) {
563 | a = r.parse(a);
564 | var b = a.words;
565 | if (1398893684 == b[0] && 1701076831 == b[1]) {
566 | var c = s.create(b.slice(2, 4));
567 | b.splice(0, 4);
568 | a.sigBytes -= 16
569 | }
570 | return n.create({
571 | ciphertext: a,
572 | salt: c
573 | })
574 | }
575 | }
576 | , a = d.SerializableCipher = l.extend({
577 | cfg: l.extend({
578 | format: b
579 | }),
580 | encrypt: function(a, b, c, d) {
581 | d = this.cfg.extend(d);
582 | var l = a.createEncryptor(c, d);
583 | b = l.finalize(b);
584 | l = l.cfg;
585 | return n.create({
586 | ciphertext: b,
587 | key: c,
588 | iv: l.iv,
589 | algorithm: a,
590 | mode: l.mode,
591 | padding: l.padding,
592 | blockSize: a.blockSize,
593 | formatter: d.format
594 | })
595 | },
596 | decrypt: function(a, b, c, d) {
597 | d = this.cfg.extend(d);
598 | b = this.Hj0x(b, d.format);
599 | return a.createDecryptor(c, d).finalize(b.ciphertext)
600 | },
601 | Hj0x: function(a, b) {
602 | return "string" == typeof a ? b.parse(a, this) : a
603 | }
604 | })
605 | , p = (p.kdf = {}).OpenSSL = {
606 | execute: function(a, b, c, d) {
607 | d || (d = s.random(8));
608 | a = w.create({
609 | keySize: b + c
610 | }).compute(a, d);
611 | c = s.create(a.words.slice(b), 4 * c);
612 | a.sigBytes = 4 * b;
613 | return n.create({
614 | key: a,
615 | iv: c,
616 | salt: d
617 | })
618 | }
619 | }
620 | , c = d.PasswordBasedCipher = a.extend({
621 | cfg: a.cfg.extend({
622 | kdf: p
623 | }),
624 | encrypt: function(b, c, d, l) {
625 | l = this.cfg.extend(l);
626 | d = l.kdf.execute(d, b.keySize, b.ivSize);
627 | l.iv = d.iv;
628 | b = a.encrypt.call(this, b, c, d.key, l);
629 | b.mixIn(d);
630 | return b
631 | },
632 | decrypt: function(b, c, d, l) {
633 | l = this.cfg.extend(l);
634 | c = this.Hj0x(c, l.format);
635 | d = l.kdf.execute(d, b.keySize, b.ivSize, c.salt);
636 | l.iv = d.iv;
637 | return a.decrypt.call(this, b, c, d.key, l)
638 | }
639 | })
640 | }();
641 | (function() {
642 | for (var u = CryptoJS, p = u.lib.BlockCipher, d = u.algo, l = [], s = [], t = [], r = [], w = [], v = [], b = [], x = [], q = [], n = [], a = [], c = 0; 256 > c; c++)
643 | a[c] = 128 > c ? c << 1 : c << 1 ^ 283;
644 | for (var e = 0, j = 0, c = 0; 256 > c; c++) {
645 | var k = j ^ j << 1 ^ j << 2 ^ j << 3 ^ j << 4
646 | , k = k >>> 8 ^ k & 255 ^ 99;
647 | l[e] = k;
648 | s[k] = e;
649 | var z = a[e]
650 | , F = a[z]
651 | , G = a[F]
652 | , y = 257 * a[k] ^ 16843008 * k;
653 | t[e] = y << 24 | y >>> 8;
654 | r[e] = y << 16 | y >>> 16;
655 | w[e] = y << 8 | y >>> 24;
656 | v[e] = y;
657 | y = 16843009 * G ^ 65537 * F ^ 257 * z ^ 16843008 * e;
658 | b[k] = y << 24 | y >>> 8;
659 | x[k] = y << 16 | y >>> 16;
660 | q[k] = y << 8 | y >>> 24;
661 | n[k] = y;
662 | e ? (e = z ^ a[a[a[G ^ z]]],
663 | j ^= a[a[j]]) : e = j = 1
664 | }
665 | var H = [0, 1, 2, 4, 8, 16, 32, 64, 128, 27, 54]
666 | , d = d.AES = p.extend({
667 | lt3x: function() {
668 | for (var a = this.L0x, c = a.words, d = a.sigBytes / 4, a = 4 * ((this.beT6N = d + 6) + 1), e = this.bqT9K = [], j = 0; j < a; j++)
669 | if (j < d)
670 | e[j] = c[j];
671 | else {
672 | var k = e[j - 1];
673 | j % d ? 6 < d && 4 == j % d && (k = l[k >>> 24] << 24 | l[k >>> 16 & 255] << 16 | l[k >>> 8 & 255] << 8 | l[k & 255]) : (k = k << 8 | k >>> 24,
674 | k = l[k >>> 24] << 24 | l[k >>> 16 & 255] << 16 | l[k >>> 8 & 255] << 8 | l[k & 255],
675 | k ^= H[j / d | 0] << 24);
676 | e[j] = e[j - d] ^ k
677 | }
678 | c = this.bqS9J = [];
679 | for (d = 0; d < a; d++)
680 | j = a - d,
681 | k = d % 4 ? e[j] : e[j - 4],
682 | c[d] = 4 > d || 4 >= j ? k : b[l[k >>> 24]] ^ x[l[k >>> 16 & 255]] ^ q[l[k >>> 8 & 255]] ^ n[l[k & 255]]
683 | },
684 | encryptBlock: function(a, b) {
685 | this.DA9r(a, b, this.bqT9K, t, r, w, v, l)
686 | },
687 | decryptBlock: function(a, c) {
688 | var d = a[c + 1];
689 | a[c + 1] = a[c + 3];
690 | a[c + 3] = d;
691 | this.DA9r(a, c, this.bqS9J, b, x, q, n, s);
692 | d = a[c + 1];
693 | a[c + 1] = a[c + 3];
694 | a[c + 3] = d
695 | },
696 | DA9r: function(a, b, c, d, e, j, l, f) {
697 | for (var m = this.beT6N, g = a[b] ^ c[0], h = a[b + 1] ^ c[1], k = a[b + 2] ^ c[2], n = a[b + 3] ^ c[3], p = 4, r = 1; r < m; r++)
698 | var q = d[g >>> 24] ^ e[h >>> 16 & 255] ^ j[k >>> 8 & 255] ^ l[n & 255] ^ c[p++]
699 | , s = d[h >>> 24] ^ e[k >>> 16 & 255] ^ j[n >>> 8 & 255] ^ l[g & 255] ^ c[p++]
700 | , t = d[k >>> 24] ^ e[n >>> 16 & 255] ^ j[g >>> 8 & 255] ^ l[h & 255] ^ c[p++]
701 | , n = d[n >>> 24] ^ e[g >>> 16 & 255] ^ j[h >>> 8 & 255] ^ l[k & 255] ^ c[p++]
702 | , g = q
703 | , h = s
704 | , k = t;
705 | q = (f[g >>> 24] << 24 | f[h >>> 16 & 255] << 16 | f[k >>> 8 & 255] << 8 | f[n & 255]) ^ c[p++];
706 | s = (f[h >>> 24] << 24 | f[k >>> 16 & 255] << 16 | f[n >>> 8 & 255] << 8 | f[g & 255]) ^ c[p++];
707 | t = (f[k >>> 24] << 24 | f[n >>> 16 & 255] << 16 | f[g >>> 8 & 255] << 8 | f[h & 255]) ^ c[p++];
708 | n = (f[n >>> 24] << 24 | f[g >>> 16 & 255] << 16 | f[h >>> 8 & 255] << 8 | f[k & 255]) ^ c[p++];
709 | a[b] = q;
710 | a[b + 1] = s;
711 | a[b + 2] = t;
712 | a[b + 3] = n
713 | },
714 | keySize: 8
715 | });
716 | u.AES = p.lS3x(d)
717 | }
718 | )();
719 | function RSAKeyPair(a, b, c) {
720 | this.e = biFromHex(a),
721 | this.d = biFromHex(b),
722 | this.m = biFromHex(c),
723 | this.chunkSize = 2 * biHighIndex(this.m),
724 | this.radix = 16,
725 | this.barrett = new BarrettMu(this.m)
726 | }
727 | function twoDigit(a) {
728 | return (10 > a ? "0" : "") + String(a)
729 | }
730 | function encryptedString(a, b) {
731 | for (var f, g, h, i, j, k, l, c = new Array, d = b.length, e = 0; d > e; )
732 | c[e] = b.charCodeAt(e),
733 | e++;
734 | for (; 0 != c.length % a.chunkSize; )
735 | c[e++] = 0;
736 | for (f = c.length,
737 | g = "",
738 | e = 0; f > e; e += a.chunkSize) {
739 | for (j = new BigInt,
740 | h = 0,
741 | i = e; i < e + a.chunkSize; ++h)
742 | j.digits[h] = c[i++],
743 | j.digits[h] += c[i++] << 8;
744 | k = a.barrett.powMod(j, a.e),
745 | l = 16 == a.radix ? biToHex(k) : biToString(k, a.radix),
746 | g += l + " "
747 | }
748 | return g.substring(0, g.length - 1)
749 | }
750 | function decryptedString(a, b) {
751 | var e, f, g, h, c = b.split(" "), d = "";
752 | for (e = 0; e < c.length; ++e)
753 | for (h = 16 == a.radix ? biFromHex(c[e]) : biFromString(c[e], a.radix),
754 | g = a.barrett.powMod(h, a.d),
755 | f = 0; f <= biHighIndex(g); ++f)
756 | d += String.fromCharCode(255 & g.digits[f], g.digits[f] >> 8);
757 | return 0 == d.charCodeAt(d.length - 1) && (d = d.substring(0, d.length - 1)),
758 | d
759 | }
760 | function setMaxDigits(a) {
761 | maxDigits = a,
762 | ZERO_ARRAY = new Array(maxDigits);
763 | for (var b = 0; b < ZERO_ARRAY.length; b++)
764 | ZERO_ARRAY[b] = 0;
765 | bigZero = new BigInt,
766 | bigOne = new BigInt,
767 | bigOne.digits[0] = 1
768 | }
769 | function BigInt(a) {
770 | this.digits = "boolean" == typeof a && 1 == a ? null : ZERO_ARRAY.slice(0),
771 | this.isNeg = !1
772 | }
773 | function biFromDecimal(a) {
774 | for (var d, e, f, b = "-" == a.charAt(0), c = b ? 1 : 0; c < a.length && "0" == a.charAt(c); )
775 | ++c;
776 | if (c == a.length)
777 | d = new BigInt;
778 | else {
779 | for (e = a.length - c,
780 | f = e % dpl10,
781 | 0 == f && (f = dpl10),
782 | d = biFromNumber(Number(a.substr(c, f))),
783 | c += f; c < a.length; )
784 | d = biAdd(biMultiply(d, lr10), biFromNumber(Number(a.substr(c, dpl10)))),
785 | c += dpl10;
786 | d.isNeg = b
787 | }
788 | return d
789 | }
790 | function biCopy(a) {
791 | var b = new BigInt(!0);
792 | return b.digits = a.digits.slice(0),
793 | b.isNeg = a.isNeg,
794 | b
795 | }
796 | function biFromNumber(a) {
797 | var c, b = new BigInt;
798 | for (b.isNeg = 0 > a,
799 | a = Math.abs(a),
800 | c = 0; a > 0; )
801 | b.digits[c++] = a & maxDigitVal,
802 | a >>= biRadixBits;
803 | return b
804 | }
805 | function reverseStr(a) {
806 | var c, b = "";
807 | for (c = a.length - 1; c > -1; --c)
808 | b += a.charAt(c);
809 | return b
810 | }
811 | function biToString(a, b) {
812 | var d, e, c = new BigInt;
813 | for (c.digits[0] = b,
814 | d = biDivideModulo(a, c),
815 | e = hexatrigesimalToChar[d[1].digits[0]]; 1 == biCompare(d[0], bigZero); )
816 | d = biDivideModulo(d[0], c),
817 | digit = d[1].digits[0],
818 | e += hexatrigesimalToChar[d[1].digits[0]];
819 | return (a.isNeg ? "-" : "") + reverseStr(e)
820 | }
821 | function biToDecimal(a) {
822 | var c, d, b = new BigInt;
823 | for (b.digits[0] = 10,
824 | c = biDivideModulo(a, b),
825 | d = String(c[1].digits[0]); 1 == biCompare(c[0], bigZero); )
826 | c = biDivideModulo(c[0], b),
827 | d += String(c[1].digits[0]);
828 | return (a.isNeg ? "-" : "") + reverseStr(d)
829 | }
830 | function digitToHex(a) {
831 | var b = 15
832 | , c = "";
833 | for (i = 0; 4 > i; ++i)
834 | c += hexToChar[a & b],
835 | a >>>= 4;
836 | return reverseStr(c)
837 | }
838 | function biToHex(a) {
839 | var d, b = "";
840 | for (biHighIndex(a),
841 | d = biHighIndex(a); d > -1; --d)
842 | b += digitToHex(a.digits[d]);
843 | return b
844 | }
845 | function charToHex(a) {
846 | var h, b = 48, c = b + 9, d = 97, e = d + 25, f = 65, g = 90;
847 | return h = a >= b && c >= a ? a - b : a >= f && g >= a ? 10 + a - f : a >= d && e >= a ? 10 + a - d : 0
848 | }
849 | function hexToDigit(a) {
850 | var d, b = 0, c = Math.min(a.length, 4);
851 | for (d = 0; c > d; ++d)
852 | b <<= 4,
853 | b |= charToHex(a.charCodeAt(d));
854 | return b
855 | }
856 | function biFromHex(a) {
857 | var d, e, b = new BigInt, c = a.length;
858 | for (d = c,
859 | e = 0; d > 0; d -= 4,
860 | ++e)
861 | b.digits[e] = hexToDigit(a.substr(Math.max(d - 4, 0), Math.min(d, 4)));
862 | return b
863 | }
864 | function biFromString(a, b) {
865 | var g, h, i, j, c = "-" == a.charAt(0), d = c ? 1 : 0, e = new BigInt, f = new BigInt;
866 | for (f.digits[0] = 1,
867 | g = a.length - 1; g >= d; g--)
868 | h = a.charCodeAt(g),
869 | i = charToHex(h),
870 | j = biMultiplyDigit(f, i),
871 | e = biAdd(e, j),
872 | f = biMultiplyDigit(f, b);
873 | return e.isNeg = c,
874 | e
875 | }
876 | function biDump(a) {
877 | return (a.isNeg ? "-" : "") + a.digits.join(" ")
878 | }
879 | function biAdd(a, b) {
880 | var c, d, e, f;
881 | if (a.isNeg != b.isNeg)
882 | b.isNeg = !b.isNeg,
883 | c = biSubtract(a, b),
884 | b.isNeg = !b.isNeg;
885 | else {
886 | for (c = new BigInt,
887 | d = 0,
888 | f = 0; f < a.digits.length; ++f)
889 | e = a.digits[f] + b.digits[f] + d,
890 | c.digits[f] = 65535 & e,
891 | d = Number(e >= biRadix);
892 | c.isNeg = a.isNeg
893 | }
894 | return c
895 | }
896 | function biSubtract(a, b) {
897 | var c, d, e, f;
898 | if (a.isNeg != b.isNeg)
899 | b.isNeg = !b.isNeg,
900 | c = biAdd(a, b),
901 | b.isNeg = !b.isNeg;
902 | else {
903 | for (c = new BigInt,
904 | e = 0,
905 | f = 0; f < a.digits.length; ++f)
906 | d = a.digits[f] - b.digits[f] + e,
907 | c.digits[f] = 65535 & d,
908 | c.digits[f] < 0 && (c.digits[f] += biRadix),
909 | e = 0 - Number(0 > d);
910 | if (-1 == e) {
911 | for (e = 0,
912 | f = 0; f < a.digits.length; ++f)
913 | d = 0 - c.digits[f] + e,
914 | c.digits[f] = 65535 & d,
915 | c.digits[f] < 0 && (c.digits[f] += biRadix),
916 | e = 0 - Number(0 > d);
917 | c.isNeg = !a.isNeg
918 | } else
919 | c.isNeg = a.isNeg
920 | }
921 | return c
922 | }
923 | function biHighIndex(a) {
924 | for (var b = a.digits.length - 1; b > 0 && 0 == a.digits[b]; )
925 | --b;
926 | return b
927 | }
928 | function biNumBits(a) {
929 | var e, b = biHighIndex(a), c = a.digits[b], d = (b + 1) * bitsPerDigit;
930 | for (e = d; e > d - bitsPerDigit && 0 == (32768 & c); --e)
931 | c <<= 1;
932 | return e
933 | }
934 | function biMultiply(a, b) {
935 | var d, h, i, k, c = new BigInt, e = biHighIndex(a), f = biHighIndex(b);
936 | for (k = 0; f >= k; ++k) {
937 | for (d = 0,
938 | i = k,
939 | j = 0; e >= j; ++j,
940 | ++i)
941 | h = c.digits[i] + a.digits[j] * b.digits[k] + d,
942 | c.digits[i] = h & maxDigitVal,
943 | d = h >>> biRadixBits;
944 | c.digits[k + e + 1] = d
945 | }
946 | return c.isNeg = a.isNeg != b.isNeg,
947 | c
948 | }
949 | function biMultiplyDigit(a, b) {
950 | var c, d, e, f;
951 | for (result = new BigInt,
952 | c = biHighIndex(a),
953 | d = 0,
954 | f = 0; c >= f; ++f)
955 | e = result.digits[f] + a.digits[f] * b + d,
956 | result.digits[f] = e & maxDigitVal,
957 | d = e >>> biRadixBits;
958 | return result.digits[1 + c] = d,
959 | result
960 | }
961 | function arrayCopy(a, b, c, d, e) {
962 | var g, h, f = Math.min(b + e, a.length);
963 | for (g = b,
964 | h = d; f > g; ++g,
965 | ++h)
966 | c[h] = a[g]
967 | }
968 | function biShiftLeft(a, b) {
969 | var e, f, g, h, c = Math.floor(b / bitsPerDigit), d = new BigInt;
970 | for (arrayCopy(a.digits, 0, d.digits, c, d.digits.length - c),
971 | e = b % bitsPerDigit,
972 | f = bitsPerDigit - e,
973 | g = d.digits.length - 1,
974 | h = g - 1; g > 0; --g,
975 | --h)
976 | d.digits[g] = d.digits[g] << e & maxDigitVal | (d.digits[h] & highBitMasks[e]) >>> f;
977 | return d.digits[0] = d.digits[g] << e & maxDigitVal,
978 | d.isNeg = a.isNeg,
979 | d
980 | }
981 | function biShiftRight(a, b) {
982 | var e, f, g, h, c = Math.floor(b / bitsPerDigit), d = new BigInt;
983 | for (arrayCopy(a.digits, c, d.digits, 0, a.digits.length - c),
984 | e = b % bitsPerDigit,
985 | f = bitsPerDigit - e,
986 | g = 0,
987 | h = g + 1; g < d.digits.length - 1; ++g,
988 | ++h)
989 | d.digits[g] = d.digits[g] >>> e | (d.digits[h] & lowBitMasks[e]) << f;
990 | return d.digits[d.digits.length - 1] >>>= e,
991 | d.isNeg = a.isNeg,
992 | d
993 | }
994 | function biMultiplyByRadixPower(a, b) {
995 | var c = new BigInt;
996 | return arrayCopy(a.digits, 0, c.digits, b, c.digits.length - b),
997 | c
998 | }
999 | function biDivideByRadixPower(a, b) {
1000 | var c = new BigInt;
1001 | return arrayCopy(a.digits, b, c.digits, 0, c.digits.length - b),
1002 | c
1003 | }
1004 | function biModuloByRadixPower(a, b) {
1005 | var c = new BigInt;
1006 | return arrayCopy(a.digits, 0, c.digits, 0, b),
1007 | c
1008 | }
1009 | function biCompare(a, b) {
1010 | if (a.isNeg != b.isNeg)
1011 | return 1 - 2 * Number(a.isNeg);
1012 | for (var c = a.digits.length - 1; c >= 0; --c)
1013 | if (a.digits[c] != b.digits[c])
1014 | return a.isNeg ? 1 - 2 * Number(a.digits[c] > b.digits[c]) : 1 - 2 * Number(a.digits[c] < b.digits[c]);
1015 | return 0
1016 | }
1017 | function biDivideModulo(a, b) {
1018 | var f, g, h, i, j, k, l, m, n, o, p, q, r, s, c = biNumBits(a), d = biNumBits(b), e = b.isNeg;
1019 | if (d > c)
1020 | return a.isNeg ? (f = biCopy(bigOne),
1021 | f.isNeg = !b.isNeg,
1022 | a.isNeg = !1,
1023 | b.isNeg = !1,
1024 | g = biSubtract(b, a),
1025 | a.isNeg = !0,
1026 | b.isNeg = e) : (f = new BigInt,
1027 | g = biCopy(a)),
1028 | new Array(f,g);
1029 | for (f = new BigInt,
1030 | g = a,
1031 | h = Math.ceil(d / bitsPerDigit) - 1,
1032 | i = 0; b.digits[h] < biHalfRadix; )
1033 | b = biShiftLeft(b, 1),
1034 | ++i,
1035 | ++d,
1036 | h = Math.ceil(d / bitsPerDigit) - 1;
1037 | for (g = biShiftLeft(g, i),
1038 | c += i,
1039 | j = Math.ceil(c / bitsPerDigit) - 1,
1040 | k = biMultiplyByRadixPower(b, j - h); -1 != biCompare(g, k); )
1041 | ++f.digits[j - h],
1042 | g = biSubtract(g, k);
1043 | for (l = j; l > h; --l) {
1044 | for (m = l >= g.digits.length ? 0 : g.digits[l],
1045 | n = l - 1 >= g.digits.length ? 0 : g.digits[l - 1],
1046 | o = l - 2 >= g.digits.length ? 0 : g.digits[l - 2],
1047 | p = h >= b.digits.length ? 0 : b.digits[h],
1048 | q = h - 1 >= b.digits.length ? 0 : b.digits[h - 1],
1049 | f.digits[l - h - 1] = m == p ? maxDigitVal : Math.floor((m * biRadix + n) / p),
1050 | r = f.digits[l - h - 1] * (p * biRadix + q),
1051 | s = m * biRadixSquared + (n * biRadix + o); r > s; )
1052 | --f.digits[l - h - 1],
1053 | r = f.digits[l - h - 1] * (p * biRadix | q),
1054 | s = m * biRadix * biRadix + (n * biRadix + o);
1055 | k = biMultiplyByRadixPower(b, l - h - 1),
1056 | g = biSubtract(g, biMultiplyDigit(k, f.digits[l - h - 1])),
1057 | g.isNeg && (g = biAdd(g, k),
1058 | --f.digits[l - h - 1])
1059 | }
1060 | return g = biShiftRight(g, i),
1061 | f.isNeg = a.isNeg != e,
1062 | a.isNeg && (f = e ? biAdd(f, bigOne) : biSubtract(f, bigOne),
1063 | b = biShiftRight(b, i),
1064 | g = biSubtract(b, g)),
1065 | 0 == g.digits[0] && 0 == biHighIndex(g) && (g.isNeg = !1),
1066 | new Array(f,g)
1067 | }
1068 | function biDivide(a, b) {
1069 | return biDivideModulo(a, b)[0]
1070 | }
1071 | function biModulo(a, b) {
1072 | return biDivideModulo(a, b)[1]
1073 | }
1074 | function biMultiplyMod(a, b, c) {
1075 | return biModulo(biMultiply(a, b), c)
1076 | }
1077 | function biPow(a, b) {
1078 | for (var c = bigOne, d = a; ; ) {
1079 | if (0 != (1 & b) && (c = biMultiply(c, d)),
1080 | b >>= 1,
1081 | 0 == b)
1082 | break;
1083 | d = biMultiply(d, d)
1084 | }
1085 | return c
1086 | }
1087 | function biPowMod(a, b, c) {
1088 | for (var d = bigOne, e = a, f = b; ; ) {
1089 | if (0 != (1 & f.digits[0]) && (d = biMultiplyMod(d, e, c)),
1090 | f = biShiftRight(f, 1),
1091 | 0 == f.digits[0] && 0 == biHighIndex(f))
1092 | break;
1093 | e = biMultiplyMod(e, e, c)
1094 | }
1095 | return d
1096 | }
1097 | function BarrettMu(a) {
1098 | this.modulus = biCopy(a),
1099 | this.k = biHighIndex(this.modulus) + 1;
1100 | var b = new BigInt;
1101 | b.digits[2 * this.k] = 1,
1102 | this.mu = biDivide(b, this.modulus),
1103 | this.bkplus1 = new BigInt,
1104 | this.bkplus1.digits[this.k + 1] = 1,
1105 | this.modulo = BarrettMu_modulo,
1106 | this.multiplyMod = BarrettMu_multiplyMod,
1107 | this.powMod = BarrettMu_powMod
1108 | }
1109 | function BarrettMu_modulo(a) {
1110 | var i, b = biDivideByRadixPower(a, this.k - 1), c = biMultiply(b, this.mu), d = biDivideByRadixPower(c, this.k + 1), e = biModuloByRadixPower(a, this.k + 1), f = biMultiply(d, this.modulus), g = biModuloByRadixPower(f, this.k + 1), h = biSubtract(e, g);
1111 | for (h.isNeg && (h = biAdd(h, this.bkplus1)),
1112 | i = biCompare(h, this.modulus) >= 0; i; )
1113 | h = biSubtract(h, this.modulus),
1114 | i = biCompare(h, this.modulus) >= 0;
1115 | return h
1116 | }
1117 | function BarrettMu_multiplyMod(a, b) {
1118 | var c = biMultiply(a, b);
1119 | return this.modulo(c)
1120 | }
1121 | function BarrettMu_powMod(a, b) {
1122 | var d, e, c = new BigInt;
1123 | for (c.digits[0] = 1,
1124 | d = a,
1125 | e = b; ; ) {
1126 | if (0 != (1 & e.digits[0]) && (c = this.multiplyMod(c, d)),
1127 | e = biShiftRight(e, 1),
1128 | 0 == e.digits[0] && 0 == biHighIndex(e))
1129 | break;
1130 | d = this.multiplyMod(d, d)
1131 | }
1132 | return c
1133 | }
1134 | var maxDigits, ZERO_ARRAY, bigZero, bigOne, dpl10, lr10, hexatrigesimalToChar, hexToChar, highBitMasks, lowBitMasks, biRadixBase = 2, biRadixBits = 16, bitsPerDigit = biRadixBits, biRadix = 65536, biHalfRadix = biRadix >>> 1, biRadixSquared = biRadix * biRadix, maxDigitVal = biRadix - 1, maxInteger = 9999999999999998;
1135 | setMaxDigits(20),
1136 | dpl10 = 15,
1137 | lr10 = biFromNumber(1e15),
1138 | hexatrigesimalToChar = new Array("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"),
1139 | hexToChar = new Array("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f"),
1140 | highBitMasks = new Array(0,32768,49152,57344,61440,63488,64512,65024,65280,65408,65472,65504,65520,65528,65532,65534,65535),
1141 | lowBitMasks = new Array(0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535);
1142 |
1143 |
1144 |
1145 | !function() {
1146 | function a(a) {
1147 | var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
1148 | for (d = 0; a > d; d += 1)
1149 | e = Math.random() * b.length,
1150 | e = Math.floor(e),
1151 | c += b.charAt(e);
1152 | return c
1153 | }
1154 | function b(a, b) {
1155 | var c = CryptoJS.enc.Utf8.parse(b)
1156 | , d = CryptoJS.enc.Utf8.parse("0102030405060708")
1157 | , e = CryptoJS.enc.Utf8.parse(a)
1158 | , f = CryptoJS.AES.encrypt(e, c, {
1159 | iv: d,
1160 | mode: CryptoJS.mode.CBC
1161 | });
1162 | return f.toString()
1163 | }
1164 | function c(a, b, c) {
1165 | var d, e;
1166 | return setMaxDigits(131),
1167 | d = new RSAKeyPair(b,"",c),
1168 | e = encryptedString(d, a)
1169 | }
1170 | function d(d, e, f, g) {
1171 | var h = {}
1172 | , i = a(16);
1173 | return h.encText = b(d, g),
1174 | h.encText = b(h.encText, i),
1175 | h.encSecKey = c(i, e, f),
1176 | h
1177 | }
1178 | function e(a, b, d, e) {
1179 | var f = {};
1180 | return f.encText = c(a + e, b, d),
1181 | f
1182 | }
1183 | asrsea = d,
1184 | ecnonasr = e
1185 | }();
1186 |
1187 | // 这个函数是启动函数,接收一个歌曲ID。获取到对应的加密参数
1188 | function start(music_id) {
1189 | var i9b = {
1190 | "rid":"R_SO_4_" + music_id,
1191 | // 偏移量。可理解为初始下标
1192 | "offset": 0,
1193 | "total":"false",
1194 | // 每页的请求数量
1195 | "limit": 100,
1196 | "csrf_token":""
1197 | };
1198 | var bYf7Y = asrsea(JSON.stringify(i9b), "010001", "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7", "0CoJUm6Qyw8W8jud");
1199 | return bYf7Y;
1200 | }
1201 |
1202 |
--------------------------------------------------------------------------------