├── .gitattributes
├── .gitignore
├── README.md
├── 【51Job】查岗位
└── select_job.py
├── 【bilibili】自动登录
├── README.md
├── __init__.py
└── login.py
├── 【bilibili】视频下载
├── __init__.py
└── video_download.py
├── 【双色球】头奖分布
├── main.py
├── result.jpg
└── 近期记录.xlsx
├── 【壁纸】美女壁纸下载器
└── bg_down.py
├── 【大众点评】字体反爬、坐标反爬
├── 参数生成
│ ├── encryp.js
│ └── uid.py
├── 旧版
│ ├── __init__.py
│ ├── parse_address_poi.py
│ └── parse_font_css.py
└── 最新版7月
│ ├── README.md
│ ├── font.json
│ └── main.py
├── 【天眼查】字体加密
├── num.woff
└── tyc.py
├── 【抖音】无水印视频解析
├── README.md
├── __init__.py
└── parse.py
├── 【拼多多】登陆参数生成
├── PinDuoDuo.py
├── README.md
├── __init__.py
└── encryp.js
├── 【文书】app查询接口
└── main.py
├── 【淘宝】自动登陆
├── auto_login_pyppeteer.py
└── login_for_sina.py
├── 其他实战
├── __init__.py
├── 【5173网】自动登录
│ ├── auto_login.py
│ ├── encryp.js
│ └── logOK.png
├── 【9377网】自动登录
│ ├── 9377login.py
│ └── __init__.py
├── 【DNS】自动登录
│ ├── Login.py
│ ├── __init__.py
│ └── dns.js
├── 【GitHub】自动登录
│ ├── __init__.py
│ └── login.py
├── 【Glidedsky】自动登陆
│ └── login.py
├── 【Python加密库】Demo
│ ├── __init__.py
│ └── encryption.py
├── 【TCL金融】自动登录
│ ├── __init__.py
│ ├── auto_login.py
│ ├── encryp.js
│ └── ok.png
├── 【steam】自动登录
│ ├── execute.js
│ └── login.py
├── 【万创帮】自动登录
│ ├── __init__.py
│ ├── encryp.js
│ ├── login_ok.png
│ └── spider_login.py
├── 【中关村】自动登录
│ ├── README.md
│ └── login.py
├── 【京东】商品数据爬取
│ ├── __init__.py
│ ├── geckodriver
│ └── selenium抓取.py
├── 【人人网】自动登录
│ ├── enc.js
│ └── login.py
├── 【企业名片】企业查询
│ ├── encryp.js
│ └── qi_ming.py
├── 【国鑫所】自动登录
│ ├── Login.py
│ ├── __init__.py
│ ├── encryp.js
│ └── login_ok.png
├── 【天眼查】模拟登录
│ ├── __init__.py
│ └── login.py
├── 【天翼】登录
│ ├── login.py
│ └── v1.js
├── 【好莱客】参数解析
│ ├── __init__.py
│ ├── encryp.js
│ ├── holike.py
│ └── ok.png
├── 【小牛在线】登录参数生成
│ ├── __init__.py
│ ├── encryp.js
│ └── make_param.py
├── 【开鑫贷】登陆参数生成
│ ├── KaiXinDai.py
│ ├── __init__.py
│ └── encryp.js
├── 【微信】登录参数生成
│ ├── __init__.py
│ ├── encryp.js
│ └── make_pwd.py
├── 【房价】房价获取
│ ├── README.md
│ ├── __pycache__
│ │ └── util.cpython-37.pyc
│ ├── main.py
│ └── util.py
├── 【房天下】自动登录
│ ├── encryp.js
│ ├── login.py
│ └── ok.png
├── 【新浪微博】密码解密
│ ├── execute.js
│ └── main.py
├── 【时光网】登陆参数生成
│ ├── encryp.js
│ └── login.py
├── 【易通贷】自动登录
│ ├── __init__.py
│ ├── auto_login.py
│ └── encryp.js
├── 【汽车之家】参数解密
│ ├── execute.js
│ └── main.py
├── 【满级网】自动登录
│ ├── auto_login.py
│ └── encryp.js
├── 【百度】wap端sig生成
│ ├── make_sig.py
│ └── v3_update.js
├── 【百度】网页找回密码
│ ├── __pycache__
│ │ └── header.cpython-37.pyc
│ ├── demo.py
│ ├── dv.js
│ ├── encryp.js
│ ├── header.py
│ └── 验证码.png
├── 【百度】翻译
│ ├── __init__.py
│ ├── translate.js
│ └── translation.py
├── 【百度】自动登录
│ ├── README.md
│ ├── encryp.js
│ └── login.py
├── 【百度街拍】图片下载
│ └── get_image.py
├── 【移动】登录参数生成
│ ├── MakeParam.py
│ ├── __init__.py
│ ├── encryp.js
│ └── make_params.png
├── 【空中网】自动登录
│ ├── __init__.py
│ ├── encryp.js
│ └── spider_login.py
├── 【美团】数据解析、token生成
│ ├── README.md
│ ├── __init__.py
│ ├── create_food_token.py
│ ├── get_login_cookies.py
│ ├── parse_food_comments.py
│ ├── parse_food_info.py
│ ├── parse_hotel_comments.py
│ ├── parse_hotel_info.py
│ ├── parse_play_areas.py
│ └── parse_play_info.py
├── 【试客联盟】登录
│ ├── execute.js
│ └── login.py
├── 【谷雨】数字解密
│ └── GuYu.py
├── 【豆瓣】自动登录
│ └── DouBan.py
├── 【逗游】自动登录
│ ├── __init__.py
│ ├── douyou.py
│ └── encryp.js
├── 【金逸电影】自动注册
│ ├── __init__.py
│ ├── encryp.js
│ ├── register.png
│ └── register.py
├── 【青海移动】登陆参数生成
│ ├── __init__.py
│ ├── encryp.js
│ └── make_param.py
└── 【餐饮】查询信息
│ ├── FoodInfo.py
│ ├── __init__.py
│ └── t.html
├── 原创爬虫工具
├── Cookies
│ ├── MeiTuan
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── db.py
│ │ ├── generator.py
│ │ └── 账号.txt
│ ├── README.md
│ └── __init__.py
├── DataMigration
│ ├── README.md
│ ├── __init__.py
│ ├── config.py
│ ├── db
│ │ ├── MongoDB.py
│ │ ├── Mysql.py
│ │ └── __init__.py
│ └── migration
│ │ ├── __init__.py
│ │ ├── mongo_to_mysql.py
│ │ └── mysql_to_mongo.py
├── Decode
│ ├── README.md
│ ├── __init__.py
│ └── translation.py
├── Jsencrypt
│ ├── __init__.py
│ └── make_encrypt.py
├── OSS
│ ├── __init__.py
│ └── push_to_oss.py
├── Proxy
│ ├── KDLProxyPool.py
│ ├── README.md
│ ├── XDLProxyPool.py
│ ├── XDLProxyUseDemo.py
│ ├── ZhiMaProxyPool.py
│ └── ZhiMaProxyUseDemo.py
├── README.md
├── Register
│ ├── MessageCode.py
│ ├── README.md
│ └── __init__.py
└── zok
│ ├── README.md
│ ├── __init__.py
│ ├── get_db
│ ├── __init__.py
│ ├── from_mongodb.py
│ └── from_mysql.py
│ ├── proxies
│ ├── __init__.py
│ └── proxies.py
│ ├── random_UA
│ ├── __init__.py
│ ├── fake_useragent.json
│ └── ua_random.py
│ ├── repetition
│ ├── __init__.py
│ └── update_cache.py
│ ├── save
│ ├── __init__.py
│ └── to_mysql.py
│ └── zok_config.py
├── 滑动验证码
├── 【w3c】滑块验证
│ ├── __init__.py
│ ├── bg.png
│ ├── chache.png
│ ├── hk.png
│ ├── img
│ │ ├── 0.png
│ │ ├── 1.png
│ │ ├── 2.png
│ │ └── 3.png
│ └── w3c.py
└── 【腾讯】滑块验证
│ ├── bg.jpeg
│ ├── discriminate.py
│ └── sel.py
└── 项目
├── HouseScrapy
├── requirements
├── scrapy.cfg
├── settings.py
├── spiders
│ ├── __init__.py
│ └── house.py
└── toolkits
│ ├── __init__.py
│ ├── fake_useragent.json
│ ├── items.py
│ ├── make_ua.py
│ ├── middlewares.py
│ ├── pipelines.py
│ └── proxies.py
├── HouseSpider
├── README.md
├── config.py
├── db
│ └── __init__.py
├── main.py
└── tool
│ ├── __init__.py
│ ├── parse.py
│ ├── proxy.py
│ └── toolkit.py
├── MeiTuanArea
├── MeiTuanArea
│ ├── __init__.py
│ ├── items.py
│ ├── middlewares.py
│ ├── pipelines.py
│ ├── settings.py
│ └── spiders
│ │ ├── __init__.py
│ │ ├── area_coord.py
│ │ └── areas.py
├── README.md
├── __init__.py
├── scrapy.cfg
└── 初始化.sql
└── README.md
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.js linguist-language=python
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | */.DS_Store
3 | /.idea
4 | */.idea
5 | .vscode
6 | /.vscode
7 | */.vscode
8 | /__pycache__
9 | */__pycache__
10 |
11 | .README.md
--------------------------------------------------------------------------------
/【51Job】查岗位/select_job.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-04-15 Python: 3.7
4 |
5 | import requests
6 | from lxml import etree
7 |
8 | Format_str = 'https://search.51job.com/list/000000,000000,0000,00,9,99,{key},2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
9 | Headers = {
10 | 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
11 | }
12 |
13 |
14 | class GetJob(object):
15 |
16 | def __init__(self, job_name):
17 | self.job = job_name
18 |
19 | self.get_info()
20 |
21 | def get_info(self):
22 | target_url = Format_str.format(key=self.job)
23 | response = requests.get(target_url, headers=Headers)
24 | # 编码转换
25 | response.encoding = response.apparent_encoding
26 | root = etree.HTML(response.text)
27 | self.parse(root)
28 |
29 | @staticmethod
30 | def parse(root):
31 | div_list = root.xpath("//div[@class='dw_table']/div[@class='el']")
32 | for div in div_list:
33 | money = div.xpath("span[@class='t4']/text()")
34 | money = money[0] if money else "面议"
35 | # 工作名称不可能为空,所以不用判断
36 | a = div.xpath("p/span/a")[0]
37 | job_name = a.xpath("text()")[0].strip()
38 | job_href = a.xpath("@href")[0]
39 | date_time = div.xpath("span[@class='t5']/text()")
40 | date_time = date_time[0] if date_time else "没有时间"
41 | print(job_name, money, date_time, job_href)
42 | with open('job.csv', 'a', encoding='gb18030') as f:
43 | job_list = [job_name, date_time, money, job_href, '\n']
44 | f.write(','.join(job_list))
45 |
46 |
47 | if __name__ == "__main__":
48 | key = input("请输入关键词")
49 | GetJob(key)
50 |
--------------------------------------------------------------------------------
/【bilibili】自动登录/README.md:
--------------------------------------------------------------------------------
1 | ## B站自动登录
2 |
3 | 本案例根据 `selenium` 实现。
4 |
5 | ## 效果图
6 |
7 | 
8 |
9 | ## Q&A
10 |
11 | > ChromeDriver - WebDriver for Chrome
12 |
13 | ```
14 | 因为是模拟点击,所以需要下载插件。
15 |
16 | 点击下方链接即可跳转至下载界面。
17 | ```
18 |
19 | > 为什么要模拟滑动多次?
20 |
21 | ```
22 | 因为获取滑块的偏移量,在模拟操作的时候,机器在控制滑动速度的时候比较均匀,可能会被判定为机器。
23 |
24 | 当然了,我们会在今后给予更好的滑动支持~
25 | ```
26 |
27 | [下载ChromeDriver](https://chromedriver.chromium.org/downloads)
28 |
29 | ## Support
30 |
31 | ```
32 | 案例于 2020-04-23 前均可用,如有疑问请联系作者。
33 | ```
34 |
35 | ## Donate
36 |
37 | Thanks ~
38 |
--------------------------------------------------------------------------------
/【bilibili】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # Date: 2020/4/23
4 |
--------------------------------------------------------------------------------
/【bilibili】视频下载/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-05-09 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/【双色球】头奖分布/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-11-08 Python: 3.7
4 | import requests
5 | import json
6 | import pandas as pd
7 | import openpyxl
8 | import jieba
9 | import wordcloud
10 | import matplotlib.pyplot as plt
11 |
12 |
13 | class SSQ:
14 | def __init__(self, file, font):
15 | self.header = {
16 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36',
17 | 'Host': 'www.cwl.gov.cn',
18 | 'Referer': 'http://www.cwl.gov.cn/kjxx/ssq/kjgg/'
19 | }
20 | self.file = file
21 | self.font = font
22 | self.get_history_url = 'http://www.cwl.gov.cn/cwl_admin/kjxx/findDrawNotice?name=ssq&issueCount=100'
23 | self.session = requests.session()
24 |
25 | def history(self):
26 | """爬取最近100期"""
27 | _dict = None
28 | try:
29 | self.session.get('http://www.cwl.gov.cn/kjxx/ssq/kjgg/')
30 | _dict = json.loads(self.session.get(self.get_history_url, headers=self.header).text)
31 | except TypeError:
32 | print('获取历史记录失败')
33 | finally:
34 | return _dict
35 |
36 | def clean_data(self, data):
37 | """
38 | 清洗数据
39 | :return:
40 | """
41 | columns = []
42 |
43 | for item in data.get('result'):
44 | columns.append([
45 | item.get('code'),
46 | item.get('date'),
47 | item.get('week'),
48 | item.get('red').split(','),
49 | item.get('blue'),
50 | item.get('sales'),
51 | item.get('poolmoney'),
52 | item.get('content'),
53 | item.get('prizegrades')[0].get('typemoney'),
54 | item.get('prizegrades')[0].get('typenum'),
55 | item.get('prizegrades')[1].get('typemoney'),
56 | item.get('prizegrades')[1].get('typenum'),
57 | item.get('prizegrades')[2].get('typemoney'),
58 | item.get('prizegrades')[2].get('typenum'),
59 | ])
60 |
61 | df = pd.DataFrame(
62 | columns,
63 | columns=["期数", "开奖日期", "星期数", "红球", "蓝球", "销售金额", "奖池", "中奖地区", "一等奖金", "一等奖人数", "二等奖金", "二等奖人数", "三等奖金", "三等奖人数"], # 指定列
64 | )
65 | self.save(df)
66 | self.set_data(df)
67 |
68 | def save(self, df):
69 | """储存
70 | """
71 | df.to_excel(self.file)
72 |
73 | def set_data(self, df):
74 | """
75 | 数据预处理
76 | :return:
77 | """
78 | cut_text = []
79 | for i in df['中奖地区']:
80 | for addr in i.split(',')[:-1]:
81 | name, num = jieba.cut(addr[:-1])
82 | for n in range(int(num)):
83 | cut_text.append(name)
84 | print(" ".join(cut_text))
85 |
86 | w = wordcloud.WordCloud(font_path=self.font, background_color="white", scale=4)
87 | w.generate(" ".join(cut_text))
88 | plt.imshow(w, interpolation="bilinear")
89 | plt.axis("off")
90 | # plt.show()
91 | # 保存生成的图片
92 | w.to_file('result.jpg')
93 |
94 | def parse_history(self):
95 | """
96 | pandas 载入数据
97 | :return:
98 | """
99 | data = self.history()
100 | self.clean_data(data)
101 |
102 |
103 | if __name__ == "__main__":
104 | """
105 | 请自行准备一个字体文件并导入路径
106 | """
107 | ssq = SSQ('近期记录.xlsx', '你自己准备的字库路径')
108 | ssq.parse_history()
109 |
--------------------------------------------------------------------------------
/【双色球】头奖分布/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/【双色球】头奖分布/result.jpg
--------------------------------------------------------------------------------
/【双色球】头奖分布/近期记录.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/【双色球】头奖分布/近期记录.xlsx
--------------------------------------------------------------------------------
/【壁纸】美女壁纸下载器/bg_down.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-11-06 Python: 3.7
4 |
5 | from requests import get
6 | from filetype import guess
7 | from os import rename
8 | from os import makedirs
9 | from os.path import exists
10 | from json import loads
11 | from contextlib import closing
12 |
13 |
14 | class DownBg:
15 | """
16 | 超级高清图片下载
17 | """
18 | def __init__(self):
19 | self.headers = {
20 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
21 | }
22 |
23 | def down_load(self, file_url, file_full_name, now_photo_count, all_photo_count):
24 |
25 | # 开始下载图片
26 | with closing(get(file_url, headers=self.headers, stream=True)) as response:
27 | chunk_size = 1024 # 单次请求最大值
28 | content_size = int(response.headers['content-length']) # 文件总大小
29 | data_count = 0 # 当前已传输的大小
30 | with open(file_full_name, "wb") as file:
31 | for data in response.iter_content(chunk_size=chunk_size):
32 | file.write(data)
33 | done_block = int((data_count / content_size) * 50)
34 | data_count = data_count + len(data)
35 | now_jd = (data_count / content_size) * 100
36 | print("\r %s:[%s%s] %d%% %d/%d" % (
37 | file_full_name, done_block * '█', ' ' * (50 - 1 - done_block), now_jd, now_photo_count,
38 | all_photo_count), end=" ")
39 | # 下载完图片后获取图片扩展名,并为其增加扩展名
40 | file_type = guess(file_full_name)
41 | rename(file_full_name, file_full_name + '.' + file_type.extension)
42 |
43 | def crawler_photo(self, type_id, photo_count):
44 | """
45 | :param type_id: 最新 1, 最热 2, 女生 3, 星空 4
46 | :param photo_count: 下载数量
47 | :return:
48 | """
49 | type_dict = {
50 | '1': '5c68ffb9463b7fbfe72b0db0',
51 | '2': '5c69251c9b1c011c41bb97be',
52 | '3': '5c81087e6aee28c541eefc26',
53 | '4': '5c81f64c96fad8fe211f5367'
54 | }
55 |
56 | url = 'https://service.paper.meiyuan.in/api/v2/columns/flow/{key}?page=1&per_page='.format(
57 | key=type_dict.get(str(type_id))) + str(photo_count)
58 |
59 | # 获取图片列表数据
60 | respond = get(url, headers=self.headers)
61 | photo_data = loads(respond.content)
62 |
63 | # 已经下载的图片张数
64 | now_photo_count = 1
65 |
66 | # 所有图片张数
67 | all_photo_count = len(photo_data)
68 |
69 | # 开始下载并保存5K分辨率壁纸
70 | for photo in photo_data:
71 |
72 | # 创建一个文件夹存放我们下载的图片
73 | if not exists('./' + str(type_id)):
74 | makedirs('./' + str(type_id))
75 |
76 | # 准备下载的图片链接
77 | file_url = photo['urls']['raw']
78 |
79 | # 准备下载的图片名称,不包含扩展名
80 | file_name_only = file_url.split('/')
81 | file_name_only = file_name_only[len(file_name_only) - 1]
82 |
83 | # 准备保存到本地的完整路径
84 | file_full_name = './' + str(type_id) + '/' + file_name_only
85 |
86 | # 开始下载图片
87 | self.down_load(file_url, file_full_name, now_photo_count, all_photo_count)
88 | now_photo_count = now_photo_count + 1
89 |
90 |
91 | if __name__ == '__main__':
92 | dg = DownBg()
93 |
94 | wall_paper_id = 1
95 | wall_paper_count = 10
96 | while True:
97 | wall_paper_id = input("\n\n壁纸类型:最新壁纸 1, 最热壁纸 2, 女生壁纸 3, 星空壁纸 4\n请输入编号以便选择5K超清壁纸类型:")
98 | wall_paper_count = input("请输入要下载的5K超清壁纸的数量:")
99 |
100 | if wall_paper_id not in ['1', '2', '3', '4'] or not wall_paper_count.isdigit():
101 | print('输入有误')
102 | continue
103 |
104 | print("正在下载5K超清壁纸,请稍等……")
105 | dg.crawler_photo(int(wall_paper_id), int(wall_paper_count))
106 | print('\n下载5K高清壁纸成功!')
107 |
--------------------------------------------------------------------------------
/【大众点评】字体反爬、坐标反爬/参数生成/encryp.js:
--------------------------------------------------------------------------------
1 | function make() {
2 | for (var t = 1 * new Date, n = 0; t === 1 * new Date && n < 200;) n++;
3 | return t.toString(16) + n.toString(16)
4 | }
5 |
6 | function test(love, you, babby) {
7 | var t = (you * babby).toString(16);
8 | return make() + "-" + Math.random().toString(16).replace(".", "") + "-" + function () {
9 | var t = love,
10 | n = void 0,
11 | e = void 0,
12 | i = [],
13 | r = 0;
14 |
15 | function o(t, n) {
16 | var e = void 0,
17 | r = 0;
18 | for (e = 0; e < n.length; e++) r |= i[e] << 8 * e;
19 | return t ^ r
20 | }
21 |
22 | for (n = 0; n < t.length; n++) e = t.charCodeAt(n), i.unshift(255 & e), 4 <= i.length && (r = o(r, i), i = []);
23 | return 0 < i.length && (r = o(r, i)), r.toString(16)
24 | }() + "-" + t + "-" + make()
25 | }
26 |
27 | function now_uu() {
28 | return (65536 * (1 + Math.random()) | 0).toString(16).substring(1)
29 | }
30 | function puid() {
31 | return "owl-" +now_uu() + now_uu() + "-" + now_uu() + "-" + now_uu() + "-" + now_uu() + "-" + now_uu() + now_uu() + now_uu()
32 | }
--------------------------------------------------------------------------------
/【大众点评】字体反爬、坐标反爬/参数生成/uid.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-11-15 Python: 3.7
4 | import execjs.runtime_names
5 | import random
6 | import requests
7 | import time
8 | from faker import Faker
9 |
10 |
11 | info = random.choice([[800, 1024], [900, 1440], [1050, 1680], [1200, 1920], [1200, 1600]])
12 |
13 | with open("encryp.js", "r", encoding="utf-8") as f:
14 | js = execjs.compile(f.read())
15 |
16 | print('引擎', execjs.get().name)
17 | uid = js.call('test', Faker().user_agent(), info[0], info[1])
18 | page_id = js.call('puid')
19 |
20 |
21 | headers = {
22 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36',
23 | 'Host': 'catfront.dianping.com',
24 | 'Referer': 'http://www.dianping.com/shop/97789651',
25 | 'Origin': 'http://www.dianping.com',
26 | }
27 |
28 | headers2 = {
29 | 'Cookie': "_lxsdk_cuid=16e8184bc7cc8-00733806cb0caf-d087704-13c680-16e8184bc7cc8;",
30 | 'Referer': 'http://www.dianping.com/shop/76311084',
31 | 'Host': 'www.dianping.com',
32 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36',
33 | }
34 | sign_url = 'http://catfront.dianping.com/api/pv?v=1&sdk=1.8.13&project=app-pc-main-shop&pageurl=main-shop&pageId={pageId}×tamp={timestamp}®ion=&operator=&network=&container=&os=&unionid={unionid}'
35 | session = requests.session()
36 | session.get('http://www.dianping.com/shop/76311084', headers=headers2)
37 | response = session.post(sign_url.format(pageId=page_id, unionid=uid, timestamp=str(int(round(time.time() * 1000)))), headers=headers)
38 | print(uid, page_id)
39 | print(response)
40 |
41 |
42 |
--------------------------------------------------------------------------------
/【大众点评】字体反爬、坐标反爬/旧版/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-12 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/【大众点评】字体反爬、坐标反爬/旧版/parse_address_poi.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-03-27 Python: 3.7
4 |
5 |
6 | def to_base36(value):
7 | """converts a decimal integer to a 36 decimal string"""
8 | if not isinstance(value, int):
9 | raise TypeError("expected int, got %s: %r" % (value.__class__.__name__, value))
10 |
11 | if value == 0:
12 | return "0"
13 |
14 | if value < 0:
15 | sign = "-"
16 | value = -value
17 | else:
18 | sign = ""
19 |
20 | result = []
21 |
22 | while value:
23 | value, mod = divmod(value, 36)
24 | result.append("0123456789abcdefghijklmnopqrstuvwxyz"[mod])
25 |
26 | return sign + "".join(reversed(result))
27 |
28 |
29 | def decode(C):
30 | """parse poi"""
31 | digi = 16
32 | add = 10
33 | plus = 7
34 | cha = 36
35 | I = -1
36 | H = 0
37 | B = ''
38 | J = len(C)
39 | G = ord(C[-1])
40 | C = C[:-1]
41 | J -= 1
42 |
43 | for E in range(J):
44 | D = int(C[E], cha) - add
45 | if D >= add:
46 | D = D - plus
47 | B += to_base36(D)
48 | if D > H:
49 | I = E
50 | H = D
51 |
52 | A = int(B[:I], digi)
53 | F = int(B[I + 1:], digi)
54 | L = (A + F - int(G)) / 2
55 | K = float(F - L) / 100000
56 | L = float(L) / 100000
57 | return {'lng': L, 'lat': K}
58 |
59 |
60 | if __name__ == '__main__':
61 | print(decode('HFHSGGZTWSATFG'))
62 |
--------------------------------------------------------------------------------
/【大众点评】字体反爬、坐标反爬/最新版7月/README.md:
--------------------------------------------------------------------------------
1 | # 仅限学术交流
2 | # 如有冒犯请立即联系作者删除
3 |
4 | # 安装
5 | **`pip3 install fontTools`**
6 |
7 | **`pip3 install requests`**
8 |
9 | **`pip3 install redis`**
10 |
11 |
12 | # 使用
13 | 1. 需要开启 redis 库 并配置,默认链接的本机 redis
14 | 2. 参考 `main.py` 中的调用代码
15 |
16 | **[参考博客链接](https://www.zhangkunzhi.com/archives/72)**
--------------------------------------------------------------------------------
/【天眼查】字体加密/num.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/【天眼查】字体加密/num.woff
--------------------------------------------------------------------------------
/【天眼查】字体加密/tyc.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-12-06 Python: 3.7
4 |
5 | """
6 | 从网页下载一个字体文件获取对应推导式,动态获取请自行拓展
7 | """
8 |
9 | from fontTools.ttLib import TTFont
10 | import re
11 |
12 | font = TTFont('num.woff') # 打开tyc-num.woff
13 | font.saveXML('tyc-num.xml') # 保存为tyc-num.xml
14 | with open('tyc-num.xml', 'r') as f:
15 | xml = f.read() # 读取tyc-num.xml赋值给xml
16 | GlyphID = re.findall(r'', xml) # 获得对应关系
17 | print(GlyphID)
18 | GlyphIDNameLists = list(set([int(Gname) for Gid, Gname in GlyphID])) # 对应关系数量转换
19 | print(GlyphIDNameLists)
20 | DigitalDicts = {str(i): str(GlyphIDNameLists[i - 2]) for i in range(2, len(GlyphIDNameLists)+2)} # 数字对应关系的字典推导式
21 | print(DigitalDicts)
22 | GlyphIDDicts = {str(Gname): DigitalDicts[Gid] for Gid, Gname in GlyphID} # 通过数字对应关系生成源代码跟页面显示的字典推导式
23 | print('-' * 39 + '数字对应关系的字典推导式' + '-' * 39)
24 | print(DigitalDicts)
25 | print('-' * 27 + '通过数字对应关系生成源代码跟页面显示的字典推导式' + '-' * 27)
26 | print(GlyphIDDicts)
27 |
--------------------------------------------------------------------------------
/【抖音】无水印视频解析/README.md:
--------------------------------------------------------------------------------
1 | 这是一份抖音无水印单个视频的解析代码
2 |
3 |
--------------------------------------------------------------------------------
/【抖音】无水印视频解析/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # Auth: Zok Email: 362416272@qq.com
3 | # Date: 2020/3/6
4 |
5 |
--------------------------------------------------------------------------------
/【抖音】无水印视频解析/parse.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # Auth: Zok Email: 362416272@qq.com
3 | # Date: 2020/3/6
4 |
5 | import re
6 | import requests
7 | import json
8 |
9 |
10 | class ParseVideo:
11 |
12 | def __init__(self, share):
13 | path = self.get_url(share)
14 | self.url = 'https://v.douyin.com/' + path + '/'
15 | self.headers = {
16 | 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
17 | }
18 | self.session = requests.session()
19 | self.first_url = None
20 |
21 | @staticmethod
22 | def get_url(share_url):
23 | return re.search(r'https://v\.douyin\.com/(.*?)/', share_url).group(1)
24 |
25 | def go_location(self):
26 | response = self.session.get(self.url, headers=self.headers)
27 | self.first_url = response.url
28 | result = re.search(r'itemId: "(.*?)",[\s\S]*?uid: "(.*?)",[\s\S]*?authorName: "(.*?)",[\s\S]*?dytk: "(.*?)"',
29 | response.text)
30 | return result
31 |
32 | def go_message(self, ret):
33 | url = 'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=' + ret.group(1) + '&dytk=' + ret.group(4)
34 | response = self.session.get(url, headers=self.headers)
35 | json_data = json.loads(response.text)
36 | user_id = ret.group(2)
37 | user_name = ret.group(3).encode('utf-8').decode('unicode_escape')
38 |
39 | if json_data.get('status_code') != 0:
40 | print('解析失败')
41 | exit()
42 | item_list = json_data.get('item_list')[0]
43 | aweme_id = item_list.get('aweme_id')
44 | desc = item_list.get('desc')
45 | comment_count = item_list.get('statistics').get('comment_count')
46 | digg_count = item_list.get('statistics').get('digg_count')
47 |
48 | video = item_list.get('video')
49 | cover = video.get('origin_cover').get('url_list')[0]
50 | play_addr = video.get('play_addr_lowbr').get('url_list')[0]
51 |
52 | play_addr_response = self.session.get(play_addr, headers=self.headers, allow_redirects=False)
53 | msg = """
54 | 用户id:{user_id}
55 | 用户名:{user_name}
56 | 作品id:{aweme_id}
57 | 标题: {desc}
58 | 评论数: {comment_count}
59 | 点赞数: {digg_count}
60 | 封面地址:{cover}
61 | 无水印视频:{addr}
62 | """.format(
63 | user_id=user_id,
64 | user_name=user_name,
65 | aweme_id=aweme_id,
66 | desc=desc,
67 | comment_count=comment_count,
68 | digg_count=digg_count,
69 | cover=cover,
70 | addr=play_addr_response.headers['location']
71 | )
72 | print(msg)
73 |
74 | def start(self):
75 | result = self.go_location()
76 | self.go_message(result)
77 |
78 |
79 | if __name__ == '__main__':
80 | # text = '#在抖音,记录美好生活#要逆天!北京地坛医院证实新冠病毒攻击中枢神经系统 https://v.douyin.com/tW7qrw/ 复制此链接,打开【抖音短视频】,直接观看视频!'
81 | text = input('请输入分享链接>>>')
82 | pv = ParseVideo(text)
83 | pv.start()
84 |
--------------------------------------------------------------------------------
/【拼多多】登陆参数生成/PinDuoDuo.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-07-23 Python: 3.7
4 |
5 |
6 | import execjs.runtime_names
7 |
8 | """
9 | pip3 install execjs
10 | npm i jsdom -g
11 | """
12 |
13 |
14 | class PingDuoDuoSpider(object):
15 | """
16 | 拼多多加密解析
17 | """
18 |
19 | def __init__(self, password):
20 | # 初始化
21 | print('引擎', execjs.get().name)
22 | self.password = password
23 |
24 | def make(self):
25 | with open("encryp.js", "r", encoding="utf-8") as f:
26 | ctx = execjs.compile(f.read())
27 |
28 | ret = ctx.call("test", self.password)
29 | print(ret)
30 |
31 |
32 | if __name__ == '__main__':
33 | key = input("输入字符串")
34 | pdd = PingDuoDuoSpider(key)
35 | pdd.make()
36 |
37 |
--------------------------------------------------------------------------------
/【拼多多】登陆参数生成/README.md:
--------------------------------------------------------------------------------
1 | # 解密过程参考博客
2 |
3 | [博客链接](https://www.zhangkunzhi.com/archives/67)
--------------------------------------------------------------------------------
/【拼多多】登陆参数生成/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-07-23 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/【淘宝】自动登陆/login_for_sina.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-04-11 Python: 3.7
4 |
5 | from selenium import webdriver
6 | from selenium.webdriver.common.by import By
7 | from selenium.webdriver.support.ui import WebDriverWait
8 | from selenium.webdriver.support import expected_conditions as EC
9 |
10 |
11 | class TB_Spider:
12 |
13 | def __init__(self, username, password):
14 | """初始化参数"""
15 | url = 'https://login.taobao.com/member/login.jhtml'
16 | self.url = url
17 |
18 | options = webdriver.ChromeOptions()
19 | # 不加载图片,加快访问速度
20 | options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
21 | # 设置为开发者模式,避免被识别
22 | options.add_experimental_option('excludeSwitches',
23 | ['enable-automation'])
24 | self.browser = webdriver.Chrome(executable_path='./chromedriver', options=options)
25 | self.wait = WebDriverWait(self.browser, 40)
26 | # 初始化用户名
27 | self.username = username
28 | # 初始化密码
29 | self.password = password
30 |
31 | def run(self):
32 | """登陆接口"""
33 | self.browser.get(self.url)
34 | try:
35 | # 这里设置等待:等待输入框
36 | login_element = self.wait.until(
37 | EC.presence_of_element_located((By.CSS_SELECTOR, '.qrcode-login > .login-links > .forget-pwd')))
38 | login_element.click()
39 |
40 | sina_login = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.weibo-login')))
41 | sina_login.click()
42 |
43 | weibo_user = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.username > .W_input')))
44 | weibo_user.send_keys(self.username)
45 |
46 | sina_password = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.password > .W_input')))
47 | sina_password.send_keys(self.password)
48 |
49 | submit = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.btn_tip > a > span')))
50 | submit.click()
51 |
52 | taobao_name = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,
53 | '.site-nav-bd > ul.site-nav-bd-l > li#J_SiteNavLogin > div.site-nav-menu-hd > div.site-nav-user > a.site-nav-login-info-nick ')))
54 | # 登陆成功打印提示信息
55 | print("登陆成功:%s" % taobao_name.text)
56 | except Exception:
57 | self.browser.close()
58 | print("登陆失败")
59 |
60 |
61 | if __name__ == "__main__":
62 | name = input("请输入你的微博用户名:")
63 | pas = input("请输入密码:")
64 | spider = TB_Spider(name, pas)
65 | spider.run()
66 |
--------------------------------------------------------------------------------
/其他实战/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-11 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【5173网】自动登录/auto_login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-08-26 Python: 3.7
4 |
5 | import re
6 | import requests
7 | import execjs.runtime_names
8 |
9 |
10 | class YX(object):
11 | """
12 | 易通贷自动登陆
13 | """
14 |
15 | def __init__(self, user, pwd):
16 | self.user = user
17 | self.pwd = pwd
18 | self.session = requests.session()
19 | self.url = 'https://passport.5173.com/'
20 | self.headers = {
21 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
22 | 'Host': 'passport.5173.com',
23 | }
24 | print('引擎', execjs.get().name)
25 |
26 | def make_pwd(self, key):
27 | with open("encryp.js", "r", encoding="utf-8") as f:
28 | ctx = execjs.compile(f.read())
29 | return ctx.call("make_js", self.pwd, key)
30 |
31 | def make_data(self, token, key):
32 | data = {
33 | 'userName': self.user,
34 | 'password': self.make_pwd(key),
35 | 'mobileNo': '',
36 | 'captcha': '',
37 | 'smsCaptcha': '',
38 | 'category': '',
39 | 'passpod': '',
40 | 'smsLogin': '0',
41 | '__validationToken__': token,
42 | '__validationDna__': '',
43 | }
44 | return data
45 |
46 | def login(self):
47 | """start
48 | """
49 | response = self.session.get(self.url)
50 | info = re.search(r'SecurityToken:"(.*?)",[\s\S]*?PasswordKey:"(.*?)",', response.text)
51 | try:
52 | token = info.group(1)
53 | key = info.group(2)
54 | data = self.make_data(token, key)
55 | result = self.session.post(self.url, data=data, headers=self.headers)
56 | if '5173auth' in str(result.cookies):
57 | print(result.cookies)
58 | print('【登陆成功】')
59 | else:
60 | print('【登陆失败】')
61 | except AttributeError:
62 | print('【获取key失败】')
63 |
64 |
65 | if __name__ == '__main__':
66 | username = input('请输入账号')
67 | password = input('密码')
68 | yx = YX(username, password)
69 | yx.login()
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/其他实战/【5173网】自动登录/logOK.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【5173网】自动登录/logOK.png
--------------------------------------------------------------------------------
/其他实战/【9377网】自动登录/9377login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-07-23 Python: 3.7
4 |
5 | import requests
6 |
7 |
8 | class Login9377:
9 | """9377游戏平台自动登陆
10 | """
11 |
12 | def __init__(self, username, password):
13 | self.headers = {
14 | 'Upgrade-Insecure-Requests': '1',
15 | 'Host': 'wvw.9377.com',
16 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
17 | }
18 | if len(password) < 6 or len(username) < 6:
19 | print('请输入正确账号密码!')
20 | exit()
21 | self.username = username
22 | self.password = password
23 | self.login_url = 'http://wvw.9377.com/login.php'
24 | self.host = 'https://www.9377.com/'
25 | self.session = requests.session()
26 |
27 | def login(self):
28 | """登陆
29 | """
30 | data = {
31 | 'do': 'login',
32 | 'gourl': self.host,
33 | 'login_save': '1',
34 | 'username': self.username,
35 | 'password': self.password
36 | }
37 | self.session.get(self.login_url, headers=self.headers)
38 | result = self.session.post(self.login_url, headers=self.headers, data=data)
39 | self.check(result)
40 |
41 | def check(self, result):
42 | """检测登陆状态
43 | """
44 | if self.username in str(result.cookies):
45 | print('登陆成功')
46 | else:
47 | print('用户名或密码错误')
48 |
49 |
50 | if __name__ == '__main__':
51 | name = input('输入账号')
52 | word = input('输入密码')
53 | lg = Login9377(name, word)
54 | lg.login()
55 |
--------------------------------------------------------------------------------
/其他实战/【9377网】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-07-24 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【DNS】自动登录/Login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-08 Python: 3.7
4 | import requests
5 | import re
6 | import execjs.runtime_names
7 |
8 |
9 | class DNS:
10 | def __init__(self, user, pwd):
11 | self.user = user
12 | self.pwd = pwd
13 | self.js = None
14 | self.url = 'https://www.dns.com/login.html'
15 | self.headers = {
16 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
17 | 'Host': 'www.dns.com'
18 | }
19 | self.read_js()
20 |
21 | def get_token(self):
22 | response = requests.get(self.url, headers=self.headers)
23 | try:
24 | token = re.search(r'', response.text).group(1)
25 | print(token)
26 | except AttributeError:
27 | print('token 捕获失败')
28 |
29 | def read_js(self):
30 | with open("dns.js", "r", encoding="utf-8") as f:
31 | self.js = execjs.compile(f.read())
32 |
33 | def login(self):
34 | data = {
35 | '_token': self.get_token(),
36 | 'password': self.js.call('aes', self.pwd),
37 | 'email': self.js.call('aes', self.user),
38 | 'redirectTo': 'https://www.dns.com/dashboard',
39 | }
40 | response = requests.post(self.url, data=data, headers=self.headers)
41 | print(response)
42 |
43 |
44 | if __name__ == '__main__':
45 | username = input('请输入账号')
46 | password = input('密码')
47 | dns = DNS(username, password)
48 | dns.login()
49 |
--------------------------------------------------------------------------------
/其他实战/【DNS】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【GitHub】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-06-18 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【GitHub】自动登录/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-04-11 Python: 3.7
4 | """
5 | 1. get login html token
6 | 2. login
7 | """
8 |
9 | import requests
10 | from lxml import etree
11 |
12 |
13 | class Login(object):
14 | def __init__(self, username, password):
15 |
16 | self.headers = {
17 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
18 | 'Referer': 'https://github.com/',
19 | 'Host': 'github.com'
20 | }
21 |
22 | self.login_url = 'https://github.com/login'
23 | self.post_url = 'https://github.com/session'
24 | self.session = requests.Session()
25 |
26 | self.username = username
27 | self.password = password
28 |
29 | def login_GitHub(self):
30 | """
31 | 模拟登陆
32 | :return:
33 | """
34 |
35 | post_data = {
36 | 'commit': 'Sign in',
37 | 'utf8': '✓',
38 | 'authenticity_token': self.get_token(),
39 | 'login': self.username,
40 | 'password': self.password
41 | }
42 |
43 | response = self.session.post(self.post_url, data=post_data, headers=self.headers)
44 |
45 | if response.status_code == 200:
46 | html = etree.HTML(response.content.decode())
47 | if html.xpath('/html/body/div[1]/header/div[7]/details/summary'):
48 | pro_list = html.xpath('//ul[@class="list-style-none"]/li/div/a/span[2]/text()')
49 | print("登录成功!正在拉取你的所有项目..")
50 | print(pro_list)
51 |
52 | else:
53 | print('账号或密码错误')
54 | else:
55 | print("登录失败!")
56 |
57 | def get_token(self):
58 | """
59 | 获取token
60 | :return:
61 | """
62 |
63 | response = self.session.get(self.login_url, headers=self.headers)
64 | html = etree.HTML(response.content.decode())
65 |
66 | token = html.xpath('//input[@name="authenticity_token"]/@value')[0]
67 |
68 | return token
69 |
70 |
71 | if __name__ == '__main__':
72 | user = input('请输入您的账号: ')
73 | key = input('请输入您的密码: ')
74 |
75 | login = Login(user, key)
76 | login.login_GitHub()
77 |
--------------------------------------------------------------------------------
/其他实战/【Glidedsky】自动登陆/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-25 Python: 3.7
4 |
5 | import requests
6 | import re
7 | import json
8 |
9 |
10 | class Gli:
11 | """
12 | 自动登陆 Glidedsky
13 | http://www.glidedsky.com/login
14 | """
15 |
16 | def __init__(self, user, pwd):
17 | self.user = user
18 | self.pwd = pwd
19 | self.url = 'http://www.glidedsky.com/login'
20 | self.session = requests.session()
21 | self.headers = {
22 | 'Host': 'www.glidedsky.com',
23 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
24 | }
25 |
26 | def get_token(self):
27 | response = self.session.get(self.url, headers=self.headers)
28 | _token = re.search(r'name="csrf-token" content="(.*?)">', response.text).group(1)
29 | return _token
30 |
31 | def login(self):
32 | data = {'_token': self.get_token(), 'email': self.user, 'password': self.pwd}
33 | self.session.post(self.url, data=data)
34 | # print(self.session.cookies)
35 | cookies = requests.utils.dict_from_cookiejar(self.session.cookies) # cookies 输出
36 | with open('toolkit/cookies.json', 'w', encoding='utf-8') as f:
37 | f.write(json.dumps(cookies))
38 | # print(cookies)
39 |
40 |
41 | if __name__ == '__main__':
42 | username = input('请输入用户名')
43 | password = input('请输入密码')
44 | g = Gli(username, password)
45 | g.login()
46 |
--------------------------------------------------------------------------------
/其他实战/【Python加密库】Demo/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-07-11 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【TCL金融】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【TCL金融】自动登录/auto_login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-10 Python: 3.7
4 |
5 | import requests
6 | import execjs.runtime_names
7 |
8 |
9 | class SpiderLogin:
10 | """
11 | TCL 个人金融
12 | https://weixin.tjinsuo.com/#login/mine
13 | """
14 |
15 | def __init__(self, user, pwd):
16 | self.user = user
17 | self.pwd = pwd
18 | self.js = None
19 | self.url = 'https://weixin.tjinsuo.com/service/user/login'
20 | self.load_js()
21 | print('引擎', execjs.get().name)
22 |
23 | def load_js(self):
24 | """js 调用
25 | """
26 | with open("encryp.js", "r", encoding="utf-8") as f:
27 | self.js = execjs.compile(f.read())
28 |
29 | def auto_login(self):
30 | """登陆
31 | """
32 | ret = self.js.call('make', self.pwd)
33 | rand_key, word = ret.split('||')
34 | print(rand_key, word)
35 | headers = {
36 | 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
37 | 'Host': 'weixin.tjinsuo.com',
38 | 'terminalType': 'BEST_WX',
39 | 'Accept': 'application/json'
40 | }
41 | data = 'mobile={user}&password={pwd}&cipherkey=&message=&randKey={rand_key}'.format(user=self.user,
42 | pwd=word,
43 | rand_key=rand_key)
44 |
45 | response = requests.post(self.url, headers=headers, data=data)
46 | print(response.text)
47 | print(response)
48 |
49 |
50 | if __name__ == '__main__':
51 | username = input('请输入账号')
52 | password = input('密码')
53 | wcb = SpiderLogin(username, password)
54 | wcb.auto_login()
55 |
--------------------------------------------------------------------------------
/其他实战/【TCL金融】自动登录/ok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【TCL金融】自动登录/ok.png
--------------------------------------------------------------------------------
/其他实战/【steam】自动登录/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-12-11 Python: 3.7
4 |
5 | import execjs
6 | import requests, json, re
7 |
8 |
9 | def Get_parameters(username):
10 | """steam 登录 只处理了密码加密。其他请自行拓展
11 | :return 公钥和一个参数;
12 | """
13 | import time
14 | try:
15 | url = "https://store.steampowered.com/login/getrsakey/"
16 |
17 | headers = {
18 | 'User-Agent': 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
19 | 'Host': 'store.steampowered.com',
20 | 'Referer': 'https://store.steampowered.com/login/?redir=&redir_ssl=1',
21 | 'Origin': 'https://store.steampowered.com'
22 | }
23 | data = {
24 | 'donotcache': int(round(time.time() * 1000)),
25 | 'username': username,
26 | }
27 | res = requests.post(url=url, headers=headers, data=data)
28 | publickey_mod = json.loads(res.text).get('publickey_mod')
29 | publickey_exp = json.loads(res.text).get('publickey_exp')
30 | return publickey_mod, publickey_exp
31 |
32 | except Exception as err:
33 | print('访问失败', err)
34 |
35 |
36 | def main(pwd, publickey_mod, publickey_exp):
37 | """
38 | :param pwd:
39 | :param publickey_mod:
40 | :param publickey_exp:
41 | :return sign:
42 | """
43 | with open('execute.js', 'r', encoding='utf-8') as f:
44 | js = execjs.compile(f.read())
45 | print('引擎', execjs.get().name)
46 | sign = js.call('get_pwd', pwd, publickey_mod, publickey_exp)
47 | return sign
48 |
49 |
50 | if __name__ == '__main__':
51 | username = input('请输入账户:')
52 | pwd = input('请输入密码:')
53 | publickey_mod, publickey_exp = Get_parameters(username)
54 | sign = main(pwd, publickey_mod, publickey_exp)
55 | print(sign)
56 |
--------------------------------------------------------------------------------
/其他实战/【万创帮】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【万创帮】自动登录/login_ok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【万创帮】自动登录/login_ok.png
--------------------------------------------------------------------------------
/其他实战/【万创帮】自动登录/spider_login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-03 Python: 3.7
4 |
5 | import json
6 | import requests
7 | import execjs.runtime_names
8 |
9 |
10 | class SpiderLogin:
11 | """
12 | 万创帮爬虫登陆
13 | """
14 |
15 | def __init__(self, user, pwd):
16 | self.user = user
17 | self.pwd = pwd
18 | self.url = 'https://m.wcbchina.com/login/other-login.html'
19 | print('引擎', execjs.get().name)
20 |
21 | def use_js(self):
22 | """js 调用
23 | """
24 | with open("encryp.js", "r", encoding="utf-8") as f:
25 | js = execjs.compile(f.read())
26 |
27 | try:
28 | sign, t = js.call("make_sigin")
29 | pwd = js.call("make_pwd", self.pwd)
30 | return sign, t, pwd
31 | except Exception:
32 | print('异常数据')
33 |
34 | def auto_login(self):
35 | """登陆
36 | """
37 | sign, t, pwd = self.use_js()
38 | headers = {
39 | 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
40 | 'Referer': 'https://m.wcbchina.com/login/other-login.html'
41 | }
42 | pay_load = {
43 | 'auth': {'sign': sign, 'timestamp': t},
44 | 'password': self.user,
45 | 'username': pwd
46 | }
47 |
48 | response = requests.post(self.url, headers=headers, data=json.dumps(pay_load))
49 | print(response.cookies)
50 | print(response)
51 |
52 |
53 | if __name__ == '__main__':
54 | username = input('请输入账号')
55 | password = input('密码')
56 | wcb = SpiderLogin(username, password)
57 | wcb.auto_login()
58 |
--------------------------------------------------------------------------------
/其他实战/【中关村】自动登录/README.md:
--------------------------------------------------------------------------------
1 | # 解密过程博客说明
2 |
3 | https://www.zhangkunzhi.com/?p=135
--------------------------------------------------------------------------------
/其他实战/【中关村】自动登录/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-07-29 Python: 3.7
4 |
5 |
6 | import requests
7 | import hashlib
8 | import time
9 | import json
10 |
11 | from urllib import parse
12 |
13 |
14 | class ZGC:
15 | """
16 | 解析过程说明 https://www.zhangkunzhi.com/?p=135
17 |
18 | 1. 用的 CryptoJS md5 加密
19 | 2. 需要带入 cookies
20 | """
21 |
22 | def __init__(self, username, password):
23 | self.username = username
24 | self.password = password
25 | self.headers = {
26 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
27 | }
28 |
29 | def get_cookies(self):
30 | """取cookies
31 | """
32 | _now = time.time()
33 | t = str(_now)[:7]
34 | _jsonp = int(round(_now * 1000))
35 | pick = 'http://js.zol.com.cn/pvn/pv.ht?&t={t}&c=&callback=_jsonp{_jsonp}'.format(t=t, _jsonp=_jsonp)
36 | try:
37 | content = requests.get(pick, headers=self.headers).text
38 | ipck = json.loads(content[content.find('(')+1:-1]).get('ipck')
39 | return parse.quote(ipck)
40 | except:
41 | print('cookies 获取失败')
42 |
43 | def login(self, ipck):
44 | """登陆
45 | """
46 | _str_now = str(int(time.time()))
47 | login_url = 'http://service.zol.com.cn/user/ajax/login2014/login.php'
48 | data = {
49 | 'userid': self.username,
50 | 'pwd': self.make_md5(self.password),
51 | 'is_auto': '1',
52 | 'backUrl': 'http://www.zol.com.cn/'
53 | }
54 | cookies = {
55 | 'Hm_lpvt_ae5edc2bc4fc71370807f6187f0a2dd0': _str_now,
56 | 'Hm_lvt_ae5edc2bc4fc71370807f6187f0a2dd0': _str_now,
57 | 'ip_ck': ipck,
58 | 'vn': '1',
59 | 'lv': _str_now,
60 | 'z_pro_city': 's_provice%3Dzhongqing%26s_city%3Dzhongqing',
61 | 'z_day': 'ixgo20%3D1'
62 | }
63 |
64 | response = requests.post(login_url, headers=self.headers, data=data, cookies=cookies)
65 | msg = json.loads(response.content)
66 | return msg
67 |
68 | @staticmethod
69 | def make_md5(_str):
70 | """md5 生成
71 | """
72 | # 待加密信息
73 | text = _str + 'zol'
74 | # 创建md5对象
75 | m = hashlib.md5()
76 | m.update(text.encode(encoding='utf-8'))
77 | str_md5 = m.hexdigest()
78 | return str_md5
79 |
80 | def main(self):
81 | ipck = self.get_cookies()
82 | msg = self.login(ipck)
83 | print(msg)
84 |
85 |
86 | if __name__ == '__main__':
87 | user = input('请输入中关村账号')
88 | pwd = input('请输入中关村密码')
89 | zgc = ZGC(user, pwd)
90 | zgc.main()
91 |
--------------------------------------------------------------------------------
/其他实战/【京东】商品数据爬取/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-12-10 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【京东】商品数据爬取/geckodriver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【京东】商品数据爬取/geckodriver
--------------------------------------------------------------------------------
/其他实战/【京东】商品数据爬取/selenium抓取.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-04-11 Python: 3.7
4 |
5 | from selenium import webdriver
6 | from selenium.webdriver.common.keys import Keys # 键盘按键操作
7 | import time
8 |
9 |
10 | def get_goods(driver):
11 | try:
12 | goods = driver.find_elements_by_class_name('gl-item')
13 |
14 | for good in goods:
15 | detail_url = good.find_element_by_tag_name('a').get_attribute('href')
16 |
17 | p_name = good.find_element_by_css_selector('.p-name em').text.replace('\n', '')
18 | price = good.find_element_by_css_selector('.p-price i').text
19 | p_commit = good.find_element_by_css_selector('.p-commit a').text
20 |
21 | msg = '''
22 | 商品 : %s
23 | 链接 : %s
24 | 价钱 :%s
25 | 评论 :%s
26 | ''' % (p_name, detail_url, price, p_commit)
27 |
28 | print(msg, end='\n\n')
29 |
30 | button = driver.find_element_by_partial_link_text('下一页')
31 | button.click()
32 | time.sleep(1)
33 | get_goods(driver)
34 | except Exception:
35 | pass
36 |
37 |
38 | def spider(url, keyword):
39 | driver = webdriver.Firefox()
40 | driver.get(url)
41 | driver.implicitly_wait(3) # 使用隐式等待
42 | try:
43 | input_tag = driver.find_element_by_id('key')
44 | input_tag.send_keys(keyword)
45 | input_tag.send_keys(Keys.ENTER)
46 | get_goods(driver)
47 | finally:
48 | driver.close()
49 |
50 |
51 | if __name__ == '__main__':
52 | spider('https://www.jd.com/', keyword='手机')
53 |
--------------------------------------------------------------------------------
/其他实战/【人人网】自动登录/login.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import re
4 | import execjs.runtime_names
5 |
6 |
7 | class People:
8 | def __init__(self, user, pwd):
9 | """
10 | 初始化
11 | :param user: 用户名
12 | :param pwd: 密码
13 | """
14 | self.username = user
15 | self.pwd = pwd
16 | self.ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
17 | self.headers = {
18 | 'User-Agent': self.ua,
19 | 'Host': 'www.renren.com',
20 | }
21 | self.session = requests.session()
22 | self.json_data = ''
23 |
24 | print('【JS引擎】', execjs.get().name)
25 | with open("enc.js", "r", encoding="utf-8") as f:
26 | self.js = execjs.compile(f.read())
27 |
28 | def to_index(self):
29 | """
30 | 第一步 - 访问首页
31 | 获取 Cookies
32 | :return:
33 | """
34 | response = self.session.get('http://www.renren.com/', headers=self.headers)
35 | print('【主页】', response)
36 |
37 | def get_key(self):
38 | """
39 | 第二步 - 获取加密参数
40 | 获取 rkey 以及 密码加密所需参数
41 | :return:
42 | """
43 | headers = {
44 | 'Referer': 'http://login.renren.com/ajaxproxy.htm',
45 | 'User-Agent': self.ua,
46 | }
47 | response = self.session.get('http://login.renren.com/ajax/getEncryptKey', headers=headers)
48 | print('【获取key】', response.text)
49 | return response.text
50 |
51 | def login(self, key_info):
52 | """
53 | 第三步 - 登录账号
54 | :param key_info: 第二步获取的参数
55 | :return:
56 | """
57 | url = 'http://www.renren.com/ajaxLogin/login?1=1' + self.js.call('getTime')
58 | data = {
59 | 'email': self.username,
60 | 'icode': "",
61 | 'origURL': 'http://www.renren.com/home',
62 | 'domain': 'renren.com',
63 | 'key_id': '1',
64 | 'captcha_type': 'web_login',
65 | 'password': self.get_password(key_info),
66 | 'rkey': json.loads(key_info).get('rkey'),
67 | 'f': ''
68 | }
69 | print('【登录data】', data)
70 | print('【登录URL】', url)
71 | print('【Cookies】', self.session.cookies)
72 | response = self.session.post(url, data=data, headers=self.headers)
73 | print('【返回信息】', response.text)
74 | response = self.session.get('http://www.renren.com/home', headers=self.headers)
75 | print('【登录信息】', re.findall("
(.*?)", response.text))
76 |
77 | def get_password(self, key_info):
78 | """
79 | 调用 js 代码生成参数
80 | :param key_info:
81 | :return:
82 | """
83 | return self.js.call('enc', key_info, self.pwd)
84 |
85 | def start(self):
86 | """
87 | 启动
88 | :return:
89 | """
90 | self.to_index()
91 | self.login(self.get_key())
92 |
93 |
94 | if __name__ == '__main__':
95 | """
96 | 启动区域
97 | """
98 | username = input('用户名>>> ')
99 | password = input('密码>>> ')
100 | pp = People(username, password)
101 | pp.start()
102 |
--------------------------------------------------------------------------------
/其他实战/【企业名片】企业查询/qi_ming.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-11-08 Python: 3.7
4 |
5 | import requests
6 | import json
7 | import execjs.runtime_names
8 |
9 |
10 | with open('encryp.js', 'r', encoding='utf-8') as f:
11 | js = execjs.compile(f.read())
12 |
13 | print('引擎', execjs.get().name)
14 |
15 | data = {
16 | 'time_interval': '',
17 | 'tag': '',
18 | 'tag_type': '',
19 | 'province': '',
20 | 'lunci': '',
21 | 'page': '1',
22 | 'num': '20',
23 | 'unionid': '',
24 | }
25 |
26 | headers = {
27 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
28 | }
29 |
30 | response = requests.post('https://vipapi.qimingpian.com/DataList/productListVip', data=data, headers=headers)
31 |
32 | re_data = json.loads(response.text)
33 |
34 | data = js.call('get_info', re_data.get('encrypt_data'))
35 | print(data.encode('utf-8').decode('unicode_escape'))
36 |
37 |
--------------------------------------------------------------------------------
/其他实战/【国鑫所】自动登录/Login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-10 Python: 3.7
4 | import execjs.runtime_names
5 | import requests
6 |
7 |
8 | class GuoXin:
9 | """
10 | 国鑫所
11 | https://wechat.gclfax.com/html/register/login.html
12 | """
13 |
14 | def __init__(self, user, pwd):
15 | self.user = user
16 | self.pwd = pwd
17 | self.url = 'https://wechat.gclfax.com/client/index.php'
18 | self.js = None
19 | self.init_js()
20 |
21 | def init_js(self):
22 | print('引擎', execjs.get().name)
23 | with open("encryp.js", "r", encoding="utf-8") as f:
24 | self.js = execjs.compile(f.read())
25 |
26 | def login(self):
27 | headers = {
28 | 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
29 | 'Host': 'wechat.gclfax.com',
30 | 'Origin': 'https://wechat.gclfax.com',
31 | 'Referer': 'https://wechat.gclfax.com/html/register/login.html'
32 | }
33 | data = {
34 | 'OPT': '1',
35 | 'name': self.user,
36 | 'pwd': self.js.call('test', self.pwd),
37 | 'randomId': '',
38 | 'code': '',
39 | 'openid': '',
40 | }
41 | response = requests.post(self.url, headers=headers, data=data)
42 | print(response.text)
43 | print(response)
44 |
45 |
46 | if __name__ == '__main__':
47 | username = input('用户名')
48 | password = input('密码')
49 | gxs = GuoXin(username, password)
50 | gxs.login()
51 |
--------------------------------------------------------------------------------
/其他实战/【国鑫所】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【国鑫所】自动登录/login_ok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【国鑫所】自动登录/login_ok.png
--------------------------------------------------------------------------------
/其他实战/【天眼查】模拟登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-06-18 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【天眼查】模拟登录/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-04-13 Python: 3.7
4 | import time
5 |
6 | from lxml import etree
7 | from selenium import webdriver
8 | from selenium.webdriver.common.by import By
9 | from selenium.webdriver.support.ui import WebDriverWait
10 | from selenium.webdriver.support import expected_conditions as EC
11 |
12 |
13 | class TYC_Spider:
14 |
15 | def __init__(self, username, password):
16 | """初始化参数"""
17 | url = 'https://www.tianyancha.com/login'
18 | page_url = 'https://www.tianyancha.com/search/ohp1/p{page}?base=cq'
19 | self.page_url = page_url
20 | self.page = 1 # 当前页数
21 | self.url = url
22 |
23 | options = webdriver.ChromeOptions()
24 | # 不加载图片,加快访问速度
25 | # options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
26 | # 设置为开发者模式,避免被识别
27 | options.add_experimental_option('excludeSwitches',
28 | ['enable-automation'])
29 | self.browser = webdriver.Chrome(executable_path='./chromedriver', options=options)
30 | self.wait = WebDriverWait(self.browser, 40)
31 | # 初始化用户名
32 | self.username = username
33 | # 初始化密码
34 | self.password = password
35 |
36 | def run(self):
37 | """登陆接口"""
38 | self.browser.get(self.url)
39 | try:
40 | use_pass = self.wait.until(
41 | EC.presence_of_element_located((By.XPATH, '//*[@id="web-content"]/div/div[2]/div/div[2]/div/div[3]/div[1]/div[2]')))
42 | time.sleep(2)
43 | use_pass.click()
44 | username = self.wait.until(
45 | EC.presence_of_element_located((By.XPATH, '//*[@id="web-content"]/div/div[2]/div/div[2]/div/div[3]/div[2]/div[2]/input')))
46 | password = self.wait.until(
47 | EC.presence_of_element_located(
48 | (By.XPATH, '//*[@id="web-content"]/div/div[2]/div/div[2]/div/div[3]/div[2]/div[3]/input')))
49 | input_to = self.wait.until(
50 | EC.presence_of_element_located(
51 | (By.XPATH, '//*[@id="web-content"]/div/div[2]/div/div[2]/div/div[3]/div[2]/div[5]')))
52 | username.send_keys(self.username)
53 | password.send_keys(self.password)
54 | input_to.click()
55 |
56 | self.wait.until(
57 | EC.presence_of_element_located((By.XPATH, '//*[@id="home-main-search"]')))
58 | print('登陆成功')
59 | self.go_page()
60 |
61 | except Exception:
62 | self.browser.close()
63 | print("登陆失败")
64 |
65 | def go_page(self):
66 | """进入指定页面"""
67 | self.browser.get(self.page_url.format(page=str(self.page+1))) # ohp带电话
68 | self.get_info()
69 | self.go_page()
70 |
71 | def get_info(self):
72 | """获取当前页面,企业名称+电话号码"""
73 | html = self.browser.page_source
74 | etr = etree.HTML(html)
75 | divs = etr.xpath("//div[@class='search-item sv-search-company']")
76 | for div in divs:
77 | title = div.xpath('./div/div[3]/div[1]/a/text()')
78 | phone = div.xpath('./div/div[3]/div[3]/div[1]/script/text()')
79 | if not phone:
80 | phone = div.xpath('./div/div[3]/div[3]/div[1]/span[2]/span/text()')
81 |
82 | if not phone:
83 | phone = div.xpath('./div/div[3]/div[4]/div[1]/script/text()')
84 | print(title, phone)
85 | time.sleep(2)
86 |
87 |
88 | if __name__ == "__main__":
89 | name = input("请输入你的微博用户名:")
90 | pas = input("请输入密码:")
91 | spider = TYC_Spider(name, pas)
92 | spider.run()
93 |
--------------------------------------------------------------------------------
/其他实战/【天翼】登录/login.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # Auth: Zok Email: 362416272@qq.com
3 | # Date: 2020/1/23
4 |
5 |
6 | import requests
7 | import re
8 | import execjs
9 |
10 |
11 | session = requests.session()
12 | UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
13 |
14 |
15 | def login(username, password):
16 | with open('v1.js', 'r', encoding='utf-8') as f:
17 | js = execjs.compile(f.read())
18 | username = js.call('make', username)
19 | password = js.call('make', password)
20 | url = 'https://e.189.cn/index.do'
21 | login_url = 'https://open.e.189.cn/api/logbox/oauth2/loginSubmit.do'
22 | response = session.get(url, headers={"User-Agent": UA})
23 | ret = re.search(r'sign=(.*?)&appId=(.*?)¶s=(.*?)&format=(.*?)&clientType=(.*?)&version=(.*?)">', response.text)
24 |
25 | url = 'https://open.e.189.cn/api/logbox/oauth2/unifyAccountLogin.do?sign=' + ret.group(1) + '&appId=' + ret.group(
26 | 2) + '¶s=' + ret.group(3) + '&format=' + ret.group(4) + '&clientType=' + ret.group(
27 | 5) + '&version=' + ret.group(6)
28 |
29 | response = session.get(url, headers={"User-Agent": UA})
30 | text = response.text
31 |
32 | captchaToken = re.search(r"captchaToken' value='(.*?)'>", text).group(1)
33 |
34 | ret = re.search(r"clientType = '(.*?)'[\s\S]*?accountType = '(.*?)'[\s\S]*?appKey = '(.*?)'", text)
35 | clientType = ret.group(1)
36 | accountType = ret.group(2)
37 | appKey = ret.group(3)
38 |
39 | paramId = re.search(r'paramId = "(.*?)"', text).group(1)
40 | REQID = re.search(r'reqId = "(.*?)"', text).group(1)
41 | lt = re.search(r'lt = "(.*?)"', text).group(1)
42 |
43 | headers = {
44 | 'User-Agent': UA,
45 | 'Host': 'open.e.189.cn',
46 | 'Origin': 'https://open.e.189.cn',
47 | 'Referer': url,
48 | 'REQID': REQID,
49 | 'lt': lt,
50 | }
51 | data = {
52 | 'appKey': appKey,
53 | 'accountType': accountType,
54 | 'validateCode': "", # 验证码
55 | 'captchaToken': captchaToken,
56 | 'returnUrl': 'https://e.189.cn/user/loginMiddle.do?returnUrlMid=https://e.189.cn/user/index.do',
57 | 'mailSuffix': '',
58 | 'dynamicCheck': 'FALSE',
59 | 'clientType': clientType,
60 | 'cb_SaveName': '1',
61 | 'isOauth2': 'false',
62 | 'state': '',
63 | 'paramId': paramId,
64 | 'userName': username,
65 | 'password': password,
66 | }
67 | response = session.post(login_url, headers=headers, data=data)
68 |
69 | # print(data)
70 | print(response.text)
71 |
72 |
73 | print(execjs.get().name)
74 | if execjs.get().name != 'Node.js (V8)':
75 | print('请安装V8 引擎')
76 |
77 | if __name__ == '__main__':
78 | user = input('用户名>>>')
79 | pwd = input('密码>>>')
80 | login(user, pwd)
81 |
--------------------------------------------------------------------------------
/其他实战/【好莱客】参数解析/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【好莱客】参数解析/holike.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-07 Python: 3.7
4 | import execjs.runtime_names
5 | import requests
6 | import time
7 | import re
8 |
9 |
10 | class MakeParam:
11 | """
12 | 好莱客
13 | http://oa.holike.com/login.jsp
14 | """
15 |
16 | def __init__(self, name, pwd):
17 | self.name = name
18 | self.pwd = pwd
19 | self.js = None
20 |
21 | self.read_js()
22 |
23 | def get_key_vi(self):
24 | url = 'http://oa.holike.com/resource/js/session.jsp?_={t}&s_ajax=true'
25 | headers = {
26 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
27 | }
28 | response = requests.get(url.format(t=int(round(time.time() * 1000))), headers=headers)
29 | try:
30 | ret = re.search(r'return "(.*?)";', response.text).group(1)
31 | _key = self.js.call('get_key_iv', ret)
32 | return _key
33 | except AttributeError:
34 | print('获取key失败')
35 |
36 | def read_js(self):
37 | with open('encryp.js', 'r', encoding='utf-8') as f:
38 | self.js = execjs.compile(f.read())
39 |
40 | def make_params(self):
41 | obj = self.get_key_vi()
42 | j_password = self.js.call("make_j_password", self.pwd, obj.get('security'), obj.get('key'), obj.get('iv'))
43 |
44 | msg = """
45 | j_username: {user}
46 | j_password: {j_password}
47 | """.format(user=self.name, j_password=j_password)
48 | print(msg)
49 |
50 |
51 | if __name__ == '__main__':
52 | username = input('请输入用户名')
53 | password = input('请输入密码')
54 | hk = MakeParam(username, password)
55 | hk.make_params()
56 |
--------------------------------------------------------------------------------
/其他实战/【好莱客】参数解析/ok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【好莱客】参数解析/ok.png
--------------------------------------------------------------------------------
/其他实战/【小牛在线】登录参数生成/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【小牛在线】登录参数生成/make_param.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-29 Python: 3.7
4 |
5 | import execjs.runtime_names
6 |
7 | """
8 | 小牛在线,登陆密码参数解密
9 | https://www.xiaoniu88.com/user/login
10 | """
11 |
12 |
13 | def init_js():
14 | with open("encryp.js", "r", encoding="utf-8") as f:
15 | return execjs.compile(f.read())
16 |
17 |
18 | def make_param(password):
19 | js = init_js()
20 | pwd = js.call('get_pwd', password)
21 | print('加密后密码', pwd)
22 |
23 |
24 | if __name__ == '__main__':
25 | password = input('明文密码')
26 | make_param(password)
27 |
--------------------------------------------------------------------------------
/其他实战/【开鑫贷】登陆参数生成/KaiXinDai.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-15 Python: 3.7
4 | import requests
5 | import execjs.runtime_names
6 |
7 |
8 | class KaiXinDai:
9 | """
10 | 开鑫贷登陆参数解密
11 | https://www.gkkxd.com/userAuth/login
12 | """
13 | def __init__(self, pwd):
14 | self.js = None
15 | self.pwd = pwd
16 | self.init_js()
17 |
18 | @staticmethod
19 | def get_dl():
20 | from lxml import etree
21 | url = 'https://www.kxjf.com/user/login?mainSiteName=kxd'
22 | headers = {
23 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
24 | 'Host': 'www.kxjf.com',
25 | 'Referer': 'https://www.gkkxd.com/userAuth/login',
26 | }
27 | response = requests.get(url, headers=headers)
28 | etree = etree.HTML(response.text)
29 | dlmy = etree.xpath('//*[@id="dlmy"]/@value')[0]
30 | return dlmy
31 |
32 | def init_js(self):
33 | with open('encryp.js', 'r', encoding='utf-8') as f:
34 | self.js = execjs.compile(f.read())
35 |
36 | def make_param(self):
37 | pwd = self.js.call('test', self.get_dl(), self.pwd)
38 | print('pwd生成', pwd)
39 |
40 |
41 | if __name__ == '__main__':
42 | password = input('请输入用户密码')
43 | kxd = KaiXinDai(password)
44 | kxd.make_param()
45 |
--------------------------------------------------------------------------------
/其他实战/【开鑫贷】登陆参数生成/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【微信】登录参数生成/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-10 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【微信】登录参数生成/make_pwd.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-08-22 Python: 3.7
4 | import execjs.runtime_names
5 |
6 |
7 | class WeXin(object):
8 | """
9 | wx 登陆密码解析
10 | """
11 |
12 | def __init__(self):
13 | self.url = 'https://mp.weixin.qq.com/?token=&lang=zh_CN'
14 | print('引擎', execjs.get().name)
15 |
16 | @staticmethod
17 | def make_pwd(pwd):
18 | with open("encryp.js", "r", encoding="utf-8") as f:
19 | ctx = execjs.compile(f.read())
20 |
21 | ret = ctx.call("make_pwd", pwd)
22 | print(ret)
23 |
24 |
25 | if __name__ == '__main__':
26 | pdd = WeXin()
27 | pdd.make_pwd('密码')
28 |
29 |
--------------------------------------------------------------------------------
/其他实战/【房价】房价获取/README.md:
--------------------------------------------------------------------------------
1 | # 概述
2 | 这不是一个完整的项目,是测试demo,可以获取区域内在售房产单套价格
3 |
4 |
5 |
6 | **代码只是测试了一个最新销售小区中的一栋楼的在售楼房价格**
7 |
8 | 如果需要更完整的,就联系作者
--------------------------------------------------------------------------------
/其他实战/【房价】房价获取/__pycache__/util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【房价】房价获取/__pycache__/util.cpython-37.pyc
--------------------------------------------------------------------------------
/其他实战/【房价】房价获取/util.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # Auth: Zok Email: 362416272@qq.com
3 | # Date: 2020/2/21
4 |
5 |
6 | from pyDes import *
7 | import base64
8 |
9 | KEY = b'hjkiuy6754edxc32890tfhjkw23xdea'[:24] # 密钥只需要24位
10 | IV = b'jhf5632s'
11 |
12 |
13 | def des3_encrypt(s):
14 | """
15 | 3DES 加密
16 | :param s: 原始字符串
17 | :return: 加密后字符串,16进制
18 | """
19 | k = triple_des(KEY, CBC, IV, pad=None, padmode=PAD_PKCS5)
20 | en = k.encrypt(s, padmode=PAD_PKCS5)
21 | return base64.b64encode(en).decode('utf-8')
22 |
23 |
24 | def des3_decrypt(s):
25 | """
26 | 3DES 解密
27 | :param s: 加密字符串
28 | :return: 明文
29 | """
30 | _str = base64.b64decode(s)
31 | k = triple_des(KEY, CBC, IV, pad=None, padmode=PAD_PKCS5)
32 | en = k.decrypt(_str, padmode=PAD_PKCS5).decode('utf-8')
33 | return en
34 |
35 |
36 | def decrypt_str(s):
37 | info = des3_decrypt(s) # 获得 解密后得 base64
38 | content = info[:-6]
39 | hIndex = base64.b64decode(info[-6:].replace("==", "")).decode().split("_")
40 | content2 = content[int(hIndex[0]):]
41 | txt = base64.b64decode(
42 | content2[: len(content2)-int(hIndex[1])][::-1]
43 | ).decode('utf-8').replace("##", "").replace("{@mk7}", "")
44 | return txt
45 |
46 |
47 | def make_str(enB):
48 | """
49 | 复写字符串算法
50 |
51 | 根据传入文档,转换ascii并计算和
52 | 并复写算法
53 | for (byte item : enB.getBytes("UTF-8")) {
54 | sumResult = Long.valueOf(sumResult.longValue() + ((long) item));
55 | }
56 | """
57 | count = 0
58 | for i in enB:
59 | count += ord(i)
60 | # print('合', count) # 每个字符的 Ascii 码的总和
61 | p = count % len(enB)
62 | n = 1
63 | # print('position', p)
64 | while p + n < len(enB) and p - n >= 0:
65 | enB = rep(
66 | rep(enB, p + n, enB[p - n]),
67 | p - n,
68 | enB[p + n]
69 | )
70 | n += 1
71 | return enB
72 |
73 |
74 | def rep(source, index, rep_str):
75 | """
76 | 复写的java层字符转换方法
77 | :return:
78 | """
79 | str1 = source[0: index]
80 | return str1 + rep_str + source[index + 1:]
81 |
82 |
83 | if __name__ == '__main__':
84 | decrypt_str("AaDaKV8GxE77rIScVyq7E0rebiFQjhrkq8PUcmR8A22NHhAW58pQkQ==")
85 |
--------------------------------------------------------------------------------
/其他实战/【房天下】自动登录/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-26 Python: 3.7
4 |
5 | import execjs.runtime_names
6 | import requests
7 |
8 |
9 | class Fang:
10 | """
11 | 房天下自动登陆
12 | https://passport.fang.com/
13 | """
14 |
15 | def __init__(self, user, pwd):
16 | self.user = user
17 | self.pwd = pwd
18 | self.js = None
19 | self.api = 'https://passport.fang.com/login.api'
20 | self.js_init()
21 |
22 | def js_init(self):
23 | print('引擎', execjs.get().name)
24 | with open("encryp.js", "r", encoding="utf-8") as f:
25 | self.js = execjs.compile(f.read())
26 |
27 | def login(self):
28 | data = {
29 | 'uid': self.user,
30 | 'pwd': self.js.call('getPwd', self.pwd),
31 | 'Service': 'soufun-passport-web',
32 | 'AutoLogin': '1'
33 | }
34 | headers = {
35 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
36 | 'Origin': 'https://passport.fang.com',
37 | 'Referer': 'https://passport.fang.com/',
38 | }
39 | response = requests.post(self.api, data=data, headers=headers)
40 | print(response.text)
41 | print(response.cookies)
42 |
43 |
44 | if __name__ == '__main__':
45 | username = input('输入房天下账号')
46 | password = input('输入密码')
47 | f = Fang(username, password)
48 | f.login()
49 |
--------------------------------------------------------------------------------
/其他实战/【房天下】自动登录/ok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【房天下】自动登录/ok.png
--------------------------------------------------------------------------------
/其他实战/【新浪微博】密码解密/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-26 Python: 3.7
4 |
5 |
6 | import execjs
7 | import requests
8 | import json
9 | import re
10 |
11 |
12 | def Get_parameters():
13 | """微博加密参数有两个 用户名和密码
14 | 用户名为 base64加密
15 | 此处只解决了密码加密问题 其他的请自行拓展
16 | pubkey,time,nonce
17 | :return pubkey,time,nonce
18 | """
19 | try:
20 | url = "https://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=MTc3MjM1NzI1OTA%3D&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.19)&_=1574300620782"
21 |
22 | headers = {
23 | 'User-Agent': 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
24 | 'Host': 'login.sina.com.cn',
25 | 'Referer': 'https://www.weibo.com/login.php',
26 | }
27 |
28 | res = requests.get(url=url, headers=headers)
29 | data = re.findall('sinaSSOController.preloginCallBack\((.*?)\)', res.text, re.S)[0]
30 | new_data = json.loads(data)
31 | time = new_data.get('servertime')
32 | nonce = new_data.get('nonce')
33 | pubkey = new_data.get('pubkey')
34 | return pubkey, time, nonce
35 | except Exception as err:
36 | print('访问失败', err)
37 |
38 |
39 | def main(pwd):
40 | """
41 | :param pwd:
42 | :return:
43 | """
44 | with open('execute.js', 'r', encoding='utf-8') as f:
45 | js = execjs.compile(f.read())
46 |
47 | print('引擎', execjs.get().name)
48 | publickey, time, nonce = Get_parameters()
49 | sign = js.call('get_up', pwd, publickey, time, nonce)
50 | return sign
51 |
52 |
53 | if __name__ == '__main__':
54 | pwd = input('请输入密码:')
55 | sign = main(pwd)
56 | print(sign)
57 |
--------------------------------------------------------------------------------
/其他实战/【时光网】登陆参数生成/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-11 Python: 3.7
4 | import execjs.runtime_names
5 |
6 |
7 | class MTime:
8 | """
9 | 时光网登陆,password 加密解析
10 | https://m.mtime.cn/#!/member/signin
11 | """
12 | def __init__(self, name, pwd):
13 | self.name = name
14 | self.pwd = pwd
15 | self.url = 'https://m.mtime.cn/Service/callback-comm.mi/user/login.api'
16 | self.js = None
17 | self.init_js()
18 |
19 | def init_js(self):
20 | print('引擎', execjs.get().name)
21 | with open("encryp.js", "r", encoding="utf-8") as f:
22 | self.js = execjs.compile(f.read())
23 |
24 | def make_pwd(self):
25 | print(self.js.call('get_pwd', self.pwd))
26 |
27 |
28 | if __name__ == '__main__':
29 | username = input('请输入用户名')
30 | password = input('输入密码')
31 | mt = MTime(username, password)
32 | mt.make_pwd()
33 |
--------------------------------------------------------------------------------
/其他实战/【易通贷】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【易通贷】自动登录/auto_login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-08-26 Python: 3.7
4 |
5 | import requests
6 | import execjs.runtime_names
7 |
8 |
9 | class YDT(object):
10 | """
11 | 易通贷自动登陆
12 | """
13 |
14 | def __init__(self, user, pwd):
15 | self.user = user
16 | self.pwd = pwd
17 | self.url = 'https://app.etongdai.com/login/verifylogin'
18 | print('引擎', execjs.get().name)
19 |
20 | @staticmethod
21 | def make_pwd(pwd):
22 | with open("encryp.js", "r", encoding="utf-8") as f:
23 | ctx = execjs.compile(f.read())
24 | return ctx.call("make_js", pwd)
25 |
26 | def make_data(self):
27 | data = {
28 | 'loginName': self.user,
29 | 'check': 'on',
30 | 'next': 'null',
31 | 'password': self.make_pwd(self.pwd),
32 | }
33 |
34 | return data
35 |
36 | def login(self):
37 | data = self.make_data()
38 | response = requests.post(self.url, data=data)
39 | data = response.content.decode('utf-8')
40 | print(data)
41 |
42 |
43 | if __name__ == '__main__':
44 | username = input('请输入 易通贷账号')
45 | password = input('请输入 易通贷密码')
46 | ydt = YDT(username, password)
47 | ydt.login()
48 |
49 |
50 |
--------------------------------------------------------------------------------
/其他实战/【汽车之家】参数解密/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-08-26 Python: 3.7
4 |
5 | import execjs
6 |
7 |
8 | def main(pwd):
9 | """只解决了pwd的加密,其他请自行拓展
10 | :param pwd:
11 | :return:
12 | """
13 | with open('execute.js', 'r', encoding='utf-8') as f:
14 | js = execjs.compile(f.read())
15 |
16 | print('引擎', execjs.get().name)
17 |
18 | sign = js.call('hex_md5', pwd)
19 | return sign
20 |
21 |
22 | if __name__ == '__main__':
23 | pwd = input('请输入你的密码:')
24 | print(main(pwd))
25 |
--------------------------------------------------------------------------------
/其他实战/【满级网】自动登录/auto_login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-08-26 Python: 3.7
4 |
5 | import requests
6 | import base64
7 | from Crypto.Cipher import PKCS1_v1_5 as Cipher_pksc1_v1_5
8 | from Crypto.PublicKey import RSA
9 |
10 |
11 | class YX(object):
12 | """
13 | 满级网自动登陆 官网 www.manjiwang.com
14 | http://www.manjiwang.com/Logins/BuyerLogin
15 | """
16 |
17 | def __init__(self, user, pwd):
18 | self.user = user
19 | self.pwd = pwd
20 | self.url = 'http://www.manjiwang.com/Logins/BuyerLogin'
21 | self.headers = {
22 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
23 | 'Host': 'www.manjiwang.com',
24 | }
25 | self.public_key = """-----BEGIN PUBLIC KEY-----
26 | MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDC4wHerJc4BSst20Zb07lY9LeZss4OEEhe+SrnLyYy8hGquX/aTQNn+5wnV/+8ierKPgqPGIXPf1ZRww5/6yON+O7dAfJ7BRx85HneIWqwPCZToLck8DN8UXsBuXLMcG7tfMunnnZKenrPsAslN0eKvkYkvz4EPGdvmPwz0NCKXQIDAQAB
27 | -----END PUBLIC KEY-----
28 | """
29 |
30 | def make_pwd(self):
31 | rsa_key = RSA.importKey(self.public_key)
32 | cipher = Cipher_pksc1_v1_5.new(rsa_key)
33 | cipher_text = base64.b64encode(cipher.encrypt(self.pwd.encode()))
34 | return cipher_text.decode()
35 |
36 | def make_data(self):
37 | data = {
38 | 'account': self.user,
39 | 'password': self.make_pwd(),
40 | 'returnUrl': '/'
41 | }
42 | return data
43 |
44 | def login(self):
45 | """start
46 | """
47 | data = self.make_data()
48 | response = requests.post(self.url, data=data)
49 | print(response.text)
50 | print(response.cookies)
51 |
52 |
53 | if __name__ == '__main__':
54 | username = input('请输入账号')
55 | password = input('密码')
56 | yx = YX(username, password)
57 | yx.login()
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/其他实战/【百度】wap端sig生成/make_sig.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2020-01-13 Python: 3.7
4 |
5 | """
6 | wap端 sig 参数生成
7 | 应水友需求,帮忙弄的
8 | 需要 V8 引擎!
9 | """
10 |
11 | import execjs
12 | import os
13 |
14 | print(execjs.get().name)
15 |
16 |
17 | with open(os.path.dirname(__file__) + '/v3_update.js') as f:
18 | js = execjs.compile(f.read())
19 |
20 |
21 | # dv 可固定, 用了一些随机参数生成的。
22 | dv = 'tk0.48553508531670751578885709447.0@mmy0VdnCHg9mlXM-7ZM-tbvB8YHXK3MIEg9WNa8V3x9Cqa5kqgOXcFOjca5BJWOB7eNIzY5k9j8VNKUk0~9F~~5rOiHXvivmzzHjJFMXubOG~W8VRln6~l9k0g9mlXM-7ZM-tbvB8YHXK3MIEg9WH~9V7x9Cql5kqgOXcFOjca5BJWOB7eNIzY5k9-9CRWUq__dy0ov8Cpy5k9j8S~W8Cpz9SlXM-7ZM-tbH-JSMIYaUktanm~F9VEg9WEj8VRgOXcFOjca5BJWOB7eNIzYUk0~9kHg9C9~5kEF8WqW9mlx-vvLwvB87Tr4hByj9G~F5kHyGynvrg~5Vty8CEW8Cqy8C9l8VH~8WEl8CHynkRz8WqK8kt-5Vq_jy~56JeOrJXLIKYOq__Hyr9m~~5k0K9k9g9WHj5k0K9Vqg9Cqy9m~lnCp~5k0K9Vqg9Cqa9q__'
23 | username = '这是测试' # 用户名
24 | s_code = 'ilvw' # 验证码
25 | verifystring = 'jxOb3456654e9d67a5c02ab155fe9012fb44e5b90ae9b01ca02' # 首页返回的
26 |
27 | result = js.call('v3test', dv, s_code, verifystring)
28 |
29 | print(result)
--------------------------------------------------------------------------------
/其他实战/【百度】网页找回密码/__pycache__/header.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【百度】网页找回密码/__pycache__/header.cpython-37.pyc
--------------------------------------------------------------------------------
/其他实战/【百度】网页找回密码/header.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-12-23 Python: 3.7
4 |
5 | UA = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
6 | REFERER = 'https://passport.baidu.com/?getpassindex'
7 | LANGUAGE = 'zh-CN,zh;q=0.9'
8 | CONNECTION = 'keep-alive'
9 |
10 | headers_get_phone = {
11 | 'Connection': CONNECTION,
12 | 'User-Agent': UA,
13 | 'Accept': '*/*',
14 | 'Sec-Fetch-Site': 'same-origin',
15 | 'Sec-Fetch-Mode': 'no-cors',
16 | 'Referer': REFERER,
17 | 'Accept-Language': LANGUAGE
18 | }
19 |
20 |
21 | headers_token = {
22 | "Connection": CONNECTION,
23 | "Content-Lengt": '999',
24 | "Cache-Control": 'max-age=0',
25 | "Origin": "https://passport.baidu.com",
26 | "Upgrade-Insecure-Requests": '1',
27 | "Content-Type": "application/x-www-form-urlencoded",
28 | "User-Agent": UA,
29 | "Sec-Fetch-User": "?1",
30 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
31 | "Sec-Fetch-Site": "same-origin",
32 | "Sec-Fetch-Mode": "navigate",
33 | "Referer": REFERER,
34 | "Accept-Language": LANGUAGE,
35 | }
36 |
37 | headers_img = {
38 | 'Connection': CONNECTION,
39 | 'User-Agent': UA,
40 | 'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
41 | 'Sec-Fetch-Site': 'same-origin',
42 | 'Sec-Fetch-Mode': 'no-cors',
43 | 'Referer': REFERER,
44 | 'Accept-Language': LANGUAGE,
45 | }
46 |
47 | headers_bds_token = {
48 | 'Connection': CONNECTION,
49 | 'Upgrade-Insecure-Requests': '1',
50 | 'User-Agent': UA,
51 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
52 | 'Sec-Fetch-Site': 'none',
53 | 'Sec-Fetch-Mode': 'navigate',
54 | 'Accept-Language': LANGUAGE,
55 | }
56 |
57 | headers_verify_str = {
58 | 'Connection': CONNECTION,
59 | 'User-Agent': UA,
60 | 'Accept': '*/*',
61 | 'Sec-Fetch-Site': 'same-origin',
62 | 'Sec-Fetch-Mode': 'no-cors',
63 | 'Referer': REFERER,
64 | 'Accept-Language': LANGUAGE,
65 | }
66 |
--------------------------------------------------------------------------------
/其他实战/【百度】网页找回密码/验证码.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【百度】网页找回密码/验证码.png
--------------------------------------------------------------------------------
/其他实战/【百度】翻译/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-11-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【百度】翻译/translate.js:
--------------------------------------------------------------------------------
1 | var i = "320305.131321201"
2 |
3 |
4 | function n(r, o) {
5 | for (var t = 0; t < o.length - 2; t += 3) {
6 | var e = o.charAt(t + 2);
7 | e = e >= "a" ? e.charCodeAt(0) - 87 : Number(e),
8 | e = "+" === o.charAt(t + 1) ? r >>> e : r << e,
9 | r = "+" === o.charAt(t) ? r + e & 4294967295 : r ^ e
10 | }
11 | return r
12 | }
13 |
14 | function a(r) {
15 | var t = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
16 | if (null === t) {
17 | var a = r.length;
18 | a > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(a / 2) - 5, 10) + r.substr(-10, 10))
19 | } else {
20 | for (var C = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), h = 0, f = C.length, u = []; f > h; h++)
21 | "" !== C[h] && u.push.apply(u, e(C[h].split(""))),
22 | h !== f - 1 && u.push(t[h]);
23 | var g = u.length;
24 | g > 30 && (r = u.slice(0, 10).join("") + u.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + u.slice(-10).join(""))
25 | }
26 | var l = void 0
27 | , d = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
28 | l = null !== i ? i : (i = o.common[d] || "") || "";
29 | for (var m = l.split("."), S = Number(m[0]) || 0, s = Number(m[1]) || 0, c = [], v = 0, F = 0; F < r.length; F++) {
30 | var p = r.charCodeAt(F);
31 | 128 > p ? c[v++] = p : (2048 > p ? c[v++] = p >> 6 | 192 : (55296 === (64512 & p) && F + 1 < r.length && 56320 === (64512 & r.charCodeAt(F + 1)) ? (p = 65536 + ((1023 & p) << 10) + (1023 & r.charCodeAt(++F)),
32 | c[v++] = p >> 18 | 240,
33 | c[v++] = p >> 12 & 63 | 128) : c[v++] = p >> 12 | 224,
34 | c[v++] = p >> 6 & 63 | 128),
35 | c[v++] = 63 & p | 128)
36 | }
37 | for (var w = S, A = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), b = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), D = 0; D < c.length; D++)
38 | w += c[D],
39 | w = n(w, A);
40 | return w = n(w, b),
41 | w ^= s,
42 | 0 > w && (w = (2147483647 & w) + 2147483648),
43 | w %= 1e6,
44 | w.toString() + "." + (w ^ S)
45 | }
--------------------------------------------------------------------------------
/其他实战/【百度】翻译/translation.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-04-26 Python: 3.7
4 |
5 | # 本代码参考 github作者:CriseLYJ
6 |
7 | import requests
8 | import js2py
9 |
10 |
11 | class FanYiSpider(object):
12 | """
13 | 翻译
14 | """
15 | context = js2py.EvalJs() # python中使用js
16 |
17 | def __init__(self, query):
18 | # 初始化
19 | self.url = "https://fanyi.baidu.com/basetrans"
20 | self.query = query
21 | self.headers = {
22 | "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Mobile Safari/537.36",
23 | "Referer": "https://fanyi.baidu.com/",
24 | "Cookie": "BAIDUID=714BFAAF02DA927F583935C7A354949A:FG=1; BIDUPSID=714BFAAF02DA927F583935C7A354949A; PSTM=1553390486; delPer=0; PSINO=5; H_PS_PSSID=28742_1463_21125_18559_28723_28557_28697_28585_28640_28604_28626_22160; locale=zh; from_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lvt_afd111fa62852d1f37001d1f980b6800=1553658863,1553766321,1553769980,1553770442; Hm_lpvt_afd111fa62852d1f37001d1f980b6800=1553770442; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1553766258,1553766321,1553769980,1553770442; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1553770442",
25 | "Content-Type": "application/x-www-form-urlencoded",
26 | "Origin": "https://fanyi.baidu.com",
27 | "X-Requested-With": "XMLHttpRequest",
28 | }
29 |
30 | def make_sign(self):
31 | with open("translate.js", "r", encoding="utf-8") as f:
32 | self.context.execute(f.read())
33 |
34 | sign = self.context.a(self.query)
35 | return sign
36 |
37 | def make_data(self, sign):
38 | data = {
39 | "query": self.query,
40 | "from": "en",
41 | "to": "zh",
42 | "token": "6f5c83b84d69ad3633abdf18abcb030d",
43 | "sign": sign
44 | }
45 | return data
46 |
47 | def get_content(self, data):
48 | response = requests.post(
49 | url=self.url,
50 | headers=self.headers,
51 | data=data
52 | )
53 | return response.json()["trans"][0]["dst"]
54 |
55 | @property
56 | def run(self):
57 | sign = self.make_sign() # 获取sign的值
58 | data = self.make_data(sign) # 构建参数
59 | content = self.get_content(data) # 获取翻译内容
60 | return content
61 |
62 |
63 | if __name__ == '__main__':
64 | key = input("输入翻译内容:")
65 | translate = FanYiSpider(key)
66 | print(translate.run)
67 |
--------------------------------------------------------------------------------
/其他实战/【百度】自动登录/README.md:
--------------------------------------------------------------------------------
1 | # 解密过程参考博客
2 |
3 | [博客链接](https://www.zhangkunzhi.com/?p=216)
--------------------------------------------------------------------------------
/其他实战/【百度】自动登录/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-08-05 Python: 3.7
4 |
5 | """
6 | 百度登陆参数比较多
7 |
8 | 这里是密码加密生成器
9 | """
10 |
11 | import js2py
12 |
13 |
14 | class PingDuoDuoSpider(object):
15 | """
16 | 生成百度登陆密码加密结果
17 | """
18 | context = js2py.EvalJs() # python中使用js
19 |
20 | def __init__(self):
21 | # 初始化
22 | with open("encryp.js", "r", encoding="utf-8") as f:
23 | self.context.execute(f.read())
24 |
25 | def make(self, password):
26 | pwd = self.context.test(password)
27 | print(pwd) # 打印加密之后的密码
28 |
29 |
30 | if __name__ == '__main__':
31 | pdd = PingDuoDuoSpider()
32 |
33 | key = input("输入密码")
34 | pdd.make(key)
35 |
--------------------------------------------------------------------------------
/其他实战/【百度街拍】图片下载/get_image.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-08-05 Python: 3.7
4 |
5 | import requests, time
6 | from urllib.parse import urlencode
7 | from urllib.request import urlretrieve
8 |
9 |
10 | def getPage(offset):
11 | '''获取网页信息'''
12 | data = {
13 | 'tn': 'resultjson_com',
14 | 'ipn': 'rj',
15 | 'ct': '201326592',
16 | 'is': '',
17 | 'fp': 'result',
18 | 'queryWord': '街拍',
19 | 'cl': '2',
20 | 'lm': '-1',
21 | 'ie': 'utf - 8',
22 | 'oe': 'utf - 8',
23 | 'adpicid': '',
24 | 'st': '-1',
25 | 'z': '',
26 | 'ic': '0',
27 | 'hd': '',
28 | 'latest': '',
29 | 'copyright': '',
30 | 'word': '街拍',
31 | 's': '',
32 | 'se': '',
33 | 'tab': '',
34 | 'width': '',
35 | 'height': '',
36 | 'face': '0',
37 | 'istype': '2',
38 | 'qc': '',
39 | 'nc': '1',
40 | 'fr': '',
41 | 'expermode': '',
42 | 'force': '',
43 | 'pn': offset,
44 | 'rn': '30',
45 | 'gsm': '1e',
46 | '1551789143500': '',
47 | }
48 | headers = {
49 | 'Accept': 'text/plain, */*; q=0.01',
50 | 'Accept-Encoding': 'deflate, br',
51 | 'Accept-Language': 'Accept-Language',
52 | 'Connection': 'keep-alive',
53 | 'Cookie': 'BDqhfp=%E8%A1%97%E6%8B%8D%26%260-10-1undefined%26%260%26%261; BIDUPSID=7CA5F033CA22949F5FB6110DBC5DC1EE; BAIDUID=6DDE5BAA44763FD6C7CA84401CB19F36:FG=1; indexPageSugList=%5B%22%E8%A1%97%E6%8B%8D%22%5D; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; uploadTime=1551768107224; userFrom=null; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; firstShowTip=1; cleanHistoryStatus=0',
54 | 'Host': 'image.baidu.com',
55 | 'Referer': 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E8%A1%97%E6%8B%8D&oq=%E8%A1%97%E6%8B%8D&rsp=-1',
56 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6735.400 QQBrowser/10.2.2328.400',
57 | 'X-Requested-With': 'XMLHttpRequest',
58 | }
59 | url = 'https://image.baidu.com/search/acjson?' + urlencode(data)
60 | try:
61 | res = requests.get(url, data=data, headers=headers)
62 | res.encoding = 'utf-8' # 网页信息编码
63 | if res.status_code == 200:
64 | return res.json()
65 | except requests.ConnectionError:
66 | return None
67 |
68 |
69 | def getImage(json):
70 | '''解析网页数据并爬取所需的信息'''
71 | try:
72 | data = json.get('data')
73 | if data:
74 | for item in data:
75 | yield {
76 | 'image': item.get('hoverURL'),
77 | 'title': item.get('fromPageTitleEnc'),
78 | }
79 | except:
80 | return None
81 |
82 |
83 | def saveImage(item):
84 | '''把获取的图片与标题封装并存储'''
85 | try:
86 | m = item.get('title')
87 | local_image = item.get('image') # 获取图片的url
88 | image_url = local_image
89 | urlretrieve(image_url, './pic/' + str(m) + '.jpg')
90 | # print('p'+str(m) + '.jpg')
91 | except:
92 | return None
93 |
94 |
95 | def main(offset):
96 | '''调度爬取函数和存储'''
97 | json = getPage(offset)
98 | for item in getImage(json):
99 | print(item)
100 | saveImage(item)
101 |
102 |
103 | if __name__ == '__main__':
104 | for i in range(5): # 此处循环遍历五次是不可行的 每次data值中的gsm在变化
105 | main(offset=i * 30)
106 | time.sleep(1)
107 |
--------------------------------------------------------------------------------
/其他实战/【移动】登录参数生成/MakeParam.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-05 Python: 3.7
4 |
5 | import execjs.runtime_names
6 |
7 |
8 | class MakeParam:
9 | """
10 | 移动登陆
11 | 加密参数生成器
12 | 页面 https://mail.10086.cn/
13 | """
14 |
15 | def __init__(self, name, pwd):
16 | self.name = name
17 | self.pwd = pwd
18 | self.js = None
19 | self.init_js()
20 |
21 | def init_js(self):
22 | print('引擎', execjs.get().name)
23 | with open("encryp.js", "r", encoding="utf-8") as f:
24 | self.js = execjs.compile(f.read())
25 |
26 | def mk_params(self):
27 | cguid = self.js.call("customerGetCGUID")
28 | _ = self.js.call('sha1', self.name)
29 | word = self.js.call('calcDigest', self.pwd)
30 | msg = """
31 | cguid: {cguid}
32 | _: {_}
33 | password: {word}
34 | """
35 | print(msg.format(cguid=cguid, _=_, word=word))
36 |
37 |
38 | if __name__ == '__main__':
39 | username = input('输入用户名')
40 | password = input('输入密码')
41 | yd = MakeParam(username, password)
42 | yd.mk_params()
43 |
--------------------------------------------------------------------------------
/其他实战/【移动】登录参数生成/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【移动】登录参数生成/encryp.js:
--------------------------------------------------------------------------------
1 | function sha1(a) {
2 | function b(a, b) {
3 | var c = (a & 65535) + (b & 65535);
4 | return (a >> 16) + (b >> 16) + (c >> 16) << 16 | c & 65535
5 | }
6 |
7 | for (var c = [], d = 0; d < 8 * a.length; d += 8)
8 | c[d >> 5] |= (a.charCodeAt(d / 8) & 255) << 24 - d % 32;
9 | a = 8 * a.length;
10 | c[a >> 5] |= 128 << 24 - a % 32;
11 | c[(a + 64 >> 9 << 4) + 15] = a;
12 | a = Array(80);
13 | for (var d = 1732584193, e = -271733879, f = -1732584194, g = 271733878, k = -1009589776, h = 0; h < c.length; h +=
14 | 16) {
15 | for (var l = d, m = e, n = f, p = g, q = k, j = 0; 80 > j; j++) {
16 | a[j] = 16 > j ? c[h + j] : (a[j - 3] ^ a[j - 8] ^ a[j - 14] ^ a[j - 16]) << 1 | (a[j - 3] ^ a[j - 8] ^ a[j - 14] ^
17 | a[j - 16]) >>> 31;
18 | var r = b(b(d << 5 | d >>> 27, 20 > j ? e & f | ~e & g : 40 > j ? e ^ f ^ g : 60 > j ? e & f | e & g | f & g : e ^
19 | f ^ g), b(b(k, a[j]), 20 > j ? 1518500249 : 40 > j ? 1859775393 : 60 > j ? -1894007588 : -899497514)),
20 | k = g,
21 | g = f,
22 | f = e << 30 | e >>> 2,
23 | e = d,
24 | d = r
25 | }
26 | d = b(d, l);
27 | e = b(e, m);
28 | f = b(f, n);
29 | g = b(g, p);
30 | k = b(k, q)
31 | }
32 | c = [d, e, f, g, k];
33 | a = "";
34 | for (d = 0; d < 4 * c.length; d++)
35 | a += "0123456789abcdef".charAt(c[d >> 2] >> 8 * (3 - d % 4) + 4 & 15) + "0123456789abcdef".charAt(c[d >> 2] >> 8 *
36 | (3 - d % 4) & 15);
37 | return a
38 | }
39 |
40 |
41 | function a(a, c) {
42 | var d = (a & 65535) + (c & 65535);
43 | return (a >> 16) + (c >> 16) + (d >> 16) << 16 | d & 65535
44 | }
45 |
46 | calcDigest = function (b) {
47 | for (var c = (b.length + 8 >> 6) + 1, d = Array(16 * c), e = 0; e < 16 * c; e++)
48 | d[e] = 0;
49 | for (e = 0; e < b.length; e++)
50 | d[e >> 2] |= b.charCodeAt(e) << 24 - 8 * (e & 3);
51 | d[e >> 2] |= 128 << 24 - 8 * (e & 3);
52 | d[16 * c - 1] = 8 * b.length;
53 | b = Array(80);
54 | for (var c = 1732584193, e = -271733879, f = -1732584194, g = 271733878, k = -1009589776, h = 0; h < d.length; h +=
55 | 16) {
56 | for (var l = c, m = e, n = f, p = g, q = k, j = 0; 80 > j; j++) {
57 | b[j] = 16 > j ? d[h + j] : (b[j - 3] ^ b[j - 8] ^ b[j - 14] ^ b[j - 16]) << 1 | (b[j - 3] ^ b[j - 8] ^ b[j - 14] ^
58 | b[j - 16]) >>> 31;
59 | var r = a(a(c << 5 | c >>> 27, 20 > j ? e & f | ~e & g : 40 > j ? e ^ f ^ g : 60 > j ? e & f | e & g | f & g : e ^
60 | f ^ g), a(a(k, b[j]), 20 > j ? 1518500249 : 40 > j ? 1859775393 : 60 > j ? -1894007588 : -899497514)),
61 | k = g,
62 | g = f,
63 | f = e << 30 | e >>> 2,
64 | e = c,
65 | c = r
66 | }
67 | c = a(c, l);
68 | e = a(e, m);
69 | f = a(f, n);
70 | g = a(g, p);
71 | k = a(k, q)
72 | }
73 | d = [c, e, f, g, k];
74 | b = "";
75 | for (c = 0; c < 4 * d.length; c++)
76 | b += "0123456789abcdef".charAt(d[c >> 2] >> 8 * (3 - c % 4) + 4 & 15) + "0123456789abcdef".charAt(d[c >> 2] >> 8 *
77 | (3 - c % 4) & 15);
78 | return b
79 | }
80 |
81 |
82 | function customerGetCGUID() {
83 | function a(a, b) {
84 | var e = (b || 2) - (1 + Math.floor(Math.log(a | 1) / Math.LN10 + 1E-15));
85 | return Array(e + 1).join("0") + a
86 | }
87 |
88 | var b = new Date;
89 | return "" + a(b.getHours()) + a(b.getMinutes()) + a(b.getSeconds()) + a(b.getMilliseconds(), 3) + a(Math.ceil(9999 *
90 | Math.random()), 4)
91 | }
--------------------------------------------------------------------------------
/其他实战/【移动】登录参数生成/make_params.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【移动】登录参数生成/make_params.png
--------------------------------------------------------------------------------
/其他实战/【空中网】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【空中网】自动登录/encryp.js:
--------------------------------------------------------------------------------
1 | function mk_pwd (str, pwd) {
2 | if (pwd == null || pwd.length <= 0) {
3 | return null
4 | }
5 | ;var prand = "";
6 | for (var i = 0; i < pwd.length; i++) {
7 | prand += pwd.charCodeAt(i).toString()
8 | }
9 | ;var sPos = Math.floor(prand.length / 5);
10 | var mult = parseInt(prand.charAt(sPos) + prand.charAt(sPos * 2) + prand.charAt(sPos * 3) + prand.charAt(sPos * 4) + prand.charAt(sPos * 5));
11 | var incr = Math.ceil(pwd.length / 2);
12 | var modu = Math.pow(2, 31) - 1;
13 | if (mult < 2) {
14 | return null
15 | }
16 | ;var salt = Math.round(Math.random() * 1000000000) % 100000000;
17 | prand += salt;
18 | while (prand.length > 10) {
19 | var a = prand.substring(0, 1);
20 | var b = prand.substring(10, prand.length);
21 | if (b.length > 10) {
22 | prand = b
23 | } else {
24 | prand = (parseInt(a) + parseInt(b)).toString()
25 | }
26 | }
27 | ;prand = (mult * prand + incr) % modu;
28 | var enc_chr = "";
29 | var enc_str = "";
30 | for (var i = 0; i < str.length; i++) {
31 | enc_chr = parseInt(str.charCodeAt(i) ^ Math.floor((prand / modu) * 255));
32 | if (enc_chr < 16) {
33 | enc_str += "0" + enc_chr.toString(16)
34 | } else
35 | enc_str += enc_chr.toString(16);
36 | prand = (mult * prand + incr) % modu
37 | }
38 | ;salt = salt.toString(16);
39 | while (salt.length < 8)
40 | salt = "0" + salt;
41 | enc_str += salt;
42 | return enc_str
43 | }
--------------------------------------------------------------------------------
/其他实战/【空中网】自动登录/spider_login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-03 Python: 3.7
4 | import re
5 | import time
6 | import requests
7 | import execjs.runtime_names
8 |
9 |
10 | class SpiderLogin:
11 | """
12 | 空中网爬虫登陆
13 | """
14 |
15 | def __init__(self, user, pwd):
16 | self.session = requests.session()
17 | self.user = user
18 | self.pwd = pwd
19 | self.login_time = int(round(time.time() * 1000))
20 | self.url = 'https://m.wcbchina.com/login/other-login.html'
21 | self.headers = {
22 | 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
23 | 'Host': 'sso.kongzhong.com',
24 | 'Referer': 'https://passport.kongzhong.com/login'
25 | }
26 |
27 | def use_js(self, dc):
28 | """js 调用
29 | """
30 | with open("encryp.js", "r", encoding="utf-8") as f:
31 | js = execjs.compile(f.read())
32 | try:
33 | pwd = js.call("mk_pwd", self.pwd, dc)
34 | return pwd
35 | except Exception:
36 | print('js 异常')
37 |
38 | def auto_login(self):
39 | """登陆
40 | """
41 | login_url = 'https://sso.kongzhong.com/ajaxLogin?j=j&&type=1&service=https://passport.kongzhong.com/&username={username}&password={password}&vcode=&toSave=0&_={_time}'
42 | dc = self.get_dc()
43 | en_pwd = self.use_js(dc)
44 | response = self.session.get(login_url.format(username=self.user, password=en_pwd, _time=self.login_time), headers=self.headers)
45 | print(response.cookies)
46 | print(response.text)
47 | print(response)
48 |
49 | def get_dc(self):
50 | """捕获 dc 参数
51 | """
52 | target = 'https://sso.kongzhong.com/ajaxLogin?j=j&jsonp=j&service=https://passport.kongzhong.com/&_={t}'.format(
53 | t=self.login_time)
54 | response = self.session.get(target, headers=self.headers)
55 | try:
56 | dc = re.search(r'"dc":"(.*?)","kzmsg', response.text).group(1)
57 | return dc
58 | except AttributeError:
59 | print('dc 捕获失败')
60 |
61 |
62 | if __name__ == '__main__':
63 | username = input('请输入账号')
64 | password = input('密码')
65 | kzw = SpiderLogin(username, password)
66 | kzw.auto_login()
67 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/README.md:
--------------------------------------------------------------------------------
1 | ```
2 | .
3 | └── MeiTuan // -------美团-------
4 | ├── get_login_cookies.py // 基于pyppeteer登陆并获取cookies
5 | ├── parse_play_areas.py // 三级区域解析器(休闲板块)
6 | ├── parse_play_info.py // 休闲会所商铺数据解析
7 | ├── parse_hotel_info.py // 酒店基础数据解析
8 | ├── parse_hotel_comments.py // 酒店评论解析
9 | ├── create_food_token.py // 餐饮页Token生成器
10 | ├── parse_food_comments.py // 获取用户评论数据
11 | └── parse_food_info.py // 解析餐馆数据
12 |
13 | ```
14 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-06-18 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/create_food_token.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-04-21 Python: 3.7
4 |
5 | import json, zlib, base64, time
6 |
7 |
8 | class MakeToken():
9 | """
10 | 测试2019-4-21日可用
11 | 仅作为学术交流!如有侵权,联系作者删除
12 | 美团【餐馆列表】Token生成
13 | """
14 |
15 | def __init__(self, areaId, cityName, originUrl, page):
16 | self.areaId = areaId
17 | self.cityName = cityName
18 | self.originUrl = originUrl
19 | self.page = page
20 | self.uuid = 'c6eada3ffd8e444491e9.1555472928.3.0.0' # Demo
21 |
22 | def join_sign(self):
23 | # 参数
24 | sign = 'areaId={areaId}&cateId=0&cityName={cityName}&dinnerCountAttrId=&optimusCode=1&originUrl={originUrl}&page={page}&partner=126&platform=1&riskLevel=1&sort=&userId=&uuid={uuid}'
25 | _str = sign.format(areaId=self.areaId, cityName=self.cityName, originUrl=self.originUrl, page=self.page,
26 | uuid=self.uuid)
27 | sign = base64.b64encode(zlib.compress(bytes(json.dumps(_str, ensure_ascii=False), encoding="utf8")))
28 | sign = str(sign, encoding="utf8")
29 | return sign
30 |
31 | @property
32 | def join_token(self):
33 | str_json = {}
34 | str_json['rId'] = 100900
35 | str_json['ver'] = '1.0.6'
36 | str_json['ts'] = time.time()
37 | str_json['cts'] = time.time() + 110
38 | str_json['brVD'] = [1920, 315]
39 | str_json['brR'] = [[1920, 1080], [1920, 1057], 24, 24]
40 | str_json['bI'] = [self.originUrl, ""]
41 | str_json['mT'] = []
42 | str_json['kT'] = []
43 | str_json['aT'] = []
44 | str_json['tT'] = []
45 | str_json['aM'] = ''
46 | str_json['sign'] = self.join_sign()
47 | token_decode = zlib.compress(
48 | bytes(json.dumps(str_json, separators=(',', ':'), ensure_ascii=False), encoding="utf8"))
49 | token = str(base64.b64encode(token_decode), encoding="utf8")
50 | return token
51 |
52 |
53 | if __name__ == '__main__':
54 | # 测试数据
55 | areaId = '4581'
56 | cityName = '重庆'
57 | originUrl = 'http://cq.meituan.com/meishi/b4581/'
58 | page = '1'
59 |
60 | token = MakeToken(areaId, cityName, originUrl, page)
61 | print(token.join_token)
62 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/get_login_cookies.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-05-21 Python: 3.7
4 | import asyncio
5 | import json
6 |
7 | from pyppeteer import launch
8 |
9 |
10 | class MeiTuanCookies():
11 | def __init__(self, username, password):
12 | self.login_url = 'https://passport.meituan.com/account/unitivelogin'
13 | self.username = username
14 | self.password = password
15 |
16 | async def star(self):
17 | browser = await launch()
18 | context = await browser.createIncogniteBrowserContext()
19 | page = await context.newPage()
20 | await page.evaluateOnNewDocument('() =>{ Object.defineProperties(navigator,'
21 | '{ webdriver:{ get: () => false } }) }') # 本页刷新后值不变
22 |
23 | await page.goto(self.login_url)
24 | await page.type('input#login-email', self.username)
25 | await page.type('input#login-password', self.password)
26 | await page.click('input.btn')
27 | await self.get_cookie(page)
28 |
29 | async def get_cookie(self, page):
30 | """
31 | 获取 cookies
32 | :param page: 页面
33 | :return:
34 | """
35 | cookies_list = await page.cookies()
36 | cookies = ''
37 | for cookie in cookies_list:
38 | str_cookie = '{0}={1};'
39 | str_cookie = str_cookie.format(cookie.get('name'), cookie.get('value'))
40 | cookies += str_cookie
41 | print(cookies)
42 |
43 |
44 | if __name__ == '__main__':
45 | name = input('美团账号')
46 | pwd = input('密码')
47 | mt = MeiTuanCookies(name, pwd)
48 | loop = asyncio.get_event_loop()
49 | loop.run_until_complete(mt.star())
50 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/parse_food_comments.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-04-17 Python: 3.7
4 |
5 | import requests
6 | import json
7 | import time
8 |
9 | from urllib import parse
10 |
11 |
12 | class ParseComments(object):
13 | def __init__(self, shop_id):
14 | self.shop_id = shop_id
15 |
16 | self.get_data()
17 |
18 | def get_data(self):
19 | url_code = self.get_originUrl()
20 |
21 | url = 'http://www.meituan.com/meishi/api/poi/getMerchantComment?'
22 | params = {
23 | 'platform': '1',
24 | 'partner': '126',
25 | 'originUrl': url_code,
26 | 'riskLevel': '1',
27 | 'optimusCode': '1',
28 | 'id': self.shop_id,
29 | 'offset': '0',
30 | 'pageSize': '10',
31 | 'sortType': '1',
32 | }
33 | headers = {
34 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
35 | }
36 | response = requests.get(url=url, params=params, headers=headers)
37 | data = response.text
38 | self.parse(data)
39 |
40 | def get_originUrl(self):
41 | """编码解码
42 | """
43 | return parse.quote_plus('http://www.meituan.com/meishi/' + self.shop_id + '/')
44 |
45 | def parse(self, data):
46 | """解析数据
47 | """
48 | data_dict = json.loads(data)
49 | for item in data_dict.get('data').get('comments'):
50 | create_time = self.parse_time(item.get('commentTime'))
51 | print_str = """
52 | 评论用户:{userName}
53 | 评论时间:{create_time}
54 | 评论详情:{comment}
55 | 评论id:{reviewId}
56 | """.format(userName=item.get('userName'), comment=item.get('comment'), create_time=create_time,
57 | reviewId=item.get('reviewId'))
58 | print(print_str)
59 |
60 | @staticmethod
61 | def parse_time(timeStamp):
62 | """13位 解码时间
63 | """
64 | time_stamp = float(int(timeStamp) / 1000)
65 | time_array = time.localtime(time_stamp)
66 | return time.strftime("%Y-%m-%d %H:%M:%S", time_array)
67 |
68 |
69 | if __name__ == '__main__':
70 | p_id = input('请输入餐馆id')
71 | ParseComments(p_id)
72 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/parse_hotel_comments.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-06-10 Python: 3.7
4 |
5 | """
6 | 解析酒店评论
7 | """
8 |
9 | import requests
10 | import json
11 | import time
12 |
13 |
14 | class ParseComments(object):
15 | """解析酒店评论
16 | """
17 | def __init__(self, hotel_id):
18 | self.hotel_id = hotel_id
19 | self.get_data()
20 |
21 | def get_data(self):
22 |
23 | url = 'https://ihotel.meituan.com/group/v1/poi/comment/' + self.hotel_id + '?'
24 | params = {
25 | 'sortType': 'default',
26 | 'noempty': '1',
27 | 'withpic': '0',
28 | 'filter': 'all',
29 | 'limit': '10',
30 | 'offset': '0',
31 | }
32 | headers = {
33 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
34 | }
35 | response = requests.get(url=url, params=params, headers=headers)
36 | data = response.text
37 | self.parse(data)
38 |
39 | def parse(self, data):
40 | """解析数据
41 | """
42 | data_dict = json.loads(data)
43 | for item in data_dict.get('data').get('feedback'):
44 | create_time = self.parse_time(item.get('replytimestamp'))
45 | print_str = """
46 | 评论用户:{userName}
47 | 评论时间:{create_time}
48 | 评论详情:{comment}
49 | 满意度:{scoretext}
50 | """.format(userName=item.get('username'), comment=item.get('comment'), create_time=create_time,
51 | scoretext=item.get('scoretext'))
52 | print(print_str)
53 | self.parse_pic(item)
54 |
55 | @staticmethod
56 | def parse_time(timeStamp):
57 | """13位 解码时间
58 | """
59 | time_array = time.localtime(timeStamp)
60 | return time.strftime("%Y-%m-%d %H:%M:%S", time_array)
61 |
62 | def parse_pic(self, item):
63 | pic_list = [i.get('url').replace('w.h', '750.0') for i in item.get('picinfo')]
64 | print(pic_list)
65 |
66 |
67 | if __name__ == '__main__':
68 | p_id = input('请输入酒店id')
69 | ParseComments(p_id)
70 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/parse_hotel_info.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-06-05 Python: 3.7
4 |
5 | """
6 | 解析
7 | 美团酒店店铺的基础信息
8 | 该板块信息隐藏在get请求后的js中直接用正则匹配出信息再抽取出来
9 | """
10 | import requests
11 | import re
12 | import json
13 | import time
14 |
15 |
16 | class ParseHotelInfo(object):
17 | headers = {
18 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
19 | }
20 |
21 | def __init__(self, p_id):
22 | self.p_id = p_id
23 |
24 | def go_to_hotel(self):
25 | """执行访问
26 | """
27 | # 拼接日期
28 | now_day = time.strftime('%Y-%m-%d', time.localtime(time.time()))
29 |
30 | # 组合 get 地址
31 | url = 'https://hotel.meituan.com/' + self.p_id + '/?ci=' + now_day + '&co=' + now_day
32 | data = requests.get(url, headers=self.headers).content.decode('utf-8')
33 |
34 | # 提取有效区域
35 | info = re.search(r'window.__INITIAL_STATE__=(.*?)', data, flags=re.DOTALL)
36 | if info:
37 | info_dict = json.loads(info.group(1).strip()[:-1])
38 | self.parse_html(info_dict)
39 | else:
40 | print('访问失效')
41 |
42 | def parse_html(self, data_dict):
43 | data = data_dict.get('poiData')
44 | print('店名', data.get('name'))
45 | print('店铺id', data.get('poiid'))
46 | print('城市id', data.get('cityId'))
47 | print('地址', data.get('addr'))
48 | print('lng', data.get('lng'))
49 | print('lat', data.get('lat'))
50 | print('封面', data.get('frontImg').replace('w.h', '750.0'))
51 | print('wifi', data.get('wifi'))
52 | print('地区id', data.get('areaId'))
53 | print('地区名', data.get('areaName'))
54 | print('平均消费', data.get('avgPrice'))
55 | print('类别id', data.get('brandId'))
56 | print('类别名', data.get('brandName'))
57 | print('简介', data.get('introduction'))
58 | print('星级', data.get('highHotelStar'))
59 | print('舒适类型', data.get('hotelStar'))
60 | print('电话', [i.get('phone') for i in data.get('phoneList')])
61 | print('平均分', data.get('avgScore'))
62 | print('标签', data.get('poiAttrTagList'))
63 | print('城市名', data.get('cityName'))
64 | print('城市拼音', data.get('cityPinyin'))
65 |
66 | poi_data = data_dict.get('poiExt') # 酒店详情
67 | print('服务', [i.get('attrDesc') for i in poi_data.get('serviceIconsInfo').get('serviceIcons')])
68 | print('酒店介绍', {i.get('attrDesc'): i.get('attrValue') for i in poi_data.get('hotelIntroInfo').get('poiExtendsInfos')})
69 |
70 |
71 | if __name__ == '__main__':
72 | print("""\033[1;33m请输入酒店ID \033[0m""")
73 | _id = input('(链接末尾数字就是ID)')
74 | # _id = '41823880' # 测试
75 | hotel = ParseHotelInfo(_id)
76 | hotel.go_to_hotel()
77 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/parse_play_areas.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-05-05 Python: 3.7
4 |
5 | import requests
6 | import json
7 | import re
8 | from pypinyin import pinyin
9 |
10 |
11 | class ParseAreas(object):
12 |
13 | def __init__(self, city_name):
14 | self.alphabet = "".join([i[0][0] for i in pinyin(city_name)])
15 |
16 | self.get_data()
17 |
18 | def get_data(self):
19 |
20 | url = 'https://{city}.meituan.com/xiuxianyule/'
21 | headers = {
22 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
23 | }
24 | target_url = url.format(city=self.alphabet)
25 | response = requests.get(target_url, headers=headers)
26 | data = response.text
27 | self.parse(data, target_url)
28 |
29 | @ staticmethod
30 | def parse(data, url):
31 | """解析数据
32 | """
33 | py_dict = {}
34 | text = re.search(r'"city":{"id":(.*?),"name":"(.*?)","pinyin".*?"area":(.*?),"category":', data)
35 | if text:
36 | py_dict = {'城市': text.group(2), '城市ID': text.group(1)}
37 | dict_info = json.loads(text.group(3)).get('children') # 提取区域信息
38 | py_dict['区'] = []
39 |
40 | for node in dict_info:
41 | if node.get('name') == '推荐商圈':
42 | continue # 推荐商圈过滤
43 | # 二级区域
44 | district = {'区名': node.get('name'), '区ID': node.get('id'),
45 | '区链接': url + 'b' + str(node.get('id')) + '/'}
46 | if node.get('children'):
47 | district['街道'] = []
48 | # 三级区域
49 | for i in node.get('children'):
50 | area = {'街道名': i.get('name'), '街道ID': i.get('id'),
51 | '街道链接': url + 'b' + str(i.get('id')) + '/'}
52 | district['街道'].append(area)
53 |
54 | py_dict['区'].append(district)
55 |
56 | print(json.dumps(py_dict, ensure_ascii=False))
57 |
58 |
59 | if __name__ == '__main__':
60 | print("""
61 | \033[1;33m娱乐板块区域解析
62 | 请输入城市名例如 北京
63 | 返回json格式\033[0m
64 | """)
65 | chines = input('输入城市名')
66 | ParseAreas(chines)
67 |
--------------------------------------------------------------------------------
/其他实战/【美团】数据解析、token生成/parse_play_info.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-05-08 Python: 3.7
4 |
5 | """
6 | 解析
7 | 美团休闲娱乐商铺信息
8 | 该板块信息隐藏在get请求后的js中直接用正则匹配出信息再抽取出来
9 | """
10 | import requests
11 | import re
12 | import json
13 |
14 |
15 | class ParsePlayInfo(object):
16 | target_url = 'http://www.meituan.com/xiuxianyule/{p_id}/'
17 | headers = {
18 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
19 | }
20 |
21 | def __init__(self, restaurant_id):
22 | self.restaurant_id = str(restaurant_id)
23 |
24 | self.go_to_restaurant()
25 |
26 | def go_to_restaurant(self):
27 | """执行访问
28 | """
29 | url = self.target_url.format(p_id=self.restaurant_id)
30 | data = requests.get(url, headers=self.headers).text
31 |
32 | # 提取有效区域
33 | data = re.search(r'"params":{"poiInfo":(.*?)},"fallbackPara', data, flags=re.DOTALL)
34 | if data:
35 | self.parse_html(json.loads(data.group(1)))
36 | else:
37 | print('访问失效')
38 |
39 | def parse_html(self, data):
40 | print('商铺ID', self.restaurant_id)
41 | print('城市ID', data.get('catId'))
42 | print('城市', data.get('cityName'))
43 | print('城市拼音', data.get('cityPy'))
44 | print('店铺', data.get('shopName'))
45 | print('评分', data.get('score'))
46 | print('平均消费', data.get('avgPrice'))
47 | print('地址', data.get('address'))
48 | print('电话', data.get('phone'))
49 | print('营业时间', data.get('openTime'))
50 | print('封面图片', data.get('headIcon'))
51 | print('wifi', data.get('wifi')) # 有=1 无=0
52 | print('停车', data.get('park')) # 如果有例如:免费提供5个停车位。 没有为空
53 | print('经度', data.get('lng'))
54 | print('纬度', data.get('lat'))
55 | print('类型', data.get('breadCrumbNavDTOList')[2].get('title')[len(data.get('cityName')):])
56 |
57 | albums = []
58 | images = data.get('albumDTOList')
59 | for node in images:
60 | albums.append(node.get('url'))
61 | print('相册', albums)
62 |
63 |
64 | if __name__ == '__main__':
65 | print("""
66 | \033[1;33m请输入商铺ID \033[0m
67 | """)
68 | p_id = input('(商铺网址末尾数字就是ID)')
69 | ParsePlayInfo(p_id)
70 |
--------------------------------------------------------------------------------
/其他实战/【试客联盟】登录/login.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-11-23 Python: 3.7
4 |
5 |
6 | import execjs
7 | import requests, re
8 |
9 | s = requests.Session()
10 |
11 |
12 | def main(pwd):
13 | """res_n 这个参数 是从网页获取的 但调试发现是其实固定的
14 | :param pwd:
15 | :return:
16 | """
17 | with open('execute.js', 'r', encoding='utf-8') as f:
18 | js = execjs.compile(f.read())
19 |
20 | print('引擎', execjs.get().name)
21 | sign = js.call('get_pwd', pwd)
22 | return sign
23 |
24 |
25 | def login(sign_pwd, username):
26 | url = "http://login.shikee.com/check/?&_1574394219820"
27 | data = {
28 | "username": username,
29 | "password": sign_pwd,
30 | "vcode": '',
31 | "to": 'http://user.shikee.com/',
32 | }
33 | res = s.post(url=url, data=data)
34 | res.encoding = "utf-8"
35 | print(res.text)
36 |
37 |
38 | def home():
39 | home_url = "http://user.shikee.com/buyer"
40 | response = s.get(home_url)
41 | html = response.content.decode('utf-8')
42 | data = re.findall(
43 | '.*?
您好!(.*?)您有未读提醒 1 条
',
44 | html, re.S)[0]
45 | print(data)
46 |
47 |
48 | if __name__ == '__main__':
49 | username = input('请输入账户:')
50 | pwd = input('请输入密码:')
51 | sign = main(pwd)
52 | print('正在登录....')
53 | login(sign, username)
54 | home()
55 |
--------------------------------------------------------------------------------
/其他实战/【谷雨】数字解密/GuYu.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-25 Python: 3.7
4 |
5 | import requests
6 | import os
7 | from fontTools.ttLib import TTFont
8 |
9 |
10 | class Font:
11 | """
12 | https://guyujiezi.com/
13 | 谷雨解字的 数字解密
14 | 现在版本的 雨谷字体加的xml 会有一个移位操作
15 | """
16 | def __init__(self, uri):
17 | self.url = uri
18 | self.filename = uri.split('/')[-1]
19 | self.font = None
20 | self._list = []
21 |
22 | def check(self):
23 | """检查目录
24 | """
25 | if not os.path.isfile(self.filename):
26 | resp = requests.get(self.url)
27 | with open(self.filename, 'wb') as f:
28 | f.write(resp.content)
29 | # TTFont 存为 xml
30 | self.font = TTFont(self.filename)
31 | self.font.saveXML(self.filename.replace(self.filename.split('.')[-1], 'xml'))
32 |
33 | def get_wo(self):
34 | """获取 woff
35 | """
36 | self.check()
37 | ph = self.font['cmap']
38 | _dict = ph.tables[0].cmap
39 | # 1. 字典取 value 列表化
40 | # 2. str 取最后 2 位,并转为 int
41 | # 3. 减去 17 并从新组装列表
42 | self._list = [int(i[-2:])-17 for i in list(_dict.values())]
43 | """
44 | 处理移位
45 | """
46 | print(list(_dict.values()))
47 | print(self._list)
48 |
49 | def parse(self, number):
50 | _str = ''
51 | for num in number:
52 | _str += str(self._list[int(num)])
53 | print('最终展示字', int(_str))
54 |
55 |
56 | if __name__ == '__main__':
57 | ft = Font("https://guyujiezi.com/fonts/2DLw9u/3iZbr8.woff")
58 | ft.get_wo()
59 | # 输入页面数字测试
60 | ft.parse('947')
61 |
62 |
63 |
--------------------------------------------------------------------------------
/其他实战/【豆瓣】自动登录/DouBan.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2020-01-08 Python: 3.7
4 |
5 | import requests
6 | import re
7 |
8 |
9 | class DouBan:
10 | def __init__(self, name, pwd):
11 | self.name = name.strip()
12 | self.pwd = pwd.strip()
13 | self.session = requests.session()
14 | self.headers = {
15 | 'Origin': 'https://accounts.douban.com',
16 | 'Host': 'accounts.douban.com',
17 | 'Referer': 'https://accounts.douban.com/passport/login_popup?login_source=anony',
18 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
19 | }
20 | self.login_url = 'https://accounts.douban.com/j/mobile/login/basic'
21 | self.index_url = "https://www.douban.com/"
22 | self.session = requests.session()
23 |
24 | def login(self):
25 | data = {
26 | 'ck': '',
27 | 'name': self.name,
28 | 'password': self.pwd,
29 | 'remember': 'false',
30 | 'ticket': '',
31 | }
32 | self.session.post(self.login_url, data=data, headers=self.headers)
33 |
34 | def check(self):
35 | self.headers['Host'] = 'www.douban.com'
36 | response = self.session.get("https://www.douban.com/", headers=self.headers)
37 | try:
38 | title = re.search(r'
(.*?)的帐号', response.text).group(1)
39 | print('【登录成功】', title)
40 | except:
41 | print('【登录失败】')
42 |
43 |
44 | if __name__ == '__main__':
45 | username = input('豆瓣用户名 >>>')
46 | password = input('密码 >>>')
47 | db = DouBan(username, password)
48 | db.login()
49 | db.check()
50 |
--------------------------------------------------------------------------------
/其他实战/【逗游】自动登录/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【逗游】自动登录/douyou.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-08-01 Python: 3.7
4 |
5 | import js2py
6 | import requests
7 | import json
8 |
9 |
10 | class DouYou:
11 | headers = {
12 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
13 | 'Referer': 'http://www.doyo.cn/passport/login'
14 | }
15 |
16 | def __init__(self, username, password):
17 | self.context = js2py.EvalJs() # python中使用js
18 | self.username = username
19 | self.password = password
20 |
21 | def make_password(self):
22 | """取加密后的字符串
23 | """
24 | try:
25 | nonce, ts = self.get_token()
26 | with open("encryp.js", "r", encoding="utf-8") as f:
27 | self.context.execute(f.read())
28 | pwd_hash = self.context.get_value(self.password, nonce, ts)
29 | return pwd_hash # 打印加密之后的密码
30 | except:
31 | print('获取token失败')
32 |
33 | def get_token(self):
34 | """获取 token
35 | """
36 | get_token_url = 'http://www.doyo.cn/User/Passport/token?username={user}&random=0.1428378278012199'.format(user=self.username)
37 | result = json.loads(requests.get(get_token_url).text)
38 | if result.get('result'):
39 | nonce = result.get('nonce')
40 | ts = result.get('ts')
41 | return nonce, ts
42 | else:
43 | print('获取token失败')
44 | exit()
45 |
46 | def login(self):
47 | """登陆
48 | """
49 | # decode('unicode_escape')
50 | login_url = 'http://www.doyo.cn/passport/login'
51 | data = {
52 | 'username': self.username,
53 | 'password': self.make_password(),
54 | 'remberme': '1',
55 | 'next': 'aHR0cCUzQSUyRiUyRnd3dy5kb3lvLmNuJTJG'
56 | }
57 | response = requests.post(login_url, data=data, headers=self.headers)
58 | info = json.loads(response.text)
59 | if info.get('result'):
60 | print('登陆成功 | 用户等级:{level} 用户id:{uid}'.format(level=info.get('level'), uid=info.get('uid')))
61 | else:
62 | print('登陆失败')
63 |
64 |
65 | if __name__ == '__main__':
66 | user = input('输入逗游账号')
67 | pwd = input('输入密码')
68 | dy = DouYou(user, pwd)
69 | dy.login()
70 |
--------------------------------------------------------------------------------
/其他实战/【金逸电影】自动注册/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【金逸电影】自动注册/register.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wkunzhi/Python3-Spider/5188ca4056bb94d956df9ddbeb42c765ebe9819a/其他实战/【金逸电影】自动注册/register.png
--------------------------------------------------------------------------------
/其他实战/【金逸电影】自动注册/register.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-04 Python: 3.7
4 |
5 | import requests
6 | import execjs.runtime_names
7 |
8 |
9 | class JinYiRegister:
10 | """
11 | 金逸电影注册
12 | http://www.jycinema.com/wap/#/register
13 | """
14 | def __init__(self, phone):
15 | self.headers = {
16 | 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
17 | }
18 | self.url = 'http://www.jycinema.com/frontUIWebapp/appserver/photoMessageService/newsSendMessage'
19 | self.phone = phone
20 |
21 | @staticmethod
22 | def js_make(json_data):
23 | with open('encryp.js', 'r', encoding='utf-8') as f:
24 | js = execjs.compile(f.read())
25 | try:
26 | result = js.call("getEncryption", json_data)
27 | return result
28 | except Exception:
29 | print('js 异常')
30 |
31 | def register(self):
32 | data = '{"mobileNumber": ' + self.phone + ', "channelId": 7, "channelCode": "J0005", "memberId": ""}'
33 | data = {
34 | 'params': self.js_make(data),
35 | 'Origin': 'http://www.jycinema.com',
36 | 'Referer': 'http://www.jycinema.com/wap/',
37 | }
38 | response = requests.post(self.url, data=data, headers=self.headers)
39 | print(response.content.decode('utf-8'))
40 |
41 |
42 | if __name__ == '__main__':
43 | your_phone = input('请输入待注册手机号')
44 | jy = JinYiRegister(your_phone)
45 | jy.register()
46 |
--------------------------------------------------------------------------------
/其他实战/【青海移动】登陆参数生成/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-10-06 Python: 3.7
4 |
5 |
--------------------------------------------------------------------------------
/其他实战/【青海移动】登陆参数生成/make_param.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # __author__ = "zok" 362416272@qq.com
3 | # Date: 2019-09-12 Python: 3.7
4 | import execjs.runtime_names
5 |
6 |
7 | class QinHaiYiDong:
8 | """
9 | 青海移动
10 | 参数加密
11 | https://www.iqhmall.cn/shopweb/logon/logon
12 | """
13 | def __init__(self, user, pwd):
14 | self.js = None
15 | self.user = user
16 | self.pwd = pwd
17 | self.init_js()
18 |
19 | def init_js(self):
20 | print('引擎', execjs.get().name)
21 | with open("encryp.js", "r", encoding="utf-8") as f:
22 | self.js = execjs.compile(f.read())
23 |
24 | def make_param(self):
25 | print(self.js.call('test', self.pwd))
26 |
27 |
28 | if __name__ == '__main__':
29 | yd = QinHaiYiDong('17327362817', '123123123')
30 | yd.make_param()
31 |
--------------------------------------------------------------------------------
/其他实战/【餐饮】查询信息/FoodInfo.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # Time : 2020/01/16
3 | # Author : Zok
4 | # Email : 362416272@qq.com
5 |
6 | import requests
7 | import re
8 | import json
9 | from copyheaders import headers_raw_to_dict
10 |
11 |
12 | class Food:
13 | """
14 | 根据输入美团餐馆名,解析参观基础信息
15 | """
16 | def __init__(self):
17 | self.headers = headers_raw_to_dict(b"""
18 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
19 | Accept-Encoding: gzip, deflate, br
20 | Accept-Language: zh-CN,zh;q=0.9
21 | Cache-Control: max-age=0
22 | Connection: keep-alive
23 | Cookie: _lxsdk_s=16fb0ce3a0d-4cf-d9e-cf2%7C%7C1
24 | Host: www.meituan.com
25 | Sec-Fetch-Mode: navigate
26 | Sec-Fetch-Site: none
27 | Sec-Fetch-User: ?1
28 | Upgrade-Insecure-Requests: 1
29 | User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36
30 | """)
31 |
32 | def get_info(self, url):
33 | response = requests.get(url, headers=self.headers)
34 | data = json.loads(re.search(r'